;*******************************************************************
;* File: qmeter128.asm
;*
;* Source code for Inductance and Q factor meter.
;*
;* Ver: 1.1
;*
;* L. Lerner
;*
;* You are free to copy and modify this software provided you 
;* reference its origin.
;*
;*******************************************************************
.include "2313def.inc"
;*******************************************************************
;*  		GLOBAL WORKING REGISTERS
;*******************************************************************
.def temp0=r16
.def temp1=r18
.def temp2=r19
.def temp3=r20
.def adlow=r30
.def adhigh=r31
;*******************************************************************
;*
;*					GLOBAL VARIABLES
;*
;*******************************************************************
.def Caph	= r2
.def Cpower = r3
.def cycle  = r4
.def Novfl  = r5
.def Free1	= r6
.def Osample= r7
.def Ftime	= r8
.def Voffset= r9
.def Gpower	= r10
.def Gammal = r11
.def Gammah	= r12
.def Fpower = r13
.def Freql  = r14
.def Freqh  = r15
;*******************************************************************
;*  PB0 - sample/hold 					- input  	   (comp. +)
;*  PB1 - A/D C	  						- input/output (comp. -)
;*  PB2 - key row 1, LCD DB4, A/D R		- output
;*  PB3 - key row 2, LCD DB5			- output
;*  PB4 - key row 3, LCD DB6			- output
;*  PB5 - key col 1, LCD DB7			- input/output
;*  PB6 - key col 2, LCD R/W			- input/output
;*  PB7 - key col 3, LCD RS				- input/output
;*******************************************************************
;*  PD0 - hold1		  - output (immediate hold)
;*  PD1 - hold0       - output (oversampling hold)
;*  PD2 - sense		  - input  (/5 counter bit 2 sensor)
;*  PD3 - cap1		  - output (0.01uF capacitor)
;*  PD4 - cap2		  - output (0.1uF capacitor)
;*  PD5 - pulse/relax - output (exciter)
;*  PD6 - strobe      - output (strobes lcd data)
;*******************************************************************
.equ PDSET = (1<<PD0)+(1<<PD1)+(1<<PD3)+(1<<PD4)+(1<<PD5)+(1<<PD6)
.equ pulse_sample=0x00 ;PD5 0 (pulse), PD1 0 (sample), PD0 0 (sample)
.equ relax_sample=0x20 ;PD5 1 (relax), PD1 0 (sample), PD0 0 (sample)
.equ relax_hold_0=0x22 ;PD5 1 (relax), PD1 1 (hold0),  PD0 0 (sample)
.equ relax_hold_1=0x23 ;PD5 1 (relax), PD1 1 (hold0),  PD0 1 (hold1)
.equ lcd_drive		= 0xfe	;1 1 1 1 | 1 1 1 0 in on comp. pins 12, 13
.equ key_drive		= 0x1e	;0 0 0 1 | 1 1 1 0 in on keypad collumn
.equ msg_table_size	= 0x40				; 				   =  64 bytes
.equ cos_table_size = 0x20				;cos[pi*(0-31)/32] =  32 bytes
.equ cos2_table_size= 0x20				;cos[pi*(0-31)/64] =  32 bytes
.equ location_table	= 0x01	;location of caps on LCD, placed at INT1/2				
.equ msg_table		= 0x00
.equ cos_table 		= msg_table_size 	
.equ cos2_table		= msg_table_size + cos_table_size  
.equ sin2_table     = cos2_table     + 0x20;sin[k]=cos[32-k]
.equ SRAMSTART 		= 0x60
.equ SAMPLEN 		= 0x68		;SRAM[SRAMSTART,SRAMSTART+SAMPLEN-1]
.equ FFTSAMP 		= 0x80
.equ Pcycles		= 3			;log2(#cycles)
.equ VSATURATE		= 110		;V < Vsaturate kept
.equ PTHRESHOLD		= 16		;V > PTHRESHOLD for max to count for Gamma
.equ MTHRESHOLD		= 240		;V < MTHRESHOLD for min to count for Gamma
.equ Mindex			= 25		;index < Mindex -> o/sample off. 128 -> 20MHz
.equ Zindex			= 3			;index < Zindex -> 'L' & 'F' -> 'E'
			
.eseg

.org msg_table
msg0: .db  $20,$20,$43,$3d,$5f,$2e,$5f,$20,$78,$31,$30,$30,$70,$46,$20,$20
msg1: .db  $20,$20,$28,$31,$20,$31,$30,$20,$31,$30,$30,$29,$6e,$46,$20,$20
msg2: .db  $20,$20,$4c,$7e,$20,$20,$20,$20,$75,$48,$20,$20,$20,$20,$20,$20
msg3: .db  $20,$20,$46,$7e,$20,$20,$20,$20,$4d,$48,$7a,$20,$7e,$51,$20,$20
.org cos_table
cos0: .db  $7f,$7f,$7d,$7a,$76,$70,$6a,$62,$5a,$51,$47,$3c,$30,$25,$18,$c ,$0 ,$f4,$e8,$db,$d0,$c4,$b9,$af,$a6,$9e,$96,$90,$8a,$86,$83,$81
.org cos2_table
cos2: .db  $7f,$7f,$7f,$7e,$7d,$7c,$7a,$78,$76,$73,$70,$6d,$6a,$66,$62,$5e,$5a,$55,$51,$4c,$47,$41,$3c,$36,$30,$2b,$25,$1f,$18,$12,$0c,$06

.cseg

.org location_table			
locs: .db $89, $c3, $c5, $c8
;************************************************************
;*  Signed Multiplication Macro
;*
;*  @0|@1 = @1 * @2. 
;*
;*  r17 is loop counter
;*
;************************************************************
.MACRO mmul
	sub @0, @0
	ldi r17, 0x08
mmul1:
	brcc mmul2
	add @0, @2
mmul2:
	sbrc @1, 0
	sub @0, @2
	asr @0
	ror @1
	dec r17
	brne mmul1
.ENDM

;*********************************************************************
;*  Four interrupt vectors are used.  Program placement commences
;*  after last one, so they must be in ascending order.
;*  Reset -> T1 Capture -> T1 Compare -> T1 Overflow
;*********************************************************************

.org 0x0000
rjmp reset
.org ICP1addr
rjmp captured
.org OC1addr
rjmp compared
.org OVF1addr
rjmp clearovf

;**********************************************************
;*  General initialisation.  Port B and D directions set.
;*  LCD display initialised, welcome message displayed, A/D
;*  converter initialised.
;**********************************************************
reset:
	ldi r17, PDSET	;input on PD3,PD4,PD5.  Never write 1 there.
	out DDRD, r17	;pin 11 = output = lcd strobe
	ldi r17, relax_hold_1
	out PORTD, r17	;dont pulse yet
	ldi r17, RAMEND	;start placing calls below 0xdf
	out SPL, r17
	rcall lcdinit	;initialise LCD display
		
wq:	ldi msg, low(msg0)
	rcall lcdmsg		;C=_._ x100pF ;(1 10 100)nF		;
	ldi r17, 0x84
	rcall lcdcom		;sets cursor to #Ch
	rcall display_num_key
	ldi temp0, 10
	rcall mul16by8
	mov Caph, temp0		;Caph = temp3 * 10
	ldi r17, 0x86
	rcall lcdcom		;sets cursor to #Cl
	rcall display_num_key
	add Caph, temp3
	clr Cpower
cap_power:
	ldi r30, low(location_table<<1)
	ldi r31,high(location_table<<1)
	add r30,Cpower
	lpm
	mov r17, r0
	rcall lcdcom
wz:	rcall get_key
	cpi  r17, 10
	breq wz
	rcall get_key
	mov temp0, r17
wy:	rcall get_key
	cpi r17, 10
	brne wy
	cpi temp0, $fe
	breq i0
	inc Cpower
	sbrc Cpower, 2
	clr Cpower
	rjmp cap_power		;100pF -> Caph = 10, Cpower = 0

i0:						;this is the continuous measurement loop
;********************************************************************
;*  Obtain DC level for acquisition. Subtracting this prior to FFT
;*  and regression analysis removes contribution of DC cell in
;*  FFT and improves accuracy by reducing sizes of numbers 
;*  involved.  Also sets correct base for log estimate in regression.
;********************************************************************
	rcall atod			;A/D
	rcall atod			;A/D
	mov Voffset, adlow
;	sbi PORTD, PD0		;restore immediate hold
;********************************************************************
;*  Check to see if user pressed * to abort or enter new capacitor
;********************************************************************
	rcall get_key
	cpi r17, $fe
	breq wq
	ldi r17, 1			;0
	mov Osample, r17	;this turns oversampling on initially
xp:	mov Ftime, r17		;this selects x1 stepping initially
x0:						;this is the averaging loop

	clr Novfl
	clr cycle		
x1:						;this is the single measurement loop
	ldi mod5, 5
	ldi mod4, 1			;sets oversampling registers for minimum delay
	clr temp1			;delay = temp1|temp0 * 100ns 
	clr temp0			;				     + Osample * f(mod5) * 25ns
	mov temp2, Ftime
	rjmp get_waveform
waveret:
;***************************************************************
;*  For decay rate evaluation we form the following regression sums:
;*
;*		Sum(x), Sum(xx), Sum(log(V)), Sum (log(V)x), Sum(1)
;*
;*  and proceed to calculation of the regression formula once 
;*  SRAMSTART+SAMPLEN is exceeded.
;*
;*  Do not need prior data conditioning, as iteration runs to 
;*	SRAMSTART+SAMPLEN only.
;***************************************************************
.def xm_l  = r21
.def xm_h  = r22
.def yx_l  = r23
.def yx_h  = r24
.def xx_l  = r25
.def xx_h  = r26

	clr index
	ldi zl, 16
qqq:
	st z+, index
	cpi zl, 30
	brlo qqq
cd_m:
	ldi msb, MTHRESHOLD - 1
	ldi yl,SRAMSTART 
cd_0:
	ld r0, y+
	cp r0, msb				;msb == 0 at start -> up transition
	brlt cd_1					;so equal not double counted
	cpi msb, MTHRESHOLD
	brge cd_4
	set							;T marks up transition
	rjmp cd_4	
cd_1:
	cpi msb, PTHRESHOLD
	brlt cd_4					;signed comparison ensures msb > 0
	brtc cd_4					;down transition				
	clt							;clear T marks down transition
								;good point -> calculate
	cpi zl, 30
	breq cd_3
;******************************************************************
;*  Routine takes natural logarithm of positive number < 256 in
;*  msb.temp1.  Returns index in temp1, mantissa in temp0, msb cleared.
;*  Based on:
;*				msb.temp1 	= 0.temp1 * 2^power 
;*
;*		-> 	 ln(msb.temp1)	~ ln(2) * power - 0.temp1 - 0.temp1^2/2
;*
;*  temp1.temp0(output) = ln(msb). msb -> 0
;*
;*  Uses: temp0-3, msb, and r17.
;*
;*  This function is called only once.  So to save space it is used 
;*  inline
;******************************************************************
	rcall bin_float		;msb.rubbish -> 0.temp1 * 2^power
	neg temp1			;0.temp1 -> 1-0.temp1 (0.temp1 > 0.5)
	mov temp3, temp1
	rcall mul16by8		;msb = 0.temp1^2
	lsr msb				;msb = 0.temp1^2/2
	add temp3, msb		
	mov temp2, temp3	;temp2 = 0.temp1 + 0.temp1^2/2
	mov temp3, power
	ldi temp0, 177		;ln(2) ~ 177/256			
	clr temp1
	rcall mul16by8		;temp1.temp0 = ln(2) * power (power > 0)	
	sub temp0, temp2
	sbc temp1, msb		;msb == 0; temp1.temp0 = y-value
	ldi temp3, 8
	rcall mul16by8		;max y-precision as S(y*(xm-x)) < 5*S^32(2x)~5000
	mov temp3, xm_h
	sub temp3, yl		;temp3 = xm.0 - yl
	brsh cd_p			;temp3 > 0
	neg temp3
	rcall mul16by8		;y(xm.0-x) = msb|temp1.temp0
	sub yx_l, temp1		;here xm.0 - x < 0, so sub product
	sbc yx_h, msb		;msb != 0 as max-ed y; y(xm-x) < y~5 * xm~30
	rjmp cd_2
cd_p:					;here xm.0 - x > 0, so add product
	rcall mul16by8
	add yx_l, temp1
	adc yx_h, msb
cd_2:
	mov temp2, temp3	;divide by (xm.0 - x) so as to get back y
	rcall div24by8
	mov temp3, xm_l		;the fractional part is always +ve, so add
	rcall mul16by8		;[y=(msb|temp1.temp0)]*[x=(0.xm_l)]
	clr temp1
	add yx_l, msb
	adc yx_h, temp1		;msb is the integer part of y * 0.xmh_l
	mov temp0, temp2	;temp2 (unaffected by mul&div) = Abs(xmh.0 - x)
	mov temp3, temp2
	rcall mul16by8		;msb=0|temp1|temp0  = (xmh.0 - x)^2
	add xx_l, temp0
	adc xx_h, temp1		;S(xmh.xml-x.0)^2 = S(xmh-x)^2 - N 0.xml^2
cd_3:					;			   !=    "      - N 0.xml xmh as before
	add temp1, yl
	adc temp3, index	;index zero on first run
	inc temp2
cd_4:
	mov msb, r0
	cpi yl, SRAMSTART+SAMPLEN
	brlo cd_0
	subi zl, 15
	breq gam			;finished 2nd loop, now calculate gamma
	mov msb, temp3		;msb|temp1 = S(x)
	rcall div24by8
	mov xm_l, temp0
	mov xm_h, temp1		;1-loop -> xmh.xml calculated
	clr temp1
	rcall mul16by8		;0|temp1.temp0 = 0.xml * N; msb cleared
	mov temp3, xm_l
	rcall mul16by8		;msb = I(N * 0.xml^2)
	sub xx_l, msb
	sbc xx_h, index		;index = 0
;****************************************************************
;*	Check for overflow or underflow on total number of transitions
;*
;*	temp2 > 50 -> Overflow,  excessive periods, Novfl = temp2
;*
;*	temp2 < 2  -> Underflow, insufficient periods, Novfl = temp2
;*
;****************************************************************
	cpi temp2, 50
	brsh ov1
	cpi temp2, 2
	brsh ov2
ov1:inc Novfl
ov2:rjmp cd_m	
;****************************************************************
;*  Q = omega/(2 * gamma) = f / (gamma/Pi)
;*
;*  gamma = Sum(y*(xm-x) / (Sum(xm-x)^2
;*
;*  we use 
;*				y = 8 * log(v)  
;*  so most of the 2-byte precision allocated to Sum(yx) in the 
;*  nominator is utilised, and large errors in the difference 
;*  in numerator are avoided. Result subsequently divided by 8 and 
;*  by Ftime.  The resulting gamma/Pi values are accumulated in 
;*  floating format.
;****************************************************************
gam:
	mov msb, xx_h
	mov temp1, xx_l
	rcall bin_float
	mov temp6, power
	mov temp2, temp1	;S(xm-x)^2 = temp2 * 2^temp6
	mov temp1, yx_h
	mov temp0, yx_l
	ldi temp3, 163		;2/Pi ~ 163/256
	rcall mul16by8		;msb|temp1.temp0 = (2/Pi) S(xm-x)y
	rcall div24by8		;				 = (2/Pi) S(xm-x)y / S(xm-x)^2 * 2^temp6
	rcall bin_float
	sub power, temp6	;(2/Pi)S(xm-x)y/S(xm-x)^2  = temp1.temp0 * 2^power
	subi power, $fc		;+8 (-> 0.temp1) -1 (2/Pi) -3 (8*y) = +4 == -$fc
	sbrc Ftime, 3
	subi power, 3
	sbrc Osample, 0
	subi power, $fe		;if oversampling f *=4
	or cycle, cycle		;if this is 0 cycle start new average
	breq gs				
	mov temp6, Gammal
	mov temp7, Gammah
	mov temp2, Gpower
	rcall float_add
gs:	mov Gammal, temp0	;Gamma = 0.Gammah|Gammal * 2^Gpower * 10 MHz
	mov Gammah, temp1
	mov Gpower, power	
;***************************************************************
;*  Acquired waveform conditioned prior to conversion. SRAM values 
;*  at and beyond SRAMSTART+SAMPLEN are padded with zeros, as they
;*  contain call addresses of routines used above.
;***************************************************************
	clr r17
	ldi yl,SRAMSTART+SAMPLEN ;conditioning must occur at 0 stack depth
condition:
	st y+, r17
	cpi yl, RAMEND + 1
	brlo condition
;***************************************************************
;*  FFT.  Must be called at zero stack depth, and so returns with
;*  jump to fftret.  FFT leaves r3-r15 free.  These can be used
;*  to operate FFT in call mode: 	mov r8,  low(label)
;*									mov r9, high(label)
;*									rjmp fft
;*									label:
;*  FFT will then return via: mov zl, r8; mov zh, r9; ijmp
;***************************************************************
	rjmp fft
fftret:
;***************************************************************
;*  This routine cycles through all memory locations from 
;*  SRAMSTART to RAMEND and returns:
;*
;*  MAX( SRAM[i]^2   + SRAM[i+1]^2 ) =  maxh | maxl
;*
;*  with index = i.
;*
;*  Since entire RAM range is free for search no calls allowed.	
;***************************************************************
.def maxml=r21 
.def maxmh=r22
.def maxl =r23
.def maxh =r24
.def maxpl=r25
.def maxph=r26
.def index=r31
.def msb  =r27
.def power=r29
.def temp6=r30	;use with float_add, when maxm, max, maxp valid
.def temp7=r31
.def temp8=r21
.def temp9=r22
.def tem10=r23
.def msg  =r31

findmax_start:
	ldi yl, SRAMSTART
	clr maxl
	clr maxh
f0:	clr maxpl
	clr maxph
f1: ld  temp3, y+
	sbrc temp3, 7
	neg temp3
	mov temp0,  temp3
	clr temp1
	rcall mul16by8
	add maxpl, temp0
	adc maxph, temp1
	sbrc yl, 0			;if y is even have |Re|^2+|Im|^2
	rjmp f1
	subi yl, 2
	cp  maxpl, maxl
	cpc maxph, maxh		;maxph|maxpl <?> maxh|maxl
	brlo f2
	mov maxl,  maxpl
	mov maxh,  maxph
	mov index, yl		
f2: st y+, maxpl
	st y+, maxph
	cpi yl, RAMEND-2	;2 bytes taken by mul16by8 call
	brlo f0
;*************************************************************
;*  We now reduce maxh|maxl, maxpl|maxph, maxmh|maxml to a 
;*  single byte.
;*
;*  Note that this is not equivalent to running the previous
;*  sorting algorithm at 1 byte accuracy, since we allow the
;*  very likely possibility that maxh|maxl > 255.
;*************************************************************
	mov yl, index
	ld maxmh, -y		;SRAM[index-1] = maxmh
	ld maxml, -y
	subi yl, 0xfc
	ld maxpl, y+
	ld maxph, y+
f3: or maxh, maxh
	breq f4
	lsr maxh
	ror maxl
	lsr maxph
	ror maxpl
	lsr maxmh
	ror maxml
	rjmp f3
;*************************************************************
;*  The centre frequency is now augmented by the offset temp0
;*  The offset, which is mathematically in the range +/- 1/2 
;*  is calculated from the asymmetry of values adjacent to the 
;*  maximum. We calculate:
;*
;*  y1 = max  - maxm
;*  y2 = maxp - maxm
;*
;*  and we know y2 < y1 >=0.
;*
;*  y1 - y2/2 and y2/2 are ror'ed together until former = 1 byte
;*
;*  256*(y2/2) = (y2/2)|0 / (y1-y2/2) = temp0 is formed
;*
;*  f (as integer.fraction) = (index - SRAMSTART)/2 . temp0
;*
;*  f (in 1/sample time) = f (in integers) * (1/128)   
;*
;*					     = 0.(index - SRAMSTART)|temp0
;*
;*  If y2 < 0 temp0 is complemented and carry added to index.
;*************************************************************
f4:	mov temp2, maxl		;temp2 = maxl
	sub temp2, maxml	;temp2 = y1 > 0
	mov temp1, maxpl
	lsr temp1			;temp1, maxml >= 0
	lsr maxml
	sub temp1, maxml	;temp1 = y2/2 no ovfl as both 0 < #s < 128
	sub temp2, temp1	;temp2 = y1 - y2/2
	mov r0, temp1		;store sign of y2
	sbrc temp1, 7		;temp1, 7 is sign of y2				
	neg temp1			;temp1 = |y2/2|
	clr temp0
	clr msb				;clears dividend MSB
	rcall div24by8		;temp1|temp0/temp2 -> temp1|temp0
	subi index, SRAMSTART ;this clears carry. Then set if temp0<0
	sbrc r0, 7				  
	neg temp0		    ;(index/2+/-(temp0/2)/256)/128 -> above
	clr temp1
	sbc index, temp1	;temp1 normally 0 as maxpl <= maxl
;*************************************************************
;*  If: 
;*			(Osample !=0) and  (index < Mindex) 
;*
;*  frequency is too low. Switch oversampling off, start again.
;*************************************************************
	or Osample, Osample
	breq fu
	cpi index, Mindex
	brsh fu
	clr Osample
	rjmp x0
fu:
;*************************************************************
;*  If with oversampling off
;*
;*		index < ZINDEX 
;*
;*  accuracy in frequency measurement too low, and its 
;*  unreliable.  Mark error message in Novfl
;*************************************************************
	cpi index, Zindex
	brsh fuu
	ldi r17, 8
	sbrs Ftime, 3
	rjmp xp
	ldi r17, 128
	or Novfl, r17
fuu:
;*************************************************************
;*  We now calculate f in MHz
;*
;*  rate = time beween samples in 100 ns -> 1/rate = 10MHz/rate
;*
;*  f (in MHz) =  10 * f / rate
;*			   =  10 * 0.msb|temp1|temp0 / rate
;*			   =  10 * 0.Freqh|Freql * 2^power
;*
;*  bin_float ensures leading 1 is in bit 7 of Freqh.
;*
;*  If Ftime it is too low index will be at the low end of 
;*  the spectrum and the error will be high due to both the 
;*  contribution of negative frequency components and the much
;*  greater fractional contrinution of curve fitting in temp0. 
;*  
;*  Fourier cell maximum < Min_Index -> Ftime *= 2 < 255
;*
;*	where			index  =  2 * Fourier cell number
;**************************************************************
	mov temp1, index	;index < 128 centred on maxl
	rcall bin_float		;f (in MHz) = 10 * 0.temp1|temp0
	sbrc Ftime, 3
	subi power, 3
	sbrc Osample, 0
	subi power, $fe		;if oversampling f *=4
	or cycle, cycle
	breq fs				;if this is zero'th cycle start new average
	mov temp6, Freql
	mov temp7, Freqh
	mov temp2, Fpower
	rcall float_add
fs:	mov Freql, temp0	;f = 0.Freqh|Freql * 2^Fpower * 10 MHz
	mov Freqh, temp1
	mov Fpower,power
;*****************************************************************
;	If we have averaged Ncycles -> display, else repeat
;*****************************************************************
	inc cycle
	sbrs cycle, Pcycles 	;sampling period of size #cycles
	rjmp x1
	ldi r17, Pcycles
	sub Fpower, r17			;average over #cycles = 2^Pcycles
	sub Gpower, r17
;*****************************************************************
;*  We now have averaged Q, and f and display the measurement screen
;*
;*	1)	calculate L = 1/(4 Pi^2 C f^2) in uH
;*	2) 	dec_float converts to 0.1->1 mantissa with decimal power
;* 	3) 	lcdnum converts binary mantissa to decimal and outputs
;*  3a) lcdnum(4 - (temp8<1) ) -> _ _ _nH
;*  3b) lcdnum(4 - (temp8=1) ) -> _._ _uH
;*  3c) lcdnum(4 - (temp8=2) ) -> _ _._uH
;*	3d) lcdnum(4 - (temp8=3) ) -> _ _ _uH
;*	4)	dec_float converts Q binary float in memory to decimal
;*  5) if power=0 f = .temp1|temp0 * 10MHz > 1MHz 
;*  6) lcdnum(power=3) _._ _ MHz output. 
;*      # got by conversion of fraction 0.temp1|temp0
;*  7) if power>0 f = .temp1|temp0 * 10^(4-power) kHz
;*  7a) lcdnum(power=1) _ _ _ kHz
;*	    lcdnum(power=2) _ _._ kHz
;* 	    lcdnum(power=3) _._ _ kHz
;*  8)  lcdmsg(k=1) -> MHz, lcdmsg(k=3) -> kHz
;*******************************************************************	
	ldi msg, low(msg2)
	rcall lcdmsg		;"L->____nH____" ;"F->____MHz_->Q"
	ldi r17, 0x8b
	rcall lcdcom		;sets cursor to #Q
;*****************************************************************
;*			Q     = f / Gamma = (1/2) omega / gamma
;*  but
;*			f 	  = 0.Freqh|Freql  * 2^Fpower * 10 MHz
;*
;*			Gamma = 0.Gammh|Gammal * 2^Gpower * 10 MHz
;*  So
;*			Q = Freqh|Freql / Gammah|Gammal * 2^(Fpower-Gpower)
;*****************************************************************		
	mov msb, Freqh	
	mov temp1, Freql	
	mov temp2, Gammah	
	rcall div24by8
	rcall bin_float
	add power, Fpower
	sub power, Gpower	;Q = 0.temp1|temp0 * 2^power
	rcall dec_float		;0.temp1|temp0 * 2^power -> 0.temp1|temp0 * 10^tem10
;**************************************************************************
;*	Q and L values are inaccurate if too few or too many periods were
;*  regsitered.
;*
;*  An error Q value is registered if:
;*
;*	Ntransitions  < 2  or Q < 1  too few periods
;*
;*  Ntransitions  > 50           too many periods
;*
;*  Fourier Index < 5			frequency inaccurate
;**************************************************************************
	tst Novfl
	brne q_e
	cpi tem10, 1
	brmi q_e			;signed comparison captures 0 < Q < 1
	rcall lcdnum		;tem10=2 -> _ _._ tem10=1 -> _._ _
	rjmp q_f
q_e:ldi r17, $45		;r17 = 'E'
	rcall lcdout
q_f:
	ldi r17, 0x8e
	rcall lcdcom		;position last digit of #Q
	ldi r17, 0x20
	rcall lcdout		;clear last digit
	ldi r17, 0xc4
	rcall lcdcom		;sets cursor to #F
;*******************************************************************
;*	
;*	index > Zindex -> frequency/inductance displayed
;*	index < Zindex -> frequency/inductance not displayed; Q shows 'E'
;*
;*******************************************************************
	sbrc Novfl, 7
	rjmp rt
f_d:mov temp1, Freqh
	mov temp0, Freql
	mov power, Fpower
	rcall dec_float
	cpi tem10, 0
	brge mh
	subi tem10, 0xfc
	rcall lcdnum
	ldi r17, 0xc8
	rcall lcdcom		;sets cursor to M
	ldi r17, 0x6b
	rcall lcdout		;out "k"
	rjmp sl
mH:	subi tem10, $ff		;output in MHz
	rcall lcdnum
;*******************************************************************
;*  Load inverse capacitance values 1 / (4 Pi^2 C)
;*
;*  ZCl -> temp8; ZCh -> temp9; ZCpower -> tem10
;*
;*  ZC   = 1/(4 Pi^2 C) (C in 10nF)
;*
;*  cap1 = 10 pF -> ZC = 25.3 = (202/256 + 164/256^2) * 2^+5
;*					ZC = ZCh, ZCl, Zpower
;*******************************************************************
sl:	ldi r17, 0x84
	rcall lcdcom		;sets cursor to #L
	ldi temp1, 164
	ldi msb, 202
	ldi tem10, $5		;+5
;*******************************************************************
;*  L (in uH) = 1  / ( (f in 10 MHz)^2 * 4 * Pi^2 * (C in 10 nF)  )
;*			  = ZC / ( (f in 10 MHz)^2 )
;*******************************************************************
	mov temp2, Freqh
	rcall div24by8
	rcall bin_float
	sub power, Fpower
	add tem10, power	;tem10 = ZCpower - Fpower + power(1)
	mov msb, temp1
	mov temp1, temp0
	mov temp2, Freqh
	rcall div24by8
	mov temp2, Caph
	rcall div24by8 		;adjust for capacitor value vs. 10pF
	rcall bin_float
	add power, tem10
	sub power, Fpower	;power = Gpower - 2 * Fpower + power(1) + power(2)
	rcall dec_float
	sub tem10, Cpower	;adjust for capacitor range vs. 10pF 
	ldi msg, $75		;msg = 'u'
	brne mHe
	ldi msg, $6e		;msg = 'n'
	ldi tem10, 3		;always have 100s of nH
mHe:cpi tem10, 4
	brlo uHe
	ldi msg, $6d		;msg = 'm'
	subi tem10, 3		;should really only get L < 10mH
uHe:rcall lcdnum
	ldi r17, 0x88
	rcall lcdcom
	mov r17, msg
	rcall lcdout
rt: ldi r17, 0xa0		;place blinking cursor off screen
	rcall lcdcom
	rcall atod			;this resets lm334 after lcd pulsing
	rcall delay1600
ci0:rjmp i0
;*********************************************************************
;*  Routine to acquire and fill a byte array starting at beginning of
;*  SRAM at SRAMSTART and of length SAMPLEN.
;*
;*  Acquistion is at minimum intervals of 100ns (10MHz clock) without
;*  oversampling and 25ns (40MHz) with.  
;*
;*  Routine operates by sampling at 
;*
;*		(temp1|temp0) * 100 ns + Osample * f(mod5) * 25ns 
;*
;*  after pulse, storing the result, and incrementing temp1|temp0 by temp2.  
;*  This repeats until SAMPLEN pulses are acquired.
;*
;*  Delay is produced using:
;*
;*  	get_1-2ck by direct coding 			for temp1|temp0 = 1-2 
;*		get_mck by indirect branching 		for 3 < temp1|temp0 < 9 
;*		get_nck by TIMER1 compare interrupt	for 8 < temp1|temp0
;*
;*  Inputs:  Delay 		   (temp1|temp0)*100ns. 
;*  		 Rate   		temp2*100ns.
;*
;*  Uses: oversampling registers r21=mod5, r26=mod4, 
;* 		  r17,temp3,r22,r29 dummies, yl as load, r23-r25 pulse/sample code
;*
;*  Output:  [SRAMSTART]-[SRAMSTART + SAMPLEN -1] in SRAM
;*			 Frate, Osample
;**********************************************************************
;*
.def pulse   =r23
.def relax_h0=r24
.def relax_h1=r25
.def mod5	 =r26
.def mod4	 =r21
adjust:					;this adjusts to remove saturation
	cpi temp1, 1
	brlo get_waveform
;rjmp i0
	cpi temp1, 16
	brsh ci0
	sbrs Osample, 0
	ldi temp2, 8
	clr Osample

get_waveform:			;ringing should cease in atod period
	ldi yl, SRAMSTART
	ldi pulse, pulse_sample
	ldi relax_h0, relax_hold_0
	sbrc Osample, 0
	ldi relax_h0, relax_sample
	ldi relax_h1, relax_hold_1
	sbrs Cpower,1
	rjmp get_sm
	ldi r17, 8
	sbrc Cpower,0
	ori r17, 16
	or pulse, r17
	or relax_h0, r17
	or relax_h1, r17
get_sm:
	ldi r22, (1<<CS10)	;signal to start counter at clock speed
	ldi r17, (1<<SE)	;enable idle sleep mode
	out MCUCR, r17
get_s0:
	clr r17
	or Osample, Osample
	brne oversample		;capture with oversampling 	
	add temp0, temp2
	adc temp1, r17		;(temp1|temp0)+temp2
	rjmp s05			;capture without oversampling
oversample:
	dec mod5			;advance oversampling delay
	brne s01
	ldi mod5, 5
s01:dec mod4
	brne s02
	ldi mod4, 4
	add temp0, temp2	;advance main delay
	adc temp1, r17
s02:ldi temp3, relax_sample
	out PORTD, temp3	;let counter go so can sync
	mov temp3, mod5
s03:sbis PIND, PD2		;sync on leading edge of bit 2 of 8MHz counter	
	rjmp s03
s04:dec temp3			;mod5 is delay from leading edge
	nop
	brne s04
s05:cpi temp1, 0x00		;now branch to appropriate delay routine
	brne get_nck
	cpi temp0, 0x02		;these delays too short for interrupt
	brlo get_1ck
	breq get_2ck
	cpi temp0, 11
	brlo get_mck
get_nck:			;delay large enough for interrupt
	rjmp gn1
gn1:nop
	out TCNT1H, r17	;clear timer1 (not done by capture int. routine)
	out TCNT1L, r17	;high byte must be written first!
	mov  r29, temp0
	mov temp3,temp1
	subi r29, 10	;compare count = delay - 8 cycles.  works if > 0
	sbc temp3, r17
	out OCR1AH, temp3
	out OCR1AL, r29
	ldi r17, 0xff
	out TIFR, r17	;set all flags. Writing to R/O bits has no effect
	ldi r17, (1<<TOIE1)+(1<<OCIE1A)	;unmask overflow and compare int.
	out TIMSK, r17
	sei
	out PORTD, pulse
	rcall cret
	out PORTD, relax_h0	;turn off excitation + let /5 counter latch
	out TCCR1B, r22		;(1<<CS10) starts counting till OCF1A sets
	sleep				;if use rjmp loop here delay always odd
get_s1:					;jump here on ret from interrupt handler
	rcall atod			;atod clears timer but interrupt doesnt
	sub adlow, Voffset
	brvs get_adj
	cpi adlow, VSATURATE
	brlt get_s2
get_adj:
	rjmp adjust
;******************************************************************
;*  These are 2 delay routines for 1 - 10 clock cycles which are too 
;*  short to be implemented via interrupts on timer1.
;*
;*  Routines: get_1ck, get_mck,.
;*
;*  Routines jump back to get_s1
;******************************************************************
get_1ck:					;1 clock delay routine -> no interrupts
	rjmp g1
g1: nop
	out PORTD, pulse
	rcall cret
	out PORTD, relax_h0
	out PORTD, relax_h1
	rjmp get_s1
get_2ck:					;2 clock delay routine -> no interrupts
	rjmp g2
g2:	out PORTD, pulse		;slightly shortner than 200 ns (170ns)
	rcall cret
	out PORTD, relax_h0
	nop
	out PORTD, relax_h1
	rjmp get_s1
get_mck:
	ldi zh, high(get_mck2 + 3)
	ldi zl,  low(get_mck2 + 3) 	;so temp0 == 3 points at base
	clr r17
	sub zl, temp0
	sbc zh, r17
	out PORTD, pulse		
	rcall cret
	out PORTD, relax_h0
	ijmp
	nop
	nop
	nop
	nop
	nop
	nop
	nop
get_mck2:
	out PORTD, relax_h1
	rjmp get_s1
;*******************************************************************
;*  Store value and loop
;*******************************************************************
get_s2:
	st y+, adlow
	cpi yl, SRAMSTART + SAMPLEN	
	brne cs0			
	mov Ftime, temp2	;store capture rate of sample
	rjmp waveret		;We have we stored samplen values
cs0:rjmp get_s0
;*******************************************************************
;*  Basic lcd command (lcdcom) and output (lcdout) routines.  These
;*  jump to delay routine delay50 for standard lcd delay.
;*
;*  Input: r17 (command or data)
;*
;*  Use temp0, temp1, temp2.  No stack used
;*******************************************************************
;*
lcdcom:
	ldi temp1, 0x00	;set RS R/W to command mode. Uses temp0,temp1,temp2.
	rjmp lcdbin
lcdout:
	ldi temp1, 0x80	;set RS R/W to RAM write mode. Uses temp0,temp1,temp2.
lcdbin:
	ldi temp2, lcd_drive
	out DDRB,  temp2
	ldi temp2, 2
lcdbin_1:
	mov  temp0,r17
	swap r17			;get LSB ready into MSB position
	andi temp0,0xf0		;get MSB
	lsr  temp0
	lsr  temp0
	or   temp0, temp1	;set RS R/W
	dec temp2
lcd4bit:
	out  PORTB, temp0	;out MSB/LSB temp2 = 1/0
	sbi  PORTD, PD6	 	;strobe high
	cbi  PORTD, PD6 	;strobe low
	brne lcdbin_1		;test on temp2, zero flag still unchanged
	out  PORTB, temp2	;=0 so no pull ups are activated on PINB
delay50:
	ldi temp2, 0xff
d4x0:	
	dec temp2
	brne d4x0
ec:	ret
;******************************************************************
;*  lcdinit is called on reset to set 4-bit mode (lcd4bit),
;*  initiallise LCD, and select cursor, blink and clear functions.
;*  Incorporates a very long (30ms) delay to ensure recommended
;*  delay after power on prior to first command sent to LCD.
;*
;*  Calls delay1600 (which in turn calls delay50) for a 1.6ms delay.
;*  
;*  Uses temp0-temp2, r17.  Stack used to depth 2.
;******************************************************************
;*
lcdinit:
	ldi temp0, 0x14	;very long delay 30ms = 1.6ms x 18.75
d3x0:
	rcall delay1600
	dec temp0
	brne d3x0
	ldi r17, lcd_drive	;temp0 - temp2 set to 0 by 30ms delay
	out DDRB,  r17
	ldi r17, 0x08
	rcall lcd4bit	;select 4-bit interface > 30ms after power on
	ldi r17, 0x28
	rcall lcdcom	;select 2-line, 5x8 - 5x11 for 1 line mode only
	ldi r17, 0x0f	
	rcall lcdcom	;display on, cursor on, blink
	ldi r17, 0x01
	rcall lcdcom	;clear display, this gets rid of blinking cursor
	rcall delay1600
	ldi r17, 0x06
	rcall lcdcom	;increment mode, shift off
	ret
delay1600:
	ldi temp1, 0x20	;1600us = 32 x 50us
d6x0:
	rcall delay50
	dec temp1
	brne d6x0
	ret
;*******************************************************************
;*  lcdmsg is passed in msg as the message number of 32 characters
;*  alligned on the 32-byte boundary in EEPROM.
;*  The output starts at top left and fills the lcd display.
;*  
;*  Input: msg = #msg.
;*
;*  Uses temp6, r17, r0, temp0-temp3.  Stack used to depth 1.
;*******************************************************************
;*
lcdmsg:
	ldi r17, 0x80	;sets cursor to start of first line.
lcdmsg1:
	rcall lcdcom
lcdline:
	out EEAR, msg
	ldi  r17, 1 	;sets EERE read bit indicating address ready
	out EECR, r17
	in r17, EEDR	;copy EEPROM data
	rcall lcdout
	subi msg, $ff	;inc dont work
	brhs lcdline	;no half carry indicates 16-byte boundary
	ldi r17, 0xc0	;sets cursor to start of second line.
	sbrs msg, 4
cret:
	ret
	rjmp lcdmsg1
;******************************************************************
;*  Routine converts binary fraction .temp1|temp0 to BCD fraction
;*  which it outputs from MSB to LSB -> LCD in ASCII form. Decimal 
;*  point is output after tem10'th digit, except when tem10=3 when
;*  it is omitted.  If tem10 is in the range [1-3] the routine
;*  always outputs 4 digits
;*
;*  Input:	temp1|temp0, tem10 place of decimal point
;*
;*  Uses:   msb, temp3, r17 (in mul16by8), temp6
;******************************************************************
lcdnum:
	ldi temp3, 10		;convert binary frac to base10
	ldi temp6, 1		;output 3 digits
b0:	rcall mul16by8		;msb|temp1|temp0=temp1|temp0*temp3
	push temp0
	push temp1
	subi msb, 0xd0		;this converts BCD digit to ASCII digit
	mov r17, msb		;integer part = r17 -> LCD
	rcall lcdout		;uses temp0-temp2. r17 = input
	cp temp6, tem10		;first digit whole, then fraction
	brne b2
	ldi r17, 0x2e		;'.'
	cpi tem10, 3
	brne b1
	ldi r17, 0x20		;' '
b1:	rcall lcdout		
b2:	inc temp6
	pop temp1
	pop temp0
	cpi temp6, 4
	brne b0
	ret
;*****************************************************************
;*  Routine test for current key press.  Scans keypad and returns 
;*  the code corresponding to any key pressed during its execution
;*
;*  Output: r17			Key
;*			1 - 9		1 - 9
;*			0xfe		*
;*			0xff		0
;*			0			#
;*			10			no key
;*
;*  Uses: temp2, msb, r29
;*****************************************************************
get_key:
	ldi r17, key_drive
	out DDRB, r17
	ldi r17, 0xfe			;r17 = key
	ldi r28, 2				;r28 = j = row strobe
k0:	ldi msb,   32			;msb   = i = collumn mask
k1:	rcall delay50
	in r29, PINB
	and r29, msb
	brne k3					;ret
	inc r17
	lsl msb
	brcc k1
	lsl r28
	out PORTB, r28		
	sbrs r28, 5				;scanned 4 rows (0,4,8,16) when j = 32
	rjmp k0
k3:	clr r29
	out PORTB, r29
	ret
;******************************************************************
;*  Routine uses get_key to return a numerical value 0 - 9.  The
;*  * and # keys are treated as zero.  As opposed to get_key, routine
;*  doesnt return until a key has actually been pressed and released.
;*  The numerical value is displayed at the current LCD position.
;*
;*  Output: temp3		key
;*			0 - 9		0 - 9
;*			0			#
;*			no ret		*
;*
;*  Uses: temp0 - temp2, r17
;******************************************************************
display_num_key:
	rcall get_key
	cpi r17, 10
	breq display_num_key	;10 = no key
	rcall get_key			;-> eliminate key pressed during get_key
	cpi r17, 10
	breq display_num_key
	cpi r17, $fe
	breq display_num_key	;*
	brlo dn					;not 0, #
	clr r17
dn:	mov temp3, r17
	subi r17, $d0
	rcall lcdout
dq:	rcall get_key
	cpi r17, 10
	brne dq
	ret
;******************************************************************
;*  Routine adds two binary floating point numbers with possibly 
;*  different exponents, and outputs the result as a binary floating
;*  point number.  The addition is performed by shifting the smaller
;*  float right until its exponent matches that of the larger float.
;*  This maintains precision.  Thus if the smaller float is outside
;*  the precision range of the larger float the addition has no 
;*  effect.
;*
;*  temp7|temp6 * 2^temp2 + temp1|temp0 * 2^power
;*
;*						  = temp1|temp0 * 2^power
;*
;*  Input:  temp7|temp6 * 2^temp2, temp1|temp0 *2^power
;*
;*  Output: temp1|temp0 * 2^power
;******************************************************************
float_add:
	cp power, temp2
	brge fa				;use brge as power can be -ve
	lsr temp1			;power < temp2
	ror temp0
	inc power
	rjmp float_add
fa:	cp temp2, power
	brge fb
	lsr temp7			;power > temp2
	ror temp6
	inc temp2
	rjmp fa
fb:	add temp0, temp6	;power = temp2
	adc temp1, temp7
	brcc fc				;mantissa +ve
	ror temp1			;carry mainatins full precision
	ror temp0
	inc power
fc:	ret
;******************************************************************
;*  Routine converts binary fraction 0.temp1|temp0 * 2^power to a 
;*  binary number in standard decimal floating point form with
;*  0.temp1|temp0 a mantissa between 1'tenth and 1, while power now
;*  stores the resulting power of 10.
;*
;*  0.temp1|temp0 * 2^power -> 0.temp1|temp0 * 10^tem10
;*
;*  Input:  0.temp1|temp0, power
;*
;*  Output: 0.temp1|temp0, tem10
;*
;*  Uses: r17 through mul16by8
;******************************************************************
dec_float:
	clr tem10
	clr msb
v0:	or power, power
	breq n1			;ret
	brmi v1
	ldi temp3, 2	;binary power positive
	rcall mul16by8
	dec power
	rjmp v2
v1:	ldi temp2, 2
	rcall div24by8
	inc power
v2:	or msb, msb
	breq v3
	ldi temp2, 10
	rcall div24by8
	inc tem10
	rjmp v0
v3:	cpi temp0, 154
	ldi r17, 25
	cpc temp1, r17
	brsh v0
	ldi temp3, 10
	rcall mul16by8
	dec tem10
	rjmp v0
;******************************************************************
;*  Routine converts a fixed point number msb.temp1|temp0 to a
;*  floating binary number 0.temp1|temp0 * 2^power, where 
;*  temp1(7) = 1, by right and left register shifts.  This is the
;*  most suitable form for precision multiplication and division.
;*
;*  	msb.temp1|temp0 -> 0.temp1|temp0 * 2^power
;*
;*  where temp1 > 128.
;*
;*  Input: 	msb.temp1|temp0
;*
;*  Output:	temp1|temp0, power
;******************************************************************
bin_float:
	clr power
n0:	or msb, msb
	brne n3
	sbrc temp1, 7
n1: ret
	lsl temp0
	rol temp1
	dec power
n2: breq n1			
	rjmp n0			
n3:	lsr msb
	ror temp1
	ror temp0
	inc power
	rjmp n2
;*******************************************************************
;*  Routine for unsigned 24 bit by 8 bit division.  Since rem is at 
;*  most 8-bit but 9-bits are required for interim calculation T-bit
;*  used for this purpose, economising one register.
;*
;*  result = msb|temp1|temp0 = (msb|temp1|temp0)/temp2 + temp3
;*
;*  Input:   msb|temp1|temp0 dividend, temp2 divisor
;*
;*  Output:  msb|temp1|temp0 result,   temp3 remainder
;*
;*  Uses:   r17 as counter.
;*******************************************************************
;*
div24by8:
	sub temp3, temp3	;clear remainder
	ldi r17, 25			;load loop counter, 24+shift last result bit
d0: rol temp0			;shift left dividend
	rol temp1
	rol msb
	bld temp0, 0		;result bit from T-flag, k=25 drops initial T
	dec r17
	brne d1
	ret
d1:	bst temp3, 7		;store carry bit from forthcoming shift
	rol temp3			;shift latest temp1|temp0|msb bit to temp3
	sub temp3, temp2
	brcc d2				;if (C=0)||(T=1) result must be +ve
	brts d2
	add temp3, temp2	;if (C=1)&&(T=0) rem < divisor
	rjmp d0
d2: set					;must set T flag for case (C=0)&&(T=0)
	rjmp d0
;**************************************************************
;*  This routine multiplies 16bit multiplier in temp1|temp0 by
;*  8bit multiplicand in temp3.  Result is a 3 byte digit
;*  msb|temp1|temp0.
;*
;*  		    temp3
;*  	            *
;*  	  temp1|temp0
;*  -----------------
;*    msb|temp1|temp0
;*
;*  Uses r17 as counter
;**************************************************************
mul16by8:
	clr msb 			;MSB of result
	ldi r17, 0x10
	lsr temp1			;rotate right multiplicand to see LSB
	ror temp0
z0: brcc z1				;LSB clear no add
	add msb, temp3		;add multiplicand(8bit) to result
z1: ror msb				;ror retrieves any carry from overflow
	ror temp1
	ror temp0
	dec r17				;this has no effect on carry
	brne z0
	ret
;******************************************************************
;*  Interrupt handlers for timer1 capture (converted) and timer1
;*  overflow (clearovf).  In former case capture register saved
;*  to adhigh and adlow.  In latter 0xFFFF written to these 
;*  registers.  Timer1 is stopped, current source turned off on PB2, 
;*  and capacitor set to discharge by writting 0!!! to PB1 and 
;*  toggling it as output.  Routine automatically clears TOV1 
;*  enabling resumption of counter operation.
;*  
;*  Interrupt handler for timer1 compare holds sample, stops 
;*  counter and jumps back to acquistion routine.
;*
;*  Interrupts automatically disabled via i of SREG.  This routine 
;*  can not be interrupted!  
;*
;*  Exit via ret not reti, so interrupts remain disabled.
;*
;*  No need to push SREG as this interrupt handler does'nt alter it.
;*******************************************************************
;*
captured:
	in   adlow, ICR1L	;low capture register read first! p34
	in   adhigh, ICR1H	;works, capture register < counter!
overflowed:
	ldi r17, 0x00		;r17 is nowehere sensitive -> no need to save
	out TCCR1B, r17		;stop timer1
	cbi PORTB, PB1		;Never write one.  Just toggle in/out
	sbi DDRB,  PB1		;discharge capacitor, takes only about 1.5us
	ret
clearovf:
	ldi adlow,  0xff	;maximum = 255, converted to 127 on atod
	rjmp overflowed
compared:
	out PORTD, relax_h1	;hold
	ldi r17, 0x00
	out TCCR1B, r17		;stop timer1	
	ret					;returns to code after sleep instruction
;*
;****************************************************************
;*  Sets up A/D conversion using capture and overflow interrupts. 
;*  Clears the timer interrupts ICF1, TOV1, 0CF1A, TOV0 in TIFR.
;*  Sets ACIC to route comparator to capture of timer 1.
;*  Clears counter.
;*  Enables capture and overflow interrupts TICIE, TOIE1 in TIMSK.
;*  Toggles negative comparator pin PB1 as input.  This pin 
;*  previously had 0 written to it to discharge capacitor, so
;*  pull up not enabled on toggle.
;*  Capacitor starts charging by setting PB2.  0.015uF with
;*  68mV/150ohm source converts 4V in 120us.
;*  Last counter is enabled at clock speed (CS10) with capture
;*  on falling edge of comparator transition.
;*
;*	Waits for conversion over (interrupt clear) before returning
;*  Returns a signed value
;*
;*  Uses stack to depth of 1.
;*****************************************************************
;*  
atod:
	ldi r17, 0xff		;Clear all interrupt flags.  PD action sets ICF1.
	out TIFR, r17		;ICF1 still be set by portd activity.
	out TCNT1H, r17	;high byte written first! $ff-> 8-bit overflow
	clr r17
	out TCNT1L, r17
	ldi r17, (1<<ACIC)	
	out ACSR, r17		;comparator -> capture.  16bits overflow in 13ms! 
	ldi r17, (1<<TICIE)+(1<<TOIE1) ;if no toie1 count ovfls. AVR hangs.
	out TIMSK,  r17				 ;overflow and capture interrupts
	sei								 ;enable global interrupts
	ldi r17, (1<<CS10)			 ;start counting at clock speed
;	ldi temp0, (1<<CS11)			 ;start counting at clk/8 speed
	cbi DDRB,  PB1		;draws ~150uA PB2 current as output
	sbi PORTB, PB2		;start charging capacitor.  
	out TCCR1B, r17		;capture on fallin edge.
at:	brie at				;wait for conversion over before returing
	subi adlow, 128		;converts to signed value
	cbi PORTD, PD0		;let base level equilibrate
	ret
;************************************************************
;*  Divide 256/128 Truncation and Rounding Routine
;*
;*  This routine is accessed from the fft code and hence is
;*  called at zero stack depth.  To achieve this the return
;*  address is stored in the Z register, so that the code
;*  exits with:
;*					ijmp
;*
;*  If loop(6) is set we divide temp1|temp0 by 256 (cos is
;*  computed multiplied by 128, hence this is effective division
;*  by 2).  Else we divide temp1|temp0 by 128.  If there is
;*  signed overflow set loop = 0xc0, k=0 and restart n2 loop
;*  with /256 and store.
;*
;*  Input:  	temp1|temp0
;*
;*  Output: 	temp1 -> SRAM[SRAMSTART+y] (if loop = 0x80,0xc0)
;*				loop  -> 0xc0 if signed overflow on /128
;*
;*  Control:	loop =
;*					  	0x00 -> no-store loop with /128
;*						0x80 ->    store loop with /128
;*						0xc0 ->    store loop with /256	
;************************************************************
store:
	sbrc loop, 6
	rjmp round			;loop(6) set -> /256 
	lsl temp0
	rol temp1			;loop(6) clear -> /128
	brvc round			
	ldi loop, 0xc0		;signed overflow 
	clr k
	rjmp outerstart		;loop = 0xc0 -> record and /256
round:
	cpi temp1, 0x7f
	breq str			;if temp1 = 127 do not round to avoid ovfl
	sbrc temp0, 7		;round (temp0(7)=1)->inc
	inc temp1			
str:sbrc loop, 7		;loop(7) clear -> this is a test loop
	st y, temp1
	ijmp
;*******************************************************************
;*  Routine to perform a Fast Fourier Transform on 128 real values
;*  stored in [SRAMSTART - RAMEND].  Output array overwrites the 
;*  input.
;*
;*  SRAM[0], SRAM[1] 	= DC component,              Re and Im.  
;*  SRAM[2], SRAM[3] 	= 1/128 frequency component, Re and Im.
;*  ....   , ....
;*  SRAM[126],SRAM[127]	=63/128 frequency component, Re and Im.
;*
;*  Frequency is in units of 1/T, with T = time interval between 
;*  successive values stored in the input SRAM array.  
;*
;*  In accordance with Nyquist max frequency = 63/128 ~ 1/2. 
;*
;*  Real and imaginary parts = cosine and sine transforms
;*
;*  Routine split into three parts:
;*
;*	(outer, middle , inner) - 3 nested loops perform butterfly
;*   swap					- bit reorders output
;*   combine				- gives +ve frequency components.
;*							
;*  For real input these equal the negative frequency components, 
;*  which thus contain no additional info and are not calculated.
;*  
;*	Input  SRAM[0-127] real values
;*  Output SRAM[0-127] 64 complex values = cos tfm. + i sin tfm.
;*
;*  Routine uses registers r0-r2 and all upper registers r16-r31.
;*
;*  Entire SRAM, N=2^7, is used -> NO STACK AVAILABLE.  Routine
;*  makes no calls instead uses MACROS mmul and truncate.
;*******************************************************************
;*
.def k 		=  r21
.def n2 	=  r22
.def kn2 	=  r23
.def j 		=  r26 
.def i 		=  r27 
.def temp4  =  r22 
.def temp5  =  r23 
.def cos	=  r19 
.def sin	=  r20 
.def treall =  r24 
.def trealh =  r25 
.def ftemp0 =  r30
.def ftemp1 =  r31 
.def timagh =  r0
.def timagl =  r1
.def loop   =  r29


fft:
	ldi loop, 0x00
	ldi k,    0x00
	ldi n2,   0x20
outerstart:
	ldi j, 0x00
;****************************************************************
;*  Get cos[j], sin[j]. Using EEPROM instead of FLASH frees r0
;****************************************************************
middlestart:
	ldi i, 0x00
	cpi j, 0x00
	breq innerstart
	ldi temp0, 0x04			;0x04 triggers carry on 6'th shift
	mov temp1, j
ib: lsr temp1
	rol temp0
	brcc ib
	ldi ftemp1, 1			;EEPROM strobe
	ldi ftemp0, cos_table
	add ftemp0, temp0
	out EEAR, ftemp0
	out EECR, ftemp1
	in cos, EEDR
	cpi temp0, 0x11
	brlo first_quad
	subi temp0, 0x10
	rjmp get_cos
j_zero:
	ld trealh, y			;treall=xreal(kn2)
	clr treall
	asr trealh
	ror treall				;trealh|treall = xreal(kn2)*128
	ldd timagh, y+1			;timagl=ximag(kn2)
	clr timagl
	asr timagh
	ror timagl				;timagh|timagl = ximag(kn2)*128
	rjmp butterfly
first_quad:
	neg  temp0
	subi temp0, 0xf0		;subi temp, 0xf0 = addi temp, 0x10
get_cos:
	ldi ftemp0, cos_table
	add ftemp0, temp0
	out EEAR, ftemp0
	out EECR, ftemp1
	in sin, EEDR
;************************************************************************
;*  Calculates butterfly:	xreal*cos+ximag*sin = treal
;*			   				ximag*cos-xreal*sin = timag
;*             				xreal[k],xreal[kn2] = xreal[k] +/- treal
;*             				ximag[k],ximag[kn2] = ximag[k] +/- timag
;************************************************************************
innerstart:
	mov kn2, k
	add kn2, n2
	lsl kn2					;since xreal, ximag interleaved
	ldi yl, SRAMSTART		;yh not used in 2313 as RAM < 256 bytes
	add yl, kn2
	cpi j, 0x00
	breq j_zero
	ld treall, y			;treall=xreal(kn2)
	mmul trealh,treall,cos
	ldd timagl, y+1			;timagl=ximag(kn2);stored in &real(kn2)+1
	mmul timagh,timagl,sin
	add treall, timagl
	adc trealh, timagh		;treal=xreal*cos+ximag*sin < 2^14.5
	ldd timagl, y+1
	mmul timagh,timagl,cos
	ld temp0, y
	mmul temp1,temp0,sin
	sub timagl, temp0
	sbc timagh, temp1		;timag=ximag*cos-xreal*sin
butterfly:
	sub yl, n2
	sub yl, n2				;yl=&xreal(k)
	ld  temp1, y			;temp1=xreal(k)
	clr temp0
	asr temp1
	ror temp0				;temp1|temp0 = xreal(k)*128
	sub temp0, treall		;t1|t0 = xreal(k)*128 - treal
	sbc temp1, trealh		;cant overflow as |B|<2^14.5				
	add yl, n2
	add yl, n2				;yl=&xreal(kn2)
	ldi zl, low(t1)
	ldi zh,high(t1)
	rjmp store				;xreal(kn2) STORED
t1:	sub yl, n2
	sub yl, n2				
	ldd temp1, y+1		
	clr temp0
	asr temp1
	ror temp0			
	sub temp0, timagl		
	sbc temp1, timagh
	add yl, n2
	add yl, n2
	inc yl		
	ldi zl, low(t2)
	ldi zh, high(t2)
	rjmp store				;ximag(kn2) STORED
t2:	sub yl, n2
	sub yl, n2				
	ld  temp1, -y			;xreal(k) loaded
	clr temp0
	asr temp1
	ror temp0				
	add temp0, treall		
	adc temp1, trealh		
	ldi zl, low(t3)
	ldi zh, high(t3)
	rjmp store				;xreak(k) STORED
t3:	ldd temp1, y+1			;ximag(k) loaded
	clr temp0
	asr temp1
	ror temp0				
	add temp0, timagl		
	adc temp1, timagh
	inc yl		
	ldi zl, low(t4)
	ldi zh, high(t4)
	rjmp store				;ximag(k) STORED
t4:	dec yl
	inc k
	inc i
	cpse n2, i
	rjmp innerstart			;if n2 != i jump 
	subi j, 0xfe			;+0x02 == -0xfe
	add  k, n2
	cpi  k, 0x40
	brsh midcont
	rjmp middlestart		;continue middle loop if k < 0x40
midcont:
	clr k					;k=0
	sbrc loop, 7
	lsr n2
	subi loop, 0x80
	andi loop, 0x80
	cpi n2, 0x00
	breq swapstart
	rjmp outerstart
;************************************************************
;*  Bit reorders SRAM: (i > ibitr[i]) -> x[i] <-> x[ibitr[i]]
;************************************************************
swapstart:
	ldi temp0, 0x04		;0x04 triggers carry on 6'th shift
	mov temp1, k		;k=0 on first loop
sw: lsr temp1
	rol temp0
	brcc sw
	cp k, temp0
	brsh noswap
	ldi yl, SRAMSTART
	add yl, k
	add yl, k
	ldi xl, SRAMSTART
	add xl, temp0
	add xl, temp0
	ld temp0, y
	ld temp1, x
	st y+, temp1
	st x+, temp0
	ld temp0, y
	ld temp1, x
	st y, temp1
	st x, temp0
noswap:
	inc k
	cpi k, 0x40
	brne swapstart
;*****************************************************
;*  Transform complex to real: 
;*    fft(x[2*i]+i x[2*i+1]) = V -> fft(x[i]) = X
;*  qr=Vr[n-i]-Vr[i]
;*  qi=Vi[n-i]+vi[i]
;*  4*Xr[i]  = Vr[i]+Vr[n-i]+qr*sin[i]+qi*cos[i]
;*  4*Xi[i]  = Vi[i]-Vi[n-1]-qi*sin[i]+qr*cos[i]
;*  4*Xr[n-i]= Vr[i]+Vr[n-i]-qr*sin[i]-qi*cos[i]
;*  4*Xi[n-i]=-Vi[i]+Vi[n-1]-qi*sin[i]+qr*cos[i]
;*****************************************************
	ldi yl,SRAMSTART		;points to xreal[0]
	ldi xl,RAMEND-1			;points to xreal[n]
	clr ftemp0
	st y+, ftemp0			;xreal[0]==0 dont need DC
	st y+, ftemp0			;ximag[0] =0
	ldi k, 0x01
combine:
	ld temp0,  y+			;xreal[k]
	ld treall, x+			;xreal[n-k]
	asr treall
	asr temp0
	sub treall, temp0		;(xreal[n-k]-xreal[k])/2
	mov temp4, treall		;temp4=qr
	ld temp0,  y			;ximag[k]
	ld timagl, x			;ximag[n-k]
	asr timagl
	asr temp0
	add timagl, temp0		;(ximag[n-k]+ximag[k])/2
	mov temp0, timagl		;temp0=qi		
	ldi ftemp1, 1			;EEPROM strobe
	ldi ftemp0, cos2_table	;0'th element is cos[31*pi/64]
	add ftemp0, k
	out EEAR, ftemp0
	out EECR, ftemp1
	in cos, EEDR			;cos[k]
	ldi ftemp0, sin2_table
	sub ftemp0, k
	out EEAR, ftemp0
	out EECR, ftemp1
	in sin, EEDR			;sin[k]
	mmul trealh, treall, sin;qr*sin
	mmul temp1,  temp0,  cos;qi*cos
	add treall, temp0
	adc trealh, temp1		;trealh|treall=qr*sin+qi*cos=xr (no ovfl)
	mmul timagh, timagl, sin;qi*sin
	mmul temp5,  temp4,  cos;qr*cos
	sub temp4, timagl
	sbc temp5,  timagh		
	mov timagl, temp4
	mov timagh, temp5		;timagh|timagl=qr*cos-qi*sin=xi
	ld temp0,  -y			;xreal[k]
	ld temp1,  -x			;xreal[n-k]
	asr temp1
	asr temp0
	add temp1, temp0		;temp1  = (xreal[k]+xreal[n-k])/2 = pr
	clr temp0				;this does'nt affect carry
	asr temp1
	ror temp0				;temp1|temp0=128*pr
	mov temp4, temp0
	mov temp5, temp1		;			=temp5|temp4
	add temp0, treall
	adc temp1, trealh		;no ovfl brvs? 1/2 + 1/root(2) can ovfl 
	st y+, temp1			;(128*pr+xr)/256 -> xreal[k]
	sub temp4, treall
	sbc temp5, trealh
	st x+, temp5			;(128*pr-xr)/256 -> xreal[n-k]
	ld temp1,  y			;ximag[k]
	ld temp0,  x			;ximag[n-k]
	asr temp1
	asr temp0
	sub temp1, temp0		
	clr temp0				;could neglect temp0 +save 3 steps		
	asr temp1
	ror temp0				;temp1|temp0=128*pi						
	mov temp4, temp0
	mov temp5, temp1		;			=temp5|temp4
	add temp0, timagl
	adc temp1, timagh			
	st y+, temp1			;(128*pi+xi)/256 -> ximag[k];
	sub timagl, temp4
	sbc timagh, temp5		
	st x , timagh			;(-128*pi+xi)/256 -> ximag[n-k]
	subi xl, 0x03			;x = &xreal[n-k-1]
	inc k
	cpi k, 0x20				;dont do 32'th element
	breq endfft
	rjmp combine
endfft:
	ld temp0, y				;temp0 = xreal[32]
	asr temp0				;as no prior addition, truncate with asr
	st y+, temp0
	ld temp0, y				;temp0 = ximag[32]
	asr temp0
	neg temp0
	st y, temp0				;as /2 no signed ovfl. poss.	
	rjmp fftret
