1/*
2 * This m4 code has been taken from The SPARC Architecture Manual Version 8.
3 */
4
5/*
6 * Division/Remainder
7 *
8 * Input is:
9 *   dividend -- the thing being divided
10 *   divisor -- how many ways to divide it
11 * Important parameters:
12 *   N -- how many bits per iteration we try to get
13 *        as our current guess: define(N, 4) define(TWOSUPN, 16)
14 *   WORDSIZE -- how many bits altogether we're talking about:
15 *               obviously: define(WORDSIZE, 32)
16 * A derived constant:
17 *   TOPBITS -- how many bits are in the top "decade" of a number:
18 *        define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
19 * Important variables are:
20 *   Q -- the partial quotient under development -- initially 0
21 *   R -- the remainder so far -- initially == the dividend
22 *   ITER -- number of iterations of the main division loop which will
23 *           be required. Equal to CEIL( lg2(quotient)/N )
24 *           Note that this is log_base_(2ˆN) of the quotient.
25 *   V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
26 * Cost:
27 *   current estimate for non-large dividend is
28 *        CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
29 *   a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
30 *   different path, as the upper bits of the quotient must be developed
31 *   one bit at a time.
32 *   This uses the m4 and cpp macro preprocessors.
33 */
34
35define(dividend, `%o0')
36define(divisor,`%o1')
37define(Q, `%o2')
38define(R, `%o3')
39define(ITER, `%o4')
40define(V, `%o5')
41define(SIGN, `%g3')
42define(T, `%g1')
43define(SC,`%g2')
44/*
45 * This is the recursive definition of how we develop quotient digits.
46 * It takes three important parameters:
47 *   $1 -- the current depth, 1<=$1<=N
48 *   $2 -- the current accumulation of quotient bits
49 *   N -- max depth
50 * We add a new bit to $2 and either recurse or insert the bits in the quotient.
51 * Dynamic input:
52 *   R -- current remainder
53 *   Q -- current quotient
54 *   V -- current comparand
55 *   cc -- set on current value of R
56 * Dynamic output:
57 *   R', Q', V', cc'
58 */
59
60#include "../assembly.h"
61
62define(DEVELOP_QUOTIENT_BITS,
63`	!depth $1, accumulated bits $2
64	bl	L.$1.eval(TWOSUPN+$2)
65	srl	V,1,V
66	! remainder is nonnegative
67	subcc	R,V,R
68	ifelse( $1, N,
69	`	b	9f
70		add	Q, ($2*2+1), Q
71	',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
72	')
73L.$1.eval(TWOSUPN+$2):
74	! remainder is negative
75	addcc	R,V,R
76	ifelse( $1, N,
77	`	b	9f
78		add	Q, ($2*2-1), Q
79	',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
80	')
81	ifelse( $1, 1, `9:')
82')
83ifelse( ANSWER, `quotient', `
84.text
85	.align 32
86DEFINE_COMPILERRT_FUNCTION(__udivsi3)
87	b	divide
88	mov	0,SIGN			! result always nonnegative
89.text
90	.align 32
91DEFINE_COMPILERRT_FUNCTION(__divsi3)
92	orcc	divisor,dividend,%g0	! are either dividend or divisor negative
93	bge	divide			! if not, skip this junk
94	xor	divisor,dividend,SIGN	! record sign of result in sign of SIGN
95	tst	divisor
96	bge	2f
97	tst	dividend
98	! divisor < 0
99	bge	divide
100	neg	divisor
101	2:
102	! dividend < 0
103	neg	dividend
104	! FALL THROUGH
105',`
106.text
107	.align 32
108DEFINE_COMPILERRT_FUNCTION(__umodsi3)
109	b	divide
110	mov	0,SIGN			! result always nonnegative
111.text
112	.align 32
113DEFINE_COMPILERRT_FUNCTION(__modsi3)
114	orcc	divisor,dividend,%g0	! are either dividend or divisor negative
115	bge	divide			! if not, skip this junk
116	mov	dividend,SIGN		! record sign of result in sign of SIGN
117	tst	divisor
118	bge	2f
119	tst	dividend
120	! divisor < 0
121	bge	divide
122	neg	divisor
123	2:
124	! dividend < 0
125	neg	dividend
126	! FALL THROUGH
127')
128
129divide:
130	! Compute size of quotient, scale comparand.
131	orcc	divisor,%g0,V		! movcc divisor,V
132	te	2			! if divisor = 0
133	mov	dividend,R
134	mov	0,Q
135	sethi	%hi(1<<(WORDSIZE-TOPBITS-1)),T
136	cmp	R,T
137	blu	not_really_big
138	mov	0,ITER
139	!
140	! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
141	! as our usual N-at-a-shot divide step will cause overflow and havoc.
142	! The total number of bits in the result here is N*ITER+SC, where
143	! SC <= N.
144	! Compute ITER in an unorthodox manner: know we need to Shift V into
145! the top decade: so don't even bother to compare to R.
1461:
147	cmp	V,T
148	bgeu	3f
149	mov	1,SC
150	sll	V,N,V
151	b	1b
152	inc	ITER
153! Now compute SC
1542:	addcc	V,V,V
155	bcc	not_too_big
156	add	SC,1,SC
157		! We're here if the divisor overflowed when Shifting.
158		! This means that R has the high-order bit set.
159		! Restore V and subtract from R.
160		sll	T,TOPBITS,T	! high order bit
161		srl	V,1,V		! rest of V
162		add	V,T,V
163		b	do_single_div
164		dec	SC
165not_too_big:
1663:	cmp	V,R
167	blu	2b
168	nop
169	be	do_single_div
170	nop
171! V > R: went too far: back up 1 step
172!     srl V,1,V
173!      dec SC
174! do single-bit divide steps
175!
176! We have to be careful here. We know that R >= V, so we can do the
177! first divide step without thinking. BUT, the others are conditional,
178! and are only done if R >= 0. Because both R and V may have the high-
179! order bit set in the first step, just falling into the regular
180! division loop will mess up the first time around.
181! So we unroll slightly...
182do_single_div:
183	deccc	SC
184	bl	end_regular_divide
185	nop
186	sub	R,V,R
187	mov	1,Q
188	b,a	end_single_divloop
189	! EMPTY
190single_divloop:
191	sll	Q,1,Q
192	bl	1f
193	srl	V,1,V
194	! R >= 0
195		sub	R,V,R
196		b	2f
197		inc	Q
198	1:	! R < 0
199		add	R,V,R
200		dec	Q
201	2:
202	end_single_divloop:
203		deccc	SC
204		bge	single_divloop
205		tst	R
206		b,a	end_regular_divide
207		! EMPTY
208
209not_really_big:
2101:
211	sll	V,N,V
212	cmp	V,R
213	bleu	1b
214	inccc	ITER
215	be	got_result
216	dec	ITER
217do_regular_divide:
218	! Do the main division iteration
219	tst	R
220	! Fall through into divide loop
221divloop:
222	sll	Q,N,Q
223	DEVELOP_QUOTIENT_BITS( 1, 0 )
224end_regular_divide:
225	deccc	ITER
226	bge	divloop
227	tst	R
228	bl,a	got_result
229	! non-restoring fixup if remainder < 0, otherwise annulled
230ifelse( ANSWER, `quotient',
231`	dec	Q
232',`	add	R,divisor,R
233')
234
235got_result:
236	tst	SIGN
237	bl,a	1f
238	! negate for answer < 0, otherwise annulled
239ifelse( ANSWER, `quotient',
240`	neg	%o2,%o2			! Q <- -Q
241',`	neg	%o3,%o3 		! R <- -R
242')
2431:
244	retl				! leaf-routine return
245ifelse( ANSWER, `quotient',
246`	mov	%o2,%o0			! quotient <- Q
247',`	mov	%o3,%o0			! remainder <- R
248')
249