1/* SPDX-License-Identifier: GPL-2.0+ */
2/*
3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
4 *
5 * Author: Nicolas Pitre <nico@fluxnic.net>
6 *   - contributed to gcc-3.4 on Sep 30, 2003
7 *   - adapted for the Linux kernel on Oct 2, 2003
8 */
9/*
10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we
18 * do not support stack unwinding to make all of the functions available
19 * without diverging from Linux code.
20 */
21#ifdef __UBOOT__
22#define UNWIND(x...)
23#endif
24
25.macro ARM_DIV_BODY dividend, divisor, result, curbit
26
27#if __LINUX_ARM_ARCH__ >= 5
28
29	clz	\curbit, \divisor
30	clz	\result, \dividend
31	sub	\result, \curbit, \result
32	mov	\curbit, #1
33	mov	\divisor, \divisor, lsl \result
34	mov	\curbit, \curbit, lsl \result
35	mov	\result, #0
36
37#else
38
39	@ Initially shift the divisor left 3 bits if possible,
40	@ set curbit accordingly.  This allows for curbit to be located
41	@ at the left end of each 4 bit nibbles in the division loop
42	@ to save one loop in most cases.
43	tst	\divisor, #0xe0000000
44	moveq	\divisor, \divisor, lsl #3
45	moveq	\curbit, #8
46	movne	\curbit, #1
47
48	@ Unless the divisor is very big, shift it up in multiples of
49	@ four bits, since this is the amount of unwinding in the main
50	@ division loop.  Continue shifting until the divisor is
51	@ larger than the dividend.
521:	cmp	\divisor, #0x10000000
53	cmplo	\divisor, \dividend
54	movlo	\divisor, \divisor, lsl #4
55	movlo	\curbit, \curbit, lsl #4
56	blo	1b
57
58	@ For very big divisors, we must shift it a bit at a time, or
59	@ we will be in danger of overflowing.
601:	cmp	\divisor, #0x80000000
61	cmplo	\divisor, \dividend
62	movlo	\divisor, \divisor, lsl #1
63	movlo	\curbit, \curbit, lsl #1
64	blo	1b
65
66	mov	\result, #0
67
68#endif
69
70	@ Division loop
711:	cmp	\dividend, \divisor
72	subhs	\dividend, \dividend, \divisor
73	orrhs	\result,   \result,   \curbit
74	cmp	\dividend, \divisor,  lsr #1
75	subhs	\dividend, \dividend, \divisor, lsr #1
76	orrhs	\result,   \result,   \curbit,  lsr #1
77	cmp	\dividend, \divisor,  lsr #2
78	subhs	\dividend, \dividend, \divisor, lsr #2
79	orrhs	\result,   \result,   \curbit,  lsr #2
80	cmp	\dividend, \divisor,  lsr #3
81	subhs	\dividend, \dividend, \divisor, lsr #3
82	orrhs	\result,   \result,   \curbit,  lsr #3
83	cmp	\dividend, #0			@ Early termination?
84	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
85	movne	\divisor,  \divisor, lsr #4
86	bne	1b
87
88.endm
89
90
91.macro ARM_DIV2_ORDER divisor, order
92
93#if __LINUX_ARM_ARCH__ >= 5
94
95	clz	\order, \divisor
96	rsb	\order, \order, #31
97
98#else
99
100	cmp	\divisor, #(1 << 16)
101	movhs	\divisor, \divisor, lsr #16
102	movhs	\order, #16
103	movlo	\order, #0
104
105	cmp	\divisor, #(1 << 8)
106	movhs	\divisor, \divisor, lsr #8
107	addhs	\order, \order, #8
108
109	cmp	\divisor, #(1 << 4)
110	movhs	\divisor, \divisor, lsr #4
111	addhs	\order, \order, #4
112
113	cmp	\divisor, #(1 << 2)
114	addhi	\order, \order, #3
115	addls	\order, \order, \divisor, lsr #1
116
117#endif
118
119.endm
120
121
122.macro ARM_MOD_BODY dividend, divisor, order, spare
123
124#if __LINUX_ARM_ARCH__ >= 5
125
126	clz	\order, \divisor
127	clz	\spare, \dividend
128	sub	\order, \order, \spare
129	mov	\divisor, \divisor, lsl \order
130
131#else
132
133	mov	\order, #0
134
135	@ Unless the divisor is very big, shift it up in multiples of
136	@ four bits, since this is the amount of unwinding in the main
137	@ division loop.  Continue shifting until the divisor is
138	@ larger than the dividend.
1391:	cmp	\divisor, #0x10000000
140	cmplo	\divisor, \dividend
141	movlo	\divisor, \divisor, lsl #4
142	addlo	\order, \order, #4
143	blo	1b
144
145	@ For very big divisors, we must shift it a bit at a time, or
146	@ we will be in danger of overflowing.
1471:	cmp	\divisor, #0x80000000
148	cmplo	\divisor, \dividend
149	movlo	\divisor, \divisor, lsl #1
150	addlo	\order, \order, #1
151	blo	1b
152
153#endif
154
155	@ Perform all needed subtractions to keep only the reminder.
156	@ Do comparisons in batch of 4 first.
157	subs	\order, \order, #3		@ yes, 3 is intended here
158	blt	2f
159
1601:	cmp	\dividend, \divisor
161	subhs	\dividend, \dividend, \divisor
162	cmp	\dividend, \divisor,  lsr #1
163	subhs	\dividend, \dividend, \divisor, lsr #1
164	cmp	\dividend, \divisor,  lsr #2
165	subhs	\dividend, \dividend, \divisor, lsr #2
166	cmp	\dividend, \divisor,  lsr #3
167	subhs	\dividend, \dividend, \divisor, lsr #3
168	cmp	\dividend, #1
169	mov	\divisor, \divisor, lsr #4
170	subsge	\order, \order, #4
171	bge	1b
172
173	tst	\order, #3
174	teqne	\dividend, #0
175	beq	5f
176
177	@ Either 1, 2 or 3 comparison/subtractions are left.
1782:	cmn	\order, #2
179	blt	4f
180	beq	3f
181	cmp	\dividend, \divisor
182	subhs	\dividend, \dividend, \divisor
183	mov	\divisor,  \divisor,  lsr #1
1843:	cmp	\dividend, \divisor
185	subhs	\dividend, \dividend, \divisor
186	mov	\divisor,  \divisor,  lsr #1
1874:	cmp	\dividend, \divisor
188	subhs	\dividend, \dividend, \divisor
1895:
190.endm
191
192
193.pushsection .text.__udivsi3, "ax"
194ENTRY(__udivsi3)
195ENTRY(__aeabi_uidiv)
196UNWIND(.fnstart)
197
198	subs	r2, r1, #1
199	reteq	lr
200	bcc	Ldiv0
201	cmp	r0, r1
202	bls	11f
203	tst	r1, r2
204	beq	12f
205
206	ARM_DIV_BODY r0, r1, r2, r3
207
208	mov	r0, r2
209	ret	lr
210
21111:	moveq	r0, #1
212	movne	r0, #0
213	ret	lr
214
21512:	ARM_DIV2_ORDER r1, r2
216
217	mov	r0, r0, lsr r2
218	ret	lr
219
220UNWIND(.fnend)
221ENDPROC(__udivsi3)
222ENDPROC(__aeabi_uidiv)
223.popsection
224
225.pushsection .text.__umodsi3, "ax"
226ENTRY(__umodsi3)
227UNWIND(.fnstart)
228
229	subs	r2, r1, #1			@ compare divisor with 1
230	bcc	Ldiv0
231	cmpne	r0, r1				@ compare dividend with divisor
232	moveq   r0, #0
233	tsthi	r1, r2				@ see if divisor is power of 2
234	andeq	r0, r0, r2
235	retls	lr
236
237	ARM_MOD_BODY r0, r1, r2, r3
238
239	ret	lr
240
241UNWIND(.fnend)
242ENDPROC(__umodsi3)
243.popsection
244
245.pushsection .text.__divsi3, "ax"
246ENTRY(__divsi3)
247ENTRY(__aeabi_idiv)
248UNWIND(.fnstart)
249
250	cmp	r1, #0
251	eor	ip, r0, r1			@ save the sign of the result.
252	beq	Ldiv0
253	rsbmi	r1, r1, #0			@ loops below use unsigned.
254	subs	r2, r1, #1			@ division by 1 or -1 ?
255	beq	10f
256	movs	r3, r0
257	rsbmi	r3, r0, #0			@ positive dividend value
258	cmp	r3, r1
259	bls	11f
260	tst	r1, r2				@ divisor is power of 2 ?
261	beq	12f
262
263	ARM_DIV_BODY r3, r1, r0, r2
264
265	cmp	ip, #0
266	rsbmi	r0, r0, #0
267	ret	lr
268
26910:	teq	ip, r0				@ same sign ?
270	rsbmi	r0, r0, #0
271	ret	lr
272
27311:	movlo	r0, #0
274	moveq	r0, ip, asr #31
275	orreq	r0, r0, #1
276	ret	lr
277
27812:	ARM_DIV2_ORDER r1, r2
279
280	cmp	ip, #0
281	mov	r0, r3, lsr r2
282	rsbmi	r0, r0, #0
283	ret	lr
284
285UNWIND(.fnend)
286ENDPROC(__divsi3)
287ENDPROC(__aeabi_idiv)
288.popsection
289
290.pushsection .text.__modsi3, "ax"
291ENTRY(__modsi3)
292UNWIND(.fnstart)
293
294	cmp	r1, #0
295	beq	Ldiv0
296	rsbmi	r1, r1, #0			@ loops below use unsigned.
297	movs	ip, r0				@ preserve sign of dividend
298	rsbmi	r0, r0, #0			@ if negative make positive
299	subs	r2, r1, #1			@ compare divisor with 1
300	cmpne	r0, r1				@ compare dividend with divisor
301	moveq	r0, #0
302	tsthi	r1, r2				@ see if divisor is power of 2
303	andeq	r0, r0, r2
304	bls	10f
305
306	ARM_MOD_BODY r0, r1, r2, r3
307
30810:	cmp	ip, #0
309	rsbmi	r0, r0, #0
310	ret	lr
311
312UNWIND(.fnend)
313ENDPROC(__modsi3)
314.popsection
315
316.pushsection .text.__aeabi_uidivmod, "ax"
317ENTRY(__aeabi_uidivmod)
318UNWIND(.fnstart)
319UNWIND(.save {r0, r1, ip, lr}	)
320
321	stmfd	sp!, {r0, r1, ip, lr}
322	bl	__aeabi_uidiv
323	ldmfd	sp!, {r1, r2, ip, lr}
324	mul	r3, r0, r2
325	sub	r1, r1, r3
326	ret	lr
327
328UNWIND(.fnend)
329ENDPROC(__aeabi_uidivmod)
330.popsection
331
332.pushsection .text.__aeabi_uidivmod, "ax"
333ENTRY(__aeabi_idivmod)
334UNWIND(.fnstart)
335UNWIND(.save {r0, r1, ip, lr}	)
336
337	stmfd	sp!, {r0, r1, ip, lr}
338	bl	__aeabi_idiv
339	ldmfd	sp!, {r1, r2, ip, lr}
340	mul	r3, r0, r2
341	sub	r1, r1, r3
342	ret	lr
343
344UNWIND(.fnend)
345ENDPROC(__aeabi_idivmod)
346.popsection
347
348.pushsection .text.Ldiv0, "ax"
349Ldiv0:
350UNWIND(.fnstart)
351UNWIND(.pad #4)
352UNWIND(.save {lr})
353
354	str	lr, [sp, #-8]!
355	bl	__div0
356	mov	r0, #0			@ About as wrong as it could be.
357	ldr	pc, [sp], #8
358
359UNWIND(.fnend)
360ENDPROC(Ldiv0)
361.popsection
362
363/* Thumb-1 specialities */
364#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
365.pushsection .text.__gnu_thumb1_case_sqi, "ax"
366ENTRY(__gnu_thumb1_case_sqi)
367	push	{r1}
368	mov	r1, lr
369	lsrs	r1, r1, #1
370	lsls	r1, r1, #1
371	ldrsb	r1, [r1, r0]
372	lsls	r1, r1, #1
373	add	lr, lr, r1
374	pop	{r1}
375	ret	lr
376ENDPROC(__gnu_thumb1_case_sqi)
377.popsection
378
379.pushsection .text.__gnu_thumb1_case_uqi, "ax"
380ENTRY(__gnu_thumb1_case_uqi)
381	push	{r1}
382	mov	r1, lr
383	lsrs	r1, r1, #1
384	lsls	r1, r1, #1
385	ldrb	r1, [r1, r0]
386	lsls	r1, r1, #1
387	add	lr, lr, r1
388	pop	{r1}
389	ret	lr
390ENDPROC(__gnu_thumb1_case_uqi)
391.popsection
392
393.pushsection .text.__gnu_thumb1_case_shi, "ax"
394ENTRY(__gnu_thumb1_case_shi)
395	push	{r0, r1}
396	mov	r1, lr
397	lsrs	r1, r1, #1
398	lsls	r0, r0, #1
399	lsls	r1, r1, #1
400	ldrsh	r1, [r1, r0]
401	lsls	r1, r1, #1
402	add	lr, lr, r1
403	pop	{r0, r1}
404	ret	lr
405ENDPROC(__gnu_thumb1_case_shi)
406.popsection
407
408.pushsection .text.__gnu_thumb1_case_uhi, "ax"
409ENTRY(__gnu_thumb1_case_uhi)
410	push	{r0, r1}
411	mov	r1, lr
412	lsrs	r1, r1, #1
413	lsls	r0, r0, #1
414	lsls	r1, r1, #1
415	ldrh	r1, [r1, r0]
416	lsls	r1, r1, #1
417	add	lr, lr, r1
418	pop	{r0, r1}
419	ret	lr
420ENDPROC(__gnu_thumb1_case_uhi)
421.popsection
422
423/* Taken and adapted from: https://github.com/gcc-mirror/gcc/blob/4f181f9c7ee3efc509d185fdfda33be9018f1611/libgcc/config/arm/lib1funcs.S#L2156 */
424.pushsection .text.__gnu_thumb1_case_si, "ax"
425ENTRY(__gnu_thumb1_case_si)
426	push	{r0, r1}
427	mov	r1, lr
428	adds	r1, r1, #2	/* Align to word.  */
429	lsrs	r1, r1, #2
430	lsls	r0, r0, #2
431	lsls	r1, r1, #2
432	ldr	r0, [r1, r0]
433	adds	r0, r0, r1
434	mov	lr, r0
435	pop	{r0, r1}
436	mov	pc, lr		/* We know we were called from thumb code.  */
437ENDPROC(__gnu_thumb1_case_si)
438.popsection
439#endif
440