1/* SPDX-License-Identifier: GPL-2.0+ */ 2/* 3 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 4 * 5 * Author: Nicolas Pitre <nico@fluxnic.net> 6 * - contributed to gcc-3.4 on Sep 30, 2003 7 * - adapted for the Linux kernel on Oct 2, 2003 8 */ 9/* 10 * Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15 16/* 17 * U-Boot compatibility bit, define empty UNWIND() macro as, since we 18 * do not support stack unwinding to make all of the functions available 19 * without diverging from Linux code. 20 */ 21#ifdef __UBOOT__ 22#define UNWIND(x...) 23#endif 24 25.macro ARM_DIV_BODY dividend, divisor, result, curbit 26 27#if __LINUX_ARM_ARCH__ >= 5 28 29 clz \curbit, \divisor 30 clz \result, \dividend 31 sub \result, \curbit, \result 32 mov \curbit, #1 33 mov \divisor, \divisor, lsl \result 34 mov \curbit, \curbit, lsl \result 35 mov \result, #0 36 37#else 38 39 @ Initially shift the divisor left 3 bits if possible, 40 @ set curbit accordingly. This allows for curbit to be located 41 @ at the left end of each 4 bit nibbles in the division loop 42 @ to save one loop in most cases. 43 tst \divisor, #0xe0000000 44 moveq \divisor, \divisor, lsl #3 45 moveq \curbit, #8 46 movne \curbit, #1 47 48 @ Unless the divisor is very big, shift it up in multiples of 49 @ four bits, since this is the amount of unwinding in the main 50 @ division loop. Continue shifting until the divisor is 51 @ larger than the dividend. 521: cmp \divisor, #0x10000000 53 cmplo \divisor, \dividend 54 movlo \divisor, \divisor, lsl #4 55 movlo \curbit, \curbit, lsl #4 56 blo 1b 57 58 @ For very big divisors, we must shift it a bit at a time, or 59 @ we will be in danger of overflowing. 601: cmp \divisor, #0x80000000 61 cmplo \divisor, \dividend 62 movlo \divisor, \divisor, lsl #1 63 movlo \curbit, \curbit, lsl #1 64 blo 1b 65 66 mov \result, #0 67 68#endif 69 70 @ Division loop 711: cmp \dividend, \divisor 72 subhs \dividend, \dividend, \divisor 73 orrhs \result, \result, \curbit 74 cmp \dividend, \divisor, lsr #1 75 subhs \dividend, \dividend, \divisor, lsr #1 76 orrhs \result, \result, \curbit, lsr #1 77 cmp \dividend, \divisor, lsr #2 78 subhs \dividend, \dividend, \divisor, lsr #2 79 orrhs \result, \result, \curbit, lsr #2 80 cmp \dividend, \divisor, lsr #3 81 subhs \dividend, \dividend, \divisor, lsr #3 82 orrhs \result, \result, \curbit, lsr #3 83 cmp \dividend, #0 @ Early termination? 84 movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? 85 movne \divisor, \divisor, lsr #4 86 bne 1b 87 88.endm 89 90 91.macro ARM_DIV2_ORDER divisor, order 92 93#if __LINUX_ARM_ARCH__ >= 5 94 95 clz \order, \divisor 96 rsb \order, \order, #31 97 98#else 99 100 cmp \divisor, #(1 << 16) 101 movhs \divisor, \divisor, lsr #16 102 movhs \order, #16 103 movlo \order, #0 104 105 cmp \divisor, #(1 << 8) 106 movhs \divisor, \divisor, lsr #8 107 addhs \order, \order, #8 108 109 cmp \divisor, #(1 << 4) 110 movhs \divisor, \divisor, lsr #4 111 addhs \order, \order, #4 112 113 cmp \divisor, #(1 << 2) 114 addhi \order, \order, #3 115 addls \order, \order, \divisor, lsr #1 116 117#endif 118 119.endm 120 121 122.macro ARM_MOD_BODY dividend, divisor, order, spare 123 124#if __LINUX_ARM_ARCH__ >= 5 125 126 clz \order, \divisor 127 clz \spare, \dividend 128 sub \order, \order, \spare 129 mov \divisor, \divisor, lsl \order 130 131#else 132 133 mov \order, #0 134 135 @ Unless the divisor is very big, shift it up in multiples of 136 @ four bits, since this is the amount of unwinding in the main 137 @ division loop. Continue shifting until the divisor is 138 @ larger than the dividend. 1391: cmp \divisor, #0x10000000 140 cmplo \divisor, \dividend 141 movlo \divisor, \divisor, lsl #4 142 addlo \order, \order, #4 143 blo 1b 144 145 @ For very big divisors, we must shift it a bit at a time, or 146 @ we will be in danger of overflowing. 1471: cmp \divisor, #0x80000000 148 cmplo \divisor, \dividend 149 movlo \divisor, \divisor, lsl #1 150 addlo \order, \order, #1 151 blo 1b 152 153#endif 154 155 @ Perform all needed subtractions to keep only the reminder. 156 @ Do comparisons in batch of 4 first. 157 subs \order, \order, #3 @ yes, 3 is intended here 158 blt 2f 159 1601: cmp \dividend, \divisor 161 subhs \dividend, \dividend, \divisor 162 cmp \dividend, \divisor, lsr #1 163 subhs \dividend, \dividend, \divisor, lsr #1 164 cmp \dividend, \divisor, lsr #2 165 subhs \dividend, \dividend, \divisor, lsr #2 166 cmp \dividend, \divisor, lsr #3 167 subhs \dividend, \dividend, \divisor, lsr #3 168 cmp \dividend, #1 169 mov \divisor, \divisor, lsr #4 170 subsge \order, \order, #4 171 bge 1b 172 173 tst \order, #3 174 teqne \dividend, #0 175 beq 5f 176 177 @ Either 1, 2 or 3 comparison/subtractions are left. 1782: cmn \order, #2 179 blt 4f 180 beq 3f 181 cmp \dividend, \divisor 182 subhs \dividend, \dividend, \divisor 183 mov \divisor, \divisor, lsr #1 1843: cmp \dividend, \divisor 185 subhs \dividend, \dividend, \divisor 186 mov \divisor, \divisor, lsr #1 1874: cmp \dividend, \divisor 188 subhs \dividend, \dividend, \divisor 1895: 190.endm 191 192 193.pushsection .text.__udivsi3, "ax" 194ENTRY(__udivsi3) 195ENTRY(__aeabi_uidiv) 196UNWIND(.fnstart) 197 198 subs r2, r1, #1 199 reteq lr 200 bcc Ldiv0 201 cmp r0, r1 202 bls 11f 203 tst r1, r2 204 beq 12f 205 206 ARM_DIV_BODY r0, r1, r2, r3 207 208 mov r0, r2 209 ret lr 210 21111: moveq r0, #1 212 movne r0, #0 213 ret lr 214 21512: ARM_DIV2_ORDER r1, r2 216 217 mov r0, r0, lsr r2 218 ret lr 219 220UNWIND(.fnend) 221ENDPROC(__udivsi3) 222ENDPROC(__aeabi_uidiv) 223.popsection 224 225.pushsection .text.__umodsi3, "ax" 226ENTRY(__umodsi3) 227UNWIND(.fnstart) 228 229 subs r2, r1, #1 @ compare divisor with 1 230 bcc Ldiv0 231 cmpne r0, r1 @ compare dividend with divisor 232 moveq r0, #0 233 tsthi r1, r2 @ see if divisor is power of 2 234 andeq r0, r0, r2 235 retls lr 236 237 ARM_MOD_BODY r0, r1, r2, r3 238 239 ret lr 240 241UNWIND(.fnend) 242ENDPROC(__umodsi3) 243.popsection 244 245.pushsection .text.__divsi3, "ax" 246ENTRY(__divsi3) 247ENTRY(__aeabi_idiv) 248UNWIND(.fnstart) 249 250 cmp r1, #0 251 eor ip, r0, r1 @ save the sign of the result. 252 beq Ldiv0 253 rsbmi r1, r1, #0 @ loops below use unsigned. 254 subs r2, r1, #1 @ division by 1 or -1 ? 255 beq 10f 256 movs r3, r0 257 rsbmi r3, r0, #0 @ positive dividend value 258 cmp r3, r1 259 bls 11f 260 tst r1, r2 @ divisor is power of 2 ? 261 beq 12f 262 263 ARM_DIV_BODY r3, r1, r0, r2 264 265 cmp ip, #0 266 rsbmi r0, r0, #0 267 ret lr 268 26910: teq ip, r0 @ same sign ? 270 rsbmi r0, r0, #0 271 ret lr 272 27311: movlo r0, #0 274 moveq r0, ip, asr #31 275 orreq r0, r0, #1 276 ret lr 277 27812: ARM_DIV2_ORDER r1, r2 279 280 cmp ip, #0 281 mov r0, r3, lsr r2 282 rsbmi r0, r0, #0 283 ret lr 284 285UNWIND(.fnend) 286ENDPROC(__divsi3) 287ENDPROC(__aeabi_idiv) 288.popsection 289 290.pushsection .text.__modsi3, "ax" 291ENTRY(__modsi3) 292UNWIND(.fnstart) 293 294 cmp r1, #0 295 beq Ldiv0 296 rsbmi r1, r1, #0 @ loops below use unsigned. 297 movs ip, r0 @ preserve sign of dividend 298 rsbmi r0, r0, #0 @ if negative make positive 299 subs r2, r1, #1 @ compare divisor with 1 300 cmpne r0, r1 @ compare dividend with divisor 301 moveq r0, #0 302 tsthi r1, r2 @ see if divisor is power of 2 303 andeq r0, r0, r2 304 bls 10f 305 306 ARM_MOD_BODY r0, r1, r2, r3 307 30810: cmp ip, #0 309 rsbmi r0, r0, #0 310 ret lr 311 312UNWIND(.fnend) 313ENDPROC(__modsi3) 314.popsection 315 316.pushsection .text.__aeabi_uidivmod, "ax" 317ENTRY(__aeabi_uidivmod) 318UNWIND(.fnstart) 319UNWIND(.save {r0, r1, ip, lr} ) 320 321 stmfd sp!, {r0, r1, ip, lr} 322 bl __aeabi_uidiv 323 ldmfd sp!, {r1, r2, ip, lr} 324 mul r3, r0, r2 325 sub r1, r1, r3 326 ret lr 327 328UNWIND(.fnend) 329ENDPROC(__aeabi_uidivmod) 330.popsection 331 332.pushsection .text.__aeabi_uidivmod, "ax" 333ENTRY(__aeabi_idivmod) 334UNWIND(.fnstart) 335UNWIND(.save {r0, r1, ip, lr} ) 336 337 stmfd sp!, {r0, r1, ip, lr} 338 bl __aeabi_idiv 339 ldmfd sp!, {r1, r2, ip, lr} 340 mul r3, r0, r2 341 sub r1, r1, r3 342 ret lr 343 344UNWIND(.fnend) 345ENDPROC(__aeabi_idivmod) 346.popsection 347 348.pushsection .text.Ldiv0, "ax" 349Ldiv0: 350UNWIND(.fnstart) 351UNWIND(.pad #4) 352UNWIND(.save {lr}) 353 354 str lr, [sp, #-8]! 355 bl __div0 356 mov r0, #0 @ About as wrong as it could be. 357 ldr pc, [sp], #8 358 359UNWIND(.fnend) 360ENDPROC(Ldiv0) 361.popsection 362 363/* Thumb-1 specialities */ 364#if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) 365.pushsection .text.__gnu_thumb1_case_sqi, "ax" 366ENTRY(__gnu_thumb1_case_sqi) 367 push {r1} 368 mov r1, lr 369 lsrs r1, r1, #1 370 lsls r1, r1, #1 371 ldrsb r1, [r1, r0] 372 lsls r1, r1, #1 373 add lr, lr, r1 374 pop {r1} 375 ret lr 376ENDPROC(__gnu_thumb1_case_sqi) 377.popsection 378 379.pushsection .text.__gnu_thumb1_case_uqi, "ax" 380ENTRY(__gnu_thumb1_case_uqi) 381 push {r1} 382 mov r1, lr 383 lsrs r1, r1, #1 384 lsls r1, r1, #1 385 ldrb r1, [r1, r0] 386 lsls r1, r1, #1 387 add lr, lr, r1 388 pop {r1} 389 ret lr 390ENDPROC(__gnu_thumb1_case_uqi) 391.popsection 392 393.pushsection .text.__gnu_thumb1_case_shi, "ax" 394ENTRY(__gnu_thumb1_case_shi) 395 push {r0, r1} 396 mov r1, lr 397 lsrs r1, r1, #1 398 lsls r0, r0, #1 399 lsls r1, r1, #1 400 ldrsh r1, [r1, r0] 401 lsls r1, r1, #1 402 add lr, lr, r1 403 pop {r0, r1} 404 ret lr 405ENDPROC(__gnu_thumb1_case_shi) 406.popsection 407 408.pushsection .text.__gnu_thumb1_case_uhi, "ax" 409ENTRY(__gnu_thumb1_case_uhi) 410 push {r0, r1} 411 mov r1, lr 412 lsrs r1, r1, #1 413 lsls r0, r0, #1 414 lsls r1, r1, #1 415 ldrh r1, [r1, r0] 416 lsls r1, r1, #1 417 add lr, lr, r1 418 pop {r0, r1} 419 ret lr 420ENDPROC(__gnu_thumb1_case_uhi) 421.popsection 422 423/* Taken and adapted from: https://github.com/gcc-mirror/gcc/blob/4f181f9c7ee3efc509d185fdfda33be9018f1611/libgcc/config/arm/lib1funcs.S#L2156 */ 424.pushsection .text.__gnu_thumb1_case_si, "ax" 425ENTRY(__gnu_thumb1_case_si) 426 push {r0, r1} 427 mov r1, lr 428 adds r1, r1, #2 /* Align to word. */ 429 lsrs r1, r1, #2 430 lsls r0, r0, #2 431 lsls r1, r1, #2 432 ldr r0, [r1, r0] 433 adds r0, r0, r1 434 mov lr, r0 435 pop {r0, r1} 436 mov pc, lr /* We know we were called from thumb code. */ 437ENDPROC(__gnu_thumb1_case_si) 438.popsection 439#endif 440