140874Smsmith/*- 240874Smsmith * Copyright (c) 1992, 1993 340874Smsmith * The Regents of the University of California. All rights reserved. 440874Smsmith * 540874Smsmith * This software was developed by the Computer Systems Engineering group 640874Smsmith * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 740874Smsmith * contributed to Berkeley. 840874Smsmith * 940874Smsmith * Redistribution and use in source and binary forms, with or without 1040874Smsmith * modification, are permitted provided that the following conditions 1140874Smsmith * are met: 1240874Smsmith * 1. Redistributions of source code must retain the above copyright 1340874Smsmith * notice, this list of conditions and the following disclaimer. 1440874Smsmith * 2. Redistributions in binary form must reproduce the above copyright 1540874Smsmith * notice, this list of conditions and the following disclaimer in the 1640874Smsmith * documentation and/or other materials provided with the distribution. 1740874Smsmith * 4. Neither the name of the University nor the names of its contributors 1840874Smsmith * may be used to endorse or promote products derived from this software 1940874Smsmith * without specific prior written permission. 2040874Smsmith * 2140874Smsmith * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2240874Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2340874Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2440874Smsmith * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2540874Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2640874Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2740874Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2840874Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2940874Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3040874Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3140874Smsmith * SUCH DAMAGE. 3240874Smsmith * 3340874Smsmith * From: Id: qdivrem.c,v 1.7 1997/11/07 09:20:40 phk Exp 3440874Smsmith */ 3540874Smsmith 3684221Sdillon#include <sys/cdefs.h> 3784221Sdillon__FBSDID("$FreeBSD$"); 3884221Sdillon 3940874Smsmith/* 4040874Smsmith * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 4140874Smsmith * section 4.3.1, pp. 257--259. 4240874Smsmith */ 4340874Smsmith 4440874Smsmith#include "quad.h" 4540874Smsmith 4640874Smsmith#define B (1 << HALF_BITS) /* digit base */ 4740874Smsmith 4840874Smsmith/* Combine two `digits' to make a single two-digit number. */ 49271134Semaste#define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b)) 5040874Smsmith 51271134Semaste_Static_assert(sizeof(int) / 2 == sizeof(short), 52271134Semaste "Bitwise functions in libstand are broken on this architecture\n"); 53271134Semaste 5440874Smsmith/* select a type for digits in base B: use unsigned short if they fit */ 5540874Smsmithtypedef unsigned short digit; 5640874Smsmith 5740874Smsmith/* 5840874Smsmith * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 5940874Smsmith * `fall out' the left (there never will be any such anyway). 6040874Smsmith * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 6140874Smsmith */ 6240874Smsmithstatic void 6392913Sobrienshl(digit *p, int len, int sh) 6440874Smsmith{ 6592913Sobrien int i; 6640874Smsmith 6740874Smsmith for (i = 0; i < len; i++) 6840874Smsmith p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh)); 6940874Smsmith p[i] = LHALF(p[i] << sh); 7040874Smsmith} 7140874Smsmith 7240874Smsmith/* 7340874Smsmith * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 7440874Smsmith * 7540874Smsmith * We do this in base 2-sup-HALF_BITS, so that all intermediate products 76271134Semaste * fit within u_int. As a consequence, the maximum length dividend and 7740874Smsmith * divisor are 4 `digits' in this base (they are shorter if they have 7840874Smsmith * leading zeros). 7940874Smsmith */ 8040874Smsmithu_quad_t 8140874Smsmith__qdivrem(uq, vq, arq) 8240874Smsmith u_quad_t uq, vq, *arq; 8340874Smsmith{ 8440874Smsmith union uu tmp; 8540874Smsmith digit *u, *v, *q; 8692913Sobrien digit v1, v2; 87271134Semaste u_int qhat, rhat, t; 8840874Smsmith int m, n, d, j, i; 8940874Smsmith digit uspace[5], vspace[5], qspace[5]; 9040874Smsmith 9140874Smsmith /* 9240874Smsmith * Take care of special cases: divide by zero, and u < v. 9340874Smsmith */ 9440874Smsmith if (vq == 0) { 9540874Smsmith /* divide by zero. */ 9640874Smsmith static volatile const unsigned int zero = 0; 9740874Smsmith 9840874Smsmith tmp.ul[H] = tmp.ul[L] = 1 / zero; 9940874Smsmith if (arq) 10040874Smsmith *arq = uq; 10140874Smsmith return (tmp.q); 10240874Smsmith } 10340874Smsmith if (uq < vq) { 10440874Smsmith if (arq) 10540874Smsmith *arq = uq; 10640874Smsmith return (0); 10740874Smsmith } 10840874Smsmith u = &uspace[0]; 10940874Smsmith v = &vspace[0]; 11040874Smsmith q = &qspace[0]; 11140874Smsmith 11240874Smsmith /* 11340874Smsmith * Break dividend and divisor into digits in base B, then 11440874Smsmith * count leading zeros to determine m and n. When done, we 11540874Smsmith * will have: 11640874Smsmith * u = (u[1]u[2]...u[m+n]) sub B 11740874Smsmith * v = (v[1]v[2]...v[n]) sub B 11840874Smsmith * v[1] != 0 11940874Smsmith * 1 < n <= 4 (if n = 1, we use a different division algorithm) 12040874Smsmith * m >= 0 (otherwise u < v, which we already checked) 12140874Smsmith * m + n = 4 12240874Smsmith * and thus 12340874Smsmith * m = 4 - n <= 2 12440874Smsmith */ 12540874Smsmith tmp.uq = uq; 12640874Smsmith u[0] = 0; 12740874Smsmith u[1] = HHALF(tmp.ul[H]); 12840874Smsmith u[2] = LHALF(tmp.ul[H]); 12940874Smsmith u[3] = HHALF(tmp.ul[L]); 13040874Smsmith u[4] = LHALF(tmp.ul[L]); 13140874Smsmith tmp.uq = vq; 13240874Smsmith v[1] = HHALF(tmp.ul[H]); 13340874Smsmith v[2] = LHALF(tmp.ul[H]); 13440874Smsmith v[3] = HHALF(tmp.ul[L]); 13540874Smsmith v[4] = LHALF(tmp.ul[L]); 13640874Smsmith for (n = 4; v[1] == 0; v++) { 13740874Smsmith if (--n == 1) { 138271134Semaste u_int rbj; /* r*B+u[j] (not root boy jim) */ 13940874Smsmith digit q1, q2, q3, q4; 14040874Smsmith 14140874Smsmith /* 14240874Smsmith * Change of plan, per exercise 16. 14340874Smsmith * r = 0; 14440874Smsmith * for j = 1..4: 14540874Smsmith * q[j] = floor((r*B + u[j]) / v), 14640874Smsmith * r = (r*B + u[j]) % v; 14740874Smsmith * We unroll this completely here. 14840874Smsmith */ 14940874Smsmith t = v[2]; /* nonzero, by definition */ 15040874Smsmith q1 = u[1] / t; 15140874Smsmith rbj = COMBINE(u[1] % t, u[2]); 15240874Smsmith q2 = rbj / t; 15340874Smsmith rbj = COMBINE(rbj % t, u[3]); 15440874Smsmith q3 = rbj / t; 15540874Smsmith rbj = COMBINE(rbj % t, u[4]); 15640874Smsmith q4 = rbj / t; 15740874Smsmith if (arq) 15840874Smsmith *arq = rbj % t; 15940874Smsmith tmp.ul[H] = COMBINE(q1, q2); 16040874Smsmith tmp.ul[L] = COMBINE(q3, q4); 16140874Smsmith return (tmp.q); 16240874Smsmith } 16340874Smsmith } 16440874Smsmith 16540874Smsmith /* 16640874Smsmith * By adjusting q once we determine m, we can guarantee that 16740874Smsmith * there is a complete four-digit quotient at &qspace[1] when 16840874Smsmith * we finally stop. 16940874Smsmith */ 17040874Smsmith for (m = 4 - n; u[1] == 0; u++) 17140874Smsmith m--; 17240874Smsmith for (i = 4 - m; --i >= 0;) 17340874Smsmith q[i] = 0; 17440874Smsmith q += 4 - m; 17540874Smsmith 17640874Smsmith /* 17740874Smsmith * Here we run Program D, translated from MIX to C and acquiring 17840874Smsmith * a few minor changes. 17940874Smsmith * 18040874Smsmith * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 18140874Smsmith */ 18240874Smsmith d = 0; 18340874Smsmith for (t = v[1]; t < B / 2; t <<= 1) 18440874Smsmith d++; 18540874Smsmith if (d > 0) { 18640874Smsmith shl(&u[0], m + n, d); /* u <<= d */ 18740874Smsmith shl(&v[1], n - 1, d); /* v <<= d */ 18840874Smsmith } 18940874Smsmith /* 19040874Smsmith * D2: j = 0. 19140874Smsmith */ 19240874Smsmith j = 0; 19340874Smsmith v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 19440874Smsmith v2 = v[2]; /* for D3 */ 19540874Smsmith do { 19692913Sobrien digit uj0, uj1, uj2; 19740874Smsmith 19840874Smsmith /* 19940874Smsmith * D3: Calculate qhat (\^q, in TeX notation). 20040874Smsmith * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 20140874Smsmith * let rhat = (u[j]*B + u[j+1]) mod v[1]. 20240874Smsmith * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 20340874Smsmith * decrement qhat and increase rhat correspondingly. 20440874Smsmith * Note that if rhat >= B, v[2]*qhat < rhat*B. 20540874Smsmith */ 20640874Smsmith uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 20740874Smsmith uj1 = u[j + 1]; /* for D3 only */ 20840874Smsmith uj2 = u[j + 2]; /* for D3 only */ 20940874Smsmith if (uj0 == v1) { 21040874Smsmith qhat = B; 21140874Smsmith rhat = uj1; 21240874Smsmith goto qhat_too_big; 21340874Smsmith } else { 214271134Semaste u_int nn = COMBINE(uj0, uj1); 21540874Smsmith qhat = nn / v1; 21640874Smsmith rhat = nn % v1; 21740874Smsmith } 21840874Smsmith while (v2 * qhat > COMBINE(rhat, uj2)) { 21940874Smsmith qhat_too_big: 22040874Smsmith qhat--; 22140874Smsmith if ((rhat += v1) >= B) 22240874Smsmith break; 22340874Smsmith } 22440874Smsmith /* 22540874Smsmith * D4: Multiply and subtract. 22640874Smsmith * The variable `t' holds any borrows across the loop. 22740874Smsmith * We split this up so that we do not require v[0] = 0, 22840874Smsmith * and to eliminate a final special case. 22940874Smsmith */ 23040874Smsmith for (t = 0, i = n; i > 0; i--) { 23140874Smsmith t = u[i + j] - v[i] * qhat - t; 23240874Smsmith u[i + j] = LHALF(t); 23340874Smsmith t = (B - HHALF(t)) & (B - 1); 23440874Smsmith } 23540874Smsmith t = u[j] - t; 23640874Smsmith u[j] = LHALF(t); 23740874Smsmith /* 23840874Smsmith * D5: test remainder. 23940874Smsmith * There is a borrow if and only if HHALF(t) is nonzero; 24040874Smsmith * in that (rare) case, qhat was too large (by exactly 1). 24140874Smsmith * Fix it by adding v[1..n] to u[j..j+n]. 24240874Smsmith */ 24340874Smsmith if (HHALF(t)) { 24440874Smsmith qhat--; 24540874Smsmith for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 24640874Smsmith t += u[i + j] + v[i]; 24740874Smsmith u[i + j] = LHALF(t); 24840874Smsmith t = HHALF(t); 24940874Smsmith } 25040874Smsmith u[j] = LHALF(u[j] + t); 25140874Smsmith } 25240874Smsmith q[j] = qhat; 25340874Smsmith } while (++j <= m); /* D7: loop on j. */ 25440874Smsmith 25540874Smsmith /* 25640874Smsmith * If caller wants the remainder, we have to calculate it as 25740874Smsmith * u[m..m+n] >> d (this is at most n digits and thus fits in 25840874Smsmith * u[m+1..m+n], but we may need more source digits). 25940874Smsmith */ 26040874Smsmith if (arq) { 26140874Smsmith if (d) { 26240874Smsmith for (i = m + n; i > m; --i) 26340874Smsmith u[i] = (u[i] >> d) | 26440874Smsmith LHALF(u[i - 1] << (HALF_BITS - d)); 26540874Smsmith u[i] = 0; 26640874Smsmith } 26740874Smsmith tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 26840874Smsmith tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 26940874Smsmith *arq = tmp.q; 27040874Smsmith } 27140874Smsmith 27240874Smsmith tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 27340874Smsmith tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 27440874Smsmith return (tmp.q); 27540874Smsmith} 27640874Smsmith 27740874Smsmith/* 27840874Smsmith * Divide two unsigned quads. 27940874Smsmith */ 28040874Smsmith 28140874Smsmithu_quad_t 28240874Smsmith__udivdi3(a, b) 28340874Smsmith u_quad_t a, b; 28440874Smsmith{ 28540874Smsmith 28640874Smsmith return (__qdivrem(a, b, (u_quad_t *)0)); 28740874Smsmith} 28840874Smsmith 28940874Smsmith/* 29040874Smsmith * Return remainder after dividing two unsigned quads. 29140874Smsmith */ 29240874Smsmithu_quad_t 29340874Smsmith__umoddi3(a, b) 29440874Smsmith u_quad_t a, b; 29540874Smsmith{ 29640874Smsmith u_quad_t r; 29740874Smsmith 29840874Smsmith (void)__qdivrem(a, b, &r); 29940874Smsmith return (r); 30040874Smsmith} 30196525Sphk 30296525Sphk/* 30396525Sphk * Divide two signed quads. 30496525Sphk * ??? if -1/2 should produce -1 on this machine, this code is wrong 30596525Sphk */ 30696525Sphkquad_t 30796525Sphk__divdi3(a, b) 30896525Sphk quad_t a, b; 30996525Sphk{ 31096525Sphk u_quad_t ua, ub, uq; 31196525Sphk int neg; 31296525Sphk 31396525Sphk if (a < 0) 31496525Sphk ua = -(u_quad_t)a, neg = 1; 31596525Sphk else 31696525Sphk ua = a, neg = 0; 31796525Sphk if (b < 0) 31896525Sphk ub = -(u_quad_t)b, neg ^= 1; 31996525Sphk else 32096525Sphk ub = b; 32196525Sphk uq = __qdivrem(ua, ub, (u_quad_t *)0); 32296525Sphk return (neg ? -uq : uq); 32396525Sphk} 32496525Sphk 32596525Sphk/* 32696525Sphk * Return remainder after dividing two signed quads. 32796525Sphk * 32896525Sphk * XXX 32996525Sphk * If -1/2 should produce -1 on this machine, this code is wrong. 33096525Sphk */ 33196525Sphkquad_t 33296525Sphk__moddi3(a, b) 33396525Sphk quad_t a, b; 33496525Sphk{ 33596525Sphk u_quad_t ua, ub, ur; 33696525Sphk int neg; 33796525Sphk 33896525Sphk if (a < 0) 33996525Sphk ua = -(u_quad_t)a, neg = 1; 34096525Sphk else 34196525Sphk ua = a, neg = 0; 34296525Sphk if (b < 0) 34396525Sphk ub = -(u_quad_t)b; 34496525Sphk else 34596525Sphk ub = b; 34696525Sphk (void)__qdivrem(ua, ub, &ur); 34796525Sphk return (neg ? -ur : ur); 34896525Sphk} 349