1214152Sed/* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
2214152Sed *
3214152Sed *                    The LLVM Compiler Infrastructure
4214152Sed *
5222656Sed * This file is dual licensed under the MIT and the University of Illinois Open
6222656Sed * Source Licenses. See LICENSE.TXT for details.
7214152Sed *
8214152Sed * ===----------------------------------------------------------------------===
9214152Sed *
10214152Sed * This file implements __udivmodti4 for the compiler_rt library.
11214152Sed *
12214152Sed * ===----------------------------------------------------------------------===
13214152Sed */
14214152Sed
15239138Sandrew#include "int_lib.h"
16239138Sandrew
17263763Sdim#ifdef CRT_HAS_128BIT
18214152Sed
19214152Sed/* Effects: if rem != 0, *rem = a % b
20214152Sed * Returns: a / b
21214152Sed */
22214152Sed
23214152Sed/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
24214152Sed
25214152Sedtu_int
26214152Sed__udivmodti4(tu_int a, tu_int b, tu_int* rem)
27214152Sed{
28214152Sed    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
29214152Sed    const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
30214152Sed    utwords n;
31214152Sed    n.all = a;
32214152Sed    utwords d;
33214152Sed    d.all = b;
34214152Sed    utwords q;
35214152Sed    utwords r;
36214152Sed    unsigned sr;
37214152Sed    /* special cases, X is unknown, K != 0 */
38214152Sed    if (n.s.high == 0)
39214152Sed    {
40214152Sed        if (d.s.high == 0)
41214152Sed        {
42214152Sed            /* 0 X
43214152Sed             * ---
44214152Sed             * 0 X
45214152Sed             */
46214152Sed            if (rem)
47214152Sed                *rem = n.s.low % d.s.low;
48214152Sed            return n.s.low / d.s.low;
49214152Sed        }
50214152Sed        /* 0 X
51214152Sed         * ---
52214152Sed         * K X
53214152Sed         */
54214152Sed        if (rem)
55214152Sed            *rem = n.s.low;
56214152Sed        return 0;
57214152Sed    }
58214152Sed    /* n.s.high != 0 */
59214152Sed    if (d.s.low == 0)
60214152Sed    {
61214152Sed        if (d.s.high == 0)
62214152Sed        {
63214152Sed            /* K X
64214152Sed             * ---
65214152Sed             * 0 0
66214152Sed             */
67214152Sed            if (rem)
68214152Sed                *rem = n.s.high % d.s.low;
69214152Sed            return n.s.high / d.s.low;
70214152Sed        }
71214152Sed        /* d.s.high != 0 */
72214152Sed        if (n.s.low == 0)
73214152Sed        {
74214152Sed            /* K 0
75214152Sed             * ---
76214152Sed             * K 0
77214152Sed             */
78214152Sed            if (rem)
79214152Sed            {
80214152Sed                r.s.high = n.s.high % d.s.high;
81214152Sed                r.s.low = 0;
82214152Sed                *rem = r.all;
83214152Sed            }
84214152Sed            return n.s.high / d.s.high;
85214152Sed        }
86214152Sed        /* K K
87214152Sed         * ---
88214152Sed         * K 0
89214152Sed         */
90214152Sed        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
91214152Sed        {
92214152Sed            if (rem)
93214152Sed            {
94214152Sed                r.s.low = n.s.low;
95214152Sed                r.s.high = n.s.high & (d.s.high - 1);
96214152Sed                *rem = r.all;
97214152Sed            }
98214152Sed            return n.s.high >> __builtin_ctzll(d.s.high);
99214152Sed        }
100214152Sed        /* K K
101214152Sed         * ---
102214152Sed         * K 0
103214152Sed         */
104214152Sed        sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
105214152Sed        /* 0 <= sr <= n_udword_bits - 2 or sr large */
106214152Sed        if (sr > n_udword_bits - 2)
107214152Sed        {
108214152Sed           if (rem)
109214152Sed                *rem = n.all;
110214152Sed            return 0;
111214152Sed        }
112214152Sed        ++sr;
113214152Sed        /* 1 <= sr <= n_udword_bits - 1 */
114214152Sed        /* q.all = n.all << (n_utword_bits - sr); */
115214152Sed        q.s.low = 0;
116214152Sed        q.s.high = n.s.low << (n_udword_bits - sr);
117214152Sed        /* r.all = n.all >> sr; */
118214152Sed        r.s.high = n.s.high >> sr;
119214152Sed        r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
120214152Sed    }
121214152Sed    else  /* d.s.low != 0 */
122214152Sed    {
123214152Sed        if (d.s.high == 0)
124214152Sed        {
125214152Sed            /* K X
126214152Sed             * ---
127214152Sed             * 0 K
128214152Sed             */
129214152Sed            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
130214152Sed            {
131214152Sed                if (rem)
132214152Sed                    *rem = n.s.low & (d.s.low - 1);
133214152Sed                if (d.s.low == 1)
134214152Sed                    return n.all;
135229135Sed                sr = __builtin_ctzll(d.s.low);
136214152Sed                q.s.high = n.s.high >> sr;
137214152Sed                q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
138214152Sed                return q.all;
139214152Sed            }
140214152Sed            /* K X
141214152Sed             * ---
142214152Sed             * 0 K
143214152Sed             */
144214152Sed            sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
145214152Sed                                   - __builtin_clzll(n.s.high);
146214152Sed            /* 2 <= sr <= n_utword_bits - 1
147214152Sed             * q.all = n.all << (n_utword_bits - sr);
148214152Sed             * r.all = n.all >> sr;
149214152Sed             * if (sr == n_udword_bits)
150214152Sed             * {
151214152Sed             *     q.s.low = 0;
152214152Sed             *     q.s.high = n.s.low;
153214152Sed             *     r.s.high = 0;
154214152Sed             *     r.s.low = n.s.high;
155214152Sed             * }
156214152Sed             * else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
157214152Sed             * {
158214152Sed             *     q.s.low = 0;
159214152Sed             *     q.s.high = n.s.low << (n_udword_bits - sr);
160214152Sed             *     r.s.high = n.s.high >> sr;
161214152Sed             *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
162214152Sed             * }
163214152Sed             * else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
164214152Sed             * {
165214152Sed             *     q.s.low = n.s.low << (n_utword_bits - sr);
166214152Sed             *     q.s.high = (n.s.high << (n_utword_bits - sr)) |
167214152Sed             *              (n.s.low >> (sr - n_udword_bits));
168214152Sed             *     r.s.high = 0;
169214152Sed             *     r.s.low = n.s.high >> (sr - n_udword_bits);
170214152Sed             * }
171214152Sed             */
172214152Sed            q.s.low =  (n.s.low << (n_utword_bits - sr)) &
173214152Sed                     ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
174214152Sed            q.s.high = ((n.s.low << ( n_udword_bits - sr))                        &
175214152Sed                     ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
176214152Sed                     (((n.s.high << (n_utword_bits - sr))                       |
177214152Sed                     (n.s.low >> (sr - n_udword_bits)))                         &
178214152Sed                     ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
179214152Sed            r.s.high = (n.s.high >> sr) &
180214152Sed                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
181214152Sed            r.s.low =  ((n.s.high >> (sr - n_udword_bits))                        &
182214152Sed                     ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
183214152Sed                     (((n.s.high << (n_udword_bits - sr))                       |
184214152Sed                     (n.s.low >> sr))                                           &
185214152Sed                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
186214152Sed        }
187214152Sed        else
188214152Sed        {
189214152Sed            /* K X
190214152Sed             * ---
191214152Sed             * K K
192214152Sed             */
193214152Sed            sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
194214152Sed            /*0 <= sr <= n_udword_bits - 1 or sr large */
195214152Sed            if (sr > n_udword_bits - 1)
196214152Sed            {
197214152Sed               if (rem)
198214152Sed                    *rem = n.all;
199214152Sed                return 0;
200214152Sed            }
201214152Sed            ++sr;
202214152Sed            /* 1 <= sr <= n_udword_bits */
203214152Sed            /* q.all = n.all << (n_utword_bits - sr); */
204214152Sed            q.s.low = 0;
205214152Sed            q.s.high = n.s.low << (n_udword_bits - sr);
206214152Sed            /* r.all = n.all >> sr;
207214152Sed             * if (sr < n_udword_bits)
208214152Sed             * {
209214152Sed             *     r.s.high = n.s.high >> sr;
210214152Sed             *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
211214152Sed             * }
212214152Sed             * else
213214152Sed             * {
214214152Sed             *     r.s.high = 0;
215214152Sed             *     r.s.low = n.s.high;
216214152Sed             * }
217214152Sed             */
218214152Sed            r.s.high = (n.s.high >> sr) &
219214152Sed                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
220214152Sed            r.s.low = (n.s.high << (n_udword_bits - sr)) |
221214152Sed                    ((n.s.low >> sr)                   &
222214152Sed                    ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
223214152Sed        }
224214152Sed    }
225214152Sed    /* Not a special case
226214152Sed     * q and r are initialized with:
227214152Sed     * q.all = n.all << (n_utword_bits - sr);
228214152Sed     * r.all = n.all >> sr;
229214152Sed     * 1 <= sr <= n_utword_bits - 1
230214152Sed     */
231214152Sed    su_int carry = 0;
232214152Sed    for (; sr > 0; --sr)
233214152Sed    {
234214152Sed        /* r:q = ((r:q)  << 1) | carry */
235214152Sed        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
236214152Sed        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
237214152Sed        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
238214152Sed        q.s.low  = (q.s.low  << 1) | carry;
239214152Sed        /* carry = 0;
240214152Sed         * if (r.all >= d.all)
241214152Sed         * {
242214152Sed         *     r.all -= d.all;
243214152Sed         *      carry = 1;
244214152Sed         * }
245214152Sed         */
246214152Sed        const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
247214152Sed        carry = s & 1;
248214152Sed        r.all -= d.all & s;
249214152Sed    }
250214152Sed    q.all = (q.all << 1) | carry;
251214152Sed    if (rem)
252214152Sed        *rem = r.all;
253214152Sed    return q.all;
254214152Sed}
255214152Sed
256263763Sdim#endif /* CRT_HAS_128BIT */
257