Yet Another eXchange Tool  0.9.0
xt_arithmetic_long.h
Go to the documentation of this file.
1 
12 /*
13  * Keywords:
14  * Maintainer: Jrg Behrens <behrens@dkrz.de>
15  * Moritz Hanke <hanke@dkrz.de>
16  * Thomas Jahns <jahns@dkrz.de>
17  * URL: https://doc.redmine.dkrz.de/yaxt/html/
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions are
21  * met:
22  *
23  * Redistributions of source code must retain the above copyright notice,
24  * this list of conditions and the following disclaimer.
25  *
26  * Redistributions in binary form must reproduce the above copyright
27  * notice, this list of conditions and the following disclaimer in the
28  * documentation and/or other materials provided with the distribution.
29  *
30  * Neither the name of the DKRZ GmbH nor the names of its contributors
31  * may be used to endorse or promote products derived from this software
32  * without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
36  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
37  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
38  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  */
46 #ifndef XT_ARITHMETIC_LONG_H
47 #define XT_ARITHMETIC_LONG_H
48 
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52 
53 #include <stdbool.h>
54 
55 #include "xt/xt_core.h"
56 #include "xt_arithmetic_util.h"
57 
58 enum {
59  xt_int_bits = sizeof (Xt_int) * CHAR_BIT,
60 };
61 /* since the computation of the next overlap of two stripes might
62  * overflow Xt_int (and indeed need 3*xt_int_bits to represent), what
63  * follows is an implementation of double- and triple-length integer
64  * arithmetic, depending on whether a type twice the size of Xt_int
65  * is available.
66  *
67  * The implementation follows recipes from Henry S. Warren, Jr.;
68  * Hacker's Delight, 2-16 and 8-2 (first edition)
69  */
70 #ifdef XT_LONG
71 typedef XT_LONG Xt_long;
72 typedef XT_ULONG Xt_ulong;
76 static inline int
77 xlsign(Xt_long x)
78 {
79  return (x >= 0) - (x < 0);
80 }
81 
82 static inline Xt_long
83 xlabs(Xt_long x)
84 {
85  return x < 0 ? -x : x;
86 }
87 
88 static inline Xt_uint
89 xlhi(Xt_long x)
90 {
91  return (Xt_uint)(x >> xt_int_bits);
92 }
93 
94 static inline Xt_uint
95 xllo(Xt_long x)
96 {
97  return (Xt_uint)x;
98 }
99 
100 
101 /* is a represntable as Xt_int? */
102 static inline bool
104 {
105  return (a <= XT_INT_MAX) & (a >= XT_INT_MIN);
106 }
107 
108 static inline Xt_long
109 xiimul(Xt_int a, Xt_int b)
110 {
111  return (Xt_long)a * b;
112 }
113 
114 typedef struct { Xt_uint hi; Xt_ulong midlo; } Xt_tword;
115 
116 static inline Xt_uint
117 xthi(Xt_tword x)
118 {
119  return x.hi;
120 }
121 
122 static inline Xt_uint
123 xtmid(Xt_tword x)
124 {
125  return (Xt_uint)(x.midlo >> xt_int_bits);
126 }
127 
128 static inline Xt_uint
129 xtlo(Xt_tword x)
130 {
131  return (Xt_uint)x.midlo;
132 }
133 
134 static inline Xt_tword
135 xlimulu(Xt_long a, Xt_uint b)
136 {
137  Xt_tword r = { 0U, 0U };
138 
139  Xt_ulong t = (Xt_ulong)(Xt_uint)a*b;
140  Xt_uint lo = (Xt_uint)t;
141  t = (Xt_ulong)((Xt_ulong)a >> xt_int_bits)*b + (t >> xt_int_bits);
142  Xt_uint mid = (Xt_uint)t;
143  r.hi = (Xt_uint)(t >> xt_int_bits);
144 
145  // Now r has the unsigned product. Correct by
146  // subtracting b*2**(xt_int_bits*2) if a < 0
147 
148  if (a < 0) {
149  r.hi = (Xt_uint)(r.hi - (Xt_uint)b);
150  }
151  r.midlo = ((Xt_ulong)mid << xt_int_bits) | lo;
152  return r;
153 }
154 
155 static inline Xt_tword
156 xlimul(Xt_long a, Xt_int b)
157 {
158  Xt_tword r = { 0U, 0U };
159 
160  Xt_ulong t = (Xt_ulong)(Xt_uint)a*(Xt_uint)b;
161  Xt_uint lo = (Xt_uint)t;
162  t = (Xt_ulong)((Xt_ulong)a >> xt_int_bits)*(Xt_uint)b + (t >> xt_int_bits);
163  Xt_uint mid = (Xt_uint)t;
164  r.hi = (Xt_uint)(t >> xt_int_bits);
165 
166  // Now r has the unsigned product. Correct by
167  // subtracting b*2**(xt_int_bits*2) if a < 0, and
168  // subtracting a*2**xt_int_bits if b < 0.
169 
170  if (a < 0) {
171  r.hi = (Xt_uint)(r.hi - (Xt_uint)b);
172  }
173  if (b < 0) {
174  t = (Xt_ulong)mid - (Xt_uint)a;
175  mid = (Xt_uint)t;
176  Xt_uint borrow = (Xt_uint)t >> (xt_int_bits-1);
177  r.hi = (Xt_uint)(r.hi - (Xt_uint)((Xt_ulong)a >> xt_int_bits) - borrow);
178  }
179  r.midlo = ((Xt_ulong)mid << xt_int_bits) | lo;
180  return r;
181 }
182 
183 static inline int
185 {
186  return (a.hi == b.hi) & (a.midlo == b.midlo);
187 }
188 
189 static inline int
191 {
192  return a == b;
193 }
194 
195 
196 #else
197 /* stores in one-complement form */
198 typedef struct { Xt_uint hi, lo; } Xt_long;
200 
204 static inline int
206 {
207  int sign_bit = (int)(x.hi >> (xt_int_bits - 1));
208  return (sign_bit^1) - sign_bit;
209 }
210 
211 static inline Xt_long
213 {
214  Xt_long r = { .lo = (Xt_uint)a,
215  .hi = (Xt_uint)(Xt_isign_mask(a)) };
216  return r;
217 }
218 
219 static inline Xt_long
221 {
222  Xt_uint sign_mask = (Xt_uint)(Xt_isign_mask((Xt_int)a.hi));
223  Xt_uint borrow = sign_mask & (Xt_uint)(-(Xt_int)a.lo != 0);
224  Xt_long r = { .hi = ((a.hi + sign_mask) ^ sign_mask) - borrow,
225  .lo = (a.lo + sign_mask) ^ sign_mask };
226  return r;
227 }
228 
229 static inline Xt_uint
231 {
232  return x.hi;
233 }
234 
235 static inline Xt_uint
237 {
238  return x.lo;
239 }
240 
241 static inline Xt_long
242 xlnegate(Xt_long a, bool negate)
243 {
244  Xt_uint borrow = (Xt_uint)((Xt_uint)negate & (Xt_uint)(-(Xt_int)a.lo != 0));
245  Xt_long r = { .hi = (a.hi ^ (Xt_uint)-(Xt_int)negate) + negate - borrow,
246  .lo = (a.lo ^ (Xt_uint)-(Xt_int)negate) + negate };
247  return r;
248 }
249 
250 /* is a represntable as Xt_int? */
251 static inline bool
253 {
254  return Xt_isign_mask((Xt_int)a.lo) == (Xt_int)a.hi;
255 }
256 
257 static inline Xt_long
259 {
260  Xt_uint al = (Xt_uint)a, bl = (Xt_uint)b,
261  ah = (Xt_uint)(Xt_isign_mask(a)), bh = (Xt_uint)(Xt_isign_mask(b));
262  Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
263  Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
264  return r;
265 }
266 
267 static inline Xt_long
269 {
270  Xt_uint al = a.lo, bl = (Xt_uint)b,
271  ah = a.hi, bh = (Xt_uint)(Xt_isign_mask(b));
272  Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
273  Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
274  return r;
275 }
276 
277 /* returns a incremented by value of b, i.e. zero or one */
278 static inline Xt_long
279 xlinc(Xt_long a, bool b)
280 {
281  Xt_uint al = a.lo, bl = (Xt_uint)b, ah = a.hi;
282  Xt_uint carry = bl & !~al;
283  Xt_long r = { .lo = al + bl, .hi = ah + carry };
284  return r;
285 }
286 
287 static inline Xt_long
289 {
290  Xt_uint al = a.lo, bl = b.lo, ah = a.hi, bh = b.hi;
291  Xt_uint carry = ((al & bl) | ((al | bl) & ~(al + bl))) >> (xt_int_bits-1);
292  Xt_long r = { .lo = al + bl, .hi = ah + bh + carry };
293  return r;
294 }
295 
296 static inline Xt_long
298 {
299  Xt_uint al = (Xt_uint)a, bl = (Xt_uint)b,
300  ah = (Xt_uint)(Xt_isign_mask(a)), bh = (Xt_uint)(Xt_isign_mask(b));
301  Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
302  Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
303  return r;
304 }
305 
306 static inline Xt_long
308 {
309  Xt_uint al = a.lo, bl = b.lo, ah = a.hi, bh = b.hi;
310  Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
311  Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
312  return r;
313 }
314 
315 static inline Xt_long
317 {
318  Xt_uint al = a.lo, ah = a.hi;
319  Xt_uint carry
320  = ((~al & (Xt_uint)b) | ((~(al ^ (Xt_uint)b)) & (al - (Xt_uint)b)))
321  >> (xt_int_bits-1);
322  Xt_long r = { .lo = al - (Xt_uint)b, .hi = ah - carry };
323  return r;
324 }
325 
326 static inline Xt_long
328 {
329  Xt_uint al = (Xt_uint)a, bl = b.lo,
330  ah = (Xt_uint)(Xt_isign_mask(a)), bh = b.hi;
331  Xt_uint carry = ((~al & bl) | ((~(al ^ bl)) & (al - bl))) >> (xt_int_bits-1);
332  Xt_long r = { .lo = al - bl, .hi = ah - bh - carry };
333  return r;
334 }
335 
336 enum {
338 };
339 
340 /* this is implemented following H.S.Warren Hacker's Delight mulhs (8-2) */
341 static inline Xt_long
343 {
344  const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
345  Xt_uint a_lo = (Xt_uint)a & lo_mask,
346  b_lo = (Xt_uint)b & lo_mask,
347  a_hi = (Xt_uint)(a >> xt_hint_bits),
348  b_hi = (Xt_uint)(b >> xt_hint_bits),
349  lo_prod = a_lo*b_lo;
350  Xt_int t = (Xt_int)(a_hi*b_lo + (lo_prod >> xt_hint_bits));
351  Xt_int w1 = (Xt_int)((Xt_uint)t & lo_mask),
352  w2 = t >> xt_hint_bits;
353  w1 = (Xt_int)(a_lo*b_hi + (Xt_uint)w1);
354  Xt_long r = { .hi = a_hi * b_hi + (Xt_uint)w2 + (Xt_uint)(w1 >> xt_hint_bits),
355  .lo = (Xt_uint)(a * b) };
356  return r;
357 }
358 
359 typedef struct { Xt_uint hi, mid, lo; } Xt_tword;
360 
361 static inline Xt_uint
363 {
364  return x.hi;
365 }
366 
367 static inline Xt_uint
369 {
370  return x.mid;
371 }
372 
373 static inline Xt_uint
375 {
376  return x.lo;
377 }
378 
379 static inline Xt_tword
381 {
382  const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
383  Xt_tword r;
384 
385  Xt_uint bl = (Xt_uint)b & lo_mask, bh = (Xt_uint)b >> xt_hint_bits,
386  a0 = a.lo & lo_mask, a1 = a.lo >> xt_hint_bits,
387  a2 = a.hi & lo_mask, a3 = a.hi >> xt_hint_bits;
388  Xt_uint t = a0*bl;
389  Xt_uint accum = t & lo_mask;
390  Xt_uint k;
391  t = a1*bl + (t >> xt_hint_bits);
392  r.lo = accum | (t << xt_hint_bits);
393  k = t >> xt_hint_bits;
394  t = a2*bl + k;
395  accum = t & lo_mask;
396  k = t >> xt_hint_bits;
397  t = a3*bl + k;
398  r.mid = accum | (t << xt_hint_bits);
399  r.hi = t >> xt_hint_bits;
400  t = a0*bh + (r.lo >> xt_hint_bits);
401  r.lo = (r.lo & lo_mask) | (t << xt_hint_bits);
402  k = t >> xt_hint_bits;
403  t = a1*bh + (r.mid & lo_mask) + k;
404  accum = t & lo_mask;
405  k = t >> xt_hint_bits;
406  t = a2*bh + (r.mid >> xt_hint_bits) + k;
407  r.mid = accum | (t << xt_hint_bits);
408  k = t >> xt_hint_bits;
409  r.hi += a3*bh + k;
410 
411  // Now r has the unsigned product. Correct by
412  // subtracting b*2**(xt_int_bits*2) if a < 0, and
413  // subtracting a*2**xt_int_bits if b < 0.
414 
415  if ((Xt_int)a.hi < 0) {
416  r.hi -= (Xt_uint)b;
417  }
418  if (b < 0) {
419  t = r.mid - a.lo;
420  r.mid = (Xt_uint)t;
421  Xt_uint borrow = t >> (xt_int_bits-1);
422  r.hi -= a.hi + borrow;
423  }
424  return r;
425 }
426 
427 typedef struct { Xt_int quot, rem; } Xt_idiv;
428 typedef struct { Xt_long quot, rem; } Xt_ldiv;
429 
430 /* divide long unsigned value a by b, from
431  * H.S.Warren, Hacker's Delight,
432  * this code is only valid if the result fits an Xt_uint
433  */
434 /* This version is divlu1 except with outer loop unrolled, and array un
435 changed into local variables. Several of the variables below could be
436 "short," but having them fullwords gives better code on gcc/Intel.
437 Statistics: Based on one million executions of this program, with
438 uniformly distributed random values for the dividend and divisor, the
439 number of times in each loop per execution of the program is:
440 
441 again1: 0.4060
442 again2: 0.3469
443 
444 This is the version that's in the hacker book. */
445 static inline Xt_idiv
447 {
448  const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
449  const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
450  Xt_uint un1, un0, // Norm. dividend LSD's.
451  ah = a.hi, al = a.lo,
452  vn1, vn0, // Norm. divisor digits.
453  q1, q0, // Quotient digits.
454  un32, un21, un10, // Dividend digit pairs.
455  rhat; // A remainder.
456 
457  if (ah >= b) // overflow, set remainder to impossible value
458  return (Xt_idiv){ .quot = (Xt_int)~(Xt_uint)0, .rem = (Xt_int)~(Xt_uint)0 };
459 
460  // Shift amount for norm.
461  int s = xinlz(b); // 0 <= s <= xt_int_bits.
462  enum { int_bits = sizeof (int) * CHAR_BIT };
463  b = b << s; // Normalize divisor.
464  vn1 = b >> xt_hint_bits; // Break divisor up into
465  vn0 = b & lo_mask; // two half-words
466 
467  un32 = (ah << s)
468  | ((al >> (xt_int_bits - s)) & (Xt_uint)(-s >> (int_bits-1)));
469  un10 = al << s; // Shift dividend left.
470 
471  un1 = un10 >> xt_hint_bits; // Break right half of
472  un0 = un10 & lo_mask; // dividend into two digits.
473 
474  q1 = un32/vn1; // Compute the first
475  rhat = un32 - q1*vn1; // quotient digit, q1.
476  while (q1 >= base || q1*vn0 > base*rhat + un1) {
477  q1 -= 1U;
478  rhat += vn1;
479  if (rhat >= base) break;
480  }
481 
482  un21 = un32*base + un1 - q1*b; // Multiply and subtract.
483 
484  q0 = un21/vn1; // Compute the second
485  rhat = un21 - q0*vn1; // quotient digit, q0.
486  while (q0 >= base || q0*vn0 > base*rhat + un0) {
487  q0 -= 1U;
488  rhat += vn1;
489  if (rhat >= base) break;
490  }
491 
492  return (Xt_idiv){ .quot = (Xt_int)(q1*base + q0),
493  .rem = (Xt_int)((un21*base + un0 - q0*b) >> s)};
494 }
495 
496 typedef XT_SHORT Xt_short;
497 typedef unsigned XT_SHORT Xt_ushort;
498 
499 
500 /* q[0], r[0], u[0], and v[0] contain the LEAST significant halfwords.
501 (The sequence is in little-endian order).
502 
503 This first version is a fairly precise implementation of Knuth's
504 Algorithm D, for a binary computer with base b = 2**16. The caller
505 supplies
506  1. Space q for the quotient, m - n + 1 halfwords (at least one).
507  2. Space r for the remainder (optional), n halfwords.
508  3. The dividend u, m halfwords, m >= 1.
509  4. The divisor v, n halfwords, n >= 2.
510 The most significant digit of the divisor, v[n-1], must be nonzero. The
511 dividend u may have leading zeros; this just makes the algorithm take
512 longer and makes the quotient contain more leading zeros. A value of
513 NULL may be given for the address of the remainder to signify that the
514 caller does not want the remainder.
515  The program does not alter the input parameters u and v.
516  The quotient and remainder returned may have leading zeros. The
517 function itself returns a value of 0 for success and 1 for invalid
518 parameters (e.g., division by 0).
519  For now, we must have m >= n. Knuth's Algorithm D also requires
520 that the dividend be at least as long as the divisor. (In his terms,
521 m >= 0 (unstated). Therefore m+n >= n.) */
522 static inline void
523 xdivmnu(Xt_ushort *restrict q, Xt_ushort *restrict r,
524  const Xt_ushort *restrict u, const Xt_ushort *restrict v,
525  int m, int n) {
526 
527  const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
528  const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
529  Xt_int s, t;
530 
531  if (m < n || n <= 0 || v[n-1] == 0)
532  return; // Return if invalid param.
533 
534  if (n == 1) { // Take care of
535  Xt_uint k = 0; // the case of a
536  for (int j = m - 1; j >= 0; j--) { // single-digit
537  q[j] = (Xt_ushort)((k*base + u[j])/v[0]); // divisor here.
538  k = (k*base + (Xt_int)u[j]) - (Xt_int)(q[j]*v[0]);
539  }
540  if (r != NULL) r[0] = (Xt_ushort)k;
541  return;
542  }
543 
544  // Normalize by shifting v left just enough so that
545  // its high-order bit is on, and shift u left the
546  // same amount. We may have to append a high-order
547  // digit on the dividend; we do that unconditionally.
548 
549  s = xinlz(v[n-1]) - xt_hint_bits; // 0 <= s <= 15.
550  Xt_ushort vn[n]; /* normalized v */
551  for (int i = n - 1; i > 0; i--)
552  vn[i] = (v[i] << s) | (v[i-1] >> (xt_hint_bits-s));
553  vn[0] = v[0] << s;
554 
555  Xt_ushort un[m + 1]; /* normalized u */
556  un[m] = u[m-1] >> (xt_hint_bits-s);
557  for (int i = m - 1; i > 0; i--)
558  un[i] = (u[i] << s) | (u[i-1] >> (xt_hint_bits-s));
559  un[0] = u[0] << s;
560 
561  for (int j = m - n; j >= 0; j--) { // Main loop.
562  // Compute estimated quotient digit qhat of q[j].
563  Xt_uint qhat = (un[j+n]*base + un[j+n-1])/vn[n-1],
564  // and remainder
565  rhat = (un[j+n]*base + un[j+n-1]) - qhat*vn[n-1];
566  while (qhat >= base || qhat*vn[n-2] > base*rhat + un[j+n-2]) {
567  --qhat;
568  rhat += vn[n-1];
569  if (rhat >= base) break;
570  }
571 
572  // Multiply and subtract.
573  Xt_int k = 0;
574  for (int i = 0; i < n; i++) {
575  Xt_uint p = qhat*vn[i]; // Product of two digits.
576  t = un[i+j] - k - (Xt_int)(p & lo_mask);
577  un[i+j] = (Xt_ushort)t;
578  k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
579  }
580  t = un[j+n] - k;
581  un[j+n] = (Xt_ushort)t;
582 
583  q[j] = (Xt_ushort)qhat; // Store quotient digit.
584  if (t < 0) { // If we subtracted too
585  q[j] = q[j] - 1; // much, add back.
586  k = 0;
587  for (int i = 0; i < n; i++) {
588  t = un[i+j] + vn[i] + k;
589  un[i+j] = (Xt_ushort)t;
590  k = t >> xt_hint_bits;
591  }
592  un[j+n] = (Xt_ushort)(un[j+n] + k);
593  }
594  } // End j.
595  // If the caller wants the remainder, unnormalize
596  // it and pass it back.
597  if (r != NULL) {
598  for (int i = 0; i < n; i++)
599  r[i] = (un[i] >> s) | (un[i+1] << (xt_hint_bits-s));
600  }
601 }
602 
603 static inline void
605 {
606  a[0] = (Xt_ushort)u.lo;
607  a[1] = (Xt_ushort)(u.lo >> xt_hint_bits);
608  a[2] = (Xt_ushort)u.hi;
609  a[3] = (Xt_ushort)(u.hi >> xt_hint_bits);
610 }
611 
612 static inline Xt_long
614 {
615  Xt_long u;
616  u.lo = (Xt_uint)a[0] | (Xt_uint)a[1] << xt_hint_bits;
617  u.hi = (Xt_uint)a[2] | (Xt_uint)a[3] << xt_hint_bits;
618  return u;
619 }
620 
621 static inline Xt_ldiv
623 {
624  const Xt_uint lo_mask = ((Xt_uint)1 << xt_hint_bits) - 1U;
625  const Xt_uint base = (Xt_uint)1 << xt_hint_bits; // Number base
626 
627  Xt_uint qhat; // Estimated quotient digit.
628  Xt_uint rhat; // A remainder.
629  Xt_ldiv d;
630 
631  if (v.hi == 0U) { // Take care of the case of a single-word
632  d.quot.hi = u.hi/v.lo; // divisor here.
633  Xt_uint k = u.hi - d.quot.hi*v.lo;
634  Xt_idiv dl = xlidivu((Xt_ulong){ .hi = k, .lo = u.lo }, v.lo);
635  d.quot.lo = (Xt_uint)dl.quot; // divisor here.
636  d.rem = xllsub((Xt_long){ .hi = k, .lo = u.lo },
637  xiimul((Xt_int)d.quot.lo, (Xt_int)v.lo));
638  } else if (u.hi < v.hi || (u.hi == v.hi && u.lo < v.lo)) {
639  d.quot = (Xt_long){ 0U, 0U };
640  d.rem = u;
641  } else {
642  // Normalize by shifting v left just enough so that
643  // its high-order bit is on, and shift u left the
644  // same amount. We may have to append a high-order
645  // digit on the dividend; we do that unconditionally.
646 
647  int s = xinlz(v.hi); // 0 <= s <= xt_int_bits.
648  Xt_ulong // Normalized form of u, v.
649  vn = { .hi = (v.hi << s) | (v.lo >> (xt_int_bits-s)),
650  .lo = v.lo << s };
651  Xt_tword
652  un = { .hi = u.hi >> (xt_int_bits-s),
653  .mid = (u.hi << s) | (u.lo >> (xt_int_bits-s)),
654  .lo = u.lo << s };
655  // the normalized divisor has 4, the dividend 6 half-words
656  if ((u.hi >= base) == (v.hi >= base)) {
657  // Compute estimate qhat of q[j].
658  qhat = un.hi/(vn.hi >> xt_hint_bits);
659  rhat = un.hi - qhat * (vn.hi >> xt_hint_bits);
660  while (qhat >= base || qhat*(vn.hi & lo_mask) > base*rhat + (un.mid >> xt_hint_bits)) {
661  --qhat;
662  rhat += (vn.hi >> xt_hint_bits);
663  if (rhat >= base) break;
664  }
665 
666  // Multiply and subtract.
667  Xt_uint p = qhat * (vn.lo & lo_mask);
668  Xt_int t = (Xt_int)((un.lo & lo_mask) - (p & lo_mask));
669  Xt_uint accum = (Xt_uint)t & lo_mask;
670  Xt_int k = (Xt_int)(p >> xt_hint_bits) - (Xt_int)(t >> xt_hint_bits);
671  p = qhat * (vn.lo >> xt_hint_bits);
672  t = (Xt_int)(un.lo >> xt_hint_bits) - k - (Xt_int)(p & lo_mask);
673  un.lo = accum | ((Xt_uint)t << xt_hint_bits);
674  k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
675  p = qhat * (vn.hi & lo_mask);
676  t = (Xt_int)(un.mid & lo_mask) - k - (Xt_int)(p & lo_mask);
677  accum = (Xt_uint)t & lo_mask;
678  k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
679  p = qhat * (vn.hi >> xt_hint_bits);
680  t = (Xt_int)(un.mid >> xt_hint_bits) - k - (Xt_int)(p & lo_mask);
681  un.mid = accum | ((Xt_uint)t << xt_hint_bits);
682  k = (Xt_int)(p >> xt_hint_bits) - (t >> xt_hint_bits);
683  t = (Xt_int)((un.hi & lo_mask) - (Xt_uint)k);
684  un.hi = (un.hi & ~lo_mask) + (Xt_uint)t;
685 
686  d.quot.hi = 0U;
687  d.quot.lo = qhat; // Store quotient digit.
688  if (t < 0) { // If we subtracted too
689  --d.quot.lo; // much, add back.
690  t = (Xt_int)(un.lo & lo_mask) + (Xt_int)(vn.lo & lo_mask);
691  accum = (Xt_uint)t & lo_mask;
692  k = t >> xt_hint_bits;
693  t = (Xt_int)((un.lo >> xt_hint_bits) + (vn.lo >> xt_hint_bits)
694  + (Xt_uint)k);
695  un.lo = accum | ((Xt_uint)t << xt_hint_bits);
696  k = t >> xt_hint_bits;
697  t = (Xt_int)((un.mid & lo_mask) + (vn.hi & lo_mask) + (Xt_uint)k);
698  accum = (Xt_uint)t & lo_mask;
699  k = t >> xt_hint_bits;
700  t = (Xt_int)((un.mid >> xt_hint_bits) + (un.hi >> xt_hint_bits)
701  + (Xt_uint)k);
702  un.mid = accum | ((Xt_uint)t << xt_hint_bits);
703  k = t >> xt_hint_bits;
704  un.hi += (Xt_uint)k;
705  }
706  // unnormalize the remainder and pass it back.
707  d.rem = (Xt_long){ .lo = (un.lo >> s) | (un.mid << (xt_int_bits - s)),
708  .hi = (un.mid >> s) | (un.hi << (xt_int_bits - s)) };
709  } else {
710  Xt_ushort ua[4], va[4], qa[4] = { 0U }, ra[4] = { 0U };
711  xl2a(ua, u);
712  xl2a(va, v);
713  int m = 4, n = 4;
714  while (ua[m-1] == 0U) --m;
715  while (va[n-1] == 0U) --n;
716  xdivmnu(qa, ra, ua, va, m, n);
717  d.quot = xa2l(qa);
718  d.rem = xa2l(ra);
719  }
720  }
721  return d;
722 }
723 
724 
725 /* compute a divided by b */
726 static inline Xt_ldiv
728 {
729  /* todo: implement */
730  (void)a;
731  (void)b;
732  Xt_ldiv r = { { 0, 0 }, { 0, 0 } };
733  return r;
734 }
735 
736 static inline Xt_ldiv
738 {
739  Xt_long au = xlabs(a), bu = xlabs(b);
740  Xt_ldiv r = xlldivu(au, bu);
741  bool negate = (Xt_int)(a.hi ^ b.hi) < 0;
742  r.quot = xlnegate(r.quot, negate);
743  r.rem = xlnegate(r.rem, negate);
744  return r;
745 }
746 
750 static inline int
752 {
753  Xt_uint al = a.lo, bl = (Xt_uint)b;
754  Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
755  int r = ah > bh || (ah == bh && al > bl);
756  return r;
757 }
758 
762 static inline int
764 {
765  Xt_uint al = a.lo, bl = (Xt_uint)b;
766  Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
767  int r = ah > bh || (ah == bh && al >= bl);
768  return r;
769 }
770 
774 static inline int
776 {
777  Xt_uint al = a.lo, bl = (Xt_uint)b;
778  Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
779  int r = ah < bh || (ah == bh && al <= bl);
780  return r;
781 }
782 
786 static inline int
788 {
789  Xt_uint al = a.lo, bl = (Xt_uint)b;
790  Xt_int ah = (Xt_int)a.hi, bh = Xt_isign_mask(b);
791  int r = ah < bh || (ah == bh && al < bl);
792  return r;
793 }
794 
795 static inline int
797 {
798  return ((a.hi == b.hi) & (a.lo == b.lo));
799 }
800 
801 static inline int
803 {
804  return (a.hi == b.hi) & (a.mid == b.mid) & (a.lo == b.lo);
805 }
806 
807 
808 #endif
809 
810 #endif
static Xt_ldiv xlldivu(Xt_ulong u, Xt_ulong v)
static Xt_long xlisub(Xt_long a, Xt_int b)
static Xt_uint xtlo(Xt_tword x)
static Xt_tword xlimul(Xt_long a, Xt_int b)
Xt_long Xt_ulong
XT_SHORT Xt_short
static int xlicmp_gt(Xt_long a, Xt_int b)
static Xt_long xlladd(Xt_long a, Xt_long b)
static Xt_long xiisub(Xt_int a, Xt_int b)
static Xt_long xi2l(Xt_int a)
static Xt_ldiv xlldiv(Xt_long a, Xt_long b)
static Xt_uint xlhi(Xt_long x)
static Xt_long xliadd(Xt_long a, Xt_int b)
static int xttcmp_eq(Xt_tword a, Xt_tword b)
static Xt_uint xtmid(Xt_tword x)
static Xt_long xllsub(Xt_long a, Xt_long b)
static Xt_uint xthi(Xt_tword x)
@ xt_hint_bits
static Xt_uint xllo(Xt_long x)
static void xl2a(Xt_ushort a[4], Xt_long u)
static int xllcmp_eq(Xt_long a, Xt_long b)
unsigned XT_SHORT Xt_ushort
static Xt_idiv xlidivu(Xt_ulong a, Xt_uint b)
static void xdivmnu(Xt_ushort *restrict q, Xt_ushort *restrict r, const Xt_ushort *restrict u, const Xt_ushort *restrict v, int m, int n)
static int xlicmp_lt(Xt_long a, Xt_int b)
static Xt_long xa2l(Xt_ushort a[4])
static Xt_long xiimul(Xt_int a, Xt_int b)
static bool xl_is_in_xt_int_range(Xt_long a)
static Xt_ldiv xtidiv(Xt_tword a, Xt_int b)
@ xt_int_bits
static Xt_long xlabs(Xt_long a)
static Xt_long xilsub(Xt_int a, Xt_long b)
static Xt_long xlinc(Xt_long a, bool b)
static Xt_long xlnegate(Xt_long a, bool negate)
static int xlsign(Xt_long x)
static int xlicmp_le(Xt_long a, Xt_int b)
static Xt_long xiiadd(Xt_int a, Xt_int b)
static int xlicmp_ge(Xt_long a, Xt_int b)
static Xt_int Xt_isign_mask(Xt_int x)
static int xinlz(Xt_uint v)
base definitions header file
#define XT_INT_MIN
Definition: xt_core.h:74
#define XT_INT_MAX
Definition: xt_core.h:73
XT_INT Xt_int
Definition: xt_core.h:68
unsigned XT_INT Xt_uint
Definition: xt_core.h:70