1 /** Fundamental operations for arbitrary-precision arithmetic 2 * 3 * These functions are for internal use only. 4 * 5 * Copyright: Copyright (C) 2008 Don Clugston. All rights reserved. 6 * License: BSD style: $(LICENSE) 7 * Authors: Don Clugston 8 */ 9 /* References: 10 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 11 Version 0.2, p. 26, (June 2009). 12 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 13 Max-Planck Institute fuer Informatik, (Oct 1998). 14 - G. Hanrot, M. Quercia, and P. Zimmermann, "The Middle Product Algorithm, I.", 15 INRIA 4664, (Dec 2002). 16 - M. Bodrato and A. Zanoni, "What about Toom-Cook Matrices Optimality?", 17 http://bodrato.it/papers (2006). 18 - A. Fog, "Optimizing subroutines in assembly language", 19 www.agner.org/optimize (2008). 20 - A. Fog, "The microarchitecture of Intel and AMD CPU's", 21 www.agner.org/optimize (2008). 22 - A. Fog, "Instruction tables: Lists of instruction latencies, throughputs 23 and micro-operation breakdowns for Intel and AMD CPU's.", www.agner.org/optimize (2008). 24 */ 25 module tango.math.internal.BiguintCore; 26 27 //version=TangoBignumNoAsm; /// temporal: see ticket #1878 28 29 version(GNU){ 30 // GDC is a filthy liar. It can't actually do inline asm. 31 } else version(TangoBignumNoAsm) { 32 33 } else version(D_InlineAsm_X86) { 34 version = Naked_D_InlineAsm_X86; 35 } else version(LLVM_InlineAsm_X86) { 36 version = Naked_D_InlineAsm_X86; 37 } 38 39 version(Naked_D_InlineAsm_X86) { 40 private import tango.math.internal.BignumX86; 41 } else { 42 private import tango.math.internal.BignumNoAsm; 43 } 44 version(build){// bud/build won't link properly without this. 45 static import tango.math.internal.BignumX86; 46 } 47 48 alias multibyteAddSub!('+') multibyteAdd; 49 alias multibyteAddSub!('-') multibyteSub; 50 51 // private import tango.core.Cpuid; 52 static this() 53 { 54 CACHELIMIT = 8000; // tango.core.Cpuid.datacache[0].size/2; 55 FASTDIVLIMIT = 100; 56 } 57 58 private: 59 // Limits for when to switch between algorithms. 60 const int CACHELIMIT; // Half the size of the data cache. 61 const int FASTDIVLIMIT; // crossover to recursive division 62 63 64 // These constants are used by shift operations 65 static if (BigDigit.sizeof == int.sizeof) { 66 enum { LG2BIGDIGITBITS = 5, BIGDIGITSHIFTMASK = 31 }; 67 alias ushort BIGHALFDIGIT; 68 } else static if (BigDigit.sizeof == long.sizeof) { 69 alias uint BIGHALFDIGIT; 70 enum { LG2BIGDIGITBITS = 6, BIGDIGITSHIFTMASK = 63 }; 71 } else static assert(0, "Unsupported BigDigit size"); 72 73 const BigDigit [] ZERO = [0]; 74 const BigDigit [] ONE = [1]; 75 const BigDigit [] TWO = [2]; 76 const BigDigit [] TEN = [10]; 77 78 public: 79 80 /// BigUint performs memory management and wraps the low-level calls. 81 struct BigUint { 82 private: 83 invariant() { 84 assert( data.length == 1 || data[$-1] != 0 ); 85 } 86 BigDigit [] data = ZERO; 87 static BigUint opCall(BigDigit [] x) { 88 BigUint a; 89 a.data = x; 90 return a; 91 } 92 public: // for development only, will be removed eventually 93 // Equivalent to BigUint[numbytes-$..$] 94 BigUint sliceHighestBytes(uint numbytes) { 95 BigUint x; 96 x.data = data[$ - (numbytes>>2) .. $]; 97 return x; 98 } 99 // Length in uints 100 int uintLength() { 101 static if (BigDigit.sizeof == uint.sizeof) { 102 return data.length; 103 } else static if (BigDigit.sizeof == ulong.sizeof) { 104 return data.length * 2 - 105 ((data[$-1] & 0xFFFF_FFFF_0000_0000L) ? 1 : 0); 106 } 107 } 108 int ulongLength() { 109 static if (BigDigit.sizeof == uint.sizeof) { 110 return (data.length + 1) >> 1; 111 } else static if (BigDigit.sizeof == ulong.sizeof) { 112 return data.length; 113 } 114 } 115 116 // The value at (cast(ulong[])data)[n] 117 ulong peekUlong(int n) { 118 static if (BigDigit.sizeof == int.sizeof) { 119 if (data.length == n*2 + 1) return data[n*2]; 120 version(LittleEndian) { 121 return data[n*2] + ((cast(ulong)data[n*2 + 1]) << 32 ); 122 } else { 123 return data[n*2 + 1] + ((cast(ulong)data[n*2]) << 32 ); 124 } 125 } else static if (BigDigit.sizeof == long.sizeof) { 126 return data[n]; 127 } 128 } 129 uint peekUint(int n) { 130 static if (BigDigit.sizeof == int.sizeof) { 131 return data[n]; 132 } else { 133 ulong x = data[n >> 1]; 134 return (n & 1) ? cast(uint)(x >> 32) : cast(uint)x; 135 } 136 } 137 public: 138 /// 139 void opAssign(ulong u) { 140 if (u == 0) data = ZERO; 141 else if (u == 1) data = ONE; 142 else if (u == 2) data = TWO; 143 else if (u == 10) data = TEN; 144 else { 145 uint ulo = cast(uint)(u & 0xFFFF_FFFF); 146 uint uhi = cast(uint)(u >> 32); 147 if (uhi==0) { 148 data = new BigDigit[1]; 149 data[0] = ulo; 150 } else { 151 data = new BigDigit[2]; 152 data[0] = ulo; 153 data[1] = uhi; 154 } 155 } 156 } 157 158 void opAssign(BigUint a) 159 { 160 data = a.data; 161 } 162 163 /// 164 int opCmp(BigUint y) 165 { 166 if (data.length != y.data.length) { 167 return (data.length > y.data.length) ? 1 : -1; 168 } 169 uint k = highestDifferentDigit(data, y.data); 170 if (data[k] == y.data[k]) return 0; 171 return data[k] > y.data[k] ? 1 : -1; 172 } 173 174 /// 175 int opCmp(ulong y) 176 { 177 if (data.length>2) return 1; 178 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 179 uint yhi = cast(uint)(y >> 32); 180 if (data.length == 2 && data[1] != yhi) { 181 return data[1] > yhi ? 1: -1; 182 } 183 if (data[0] == ylo) return 0; 184 return data[0] > ylo ? 1: -1; 185 } 186 187 const bool opEquals(const ref BigUint y) { 188 return y.data[] == data[]; 189 } 190 191 const bool opEquals(ulong y) { 192 if (data.length>2) return 0; 193 uint ylo = cast(uint)(y & 0xFFFF_FFFF); 194 uint yhi = cast(uint)(y >> 32); 195 if (data.length==2 && data[1]!=yhi) return 0; 196 if (data.length==1 && yhi!=0) return 0; 197 return (data[0] == ylo); 198 } 199 200 201 bool isZero() { return data.length == 1 && data[0] == 0; } 202 203 int numBytes() { 204 return data.length * BigDigit.sizeof; 205 } 206 207 // the extra bytes are added to the start of the string 208 char [] toDecimalString(int frontExtraBytes) 209 { 210 uint predictlength = 20+20*(data.length/2); // just over 19 211 char [] buff = new char[frontExtraBytes + predictlength]; 212 int sofar = biguintToDecimal(buff, data.dup); 213 return buff[sofar-frontExtraBytes..$]; 214 } 215 216 /** Convert to a hex string, printing a minimum number of digits 'minPadding', 217 * allocating an additional 'frontExtraBytes' at the start of the string. 218 * Padding is done with padChar, which may be '0' or ' '. 219 * 'separator' is a digit separation character. If non-zero, it is inserted 220 * between every 8 digits. 221 * Separator characters do not contribute to the minPadding. 222 */ 223 char [] toHexString(int frontExtraBytes, char separator = 0, int minPadding=0, char padChar = '0') 224 { 225 // Calculate number of extra padding bytes 226 size_t extraPad = (minPadding > data.length * 2 * BigDigit.sizeof) 227 ? minPadding - data.length * 2 * BigDigit.sizeof : 0; 228 229 // Length not including separator bytes 230 size_t lenBytes = data.length * 2 * BigDigit.sizeof; 231 232 // Calculate number of separator bytes 233 size_t mainSeparatorBytes = separator ? (lenBytes / 8) - 1 : 0; 234 size_t totalSeparatorBytes = separator ? ((extraPad + lenBytes + 7) / 8) - 1: 0; 235 236 char [] buff = new char[lenBytes + extraPad + totalSeparatorBytes + frontExtraBytes]; 237 biguintToHex(buff[$ - lenBytes - mainSeparatorBytes .. $], data, separator); 238 if (extraPad > 0) { 239 if (separator) { 240 size_t start = frontExtraBytes; // first index to pad 241 if (extraPad &7) { 242 // Do 1 to 7 extra zeros. 243 buff[frontExtraBytes .. frontExtraBytes + (extraPad & 7)] = padChar; 244 buff[frontExtraBytes + (extraPad & 7)] = (padChar == ' ' ? ' ' : separator); 245 start += (extraPad & 7) + 1; 246 } 247 for (int i=0; i< (extraPad >> 3); ++i) { 248 buff[start .. start + 8] = padChar; 249 buff[start + 8] = (padChar == ' ' ? ' ' : separator); 250 start += 9; 251 } 252 } else { 253 buff[frontExtraBytes .. frontExtraBytes + extraPad]=padChar; 254 } 255 } 256 int z = frontExtraBytes; 257 if (lenBytes > minPadding) { 258 // Strip leading zeros. 259 int maxStrip = lenBytes - minPadding; 260 while (z< buff.length-1 && (buff[z]=='0' || buff[z]==padChar) && maxStrip>0) { 261 ++z; --maxStrip; 262 } 263 } 264 if (padChar!='0') { 265 // Convert leading zeros into padChars. 266 for (size_t k= z; k< buff.length-1 && (buff[k]=='0' || buff[k]==padChar); ++k) { 267 if (buff[k]=='0') buff[k]=padChar; 268 } 269 } 270 return buff[z-frontExtraBytes..$]; 271 } 272 273 // return false if invalid character found 274 bool fromHexString(const(char []) s) 275 { 276 //Strip leading zeros 277 int firstNonZero = 0; 278 while ((firstNonZero < s.length - 1) && 279 (s[firstNonZero]=='0' || s[firstNonZero]=='_')) { 280 ++firstNonZero; 281 } 282 int len = (s.length - firstNonZero + 15)/4; 283 data = new BigDigit[len+1]; 284 uint part = 0; 285 uint sofar = 0; 286 uint partcount = 0; 287 assert(s.length>0); 288 for (int i = s.length - 1; i>=firstNonZero; --i) { 289 assert(i>=0); 290 char c = s[i]; 291 if (s[i]=='_') continue; 292 uint x = (c>='0' && c<='9') ? c - '0' 293 : (c>='A' && c<='F') ? c - 'A' + 10 294 : (c>='a' && c<='f') ? c - 'a' + 10 295 : 100; 296 if (x==100) return false; 297 part >>= 4; 298 part |= (x<<(32-4)); 299 ++partcount; 300 if (partcount==8) { 301 data[sofar] = part; 302 ++sofar; 303 partcount = 0; 304 part = 0; 305 } 306 } 307 if (part) { 308 for ( ; partcount != 8; ++partcount) part >>= 4; 309 data[sofar] = part; 310 ++sofar; 311 } 312 if (sofar == 0) data = ZERO; 313 else data = data[0..sofar]; 314 return true; 315 } 316 317 // return true if OK; false if erroneous characters found 318 bool fromDecimalString(const(char []) s) 319 { 320 //Strip leading zeros 321 int firstNonZero = 0; 322 while ((firstNonZero < s.length - 1) && 323 (s[firstNonZero]=='0' || s[firstNonZero]=='_')) { 324 ++firstNonZero; 325 } 326 if (firstNonZero == s.length - 1 && s.length > 1) { 327 data = ZERO; 328 return true; 329 } 330 uint predictlength = (18*2 + 2*(s.length-firstNonZero)) / 19; 331 data = new BigDigit[predictlength]; 332 uint hi = biguintFromDecimal(data, s[firstNonZero..$]); 333 data.length = hi; 334 return true; 335 } 336 337 //////////////////////// 338 // 339 // All of these member functions create a new BigUint. 340 341 // return x >> y 342 BigUint opShr(ulong y) 343 { 344 assert(y>0); 345 uint bits = cast(uint)y & BIGDIGITSHIFTMASK; 346 if ((y>>LG2BIGDIGITBITS) >= data.length) return BigUint(ZERO); 347 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 348 if (bits==0) { 349 return BigUint(data[words..$]); 350 } else { 351 uint [] result = new BigDigit[data.length - words]; 352 multibyteShr(result, data[words..$], bits); 353 if (result.length>1 && result[$-1]==0) return BigUint(result[0..$-1]); 354 else return BigUint(result); 355 } 356 } 357 358 // return x << y 359 BigUint opShl(ulong y) 360 { 361 assert(y>0); 362 if (isZero()) return this; 363 uint bits = cast(uint)y & BIGDIGITSHIFTMASK; 364 assert ((y>>LG2BIGDIGITBITS) < cast(ulong)(uint.max)); 365 uint words = cast(uint)(y >> LG2BIGDIGITBITS); 366 BigDigit [] result = new BigDigit[data.length + words+1]; 367 result[0..words] = 0; 368 if (bits==0) { 369 result[words..words+data.length] = data[]; 370 return BigUint(result[0..words+data.length]); 371 } else { 372 uint c = multibyteShl(result[words..words+data.length], data, bits); 373 if (c==0) return BigUint(result[0..words+data.length]); 374 result[$-1] = c; 375 return BigUint(result); 376 } 377 } 378 379 // If wantSub is false, return x+y, leaving sign unchanged 380 // If wantSub is true, return abs(x-y), negating sign if x<y 381 static BigUint addOrSubInt(BigUint x, ulong y, bool wantSub, bool *sign) { 382 BigUint r; 383 if (wantSub) { // perform a subtraction 384 if (x.data.length > 2) { 385 r.data = subInt(x.data, y); 386 } else { // could change sign! 387 ulong xx = x.data[0]; 388 if (x.data.length > 1) xx+= (cast(ulong)x.data[1]) << 32; 389 ulong d; 390 if (xx <= y) { 391 d = y - xx; 392 *sign = !*sign; 393 } else { 394 d = xx - y; 395 } 396 if (d==0) { 397 r = 0; 398 return r; 399 } 400 r.data = new BigDigit[ d > uint.max ? 2: 1]; 401 r.data[0] = cast(uint)(d & 0xFFFF_FFFF); 402 if (d > uint.max) r.data[1] = cast(uint)(d>>32); 403 } 404 } else { 405 r.data = addInt(x.data, y); 406 } 407 return r; 408 } 409 410 // If wantSub is false, return x + y, leaving sign unchanged. 411 // If wantSub is true, return abs(x - y), negating sign if x<y 412 static BigUint addOrSub(BigUint x, BigUint y, bool wantSub, bool *sign) { 413 BigUint r; 414 if (wantSub) { // perform a subtraction 415 r.data = sub(x.data, y.data, sign); 416 if (r.isZero()) { 417 *sign = false; 418 } 419 } else { 420 r.data = add(x.data, y.data); 421 } 422 return r; 423 } 424 425 426 // return x*y. 427 // y must not be zero. 428 static BigUint mulInt(BigUint x, ulong y) 429 { 430 if (y==0 || x == 0) return BigUint(ZERO); 431 uint hi = cast(uint)(y >>> 32); 432 uint lo = cast(uint)(y & 0xFFFF_FFFF); 433 uint [] result = new BigDigit[x.data.length+1+(hi!=0)]; 434 result[x.data.length] = multibyteMul(result[0..x.data.length], x.data, lo, 0); 435 if (hi!=0) { 436 result[x.data.length+1] = multibyteMulAdd!('+')(result[1..x.data.length+1], 437 x.data, hi, 0); 438 } 439 return BigUint(removeLeadingZeros(result)); 440 } 441 442 /* return x*y. 443 */ 444 static BigUint mul(BigUint x, BigUint y) 445 { 446 if (y==0 || x == 0) return BigUint(ZERO); 447 448 uint len = x.data.length + y.data.length; 449 BigDigit [] result = new BigDigit[len]; 450 if (y.data.length > x.data.length) { 451 mulInternal(result, y.data, x.data); 452 } else { 453 if (x.data[]==y.data[]) squareInternal(result, x.data); 454 else mulInternal(result, x.data, y.data); 455 } 456 // the highest element could be zero, 457 // in which case we need to reduce the length 458 return BigUint(removeLeadingZeros(result)); 459 } 460 461 // return x/y 462 static BigUint divInt(BigUint x, uint y) { 463 uint [] result = new BigDigit[x.data.length]; 464 if ((y&(-y))==y) { 465 assert(y!=0, "BigUint division by zero"); 466 // perfect power of 2 467 uint b = 0; 468 for (;y!=1; y>>=1) { 469 ++b; 470 } 471 multibyteShr(result, x.data, b); 472 } else { 473 result[] = x.data[]; 474 uint rem = multibyteDivAssign(result, y, 0); 475 } 476 return BigUint(removeLeadingZeros(result)); 477 } 478 479 // return x%y 480 static uint modInt(BigUint x, uint y) { 481 assert(y!=0); 482 if ((y&(-y))==y) { // perfect power of 2 483 return x.data[0]&(y-1); 484 } else { 485 // horribly inefficient - malloc, copy, & store are unnecessary. 486 uint [] wasteful = new BigDigit[x.data.length]; 487 wasteful[] = x.data[]; 488 uint rem = multibyteDivAssign(wasteful, y, 0); 489 delete wasteful; 490 return rem; 491 } 492 } 493 494 // return x/y 495 static BigUint div(BigUint x, BigUint y) 496 { 497 if (y.data.length > x.data.length) return BigUint(ZERO); 498 if (y.data.length == 1) return divInt(x, y.data[0]); 499 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 500 divModInternal(result, null, x.data, y.data); 501 return BigUint(removeLeadingZeros(result)); 502 } 503 504 // return x%y 505 static BigUint mod(BigUint x, BigUint y) 506 { 507 if (y.data.length > x.data.length) return x; 508 if (y.data.length == 1) { 509 BigDigit [] result = new BigDigit[1]; 510 result[0] = modInt(x, y.data[0]); 511 return BigUint(result); 512 } 513 BigDigit [] result = new BigDigit[x.data.length - y.data.length + 1]; 514 BigDigit [] rem = new BigDigit[y.data.length]; 515 divModInternal(result, rem, x.data, y.data); 516 return BigUint(removeLeadingZeros(rem)); 517 } 518 519 /** 520 * Return a BigUint which is x raised to the power of y. 521 * Method: Powers of 2 are removed from x, then left-to-right binary 522 * exponentiation is used. 523 * Memory allocation is minimized: at most one temporary BigUint is used. 524 */ 525 static BigUint pow(BigUint x, ulong y) 526 { 527 // Deal with the degenerate cases first. 528 if (y==0) return BigUint(ONE); 529 if (y==1) return x; 530 if (x==0 || x==1) return x; 531 532 BigUint result; 533 534 // Simplify, step 1: Remove all powers of 2. 535 uint firstnonzero = firstNonZeroDigit(x.data); 536 537 // See if x can now fit into a single digit. 538 bool singledigit = ((x.data.length - firstnonzero) == 1); 539 // If true, then x0 is that digit, and we must calculate x0 ^^ y0. 540 BigDigit x0 = x.data[firstnonzero]; 541 assert(x0 !=0); 542 size_t xlength = x.data.length; 543 ulong y0; 544 uint evenbits = 0; // number of even bits in the bottom of x 545 while (!(x0 & 1)) { x0 >>= 1; ++evenbits; } 546 547 if ((x.data.length- firstnonzero == 2)) { 548 // Check for a single digit straddling a digit boundary 549 BigDigit x1 = x.data[firstnonzero+1]; 550 if ((x1 >> evenbits) == 0) { 551 x0 |= (x1 << (BigDigit.sizeof * 8 - evenbits)); 552 singledigit = true; 553 } 554 } 555 uint evenshiftbits = 0; // Total powers of 2 to shift by, at the end 556 557 // Simplify, step 2: For singledigits, see if we can trivially reduce y 558 559 BigDigit finalMultiplier = 1; 560 561 if (singledigit) { 562 // x fits into a single digit. Raise it to the highest power we can 563 // that still fits into a single digit, then reduce the exponent accordingly. 564 // We're quite likely to have a residual multiply at the end. 565 // For example, 10^^100 = (((5^^13)^^7) * 5^^9) * 2^^100. 566 // and 5^^13 still fits into a uint. 567 evenshiftbits = cast(uint)( (evenbits * y) & BIGDIGITSHIFTMASK); 568 if (x0 == 1) { // Perfect power of 2 569 result = 1; 570 return result<< (evenbits + firstnonzero*BigDigit.sizeof)*y; 571 } else { 572 int p = highestPowerBelowUintMax(x0); 573 if (y <= p) { // Just do it with pow 574 result = intpow(x0, y); 575 if (evenshiftbits+firstnonzero == 0) return result; 576 return result<< (evenbits + firstnonzero*BigDigit.sizeof)*y; 577 } 578 y0 = y/p; 579 finalMultiplier = intpow(x0, y - y0*p); 580 x0 = intpow(x0, p); 581 } 582 xlength = 1; 583 } 584 585 // Check for overflow and allocate result buffer 586 // Single digit case: +1 is for final multiplier, + 1 is for spare evenbits. 587 ulong estimatelength = singledigit ? firstnonzero*y + y0*1 + 2 + ((evenbits*y) >> LG2BIGDIGITBITS) 588 : x.data.length * y; // estimated length in BigDigits 589 // (Estimated length can overestimate by a factor of 2, if x.data.length ~ 2). 590 if (estimatelength > uint.max/(4*BigDigit.sizeof)) assert(0, "Overflow in BigInt.pow"); 591 592 // The result buffer includes space for all the trailing zeros 593 BigDigit [] resultBuffer = new BigDigit[cast(size_t)estimatelength]; 594 595 // Do all the powers of 2! 596 size_t result_start = cast(size_t)(firstnonzero*y + singledigit? ((evenbits*y) >> LG2BIGDIGITBITS) : 0); 597 resultBuffer[0..result_start] = 0; 598 BigDigit [] t1 = resultBuffer[result_start..$]; 599 BigDigit [] r1; 600 601 if (singledigit) { 602 r1 = t1[0..1]; 603 r1[0] = x0; 604 y = y0; 605 } else { 606 // It's not worth right shifting by evenbits unless we also shrink the length after each 607 // multiply or squaring operation. That might still be worthwhile for large y. 608 r1 = t1[0..x.data.length - firstnonzero]; 609 r1[0..$] = x.data[firstnonzero..$]; 610 } 611 612 if (y>1) { // Set r1 = r1 ^^ y. 613 614 // The secondary buffer only needs space for the multiplication results 615 BigDigit [] secondaryBuffer = new BigDigit[resultBuffer.length - result_start]; 616 BigDigit [] t2 = secondaryBuffer; 617 BigDigit [] r2; 618 619 int shifts = 63; // num bits in a long 620 while(!(y & 0x8000_0000_0000_0000L)) { 621 y <<= 1; 622 --shifts; 623 } 624 y <<=1; 625 626 while(y!=0) { 627 r2 = t2[0 .. r1.length*2]; 628 squareInternal(r2, r1); 629 if (y & 0x8000_0000_0000_0000L) { 630 r1 = t1[0 .. r2.length + xlength]; 631 if (xlength == 1) { 632 r1[$-1] = multibyteMul(r1[0 .. $-1], r2, x0, 0); 633 } else { 634 mulInternal(r1, r2, x.data); 635 } 636 } else { 637 r1 = t1[0 .. r2.length]; 638 r1[] = r2[]; 639 } 640 y <<=1; 641 shifts--; 642 } 643 while (shifts>0) { 644 r2 = t2[0 .. r1.length * 2]; 645 squareInternal(r2, r1); 646 r1 = t1[0 .. r2.length]; 647 r1[] = r2[]; 648 --shifts; 649 } 650 } 651 652 if (finalMultiplier!=1) { 653 BigDigit carry = multibyteMul(r1, r1, finalMultiplier, 0); 654 if (carry) { 655 r1 = t1[0 .. r1.length + 1]; 656 r1[$-1] = carry; 657 } 658 } 659 if (evenshiftbits) { 660 BigDigit carry = multibyteShl(r1, r1, evenshiftbits); 661 if (carry!=0) { 662 r1 = t1[0 .. r1.length + 1]; 663 r1[$ - 1] = carry; 664 } 665 } 666 while(r1[$ - 1]==0) { 667 r1=r1[0 .. $ - 1]; 668 } 669 result.data = resultBuffer[0 .. result_start + r1.length]; 670 return result; 671 } 672 673 } // end BigUint 674 675 676 // Remove leading zeros from x, to restore the BigUint invariant 677 BigDigit[] removeLeadingZeros(BigDigit [] x) 678 { 679 size_t k = x.length; 680 while(k>1 && x[k - 1]==0) --k; 681 return x[0 .. k]; 682 } 683 684 debug(UnitTest) { 685 unittest { 686 // Bug 1650. 687 BigUint r = BigUint([5]); 688 BigUint t = BigUint([7]); 689 BigUint s = BigUint.mod(r, t); 690 assert(s==5); 691 } 692 } 693 694 695 696 debug (UnitTest) { 697 // Pow tests 698 unittest { 699 BigUint r, s; 700 r.fromHexString("80000000_00000001".dup); 701 s = BigUint.pow(r, 5); 702 r.fromHexString("08000000_00000000_50000000_00000001_40000000_00000002_80000000".dup 703 ~ "_00000002_80000000_00000001".dup); 704 assert(s == r); 705 s = 10; 706 s = BigUint.pow(s, 39); 707 r.fromDecimalString("1000000000000000000000000000000000000000".dup); 708 assert(s == r); 709 r.fromHexString("1_E1178E81_00000000".dup); 710 s = BigUint.pow(r, 15); // Regression test: this used to overflow array bounds 711 712 } 713 714 // Radix conversion tests 715 unittest { 716 BigUint r; 717 r.fromHexString("1_E1178E81_00000000".dup); 718 assert(r.toHexString(0, '_', 0) == "1_E1178E81_00000000"); 719 assert(r.toHexString(0, '_', 20) == "0001_E1178E81_00000000"); 720 assert(r.toHexString(0, '_', 16+8) == "00000001_E1178E81_00000000"); 721 assert(r.toHexString(0, '_', 16+9) == "0_00000001_E1178E81_00000000"); 722 assert(r.toHexString(0, '_', 16+8+8) == "00000000_00000001_E1178E81_00000000"); 723 assert(r.toHexString(0, '_', 16+8+8+1) == "0_00000000_00000001_E1178E81_00000000"); 724 assert(r.toHexString(0, '_', 16+8+8+1, ' ') == " 1_E1178E81_00000000"); 725 assert(r.toHexString(0, 0, 16+8+8+1) == "00000000000000001E1178E8100000000"); 726 r = 0; 727 assert(r.toHexString(0, '_', 0) == "0"); 728 assert(r.toHexString(0, '_', 7) == "0000000"); 729 assert(r.toHexString(0, '_', 7, ' ') == " 0"); 730 assert(r.toHexString(0, '#', 9) == "0#00000000"); 731 assert(r.toHexString(0, 0, 9) == "000000000"); 732 733 } 734 } 735 736 private: 737 738 // works for any type 739 T intpow(T)(T x, ulong n) 740 { 741 T p; 742 743 switch (n) 744 { 745 case 0: 746 p = 1; 747 break; 748 749 case 1: 750 p = x; 751 break; 752 753 case 2: 754 p = x * x; 755 break; 756 757 default: 758 p = 1; 759 while (1){ 760 if (n & 1) 761 p *= x; 762 n >>= 1; 763 if (!n) 764 break; 765 x *= x; 766 } 767 break; 768 } 769 return p; 770 } 771 772 773 // returns the maximum power of x that will fit in a uint. 774 int highestPowerBelowUintMax(uint x) 775 { 776 assert(x>1); 777 const ubyte [22] maxpwr = [31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 778 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7]; 779 if (x<24) return maxpwr[x-2]; 780 if (x<41) return 6; 781 if (x<85) return 5; 782 if (x<256) return 4; 783 if (x<1626) return 3; 784 if (x<65536) return 2; 785 return 1; 786 } 787 788 // returns the maximum power of x that will fit in a ulong. 789 int highestPowerBelowUlongMax(uint x) 790 { 791 assert(x>1); 792 const ubyte [39] maxpwr = [63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 793 17, 17, 16, 16, 15, 15, 15, 15, 14, 14, 794 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 795 12, 12, 12, 12, 12, 12, 12, 12, 12]; 796 if (x<41) return maxpwr[x-2]; 797 if (x<57) return 11; 798 if (x<85) return 10; 799 if (x<139) return 9; 800 if (x<256) return 8; 801 if (x<566) return 7; 802 if (x<1626) return 6; 803 if (x<7132) return 5; 804 if (x<65536) return 4; 805 if (x<2642246) return 3; 806 return 2; 807 } 808 809 version(UnitTest) { 810 int slowHighestPowerBelowUintMax(uint x) 811 { 812 int pwr = 1; 813 for (ulong q = x;x*q < cast(ulong)uint.max; ) { 814 q*=x; ++pwr; 815 } 816 return pwr; 817 } 818 819 unittest { 820 assert(highestPowerBelowUintMax(10)==9); 821 for (int k=82; k<88; ++k) {assert(highestPowerBelowUintMax(k)== slowHighestPowerBelowUintMax(k)); } 822 } 823 } 824 825 826 /* General unsigned subtraction routine for bigints. 827 * Sets result = x - y. If the result is negative, negative will be true. 828 */ 829 BigDigit [] sub(in BigDigit[] x, in BigDigit[] y, bool *negative) 830 { 831 if (x.length == y.length) { 832 // There's a possibility of cancellation, if x and y are almost equal. 833 int last = highestDifferentDigit(x, y); 834 BigDigit [] result = new BigDigit[last+1]; 835 if (x[last] < y[last]) { // we know result is negative 836 multibyteSub(result[0..last+1], y[0..last+1], x[0..last+1], 0); 837 *negative = true; 838 } else { // positive or zero result 839 multibyteSub(result[0..last+1], x[0..last+1], y[0..last+1], 0); 840 *negative = false; 841 } 842 while (result.length > 1 && result[$-1] == 0) { 843 result = result[0..$-1]; 844 } 845 return result; 846 } 847 // Lengths are different 848 const (BigDigit) [] large, small; 849 if (x.length < y.length) { 850 *negative = true; 851 large = y; small = x; 852 } else { 853 *negative = false; 854 large = x; small = y; 855 } 856 857 BigDigit [] result = new BigDigit[large.length]; 858 BigDigit carry = multibyteSub(result[0..small.length], large[0..small.length], small, 0); 859 result[small.length..$] = large[small.length..$]; 860 if (carry) { 861 multibyteIncrementAssign!('-')(result[small.length..$], carry); 862 } 863 while (result.length > 1 && result[$-1] == 0) { 864 result = result[0..$-1]; 865 } 866 return result; 867 } 868 869 870 // return a + b 871 BigDigit [] add(BigDigit[] a, BigDigit [] b) { 872 BigDigit [] x, y; 873 if (a.length<b.length) { x = b; y = a; } else { x = a; y = b; } 874 // now we know x.length > y.length 875 // create result. add 1 in case it overflows 876 BigDigit [] result = new BigDigit[x.length + 1]; 877 878 BigDigit carry = multibyteAdd(result[0..y.length], x[0..y.length], y, 0); 879 if (x.length != y.length){ 880 result[y.length..$-1]= x[y.length..$]; 881 carry = multibyteIncrementAssign!('+')(result[y.length..$-1], carry); 882 } 883 if (carry) { 884 result[$-1] = carry; 885 return result; 886 } else return result[0..$-1]; 887 } 888 889 /** return x + y 890 */ 891 BigDigit [] addInt(BigDigit[] x, ulong y) 892 { 893 uint hi = cast(uint)(y >>> 32); 894 uint lo = cast(uint)(y& 0xFFFF_FFFF); 895 uint len = x.length; 896 if (x.length < 2 && hi!=0) ++len; 897 BigDigit [] result = new BigDigit[len+1]; 898 result[0..x.length] = x[]; 899 if (x.length < 2 && hi!=0) { result[1]=hi; hi=0; } 900 uint carry = multibyteIncrementAssign!('+')(result[0..$-1], lo); 901 if (hi!=0) carry += multibyteIncrementAssign!('+')(result[1..$-1], hi); 902 if (carry) { 903 result[$-1] = carry; 904 return result; 905 } else return result[0..$-1]; 906 } 907 908 /** Return x - y. 909 * x must be greater than y. 910 */ 911 BigDigit [] subInt(BigDigit[] x, ulong y) 912 { 913 uint hi = cast(uint)(y >>> 32); 914 uint lo = cast(uint)(y & 0xFFFF_FFFF); 915 BigDigit [] result = new BigDigit[x.length]; 916 result[] = x[]; 917 multibyteIncrementAssign!('-')(result[], lo); 918 if (hi) multibyteIncrementAssign!('-')(result[1..$], hi); 919 if (result[$-1]==0) return result[0..$-1]; 920 else return result; 921 } 922 923 /** General unsigned multiply routine for bigints. 924 * Sets result = x * y. 925 * 926 * The length of y must not be larger than the length of x. 927 * Different algorithms are used, depending on the lengths of x and y. 928 * TODO: "Modern Computer Arithmetic" suggests the OddEvenKaratsuba algorithm for the 929 * unbalanced case. (But I doubt it would be faster in practice). 930 * 931 */ 932 void mulInternal(BigDigit[] result, in BigDigit[] x, in BigDigit[] y) 933 { 934 assert( result.length == x.length + y.length ); 935 assert( y.length > 0 ); 936 assert( x.length >= y.length); 937 if (y.length <= KARATSUBALIMIT) { 938 // Small multiplier, we'll just use the asm classic multiply. 939 if (y.length==1) { // Trivial case, no cache effects to worry about 940 result[x.length] = multibyteMul(result[0..x.length], x, y[0], 0); 941 return; 942 } 943 if (x.length + y.length < CACHELIMIT) return mulSimple(result, x, y); 944 945 // If x is so big that it won't fit into the cache, we divide it into chunks 946 // Every chunk must be greater than y.length. 947 // We make the first chunk shorter, if necessary, to ensure this. 948 949 uint chunksize = CACHELIMIT/y.length; 950 uint residual = x.length % chunksize; 951 if (residual < y.length) { chunksize -= y.length; } 952 // Use schoolbook multiply. 953 mulSimple(result[0 .. chunksize + y.length], x[0..chunksize], y); 954 uint done = chunksize; 955 956 while (done < x.length) { 957 // result[done .. done+ylength] already has a value. 958 chunksize = (done + (CACHELIMIT/y.length) < x.length) ? (CACHELIMIT/y.length) : x.length - done; 959 BigDigit [KARATSUBALIMIT] partial; 960 partial[0..y.length] = result[done..done+y.length]; 961 mulSimple(result[done..done+chunksize+y.length], x[done..done+chunksize], y); 962 addAssignSimple(result[done..done+chunksize + y.length], partial[0..y.length]); 963 done += chunksize; 964 } 965 return; 966 } 967 968 uint half = (x.length >> 1) + (x.length & 1); 969 if (2*y.length*y.length <= x.length*x.length) { 970 // UNBALANCED MULTIPLY 971 // Use school multiply to cut into quasi-squares of Karatsuba-size 972 // or larger. The ratio of the two sides of the 'square' must be 973 // between 1.414:1 and 1:1. Use Karatsuba on each chunk. 974 // 975 // For maximum performance, we want the ratio to be as close to 976 // 1:1 as possible. To achieve this, we can either pad x or y. 977 // The best choice depends on the modulus x%y. 978 uint numchunks = x.length / y.length; 979 uint chunksize = y.length; 980 uint extra = x.length % y.length; 981 uint maxchunk = chunksize + extra; 982 bool paddingY; // true = we're padding Y, false = we're padding X. 983 if (extra * extra * 2 < y.length*y.length) { 984 // The leftover bit is small enough that it should be incorporated 985 // in the existing chunks. 986 // Make all the chunks a tiny bit bigger 987 // (We're padding y with zeros) 988 chunksize += extra / cast(double)numchunks; 989 extra = x.length - chunksize*numchunks; 990 // there will probably be a few left over. 991 // Every chunk will either have size chunksize, or chunksize+1. 992 maxchunk = chunksize + 1; 993 paddingY = true; 994 assert(chunksize + extra + chunksize *(numchunks-1) == x.length ); 995 } else { 996 // the extra bit is large enough that it's worth making a new chunk. 997 // (This means we're padding x with zeros, when doing the first one). 998 maxchunk = chunksize; 999 ++numchunks; 1000 paddingY = false; 1001 assert(extra + chunksize *(numchunks-1) == x.length ); 1002 } 1003 // We make the buffer a bit bigger so we have space for the partial sums. 1004 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length]; 1005 BigDigit [] partial = scratchbuff[$ - y.length .. $]; 1006 uint done; // how much of X have we done so far? 1007 double residual = 0; 1008 if (paddingY) { 1009 // If the first chunk is bigger, do it first. We're padding y. 1010 mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )], 1011 x[0 .. chunksize + (extra>0?1:0)], y, scratchbuff); 1012 done = chunksize + (extra > 0 ? 1 : 0); 1013 if (extra) --extra; 1014 } else { // We're padding X. Begin with the extra bit. 1015 mulKaratsuba(result[0 .. y.length + extra], y, x[0..extra], scratchbuff); 1016 done = extra; 1017 extra = 0; 1018 } 1019 auto basechunksize = chunksize; 1020 while (done < x.length) { 1021 chunksize = basechunksize + (extra > 0 ? 1 : 0); 1022 if (extra) --extra; 1023 partial[] = result[done .. done+y.length]; 1024 mulKaratsuba(result[done .. done + y.length + chunksize], 1025 x[done .. done+chunksize], y, scratchbuff); 1026 addAssignSimple(result[done .. done + y.length + chunksize], partial); 1027 done += chunksize; 1028 } 1029 delete scratchbuff; 1030 } else { 1031 // Balanced. Use Karatsuba directly. 1032 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1033 mulKaratsuba(result, x, y, scratchbuff); 1034 delete scratchbuff; 1035 } 1036 } 1037 1038 /** General unsigned squaring routine for BigInts. 1039 * Sets result = x*x. 1040 * NOTE: If the highest half-digit of x is zero, the highest digit of result will 1041 * also be zero. 1042 */ 1043 void squareInternal(BigDigit[] result, BigDigit[] x) 1044 { 1045 // TODO: Squaring is potentially half a multiply, plus add the squares of 1046 // the diagonal elements. 1047 assert(result.length == 2*x.length); 1048 if (x.length <= KARATSUBASQUARELIMIT) { 1049 if (x.length==1) { 1050 result[1] = multibyteMul(result[0..1], x, x[0], 0); 1051 return; 1052 } 1053 return squareSimple(result, x); 1054 } 1055 // The nice thing about squaring is that it always stays balanced 1056 BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(x.length)]; 1057 squareKaratsuba(result, x, scratchbuff); 1058 delete scratchbuff; 1059 } 1060 1061 1062 import tango.core.BitManip : bsr; 1063 1064 /// if remainder is null, only calculate quotient. 1065 void divModInternal(BigDigit [] quotient, BigDigit[] remainder, BigDigit [] u, BigDigit [] v) 1066 { 1067 assert(quotient.length == u.length - v.length + 1); 1068 assert(remainder==null || remainder.length == v.length); 1069 assert(v.length > 1); 1070 assert(u.length >= v.length); 1071 1072 // Normalize by shifting v left just enough so that 1073 // its high-order bit is on, and shift u left the 1074 // same amount. The highest bit of u will never be set. 1075 1076 BigDigit [] vn = new BigDigit[v.length]; 1077 BigDigit [] un = new BigDigit[u.length + 1]; 1078 // How much to left shift v, so that its MSB is set. 1079 uint s = BIGDIGITSHIFTMASK - bsr(v[$-1]); 1080 if (s!=0) { 1081 multibyteShl(vn, v, s); 1082 un[$-1] = multibyteShl(un[0..$-1], u, s); 1083 } else { 1084 vn[] = v[]; 1085 un[0..$-1] = u[]; 1086 un[$-1] = 0; 1087 } 1088 if (quotient.length<FASTDIVLIMIT) { 1089 schoolbookDivMod(quotient, un, vn); 1090 } else { 1091 fastDivMod(quotient, un, vn); 1092 } 1093 1094 // Unnormalize remainder, if required. 1095 if (remainder != null) { 1096 if (s == 0) remainder[] = un[0..vn.length]; 1097 else multibyteShr(remainder, un[0..vn.length+1], s); 1098 } 1099 delete un; 1100 delete vn; 1101 } 1102 1103 debug(UnitTest) 1104 { 1105 unittest { 1106 uint [] u = [0, 0xFFFF_FFFE, 0x8000_0000]; 1107 uint [] v = [0xFFFF_FFFF, 0x8000_0000]; 1108 uint [] q = new uint[u.length - v.length + 1]; 1109 uint [] r = new uint[2]; 1110 divModInternal(q, r, u, v); 1111 assert(q[]==[0xFFFF_FFFFu, 0]); 1112 assert(r[]==[0xFFFF_FFFFu, 0x7FFF_FFFF]); 1113 u = [0, 0xFFFF_FFFE, 0x8000_0001]; 1114 v = [0xFFFF_FFFF, 0x8000_0000]; 1115 divModInternal(q, r, u, v); 1116 } 1117 } 1118 1119 private: 1120 // Converts a big uint to a hexadecimal string. 1121 // 1122 // Optionally, a separator character (eg, an underscore) may be added between 1123 // every 8 digits. 1124 // buff.length must be data.length*8 if separator is zero, 1125 // or data.length*9 if separator is non-zero. It will be completely filled. 1126 char [] biguintToHex(char [] buff, BigDigit [] data, char separator=0) 1127 { 1128 int x=0; 1129 for (int i=data.length - 1; i>=0; --i) { 1130 toHexZeroPadded(buff[x..x+8], data[i]); 1131 x+=8; 1132 if (separator) { 1133 if (i>0) buff[x] = separator; 1134 ++x; 1135 } 1136 } 1137 return buff; 1138 } 1139 1140 /** Convert a big uint into a decimal string. 1141 * 1142 * Params: 1143 * data The biguint to be converted. Will be destroyed. 1144 * buff The destination buffer for the decimal string. Must be 1145 * large enough to store the result, including leading zeros. 1146 * Will be filled backwards, starting from buff[$-1]. 1147 * 1148 * buff.length must be >= (data.length*32)/log2(10) = 9.63296 * data.length. 1149 * Returns: 1150 * the lowest index of buff which was used. 1151 */ 1152 int biguintToDecimal(char [] buff, BigDigit [] data){ 1153 int sofar = buff.length; 1154 // Might be better to divide by (10^38/2^32) since that gives 38 digits for 1155 // the price of 3 divisions and a shr; this version only gives 27 digits 1156 // for 3 divisions. 1157 while(data.length>1) { 1158 uint rem = multibyteDivAssign(data, 10_0000_0000, 0); 1159 itoaZeroPadded(buff[sofar-9 .. sofar], rem); 1160 sofar -= 9; 1161 if (data[$-1]==0 && data.length>1) { 1162 data.length = data.length - 1; 1163 } 1164 } 1165 itoaZeroPadded(buff[sofar-10 .. sofar], data[0]); 1166 sofar -= 10; 1167 // and strip off the leading zeros 1168 while(sofar!= buff.length-1 && buff[sofar] == '0') sofar++; 1169 return sofar; 1170 } 1171 1172 /** Convert a decimal string into a big uint. 1173 * 1174 * Params: 1175 * data The biguint to be receive the result. Must be large enough to 1176 * store the result. 1177 * s The decimal string. May contain 0..9, or _. Will be preserved. 1178 * 1179 * The required length for the destination buffer is slightly less than 1180 * 1 + s.length/log2(10) = 1 + s.length/3.3219. 1181 * 1182 * Returns: 1183 * the highest index of data which was used. 1184 */ 1185 int biguintFromDecimal(BigDigit [] data, const(char []) s) { 1186 // Convert to base 1e19 = 10_000_000_000_000_000_000. 1187 // (this is the largest power of 10 that will fit into a long). 1188 // The length will be less than 1 + s.length/log2(10) = 1 + s.length/3.3219. 1189 // 485 bits will only just fit into 146 decimal digits. 1190 uint lo = 0; 1191 uint x = 0; 1192 ulong y = 0; 1193 uint hi = 0; 1194 data[0] = 0; // initially number is 0. 1195 data[1] = 0; 1196 1197 for (int i= (s[0]=='-' || s[0]=='+')? 1 : 0; i<s.length; ++i) { 1198 if (s[i] == '_') continue; 1199 x *= 10; 1200 x += s[i] - '0'; 1201 ++lo; 1202 if (lo==9) { 1203 y = x; 1204 x = 0; 1205 } 1206 if (lo==18) { 1207 y *= 10_0000_0000; 1208 y += x; 1209 x = 0; 1210 } 1211 if (lo==19) { 1212 y *= 10; 1213 y += x; 1214 x = 0; 1215 // Multiply existing number by 10^19, then add y1. 1216 if (hi>0) { 1217 data[hi] = multibyteMul(data[0..hi], data[0..hi], 1220703125*2, 0); // 5^13*2 = 0x9184_E72A 1218 ++hi; 1219 data[hi] = multibyteMul(data[0..hi], data[0..hi], 15625*262144, 0); // 5^6*2^18 = 0xF424_0000 1220 ++hi; 1221 } else hi = 2; 1222 uint c = multibyteIncrementAssign!('+')(data[0..hi], cast(uint)(y&0xFFFF_FFFF)); 1223 c += multibyteIncrementAssign!('+')(data[1..hi], cast(uint)(y>>32)); 1224 if (c!=0) { 1225 data[hi]=c; 1226 ++hi; 1227 } 1228 y = 0; 1229 lo = 0; 1230 } 1231 } 1232 // Now set y = all remaining digits. 1233 if (lo>=18) { 1234 } else if (lo>=9) { 1235 for (int k=9; k<lo; ++k) y*=10; 1236 y+=x; 1237 } else { 1238 for (int k=0; k<lo; ++k) y*=10; 1239 y+=x; 1240 } 1241 if (lo!=0) { 1242 if (hi==0) { 1243 *cast(ulong *)(&data[hi]) = y; 1244 hi=2; 1245 } else { 1246 while (lo>0) { 1247 uint c = multibyteMul(data[0..hi], data[0..hi], 10, 0); 1248 if (c!=0) { data[hi]=c; ++hi; } 1249 --lo; 1250 } 1251 uint c = multibyteIncrementAssign!('+')(data[0..hi], cast(uint)(y&0xFFFF_FFFF)); 1252 if (y>0xFFFF_FFFFL) { 1253 c += multibyteIncrementAssign!('+')(data[1..hi], cast(uint)(y>>32)); 1254 } 1255 if (c!=0) { data[hi]=c; ++hi; } 1256 // hi+=2; 1257 } 1258 } 1259 if (hi>1 && data[hi-1]==0) --hi; 1260 return hi; 1261 } 1262 1263 1264 private: 1265 // ------------------------ 1266 // These in-place functions are only for internal use; they are incompatible 1267 // with COW. 1268 1269 // Classic 'schoolbook' multiplication. 1270 void mulSimple(BigDigit[] result, in BigDigit [] left, in BigDigit[] right) 1271 in { 1272 assert(result.length == left.length + right.length); 1273 assert(right.length>1); 1274 } 1275 body { 1276 result[left.length] = multibyteMul(result[0..left.length], left, right[0], 0); 1277 multibyteMultiplyAccumulate(result[1..$], left, right[1..$]); 1278 } 1279 1280 // Classic 'schoolbook' squaring 1281 void squareSimple(BigDigit[] result, BigDigit [] x) 1282 in { 1283 assert(result.length == 2*x.length); 1284 assert(x.length>1); 1285 } 1286 body { 1287 multibyteSquare(result, x); 1288 } 1289 1290 1291 // add two uints of possibly different lengths. Result must be as long 1292 // as the larger length. 1293 // Returns carry (0 or 1). 1294 uint addSimple(BigDigit [] result, BigDigit [] left, BigDigit [] right) 1295 in { 1296 assert(result.length == left.length); 1297 assert(left.length >= right.length); 1298 assert(right.length>0); 1299 } 1300 body { 1301 uint carry = multibyteAdd(result[0..right.length], 1302 left[0..right.length], right, 0); 1303 if (right.length < left.length) { 1304 result[right.length..left.length] = left[right.length .. $]; 1305 carry = multibyteIncrementAssign!('+')(result[right.length..$], carry); 1306 } 1307 return carry; 1308 } 1309 1310 // result = left - right 1311 // returns carry (0 or 1) 1312 BigDigit subSimple(BigDigit [] result, BigDigit [] left, BigDigit [] right) 1313 in { 1314 assert(result.length == left.length); 1315 assert(left.length >= right.length); 1316 assert(right.length>0); 1317 } 1318 body { 1319 BigDigit carry = multibyteSub(result[0..right.length], 1320 left[0..right.length], right, 0); 1321 if (right.length < left.length) { 1322 result[right.length..left.length] = left[right.length .. $]; 1323 carry = multibyteIncrementAssign!('-')(result[right.length..$], carry); 1324 } //else if (result.length==left.length+1) { result[$-1] = carry; carry=0; } 1325 return carry; 1326 } 1327 1328 1329 /* result = result - right 1330 * Returns carry = 1 if result was less than right. 1331 */ 1332 BigDigit subAssignSimple(BigDigit [] result, BigDigit [] right) 1333 { 1334 assert(result.length >= right.length); 1335 uint c = multibyteSub(result[0..right.length], result[0..right.length], right, 0); 1336 if (c && result.length > right.length) c = multibyteIncrementAssign!('-')(result[right.length .. $], c); 1337 return c; 1338 } 1339 1340 /* result = result + right 1341 */ 1342 BigDigit addAssignSimple(BigDigit [] result, BigDigit [] right) 1343 { 1344 assert(result.length >= right.length); 1345 uint c = multibyteAdd(result[0..right.length], result[0..right.length], right, 0); 1346 if (c && result.length > right.length) { 1347 c = multibyteIncrementAssign!('+')(result[right.length .. $], c); 1348 } 1349 return c; 1350 } 1351 1352 /* performs result += wantSub? - right : right; 1353 */ 1354 BigDigit addOrSubAssignSimple(BigDigit [] result, BigDigit [] right, bool wantSub) 1355 { 1356 if (wantSub) return subAssignSimple(result, right); 1357 else return addAssignSimple(result, right); 1358 } 1359 1360 1361 // return true if x<y, considering leading zeros 1362 bool less(in BigDigit[] x, in BigDigit[] y) 1363 { 1364 assert(x.length >= y.length); 1365 uint k = x.length-1; 1366 while(x[k]==0 && k>=y.length) --k; 1367 if (k>=y.length) return false; 1368 while (k>0 && x[k]==y[k]) --k; 1369 return x[k] < y[k]; 1370 } 1371 1372 // Set result = abs(x-y), return true if result is negative(x<y), false if x<=y. 1373 bool inplaceSub(BigDigit[] result, in BigDigit[] x, in BigDigit[] y) 1374 { 1375 assert(result.length == (x.length >= y.length) ? x.length : y.length); 1376 1377 size_t minlen; 1378 bool negative; 1379 if (x.length >= y.length) { 1380 minlen = y.length; 1381 negative = less(x, y); 1382 } else { 1383 minlen = x.length; 1384 negative = !less(y, x); 1385 } 1386 const(BigDigit)[] large, small; 1387 if (negative) { large = y; small=x; } else { large=x; small=y; } 1388 1389 BigDigit carry = multibyteSub(result[0..minlen], large[0..minlen], small[0..minlen], 0); 1390 if (x.length != y.length) { 1391 result[minlen..large.length]= large[minlen..$]; 1392 result[large.length..$] = 0; 1393 if (carry) multibyteIncrementAssign!('-')(result[minlen..$], carry); 1394 } 1395 return negative; 1396 } 1397 1398 /* Determine how much space is required for the temporaries 1399 * when performing a Karatsuba multiplication. 1400 */ 1401 uint karatsubaRequiredBuffSize(uint xlen) 1402 { 1403 return xlen <= KARATSUBALIMIT ? 0 : 2*xlen; // - KARATSUBALIMIT+2; 1404 } 1405 1406 /* Sets result = x*y, using Karatsuba multiplication. 1407 * x must be longer or equal to y. 1408 * Valid only for balanced multiplies, where x is not shorter than y. 1409 * It is superior to schoolbook multiplication if and only if 1410 * sqrt(2)*y.length > x.length > y.length. 1411 * Karatsuba multiplication is O(n^1.59), whereas schoolbook is O(n^2) 1412 * The maximum allowable length of x and y is uint.max; but better algorithms 1413 * should be used far before that length is reached. 1414 * Params: 1415 * scratchbuff An array long enough to store all the temporaries. Will be destroyed. 1416 */ 1417 void mulKaratsuba(BigDigit [] result, in BigDigit [] x, in BigDigit[] y, BigDigit [] scratchbuff) 1418 { 1419 assert(x.length >= y.length); 1420 assert(result.length < uint.max, "Operands too large"); 1421 assert(result.length == x.length + y.length); 1422 if (x.length <= KARATSUBALIMIT) { 1423 return mulSimple(result, x, y); 1424 } 1425 // Must be almost square (otherwise, a schoolbook iteration is better) 1426 assert(2L * y.length * y.length > (x.length-1) * (x.length-1), 1427 "Bigint Internal Error: Asymmetric Karatsuba"); 1428 1429 // The subtractive version of Karatsuba multiply uses the following result: 1430 // (Nx1 + x0)*(Ny1 + y0) = (N*N)*x1y1 + x0y0 + N * (x0y0 + x1y1 - mid) 1431 // where mid = (x0-x1)*(y0-y1) 1432 // requiring 3 multiplies of length N, instead of 4. 1433 // The advantage of the subtractive over the additive version is that 1434 // the mid multiply cannot exceed length N. But there are subtleties: 1435 // (x0-x1),(y0-y1) may be negative or zero. To keep it simple, we 1436 // retain all of the leading zeros in the subtractions 1437 1438 // half length, round up. 1439 uint half = (x.length >> 1) + (x.length & 1); 1440 1441 const(BigDigit) [] x0 = x[0 .. half]; 1442 const(BigDigit) [] x1 = x[half .. $]; 1443 const(BigDigit) [] y0 = y[0 .. half]; 1444 const(BigDigit) [] y1 = y[half .. $]; 1445 BigDigit [] mid = scratchbuff[0 .. half*2]; 1446 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 1447 BigDigit [] resultLow = result[0 .. 2*half]; 1448 BigDigit [] resultHigh = result[2*half .. $]; 1449 // initially use result to store temporaries 1450 BigDigit [] xdiff= result[0 .. half]; 1451 BigDigit [] ydiff = result[half .. half*2]; 1452 1453 // First, we calculate mid, and sign of mid 1454 bool midNegative = inplaceSub(xdiff, x0, x1) 1455 ^ inplaceSub(ydiff, y0, y1); 1456 mulKaratsuba(mid, xdiff, ydiff, newscratchbuff); 1457 1458 // Low half of result gets x0 * y0. High half gets x1 * y1 1459 1460 mulKaratsuba(resultLow, x0, y0, newscratchbuff); 1461 1462 if (2L * y1.length * y1.length < x1.length * x1.length) { 1463 // an asymmetric situation has been created. 1464 // Worst case is if x:y = 1.414 : 1, then x1:y1 = 2.41 : 1. 1465 // Applying one schoolbook multiply gives us two pieces each 1.2:1 1466 if (y1.length <= KARATSUBALIMIT) { 1467 mulSimple(resultHigh, x1, y1); 1468 } else { 1469 // divide x1 in two, then use schoolbook multiply on the two pieces. 1470 uint quarter = (x1.length >> 1) + (x1.length & 1); 1471 bool ysmaller = (quarter >= y1.length); 1472 mulKaratsuba(resultHigh[0..quarter+y1.length], ysmaller ? x1[0..quarter] : y1, 1473 ysmaller ? y1 : x1[0..quarter], newscratchbuff); 1474 // Save the part which will be overwritten. 1475 bool ysmaller2 = ((x1.length - quarter) >= y1.length); 1476 newscratchbuff[0..y1.length] = resultHigh[quarter..quarter + y1.length]; 1477 mulKaratsuba(resultHigh[quarter..$], ysmaller2 ? x1[quarter..$] : y1, 1478 ysmaller2 ? y1 : x1[quarter..$], newscratchbuff[y1.length..$]); 1479 1480 resultHigh[quarter..$].addAssignSimple(newscratchbuff[0..y1.length]); 1481 } 1482 } else mulKaratsuba(resultHigh, x1, y1, newscratchbuff); 1483 1484 /* We now have result = x0y0 + (N*N)*x1y1 1485 Before adding or subtracting mid, we must calculate 1486 result += N * (x0y0 + x1y1) 1487 We can do this with three half-length additions. With a = x0y0, b = x1y1: 1488 aHI aLO 1489 + aHI aLO 1490 + bHI bLO 1491 + bHI bLO 1492 = R3 R2 R1 R0 1493 R1 = aHI + bLO + aLO 1494 R2 = aHI + bLO + aHI + carry_from_R1 1495 R3 = bHi + carry_from_R2 1496 Can also do use newscratchbuff: 1497 1498 // It might actually be quicker to do it in two full-length additions: 1499 // newscratchbuff[2*half] = addSimple(newscratchbuff[0..2*half], result[0..2*half], result[2*half..$]); 1500 // addAssignSimple(result[half..$], newscratchbuff[0..2*half+1]); 1501 */ 1502 BigDigit[] R1 = result[half..half*2]; 1503 BigDigit[] R2 = result[half*2..half*3]; 1504 BigDigit[] R3 = result[half*3..$]; 1505 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 1506 BigDigit c2 = multibyteAdd(R1, R2, result[0..half], 0); // c2:R1 = R2 + R1 + R0 1507 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 1508 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 1509 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 1510 1511 // And finally we subtract mid 1512 addOrSubAssignSimple(result[half..$], mid, !midNegative); 1513 } 1514 1515 void squareKaratsuba(BigDigit [] result, BigDigit [] x, BigDigit [] scratchbuff) 1516 { 1517 // See mulKaratsuba for implementation comments. 1518 // Squaring is simpler, since it never gets asymmetric. 1519 assert(result.length < uint.max, "Operands too large"); 1520 assert(result.length == 2*x.length); 1521 if (x.length <= KARATSUBASQUARELIMIT) { 1522 return squareSimple(result, x); 1523 } 1524 // half length, round up. 1525 uint half = (x.length >> 1) + (x.length & 1); 1526 1527 BigDigit [] x0 = x[0 .. half]; 1528 BigDigit [] x1 = x[half .. $]; 1529 BigDigit [] mid = scratchbuff[0 .. half*2]; 1530 BigDigit [] newscratchbuff = scratchbuff[half*2 .. $]; 1531 // initially use result to store temporaries 1532 BigDigit [] xdiff= result[0 .. half]; 1533 BigDigit [] ydiff = result[half .. half*2]; 1534 1535 // First, we calculate mid. We don't need its sign 1536 inplaceSub(xdiff, x0, x1); 1537 squareKaratsuba(mid, xdiff, newscratchbuff); 1538 1539 // Set result = x0x0 + (N*N)*x1x1 1540 squareKaratsuba(result[0 .. 2*half], x0, newscratchbuff); 1541 squareKaratsuba(result[2*half .. $], x1, newscratchbuff); 1542 1543 /* result += N * (x0x0 + x1x1) 1544 Do this with three half-length additions. With a = x0x0, b = x1x1: 1545 R1 = aHI + bLO + aLO 1546 R2 = aHI + bLO + aHI + carry_from_R1 1547 R3 = bHi + carry_from_R2 1548 */ 1549 BigDigit[] R1 = result[half..half*2]; 1550 BigDigit[] R2 = result[half*2..half*3]; 1551 BigDigit[] R3 = result[half*3..$]; 1552 BigDigit c1 = multibyteAdd(R2, R2, R1, 0); // c1:R2 = R2 + R1 1553 BigDigit c2 = multibyteAdd(R1, R2, result[0..half], 0); // c2:R1 = R2 + R1 + R0 1554 BigDigit c3 = addAssignSimple(R2, R3); // R2 = R2 + R1 + R3 1555 if (c1+c2) multibyteIncrementAssign!('+')(result[half*2..$], c1+c2); 1556 if (c1+c3) multibyteIncrementAssign!('+')(R3, c1+c3); 1557 1558 // And finally we subtract mid, which is always positive 1559 subAssignSimple(result[half..$], mid); 1560 } 1561 1562 /* Knuth's Algorithm D, as presented in 1563 * H.S. Warren, "Hacker's Delight", Addison-Wesley Professional (2002). 1564 * Also described in "Modern Computer Arithmetic" 0.2, Exercise 1.8.18. 1565 * Given u and v, calculates quotient = u/v, u = u%v. 1566 * v must be normalized (ie, the MSB of v must be 1). 1567 * The most significant words of quotient and u may be zero. 1568 * u[0..v.length] holds the remainder. 1569 */ 1570 void schoolbookDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 1571 { 1572 assert(quotient.length == u.length - v.length); 1573 assert(v.length > 1); 1574 assert(u.length >= v.length); 1575 assert((v[$-1]&0x8000_0000)!=0); 1576 assert(u[$-1] < v[$-1]); 1577 // BUG: This code only works if BigDigit is uint. 1578 uint vhi = v[$-1]; 1579 uint vlo = v[$-2]; 1580 1581 for (int j = u.length - v.length - 1; j >= 0; j--) { 1582 // Compute estimate of quotient[j], 1583 // qhat = (three most significant words of u)/(two most sig words of v). 1584 uint qhat; 1585 if (u[j + v.length] == vhi) { 1586 // uu/vhi could exceed uint.max (it will be 0x8000_0000 or 0x8000_0001) 1587 qhat = uint.max; 1588 } else { 1589 uint ulo = u[j + v.length - 2]; 1590 version(Naked_D_InlineAsm_X86) { 1591 // Note: On DMD, this is only ~10% faster than the non-asm code. 1592 uint *p = &u[j + v.length - 1]; 1593 asm { 1594 mov EAX, p; 1595 mov EDX, [EAX+4]; 1596 mov EAX, [EAX]; 1597 div dword ptr [vhi]; 1598 mov qhat, EAX; 1599 mov ECX, EDX; 1600 div3by2correction: 1601 mul dword ptr [vlo]; // EDX:EAX = qhat * vlo 1602 sub EAX, ulo; 1603 sbb EDX, ECX; 1604 jbe div3by2done; 1605 mov EAX, qhat; 1606 dec EAX; 1607 mov qhat, EAX; 1608 add ECX, dword ptr [vhi]; 1609 jnc div3by2correction; 1610 div3by2done: ; 1611 } 1612 } else { // version(InlineAsm) 1613 ulong uu = (cast(ulong)(u[j+v.length]) << 32) | u[j+v.length-1]; 1614 ulong bigqhat = uu / vhi; 1615 ulong rhat = uu - bigqhat * vhi; 1616 qhat = cast(uint)bigqhat; 1617 again: 1618 if (cast(ulong)qhat*vlo > ((rhat<<32) + ulo)) { 1619 --qhat; 1620 rhat += vhi; 1621 if (!(rhat & 0xFFFF_FFFF_0000_0000L)) goto again; 1622 } 1623 } // version(InlineAsm) 1624 } 1625 // Multiply and subtract. 1626 uint carry = multibyteMulAdd!('-')(u[j..j+v.length], v, qhat, 0); 1627 1628 if (u[j+v.length] < carry) { 1629 // If we subtracted too much, add back 1630 --qhat; 1631 carry -= multibyteAdd(u[j..j+v.length],u[j..j+v.length], v, 0); 1632 } 1633 quotient[j] = qhat; 1634 u[j + v.length] = u[j + v.length] - carry; 1635 } 1636 } 1637 1638 private: 1639 // TODO: Replace with a library call 1640 void itoaZeroPadded(char[] output, uint value, int radix = 10) { 1641 int x = output.length - 1; 1642 for( ; x>=0; --x) { 1643 output[x]= cast(char)(value % radix + '0'); 1644 value /= radix; 1645 } 1646 } 1647 1648 void toHexZeroPadded(char[] output, uint value) { 1649 int x = output.length - 1; 1650 const char [] hexDigits = "0123456789ABCDEF"; 1651 for( ; x>=0; --x) { 1652 output[x] = hexDigits[value & 0xF]; 1653 value >>= 4; 1654 } 1655 } 1656 1657 private: 1658 1659 // Returns the highest value of i for which left[i]!=right[i], 1660 // or 0 if left[]==right[] 1661 int highestDifferentDigit(in BigDigit [] left, in BigDigit [] right) 1662 { 1663 assert(left.length == right.length); 1664 for (int i=left.length-1; i>0; --i) { 1665 if (left[i]!=right[i]) return i; 1666 } 1667 return 0; 1668 } 1669 1670 // Returns the lowest value of i for which x[i]!=0. 1671 int firstNonZeroDigit(BigDigit[] x) 1672 { 1673 int k = 0; 1674 while (x[k]==0) { 1675 ++k; 1676 assert(k<x.length); 1677 } 1678 return k; 1679 } 1680 1681 /* Calculate quotient and remainder of u / v using fast recursive division. 1682 v must be normalised, and must be at least half as long as u. 1683 Given u and v, v normalised, calculates quotient = u/v, u = u%v. 1684 Algorithm is described in 1685 - C. Burkinel and J. Ziegler, "Fast Recursive Division", MPI-I-98-1-022, 1686 Max-Planck Institute fuer Informatik, (Oct 1998). 1687 - R.P. Brent and P. Zimmermann, "Modern Computer Arithmetic", 1688 Version 0.2, p. 26, (June 2008). 1689 Returns: 1690 u[0..v.length] is the remainder. u[v.length..$] is corrupted. 1691 scratch is temporary storage space, must be at least as long as quotient. 1692 */ 1693 void recursiveDivMod(BigDigit[] quotient, BigDigit[] u, in BigDigit[] v, 1694 BigDigit[] scratch) 1695 in { 1696 assert(quotient.length == u.length - v.length); 1697 assert(u.length <= 2 * v.length, "Asymmetric division"); // use base-case division to get it to this situation 1698 assert(v.length > 1); 1699 assert(u.length >= v.length); 1700 assert((v[$ - 1] & 0x8000_0000) != 0); 1701 assert(scratch.length >= quotient.length); 1702 1703 } 1704 body { 1705 if(quotient.length < FASTDIVLIMIT) { 1706 return schoolbookDivMod(quotient, u, v); 1707 } 1708 uint k = quotient.length >> 1; 1709 uint h = k + v.length; 1710 1711 recursiveDivMod(quotient[k .. $], u[2 * k .. $], v[k .. $], scratch); 1712 adjustRemainder(quotient[k .. $], u[k .. h], v, k, 1713 scratch[0 .. quotient.length]); 1714 recursiveDivMod(quotient[0 .. k], u[k .. h], v[k .. $], scratch); 1715 adjustRemainder(quotient[0 .. k], u[0 .. v.length], v, k, 1716 scratch[0 .. 2 * k]); 1717 } 1718 1719 // rem -= quot * v[0..k]. 1720 // If would make rem negative, decrease quot until rem is >=0. 1721 // Needs (quot.length * k) scratch space to store the result of the multiply. 1722 void adjustRemainder(BigDigit[] quot, BigDigit[] rem, in BigDigit[] v, int k, 1723 BigDigit[] scratch) 1724 { 1725 assert(rem.length == v.length); 1726 mulInternal(scratch, quot, v[0 .. k]); 1727 uint carry = subAssignSimple(rem, scratch); 1728 while(carry) { 1729 multibyteIncrementAssign!('-')(quot, 1); // quot-- 1730 carry -= multibyteAdd(rem, rem, v, 0); 1731 } 1732 } 1733 1734 // Cope with unbalanced division by performing block schoolbook division. 1735 void fastDivMod(BigDigit [] quotient, BigDigit [] u, in BigDigit [] v) 1736 { 1737 assert(quotient.length == u.length - v.length); 1738 assert(v.length > 1); 1739 assert(u.length >= v.length); 1740 assert((v[$-1] & 0x8000_0000)!=0); 1741 BigDigit [] scratch = new BigDigit[v.length]; 1742 1743 // Perform block schoolbook division, with 'v.length' blocks. 1744 uint m = u.length - v.length; 1745 while (m > v.length) { 1746 recursiveDivMod(quotient[m-v.length..m], 1747 u[m - v.length..m + v.length], v, scratch); 1748 m -= v.length; 1749 } 1750 recursiveDivMod(quotient[0..m], u[0..m + v.length], v, scratch); 1751 delete scratch; 1752 } 1753 1754 debug(UnitTest) 1755 { 1756 import tango.stdc.stdio; 1757 1758 void printBiguint(uint [] data) 1759 { 1760 char [] buff = new char[data.length*9]; 1761 printf("%.*s\n", biguintToHex(buff, data, '_')); 1762 } 1763 1764 void printDecimalBigUint(BigUint data) 1765 { 1766 printf("%.*s\n", data.toDecimalString(0)); 1767 } 1768 1769 unittest{ 1770 uint [] a, b; 1771 a = new uint[43]; 1772 b = new uint[179]; 1773 for (int i=0; i<a.length; ++i) a[i] = 0x1234_B6E9 + i; 1774 for (int i=0; i<b.length; ++i) b[i] = 0x1BCD_8763 - i*546; 1775 1776 a[$-1] |= 0x8000_0000; 1777 uint [] r = new uint[a.length]; 1778 uint [] q = new uint[b.length-a.length+1]; 1779 1780 divModInternal(q, r, b, a); 1781 q = q[0..$-1]; 1782 uint [] r1 = r.dup; 1783 uint [] q1 = q.dup; 1784 fastDivMod(q, b, a); 1785 r = b[0..a.length]; 1786 assert(r[]==r1[]); 1787 assert(q[]==q1[]); 1788 } 1789 } 1790