1 /******************************************************************************* 2 3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Initial release: Nov 2005 8 9 author: Kris 10 11 A set of functions for converting between string and integer 12 values. 13 14 Applying the D "import alias" mechanism to this module is highly 15 recommended, in order to limit namespace pollution: 16 --- 17 import Integer = tango.text.convert.Integer; 18 19 auto i = Integer.parse ("32767"); 20 --- 21 22 *******************************************************************************/ 23 24 module tango.text.convert.Integer; 25 26 private import tango.core.Exception; 27 private import tango.core.Octal; 28 29 /****************************************************************************** 30 31 Parse an integer value from the provided 'digits' string. 32 33 The string is inspected for a sign and an optional radix 34 prefix. A radix may be provided as an argument instead, 35 whereupon it must match the prefix (where present). When 36 radix is set to zero, conversion will default to decimal. 37 38 Throws: IllegalArgumentException where the input text is not parsable 39 in its entirety. 40 41 See_also: the low level functions parse() and convert() 42 43 ******************************************************************************/ 44 int toInt(T) (const(T[]) digits, uint radix=0) 45 { 46 auto x = toLong (digits, radix); 47 if (x > int.max) 48 throw new IllegalArgumentException ("Integer.toInt :: integer overflow"); 49 return cast(int) x; 50 } 51 52 /****************************************************************************** 53 54 Parse an integer value from the provided 'digits' string. 55 56 The string is inspected for a sign and an optional radix 57 prefix. A radix may be provided as an argument instead, 58 whereupon it must match the prefix (where present). When 59 radix is set to zero, conversion will default to decimal. 60 61 Throws: IllegalArgumentException where the input text is not parsable 62 in its entirety. 63 64 See_also: the low level functions parse() and convert() 65 66 ******************************************************************************/ 67 long toLong(T) (const(T[]) digits, uint radix=0) 68 { 69 size_t len; 70 71 auto x = parse (digits, radix, &len); 72 if (len < digits.length) 73 throw new IllegalArgumentException ("Integer.toLong :: invalid literal"); 74 return x; 75 } 76 77 /****************************************************************************** 78 79 Parse an unsignedinteger value from the provided 'digits' string. 80 81 The string is inspected for an optional radix prefix. A 82 radix may be provided as an argument instead, whereupon 83 it must match the prefix (where present). When radix is 84 set to zero, conversion will default to decimal. 85 86 Throws: IllegalArgumentException where the input text is not parsable 87 in its entirety. 88 89 See_also: the low level functions parse() and convert() 90 91 ******************************************************************************/ 92 ulong toUlong(T) (const(T[]) digits, uint radix=0) 93 { 94 bool sign = false; 95 96 auto eaten = trim (digits, sign, radix); 97 if (sign) 98 throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); 99 100 size_t len = 0; 101 auto value = convert (digits[eaten..$], radix, &len); 102 if (len == 0 || eaten + len < digits.length) 103 throw new IllegalArgumentException ("Integer.toUlong :: invalid literal"); 104 105 return value; 106 } 107 108 /****************************************************************************** 109 110 Wrapper to make life simpler. Returns a text version 111 of the provided value. 112 113 See format() for details 114 115 ******************************************************************************/ 116 117 char[] toString (long i, const(char[]) fmt = null) 118 { 119 char[66] tmp = void; 120 return format (tmp, i, fmt).dup; 121 } 122 123 /****************************************************************************** 124 125 Wrapper to make life simpler. Returns a text version 126 of the provided value. 127 128 See format() for details 129 130 ******************************************************************************/ 131 132 wchar[] toString16 (long i, const(wchar[]) fmt = null) 133 { 134 wchar[66] tmp = void; 135 return format (tmp, i, fmt).dup; 136 } 137 138 /****************************************************************************** 139 140 Wrapper to make life simpler. Returns a text version 141 of the provided value. 142 143 See format() for details 144 145 ******************************************************************************/ 146 147 dchar[] toString32 (long i, const(dchar[]) fmt = null) 148 { 149 dchar[66] tmp = void; 150 return format (tmp, i, fmt).dup; 151 } 152 153 /******************************************************************************* 154 155 Supports format specifications via an array, where format follows 156 the notation given below: 157 --- 158 type width prefix 159 --- 160 161 Type is one of [d, g, u, b, x, o] or uppercase equivalent, and 162 dictates the conversion radix or other semantics. 163 164 Width is optional and indicates a minimum width for zero-padding, 165 while the optional prefix is one of ['#', ' ', '+'] and indicates 166 what variety of prefix should be placed in the output. e.g. 167 --- 168 "d" => integer 169 "u" => unsigned 170 "o" => octal 171 "b" => binary 172 "x" => hexadecimal 173 "X" => hexadecimal uppercase 174 175 "d+" => integer prefixed with "+" 176 "b#" => binary prefixed with "0b" 177 "x#" => hexadecimal prefixed with "0x" 178 "X#" => hexadecimal prefixed with "0X" 179 180 "d8" => decimal padded to 8 places as required 181 "b8" => binary padded to 8 places as required 182 "b8#" => binary padded to 8 places and prefixed with "0b" 183 --- 184 185 Note that the specified width is exclusive of the prefix, though 186 the width padding will be shrunk as necessary in order to ensure 187 a requested prefix can be inserted into the provided output. 188 189 *******************************************************************************/ 190 T[] format(T) (T[] dst, long i, const(T[]) fmt = null) 191 { 192 char pre, 193 type; 194 int width; 195 196 decode (fmt, type, pre, width); 197 return formatter (dst, i, type, pre, width); 198 } 199 200 private void decode(T) (in T[] fmt, ref char type, out char pre, out int width) 201 { 202 if (fmt.length is 0) 203 type = 'd'; 204 else 205 { 206 type = cast(char)fmt[0]; 207 if (fmt.length > 1) 208 { 209 auto p = &fmt[1]; 210 for (int j=1; j < fmt.length; ++j, ++p) 211 if (*p >= '0' && *p <= '9') 212 width = width * 10 + (*p - '0'); 213 else 214 pre = cast(char)*p; 215 } 216 } 217 } 218 private struct _FormatterInfo(T) 219 { 220 uint radix; 221 immutable(T)[] prefix; 222 immutable(T)[] numbers; 223 } 224 225 T[] formatter(T) (T[] dst, long i, char type, char pre, int width) 226 { 227 __gshared immutable immutable(T)[] lower = "0123456789abcdef"; 228 __gshared immutable immutable(T)[] upper = "0123456789ABCDEF"; 229 230 alias _FormatterInfo!(T) Info; 231 232 __gshared immutable Info[] formats = 233 [ 234 {10, null, lower}, 235 {10, "-", lower}, 236 {10, " ", lower}, 237 {10, "+", lower}, 238 { 2, "0b", lower}, 239 { 8, "0o", lower}, 240 {16, "0x", lower}, 241 {16, "0X", upper}, 242 ]; 243 244 ubyte index; 245 int len = cast(int)dst.length; 246 247 if (len) 248 { 249 switch (type) 250 { 251 case 'd': 252 case 'D': 253 case 'g': 254 case 'G': 255 if (i < 0) 256 { 257 index = 1; 258 i = -i; 259 } 260 else 261 if (pre is ' ') 262 index = 2; 263 else 264 if (pre is '+') 265 index = 3; 266 goto case 'U'; 267 case 'u': 268 case 'U': 269 pre = '#'; 270 break; 271 272 case 'b': 273 case 'B': 274 index = 4; 275 break; 276 277 case 'o': 278 case 'O': 279 index = 5; 280 break; 281 282 case 'x': 283 index = 6; 284 break; 285 286 case 'X': 287 index = 7; 288 break; 289 290 default: 291 return cast(T[])"{unknown format '".dup~cast(T)type~cast(T[])"'}".dup; 292 } 293 294 auto info = &formats[index]; 295 auto numbers = info.numbers; 296 auto radix = info.radix; 297 298 // convert number to text 299 auto p = dst.ptr + len; 300 if (uint.max >= cast(ulong) i) 301 { 302 auto v = cast (uint) i; 303 do { 304 *--p = numbers [v % radix]; 305 } while ((v /= radix) && --len); 306 } 307 else 308 { 309 auto v = cast (ulong) i; 310 do { 311 *--p = numbers [cast(uint) (v % radix)]; 312 } while ((v /= radix) && --len); 313 } 314 315 auto prefix = (pre is '#') ? info.prefix : null; 316 if (len > prefix.length) 317 { 318 len -= prefix.length + 1; 319 320 // prefix number with zeros? 321 if (width) 322 { 323 width = cast(int)dst.length - width - cast(int)prefix.length; 324 while (len > width && len > 0) 325 { 326 *--p = '0'; 327 --len; 328 } 329 } 330 // write optional prefix string ... 331 dst [len .. len + prefix.length] = prefix[]; 332 333 // return slice of provided output buffer 334 return dst [len .. $]; 335 } 336 } 337 338 return cast(T[])"{output width too small}".dup; 339 } 340 341 342 /****************************************************************************** 343 344 Parse an integer value from the provided 'digits' string. 345 346 The string is inspected for a sign and an optional radix 347 prefix. A radix may be provided as an argument instead, 348 whereupon it must match the prefix (where present). When 349 radix is set to zero, conversion will default to decimal. 350 351 A non-null 'ate' will return the number of characters used 352 to construct the returned value. 353 354 Throws: none. The 'ate' param should be checked for valid input. 355 356 ******************************************************************************/ 357 long parse(T) (T[] digits, uint radix=0, size_t* ate=null) 358 { 359 bool sign; 360 361 auto eaten = trim (digits, sign, radix); 362 auto value = convert (digits[eaten..$], radix, ate); 363 364 // check *ate > 0 to make sure we don't parse "-" as 0. 365 if (ate && *ate > 0) 366 *ate += eaten; 367 368 return cast(long) (sign ? -value : value); 369 } 370 371 /****************************************************************************** 372 373 Convert the provided 'digits' into an integer value, 374 without checking for a sign or radix. The radix defaults 375 to decimal (10). 376 377 Returns the value and updates 'ate' with the number of 378 characters consumed. 379 380 Throws: none. The 'ate' param should be checked for valid input. 381 382 ******************************************************************************/ 383 ulong convert(T) (const(T[]) digits, uint radix=10, size_t* ate=null) 384 { 385 uint eaten; 386 ulong value; 387 388 foreach (c; cast(T[])digits) 389 { 390 if (c >= '0' && c <= '9') 391 {} 392 else 393 if (c >= 'a' && c <= 'z') 394 c -= 39; 395 else 396 if (c >= 'A' && c <= 'Z') 397 c -= 7; 398 else 399 break; 400 401 if ((c -= '0') < radix) 402 { 403 value = value * radix + c; 404 ++eaten; 405 } 406 else 407 break; 408 } 409 410 if (ate) 411 *ate = eaten; 412 413 return value; 414 } 415 416 417 /****************************************************************************** 418 419 Strip leading whitespace, extract an optional +/- sign, 420 and an optional radix prefix. If the radix value matches 421 an optional prefix, or the radix is zero, the prefix will 422 be consumed and assigned. Where the radix is non zero and 423 does not match an explicit prefix, the latter will remain 424 unconsumed. Otherwise, radix will default to 10. 425 426 Returns the number of characters consumed. 427 428 ******************************************************************************/ 429 size_t trim(T) (const(T[]) digits, ref bool sign, ref uint radix) 430 { 431 T c; 432 const (T)* p = digits.ptr; 433 auto len = digits.length; 434 435 if (len) 436 { 437 // strip off whitespace and sign characters 438 for (c = *p; len; c = *++p, --len) 439 if (c is ' ' || c is '\t') 440 {} 441 else 442 if (c is '-') 443 sign = true; 444 else 445 if (c is '+') 446 sign = false; 447 else 448 break; 449 450 // strip off a radix specifier also? 451 auto r = radix; 452 if (c is '0' && len > 1) 453 switch (*++p) 454 { 455 case 'x': 456 case 'X': 457 ++p; 458 r = 16; 459 break; 460 461 case 'b': 462 case 'B': 463 ++p; 464 r = 2; 465 break; 466 467 case 'o': 468 case 'O': 469 ++p; 470 r = 8; 471 break; 472 473 default: 474 --p; 475 break; 476 } 477 478 // default the radix to 10 479 if (r is 0) 480 radix = 10; 481 else 482 // explicit radix must match (optional) prefix 483 if (radix != r) 484 { 485 if (radix) 486 p -= 2; 487 else 488 radix = r; 489 } 490 } 491 492 // return number of characters eaten 493 return (p - digits.ptr); 494 } 495 496 497 /****************************************************************************** 498 499 quick & dirty text-to-unsigned int converter. Use only when you 500 know what the content is, or use parse() or convert() instead. 501 502 Return the parsed uint 503 504 ******************************************************************************/ 505 506 uint atoi(T) (T[] s, int radix = 10) 507 { 508 uint value; 509 510 foreach (c; s) 511 if (c >= '0' && c <= '9') 512 value = value * radix + (c - '0'); 513 else 514 break; 515 return value; 516 } 517 518 519 /****************************************************************************** 520 521 quick & dirty unsigned to text converter, where the provided output 522 must be large enough to house the result (10 digits in the largest 523 case). For mainstream use, consider utilizing format() instead. 524 525 Returns a populated slice of the provided output 526 527 ******************************************************************************/ 528 T[] itoa(T) (T[] output, uint value, int radix = 10) 529 { 530 T* p = output.ptr + output.length; 531 532 do { 533 *--p = cast(T)(value % radix + '0'); 534 } while (value /= radix); 535 return output[cast(size_t) (p-output.ptr) .. $]; 536 } 537 538 539 /****************************************************************************** 540 541 Consume a number from the input without converting it. Argument 542 'fp' enables floating-point consumption. Supports hex input for 543 numbers which are prefixed appropriately 544 545 Since version 0.99.9 546 547 ******************************************************************************/ 548 549 T[] consume(T) (T[] src, bool fp=false) 550 { 551 T c; 552 bool sign; 553 uint radix; 554 555 // remove leading space, and sign 556 auto e = src.ptr + src.length; 557 auto p = src.ptr + trim (src, sign, radix); 558 auto b = p; 559 560 // bail out if the string is empty 561 if (src.length is 0 || p > &src[$-1]) 562 return null; 563 564 // read leading digits 565 for (c=*p; p < e && ((c >= '0' && c <= '9') || 566 (radix is 16 && ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))));) 567 c = *++p; 568 569 if (fp) 570 { 571 // gobble up a point 572 if (c is '.' && p < e) 573 c = *++p; 574 575 // read fractional digits 576 while (c >= '0' && c <= '9' && p < e) 577 c = *++p; 578 579 // did we consume anything? 580 if (p > b) 581 { 582 // consume exponent? 583 if ((c is 'e' || c is 'E') && p < e ) 584 { 585 c = *++p; 586 if (c is '+' || c is '-') 587 c = *++p; 588 while (c >= '0' && c <= '9' && p < e) 589 c = *++p; 590 } 591 } 592 } 593 return src [0 .. p-src.ptr]; 594 } 595 596 597 /****************************************************************************** 598 599 ******************************************************************************/ 600 601 debug (UnitTest) 602 { 603 604 unittest 605 { 606 char[64] tmp; 607 608 assert (toInt("1") is 1); 609 assert (toLong("1") is 1); 610 assert (toInt("1", 10) is 1); 611 assert (toLong("1", 10) is 1); 612 assert (toUlong("1", 10) is 1); 613 assert (toUlong("18446744073709551615") is ulong.max); 614 615 assert (atoi ("12345") is 12345); 616 assert (itoa (tmp, 12345) == "12345"); 617 618 assert(parse( "0"w ) == 0 ); 619 assert(parse( "1"w ) == 1 ); 620 assert(parse( "-1"w ) == -1 ); 621 assert(parse( "+1"w ) == 1 ); 622 623 // numerical limits 624 assert(parse( "-2147483648" ) == int.min ); 625 assert(parse( "2147483647" ) == int.max ); 626 assert(parse( "4294967295" ) == uint.max ); 627 628 assert(parse( "-9223372036854775808" ) == long.min ); 629 assert(parse( "9223372036854775807" ) == long.max ); 630 assert(parse( "18446744073709551615" ) == ulong.max ); 631 632 // hex 633 assert(parse( "a", 16) == 0x0A ); 634 assert(parse( "b", 16) == 0x0B ); 635 assert(parse( "c", 16) == 0x0C ); 636 assert(parse( "d", 16) == 0x0D ); 637 assert(parse( "e", 16) == 0x0E ); 638 assert(parse( "f", 16) == 0x0F ); 639 assert(parse( "A", 16) == 0x0A ); 640 assert(parse( "B", 16) == 0x0B ); 641 assert(parse( "C", 16) == 0x0C ); 642 assert(parse( "D", 16) == 0x0D ); 643 assert(parse( "E", 16) == 0x0E ); 644 assert(parse( "F", 16) == 0x0F ); 645 assert(parse( "FFFF", 16) == ushort.max ); 646 assert(parse( "ffffFFFF", 16) == uint.max ); 647 assert(parse( "ffffFFFFffffFFFF", 16u ) == ulong.max ); 648 // oct 649 assert(parse( "55", 8) == octal!(55) ); 650 assert(parse( "100", 8) == octal!(100) ); 651 // bin 652 assert(parse( "10000", 2) == 0x10 ); 653 // trim 654 assert(parse( " \t20") == 20 ); 655 assert(parse( " \t-20") == -20 ); 656 assert(parse( "- \t 20") == -20 ); 657 // recognise radix prefix 658 assert(parse( "0xFFFF" ) == ushort.max ); 659 assert(parse( "0XffffFFFF" ) == uint.max ); 660 assert(parse( "0o55") == octal!(55) ); 661 assert(parse( "0O55" ) == octal!(55) ); 662 assert(parse( "0b10000") == 0x10 ); 663 assert(parse( "0B10000") == 0x10 ); 664 665 // prefix tests 666 auto str = "0x"; 667 assert(parse( str[0..1] ) == 0 ); 668 assert(parse("0x10", 10) == 0); 669 assert(parse("0b10", 10) == 0); 670 assert(parse("0o10", 10) == 0); 671 assert(parse("0b10") == 0b10); 672 assert(parse("0o10") == octal!(10)); 673 assert(parse("0b10", 2) == 0b10); 674 assert(parse("0o10", 8) == octal!(10)); 675 676 // revised tests 677 assert (format(tmp, 10, "d") == "10"); 678 assert (format(tmp, -10, "d") == "-10"); 679 680 assert (format(tmp, 10L, "u") == "10"); 681 assert (format(tmp, 10L, "U") == "10"); 682 assert (format(tmp, 10L, "g") == "10"); 683 assert (format(tmp, 10L, "G") == "10"); 684 assert (format(tmp, 10L, "o") == "12"); 685 assert (format(tmp, 10L, "O") == "12"); 686 assert (format(tmp, 10L, "b") == "1010"); 687 assert (format(tmp, 10L, "B") == "1010"); 688 assert (format(tmp, 10L, "x") == "a"); 689 assert (format(tmp, 10L, "X") == "A"); 690 691 assert (format(tmp, 10L, "d+") == "+10"); 692 assert (format(tmp, 10L, "d ") == " 10"); 693 assert (format(tmp, 10L, "d#") == "10"); 694 assert (format(tmp, 10L, "x#") == "0xa"); 695 assert (format(tmp, 10L, "X#") == "0XA"); 696 assert (format(tmp, 10L, "b#") == "0b1010"); 697 assert (format(tmp, 10L, "o#") == "0o12"); 698 699 assert (format(tmp, 10L, "d1") == "10"); 700 assert (format(tmp, 10L, "d8") == "00000010"); 701 assert (format(tmp, 10L, "x8") == "0000000a"); 702 assert (format(tmp, 10L, "X8") == "0000000A"); 703 assert (format(tmp, 10L, "b8") == "00001010"); 704 assert (format(tmp, 10L, "o8") == "00000012"); 705 706 assert (format(tmp, 10L, "d1#") == "10"); 707 assert (format(tmp, 10L, "d6#") == "000010"); 708 assert (format(tmp, 10L, "x6#") == "0x00000a"); 709 assert (format(tmp, 10L, "X6#") == "0X00000A"); 710 711 char[8] tmp1; 712 assert (format(tmp1, 10L, "b12#") == "0b001010"); 713 assert (format(tmp1, 10L, "o12#") == "0o000012"); 714 } 715 } 716 717 /****************************************************************************** 718 719 ******************************************************************************/ 720 721 debug (Integer) 722 { 723 import tango.io.Stdout; 724 725 void main() 726 { 727 char[8] tmp; 728 729 Stdout.formatln ("d '{}'", format(tmp, 10)); 730 Stdout.formatln ("d '{}'", format(tmp, -10)); 731 732 Stdout.formatln ("u '{}'", format(tmp, 10L, "u")); 733 Stdout.formatln ("U '{}'", format(tmp, 10L, "U")); 734 Stdout.formatln ("g '{}'", format(tmp, 10L, "g")); 735 Stdout.formatln ("G '{}'", format(tmp, 10L, "G")); 736 Stdout.formatln ("o '{}'", format(tmp, 10L, "o")); 737 Stdout.formatln ("O '{}'", format(tmp, 10L, "O")); 738 Stdout.formatln ("b '{}'", format(tmp, 10L, "b")); 739 Stdout.formatln ("B '{}'", format(tmp, 10L, "B")); 740 Stdout.formatln ("x '{}'", format(tmp, 10L, "x")); 741 Stdout.formatln ("X '{}'", format(tmp, 10L, "X")); 742 743 Stdout.formatln ("d+ '{}'", format(tmp, 10L, "d+")); 744 Stdout.formatln ("ds '{}'", format(tmp, 10L, "d ")); 745 Stdout.formatln ("d# '{}'", format(tmp, 10L, "d#")); 746 Stdout.formatln ("x# '{}'", format(tmp, 10L, "x#")); 747 Stdout.formatln ("X# '{}'", format(tmp, 10L, "X#")); 748 Stdout.formatln ("b# '{}'", format(tmp, 10L, "b#")); 749 Stdout.formatln ("o# '{}'", format(tmp, 10L, "o#")); 750 751 Stdout.formatln ("d1 '{}'", format(tmp, 10L, "d1")); 752 Stdout.formatln ("d8 '{}'", format(tmp, 10L, "d8")); 753 Stdout.formatln ("x8 '{}'", format(tmp, 10L, "x8")); 754 Stdout.formatln ("X8 '{}'", format(tmp, 10L, "X8")); 755 Stdout.formatln ("b8 '{}'", format(tmp, 10L, "b8")); 756 Stdout.formatln ("o8 '{}'", format(tmp, 10L, "o8")); 757 758 Stdout.formatln ("d1# '{}'", format(tmp, 10L, "d1#")); 759 Stdout.formatln ("d6# '{}'", format(tmp, 10L, "d6#")); 760 Stdout.formatln ("x6# '{}'", format(tmp, 10L, "x6#")); 761 Stdout.formatln ("X6# '{}'", format(tmp, 10L, "X6#")); 762 763 Stdout.formatln ("b12# '{}'", format(tmp, 10L, "b12#")); 764 Stdout.formatln ("o12# '{}'", format(tmp, 10L, "o12#")).newline; 765 766 Stdout.formatln (consume("10")); 767 Stdout.formatln (consume("0x1f")); 768 Stdout.formatln (consume("0.123")); 769 Stdout.formatln (consume("0.123", true)); 770 Stdout.formatln (consume("0.123e-10", true)).newline; 771 772 Stdout.formatln (consume("10 s")); 773 Stdout.formatln (consume("0x1f s")); 774 Stdout.formatln (consume("0.123 s")); 775 Stdout.formatln (consume("0.123 s", true)); 776 Stdout.formatln (consume("0.123e-10 s", true)).newline; 777 } 778 } 779 780 781