1 2 /** 3 * Part of the D programming language runtime library. 4 */ 5 6 /* 7 * Copyright (C) 2004-2006 by Digital Mars, www.digitalmars.com 8 * Written by Walter Bright 9 * 10 * This software is provided 'as-is', without any express or implied 11 * warranty. In no event will the authors be held liable for any damages 12 * arising from the use of this software. 13 * 14 * Permission is granted to anyone to use this software for any purpose, 15 * including commercial applications, and to alter it and redistribute it 16 * freely, in both source and binary form, subject to the following 17 * restrictions: 18 * 19 * o The origin of this software must not be misrepresented; you must not 20 * claim that you wrote the original software. If you use this software 21 * in a product, an acknowledgment in the product documentation would be 22 * appreciated but is not required. 23 * o Altered source versions must be plainly marked as such, and must not 24 * be misrepresented as being the original software. 25 * o This notice may not be removed or altered from any source 26 * distribution. 27 */ 28 29 /* 30 * Modified by Sean Kelly <sean@f4.ca> for use with Tango. 31 */ 32 33 /* This code handles decoding UTF strings for foreach_reverse loops. 34 * There are 6 combinations of conversions between char, wchar, 35 * and dchar, and 2 of each of those. 36 */ 37 module rt.aApplyR; 38 private import rt.compiler.util.utf; 39 40 /**********************************************/ 41 /* 1 argument versions */ 42 43 // dg is D, but _aApplyRcd() is C 44 extern (D) typedef int delegate(void *) dg_t; 45 46 extern (C) int _aApplyRcd1(in char[] aa, dg_t dg) 47 { int result; 48 49 debug(apply) printf("_aApplyRcd1(), len = %d\n", aa.length); 50 for (size_t i = aa.length; i != 0; ) 51 { dchar d; 52 53 i--; 54 d = aa[i]; 55 if (d & 0x80) 56 { char c = cast(char)d; 57 uint j; 58 uint m = 0x3F; 59 d = 0; 60 while ((c & 0xC0) != 0xC0) 61 { if (i == 0) 62 onUnicodeError("Invalid UTF-8 sequence", 0); 63 i--; 64 d |= (c & 0x3F) << j; 65 j += 6; 66 m >>= 1; 67 c = aa[i]; 68 } 69 d |= (c & m) << j; 70 } 71 result = dg(cast(void *)&d); 72 if (result) 73 break; 74 } 75 return result; 76 } 77 78 unittest 79 { 80 debug(apply) printf("_aApplyRcd1.unittest\n"); 81 82 char[] s = "hello"c; 83 int i; 84 85 foreach_reverse(dchar d; s) 86 { 87 switch (i) 88 { 89 case 0: assert(d == 'o'); break; 90 case 1: assert(d == 'l'); break; 91 case 2: assert(d == 'l'); break; 92 case 3: assert(d == 'e'); break; 93 case 4: assert(d == 'h'); break; 94 default: assert(0); 95 } 96 i++; 97 } 98 assert(i == 5); 99 100 s = "a\u1234\U00100456b"; 101 i = 0; 102 foreach_reverse(dchar d; s) 103 { 104 //printf("i = %d, d = %x\n", i, d); 105 switch (i) 106 { 107 case 0: assert(d == 'b'); break; 108 case 1: assert(d == '\U00100456'); break; 109 case 2: assert(d == '\u1234'); break; 110 case 3: assert(d == 'a'); break; 111 default: assert(0); 112 } 113 i++; 114 } 115 assert(i == 4); 116 } 117 118 /*****************************/ 119 120 extern (C) int _aApplyRwd1(in wchar[] aa, dg_t dg) 121 { int result; 122 123 debug(apply) printf("_aApplyRwd1(), len = %d\n", aa.length); 124 for (size_t i = aa.length; i != 0; ) 125 { dchar d; 126 127 i--; 128 d = aa[i]; 129 if (d >= 0xDC00 && d <= 0xDFFF) 130 { if (i == 0) 131 onUnicodeError("Invalid UTF-16 sequence", 0); 132 i--; 133 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 134 } 135 result = dg(cast(void *)&d); 136 if (result) 137 break; 138 } 139 return result; 140 } 141 142 unittest 143 { 144 debug(apply) printf("_aApplyRwd1.unittest\n"); 145 146 wchar[] s = "hello"w; 147 int i; 148 149 foreach_reverse(dchar d; s) 150 { 151 switch (i) 152 { 153 case 0: assert(d == 'o'); break; 154 case 1: assert(d == 'l'); break; 155 case 2: assert(d == 'l'); break; 156 case 3: assert(d == 'e'); break; 157 case 4: assert(d == 'h'); break; 158 default: assert(0); 159 } 160 i++; 161 } 162 assert(i == 5); 163 164 s = "a\u1234\U00100456b"; 165 i = 0; 166 foreach_reverse(dchar d; s) 167 { 168 //printf("i = %d, d = %x\n", i, d); 169 switch (i) 170 { 171 case 0: assert(d == 'b'); break; 172 case 1: assert(d == '\U00100456'); break; 173 case 2: assert(d == '\u1234'); break; 174 case 3: assert(d == 'a'); break; 175 default: assert(0); 176 } 177 i++; 178 } 179 assert(i == 4); 180 } 181 182 /*****************************/ 183 184 extern (C) int _aApplyRcw1(in char[] aa, dg_t dg) 185 { int result; 186 187 debug(apply) printf("_aApplyRcw1(), len = %d\n", aa.length); 188 for (size_t i = aa.length; i != 0; ) 189 { dchar d; 190 wchar w; 191 192 i--; 193 w = aa[i]; 194 if (w & 0x80) 195 { char c = cast(char)w; 196 uint j; 197 uint m = 0x3F; 198 d = 0; 199 while ((c & 0xC0) != 0xC0) 200 { if (i == 0) 201 onUnicodeError("Invalid UTF-8 sequence", 0); 202 i--; 203 d |= (c & 0x3F) << j; 204 j += 6; 205 m >>= 1; 206 c = aa[i]; 207 } 208 d |= (c & m) << j; 209 210 if (d <= 0xFFFF) 211 w = cast(wchar) d; 212 else 213 { 214 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 215 result = dg(cast(void *)&w); 216 if (result) 217 break; 218 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 219 } 220 } 221 result = dg(cast(void *)&w); 222 if (result) 223 break; 224 } 225 return result; 226 } 227 228 unittest 229 { 230 debug(apply) printf("_aApplyRcw1.unittest\n"); 231 232 char[] s = "hello"c; 233 int i; 234 235 foreach_reverse(wchar d; s) 236 { 237 switch (i) 238 { 239 case 0: assert(d == 'o'); break; 240 case 1: assert(d == 'l'); break; 241 case 2: assert(d == 'l'); break; 242 case 3: assert(d == 'e'); break; 243 case 4: assert(d == 'h'); break; 244 default: assert(0); 245 } 246 i++; 247 } 248 assert(i == 5); 249 250 s = "a\u1234\U00100456b"; 251 i = 0; 252 foreach_reverse(wchar d; s) 253 { 254 //printf("i = %d, d = %x\n", i, d); 255 switch (i) 256 { 257 case 0: assert(d == 'b'); break; 258 case 1: assert(d == 0xDBC1); break; 259 case 2: assert(d == 0xDC56); break; 260 case 3: assert(d == 0x1234); break; 261 case 4: assert(d == 'a'); break; 262 default: assert(0); 263 } 264 i++; 265 } 266 assert(i == 5); 267 } 268 269 /*****************************/ 270 271 extern (C) int _aApplyRwc1(in wchar[] aa, dg_t dg) 272 { int result; 273 274 debug(apply) printf("_aApplyRwc1(), len = %d\n", aa.length); 275 for (size_t i = aa.length; i != 0; ) 276 { dchar d; 277 char c; 278 279 i--; 280 d = aa[i]; 281 if (d >= 0xDC00 && d <= 0xDFFF) 282 { if (i == 0) 283 onUnicodeError("Invalid UTF-16 sequence", 0); 284 i--; 285 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 286 } 287 288 if (d & ~0x7F) 289 { 290 char[4] buf; 291 292 auto b = toUTF8(buf, d); 293 foreach (char c2; b) 294 { 295 result = dg(cast(void *)&c2); 296 if (result) 297 return result; 298 } 299 continue; 300 } 301 c = cast(char)d; 302 result = dg(cast(void *)&c); 303 if (result) 304 break; 305 } 306 return result; 307 } 308 309 unittest 310 { 311 debug(apply) printf("_aApplyRwc1.unittest\n"); 312 313 wchar[] s = "hello"w; 314 int i; 315 316 foreach_reverse(char d; s) 317 { 318 switch (i) 319 { 320 case 0: assert(d == 'o'); break; 321 case 1: assert(d == 'l'); break; 322 case 2: assert(d == 'l'); break; 323 case 3: assert(d == 'e'); break; 324 case 4: assert(d == 'h'); break; 325 default: assert(0); 326 } 327 i++; 328 } 329 assert(i == 5); 330 331 s = "a\u1234\U00100456b"; 332 i = 0; 333 foreach_reverse(char d; s) 334 { 335 //printf("i = %d, d = %x\n", i, d); 336 switch (i) 337 { 338 case 0: assert(d == 'b'); break; 339 case 1: assert(d == 0xF4); break; 340 case 2: assert(d == 0x80); break; 341 case 3: assert(d == 0x91); break; 342 case 4: assert(d == 0x96); break; 343 case 5: assert(d == 0xE1); break; 344 case 6: assert(d == 0x88); break; 345 case 7: assert(d == 0xB4); break; 346 case 8: assert(d == 'a'); break; 347 default: assert(0); 348 } 349 i++; 350 } 351 assert(i == 9); 352 } 353 354 /*****************************/ 355 356 extern (C) int _aApplyRdc1(in dchar[] aa, dg_t dg) 357 { int result; 358 359 debug(apply) printf("_aApplyRdc1(), len = %d\n", aa.length); 360 for (size_t i = aa.length; i != 0;) 361 { dchar d = aa[--i]; 362 char c; 363 364 if (d & ~0x7F) 365 { 366 char[4] buf; 367 368 auto b = toUTF8(buf, d); 369 foreach (char c2; b) 370 { 371 result = dg(cast(void *)&c2); 372 if (result) 373 return result; 374 } 375 continue; 376 } 377 else 378 { 379 c = cast(char)d; 380 } 381 result = dg(cast(void *)&c); 382 if (result) 383 break; 384 } 385 return result; 386 } 387 388 unittest 389 { 390 debug(apply) printf("_aApplyRdc1.unittest\n"); 391 392 dchar[] s = "hello"d; 393 int i; 394 395 foreach_reverse(char d; s) 396 { 397 switch (i) 398 { 399 case 0: assert(d == 'o'); break; 400 case 1: assert(d == 'l'); break; 401 case 2: assert(d == 'l'); break; 402 case 3: assert(d == 'e'); break; 403 case 4: assert(d == 'h'); break; 404 default: assert(0); 405 } 406 i++; 407 } 408 assert(i == 5); 409 410 s = "a\u1234\U00100456b"; 411 i = 0; 412 foreach_reverse(char d; s) 413 { 414 //printf("i = %d, d = %x\n", i, d); 415 switch (i) 416 { 417 case 0: assert(d == 'b'); break; 418 case 1: assert(d == 0xF4); break; 419 case 2: assert(d == 0x80); break; 420 case 3: assert(d == 0x91); break; 421 case 4: assert(d == 0x96); break; 422 case 5: assert(d == 0xE1); break; 423 case 6: assert(d == 0x88); break; 424 case 7: assert(d == 0xB4); break; 425 case 8: assert(d == 'a'); break; 426 default: assert(0); 427 } 428 i++; 429 } 430 assert(i == 9); 431 } 432 433 /*****************************/ 434 435 extern (C) int _aApplyRdw1(in dchar[] aa, dg_t dg) 436 { int result; 437 438 debug(apply) printf("_aApplyRdw1(), len = %d\n", aa.length); 439 for (size_t i = aa.length; i != 0; ) 440 { dchar d = aa[--i]; 441 wchar w; 442 443 if (d <= 0xFFFF) 444 w = cast(wchar) d; 445 else 446 { 447 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 448 result = dg(cast(void *)&w); 449 if (result) 450 break; 451 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 452 } 453 result = dg(cast(void *)&w); 454 if (result) 455 break; 456 } 457 return result; 458 } 459 460 unittest 461 { 462 debug(apply) printf("_aApplyRdw1.unittest\n"); 463 464 dchar[] s = "hello"d; 465 int i; 466 467 foreach_reverse(wchar d; s) 468 { 469 switch (i) 470 { 471 case 0: assert(d == 'o'); break; 472 case 1: assert(d == 'l'); break; 473 case 2: assert(d == 'l'); break; 474 case 3: assert(d == 'e'); break; 475 case 4: assert(d == 'h'); break; 476 default: assert(0); 477 } 478 i++; 479 } 480 assert(i == 5); 481 482 s = "a\u1234\U00100456b"; 483 i = 0; 484 foreach_reverse(wchar d; s) 485 { 486 //printf("i = %d, d = %x\n", i, d); 487 switch (i) 488 { 489 case 0: assert(d == 'b'); break; 490 case 1: assert(d == 0xDBC1); break; 491 case 2: assert(d == 0xDC56); break; 492 case 3: assert(d == 0x1234); break; 493 case 4: assert(d == 'a'); break; 494 default: assert(0); 495 } 496 i++; 497 } 498 assert(i == 5); 499 } 500 501 502 /****************************************************************************/ 503 /* 2 argument versions */ 504 505 // dg is D, but _aApplyRcd2() is C 506 extern (D) typedef int delegate(void *, void *) dg2_t; 507 508 extern (C) int _aApplyRcd2(in char[] aa, dg2_t dg) 509 { int result; 510 size_t i; 511 size_t len = aa.length; 512 513 debug(apply) printf("_aApplyRcd2(), len = %d\n", len); 514 for (i = len; i != 0; ) 515 { dchar d; 516 517 i--; 518 d = aa[i]; 519 if (d & 0x80) 520 { char c = cast(char)d; 521 uint j; 522 uint m = 0x3F; 523 d = 0; 524 while ((c & 0xC0) != 0xC0) 525 { if (i == 0) 526 onUnicodeError("Invalid UTF-8 sequence", 0); 527 i--; 528 d |= (c & 0x3F) << j; 529 j += 6; 530 m >>= 1; 531 c = aa[i]; 532 } 533 d |= (c & m) << j; 534 } 535 result = dg(&i, cast(void *)&d); 536 if (result) 537 break; 538 } 539 return result; 540 } 541 542 unittest 543 { 544 debug(apply) printf("_aApplyRcd2.unittest\n"); 545 546 char[] s = "hello"c; 547 int i; 548 549 foreach_reverse(k, dchar d; s) 550 { 551 assert(k == 4 - i); 552 switch (i) 553 { 554 case 0: assert(d == 'o'); break; 555 case 1: assert(d == 'l'); break; 556 case 2: assert(d == 'l'); break; 557 case 3: assert(d == 'e'); break; 558 case 4: assert(d == 'h'); break; 559 default: assert(0); 560 } 561 i++; 562 } 563 assert(i == 5); 564 565 s = "a\u1234\U00100456b"; 566 i = 0; 567 foreach_reverse(k, dchar d; s) 568 { 569 //printf("i = %d, k = %d, d = %x\n", i, k, d); 570 switch (i) 571 { 572 case 0: assert(d == 'b'); assert(k == 8); break; 573 case 1: assert(d == '\U00100456'); assert(k == 4); break; 574 case 2: assert(d == '\u1234'); assert(k == 1); break; 575 case 3: assert(d == 'a'); assert(k == 0); break; 576 default: assert(0); 577 } 578 i++; 579 } 580 assert(i == 4); 581 } 582 583 /*****************************/ 584 585 extern (C) int _aApplyRwd2(in wchar[] aa, dg2_t dg) 586 { int result; 587 588 debug(apply) printf("_aApplyRwd2(), len = %d\n", aa.length); 589 for (size_t i = aa.length; i != 0; ) 590 { dchar d; 591 592 i--; 593 d = aa[i]; 594 if (d >= 0xDC00 && d <= 0xDFFF) 595 { if (i == 0) 596 onUnicodeError("Invalid UTF-16 sequence", 0); 597 i--; 598 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 599 } 600 result = dg(&i, cast(void *)&d); 601 if (result) 602 break; 603 } 604 return result; 605 } 606 607 unittest 608 { 609 debug(apply) printf("_aApplyRwd2.unittest\n"); 610 611 wchar[] s = "hello"w; 612 int i; 613 614 foreach_reverse(k, dchar d; s) 615 { 616 //printf("i = %d, k = %d, d = %x\n", i, k, d); 617 assert(k == 4 - i); 618 switch (i) 619 { 620 case 0: assert(d == 'o'); break; 621 case 1: assert(d == 'l'); break; 622 case 2: assert(d == 'l'); break; 623 case 3: assert(d == 'e'); break; 624 case 4: assert(d == 'h'); break; 625 default: assert(0); 626 } 627 i++; 628 } 629 assert(i == 5); 630 631 s = "a\u1234\U00100456b"; 632 i = 0; 633 foreach_reverse(k, dchar d; s) 634 { 635 //printf("i = %d, k = %d, d = %x\n", i, k, d); 636 switch (i) 637 { 638 case 0: assert(k == 4); assert(d == 'b'); break; 639 case 1: assert(k == 2); assert(d == '\U00100456'); break; 640 case 2: assert(k == 1); assert(d == '\u1234'); break; 641 case 3: assert(k == 0); assert(d == 'a'); break; 642 default: assert(0); 643 } 644 i++; 645 } 646 assert(i == 4); 647 } 648 649 /*****************************/ 650 651 extern (C) int _aApplyRcw2(in char[] aa, dg2_t dg) 652 { int result; 653 654 debug(apply) printf("_aApplyRcw2(), len = %d\n", aa.length); 655 for (size_t i = aa.length; i != 0; ) 656 { dchar d; 657 wchar w; 658 659 i--; 660 w = aa[i]; 661 if (w & 0x80) 662 { char c = cast(char)w; 663 uint j; 664 uint m = 0x3F; 665 d = 0; 666 while ((c & 0xC0) != 0xC0) 667 { if (i == 0) 668 onUnicodeError("Invalid UTF-8 sequence", 0); 669 i--; 670 d |= (c & 0x3F) << j; 671 j += 6; 672 m >>= 1; 673 c = aa[i]; 674 } 675 d |= (c & m) << j; 676 677 if (d <= 0xFFFF) 678 w = cast(wchar) d; 679 else 680 { 681 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 682 result = dg(&i, cast(void *)&w); 683 if (result) 684 break; 685 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 686 } 687 } 688 result = dg(&i, cast(void *)&w); 689 if (result) 690 break; 691 } 692 return result; 693 } 694 695 unittest 696 { 697 debug(apply) printf("_aApplyRcw2.unittest\n"); 698 699 char[] s = "hello"c; 700 int i; 701 702 foreach_reverse(k, wchar d; s) 703 { 704 //printf("i = %d, k = %d, d = %x\n", i, k, d); 705 assert(k == 4 - i); 706 switch (i) 707 { 708 case 0: assert(d == 'o'); break; 709 case 1: assert(d == 'l'); break; 710 case 2: assert(d == 'l'); break; 711 case 3: assert(d == 'e'); break; 712 case 4: assert(d == 'h'); break; 713 default: assert(0); 714 } 715 i++; 716 } 717 assert(i == 5); 718 719 s = "a\u1234\U00100456b"; 720 i = 0; 721 foreach_reverse(k, wchar d; s) 722 { 723 //printf("i = %d, k = %d, d = %x\n", i, k, d); 724 switch (i) 725 { 726 case 0: assert(k == 8); assert(d == 'b'); break; 727 case 1: assert(k == 4); assert(d == 0xDBC1); break; 728 case 2: assert(k == 4); assert(d == 0xDC56); break; 729 case 3: assert(k == 1); assert(d == 0x1234); break; 730 case 4: assert(k == 0); assert(d == 'a'); break; 731 default: assert(0); 732 } 733 i++; 734 } 735 assert(i == 5); 736 } 737 738 /*****************************/ 739 740 extern (C) int _aApplyRwc2(in wchar[] aa, dg2_t dg) 741 { int result; 742 743 debug(apply) printf("_aApplyRwc2(), len = %d\n", aa.length); 744 for (size_t i = aa.length; i != 0; ) 745 { dchar d; 746 char c; 747 748 i--; 749 d = aa[i]; 750 if (d >= 0xDC00 && d <= 0xDFFF) 751 { if (i == 0) 752 onUnicodeError("Invalid UTF-16 sequence", 0); 753 i--; 754 d = ((aa[i] - 0xD7C0) << 10) + (d - 0xDC00); 755 } 756 757 if (d & ~0x7F) 758 { 759 char[4] buf; 760 761 auto b = toUTF8(buf, d); 762 foreach (char c2; b) 763 { 764 result = dg(&i, cast(void *)&c2); 765 if (result) 766 return result; 767 } 768 continue; 769 } 770 c = cast(char)d; 771 result = dg(&i, cast(void *)&c); 772 if (result) 773 break; 774 } 775 return result; 776 } 777 778 unittest 779 { 780 debug(apply) printf("_aApplyRwc2.unittest\n"); 781 782 wchar[] s = "hello"w; 783 int i; 784 785 foreach_reverse(k, char d; s) 786 { 787 //printf("i = %d, k = %d, d = %x\n", i, k, d); 788 assert(k == 4 - i); 789 switch (i) 790 { 791 case 0: assert(d == 'o'); break; 792 case 1: assert(d == 'l'); break; 793 case 2: assert(d == 'l'); break; 794 case 3: assert(d == 'e'); break; 795 case 4: assert(d == 'h'); break; 796 default: assert(0); 797 } 798 i++; 799 } 800 assert(i == 5); 801 802 s = "a\u1234\U00100456b"; 803 i = 0; 804 foreach_reverse(k, char d; s) 805 { 806 //printf("i = %d, k = %d, d = %x\n", i, k, d); 807 switch (i) 808 { 809 case 0: assert(k == 4); assert(d == 'b'); break; 810 case 1: assert(k == 2); assert(d == 0xF4); break; 811 case 2: assert(k == 2); assert(d == 0x80); break; 812 case 3: assert(k == 2); assert(d == 0x91); break; 813 case 4: assert(k == 2); assert(d == 0x96); break; 814 case 5: assert(k == 1); assert(d == 0xE1); break; 815 case 6: assert(k == 1); assert(d == 0x88); break; 816 case 7: assert(k == 1); assert(d == 0xB4); break; 817 case 8: assert(k == 0); assert(d == 'a'); break; 818 default: assert(0); 819 } 820 i++; 821 } 822 assert(i == 9); 823 } 824 825 /*****************************/ 826 827 extern (C) int _aApplyRdc2(in dchar[] aa, dg2_t dg) 828 { int result; 829 830 debug(apply) printf("_aApplyRdc2(), len = %d\n", aa.length); 831 for (size_t i = aa.length; i != 0; ) 832 { dchar d = aa[--i]; 833 char c; 834 835 if (d & ~0x7F) 836 { 837 char[4] buf; 838 839 auto b = toUTF8(buf, d); 840 foreach (char c2; b) 841 { 842 result = dg(&i, cast(void *)&c2); 843 if (result) 844 return result; 845 } 846 continue; 847 } 848 else 849 { c = cast(char)d; 850 } 851 result = dg(&i, cast(void *)&c); 852 if (result) 853 break; 854 } 855 return result; 856 } 857 858 unittest 859 { 860 debug(apply) printf("_aApplyRdc2.unittest\n"); 861 862 dchar[] s = "hello"d; 863 int i; 864 865 foreach_reverse(k, char d; s) 866 { 867 //printf("i = %d, k = %d, d = %x\n", i, k, d); 868 assert(k == 4 - i); 869 switch (i) 870 { 871 case 0: assert(d == 'o'); break; 872 case 1: assert(d == 'l'); break; 873 case 2: assert(d == 'l'); break; 874 case 3: assert(d == 'e'); break; 875 case 4: assert(d == 'h'); break; 876 default: assert(0); 877 } 878 i++; 879 } 880 assert(i == 5); 881 882 s = "a\u1234\U00100456b"; 883 i = 0; 884 foreach_reverse(k, char d; s) 885 { 886 //printf("i = %d, k = %d, d = %x\n", i, k, d); 887 switch (i) 888 { 889 case 0: assert(k == 3); assert(d == 'b'); break; 890 case 1: assert(k == 2); assert(d == 0xF4); break; 891 case 2: assert(k == 2); assert(d == 0x80); break; 892 case 3: assert(k == 2); assert(d == 0x91); break; 893 case 4: assert(k == 2); assert(d == 0x96); break; 894 case 5: assert(k == 1); assert(d == 0xE1); break; 895 case 6: assert(k == 1); assert(d == 0x88); break; 896 case 7: assert(k == 1); assert(d == 0xB4); break; 897 case 8: assert(k == 0); assert(d == 'a'); break; 898 default: assert(0); 899 } 900 i++; 901 } 902 assert(i == 9); 903 } 904 905 /*****************************/ 906 907 extern (C) int _aApplyRdw2(in dchar[] aa, dg2_t dg) 908 { int result; 909 910 debug(apply) printf("_aApplyRdw2(), len = %d\n", aa.length); 911 for (size_t i = aa.length; i != 0; ) 912 { dchar d = aa[--i]; 913 wchar w; 914 915 if (d <= 0xFFFF) 916 w = cast(wchar) d; 917 else 918 { 919 w = cast(wchar) ((((d - 0x10000) >> 10) & 0x3FF) + 0xD800); 920 result = dg(&i, cast(void *)&w); 921 if (result) 922 break; 923 w = cast(wchar) (((d - 0x10000) & 0x3FF) + 0xDC00); 924 } 925 result = dg(&i, cast(void *)&w); 926 if (result) 927 break; 928 } 929 return result; 930 } 931 932 unittest 933 { 934 debug(apply) printf("_aApplyRdw2.unittest\n"); 935 936 dchar[] s = "hello"d; 937 int i; 938 939 foreach_reverse(k, wchar d; s) 940 { 941 //printf("i = %d, k = %d, d = %x\n", i, k, d); 942 assert(k == 4 - i); 943 switch (i) 944 { 945 case 0: assert(d == 'o'); break; 946 case 1: assert(d == 'l'); break; 947 case 2: assert(d == 'l'); break; 948 case 3: assert(d == 'e'); break; 949 case 4: assert(d == 'h'); break; 950 default: assert(0); 951 } 952 i++; 953 } 954 assert(i == 5); 955 956 s = "a\u1234\U00100456b"; 957 i = 0; 958 foreach_reverse(k, wchar d; s) 959 { 960 //printf("i = %d, k = %d, d = %x\n", i, k, d); 961 switch (i) 962 { 963 case 0: assert(k == 3); assert(d == 'b'); break; 964 case 1: assert(k == 2); assert(d == 0xDBC1); break; 965 case 2: assert(k == 2); assert(d == 0xDC56); break; 966 case 3: assert(k == 1); assert(d == 0x1234); break; 967 case 4: assert(k == 0); assert(d == 'a'); break; 968 default: assert(0); 969 } 970 i++; 971 } 972 assert(i == 5); 973 } 974 975