1 /******************************************************************************* 2 3 copyright: Copyright (c) 2007 Peter Triller. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Initial release: Sept 2007 8 9 authors: Peter 10 11 Provides case mapping Functions for Unicode Strings. As of now it is 12 only 99 % complete, because it does not take into account Conditional 13 case mappings. This means the Greek Letter Sigma will not be correctly 14 case mapped at the end of a Word, and the Locales Lithuanian, Turkish 15 and Azeri are not taken into account during Case Mappings. This means 16 all in all around 12 Characters will not be mapped correctly under 17 some circumstances. 18 19 ICU4j also does not handle these cases at the moment. 20 21 Unittests are written against output from ICU4j 22 23 This Module tries to minimize Memory allocation and usage. You can 24 always pass the output buffer that should be used to the case mapping 25 function, which will be resized if necessary. 26 27 *******************************************************************************/ 28 29 module tango.text.Unicode; 30 31 private import tango.text.UnicodeData; 32 private import tango.text.convert.Utf; 33 34 35 36 /** 37 * Converts an Utf8 String to Upper case 38 * 39 * Params: 40 * input = String to be case mapped 41 * output = this output buffer will be used unless too small 42 * Returns: the case mapped string 43 */ 44 /+deprecated char[] blockToUpper(char[] input, char[] output = null, dchar[] working = null) { 45 46 // ?? How much preallocation ?? This is worst case allocation 47 if (working is null) 48 working.length = input.length; 49 50 uint produced = 0; 51 size_t ate; 52 uint oprod = 0; 53 foreach(dchar ch; input) { 54 // TODO Conditional Case Mapping 55 UnicodeData *d = getUnicodeData(ch); 56 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 57 SpecialCaseData *s = getSpecialCaseData(ch); 58 debug { 59 assert(s !is null); 60 } 61 if(s.upperCaseMapping !is null) { 62 // To speed up, use worst case for memory prealocation 63 // since the length of an UpperCaseMapping list is at most 4 64 // Make sure no relocation is made in the toString Method 65 // better allocation algorithm ? 66 auto len = s.upperCaseMapping.length; 67 if(produced + len >= working.length) 68 working.length = working.length + working.length / 2 + len; 69 oprod = produced; 70 produced += len; 71 working[oprod..produced] = s.upperCaseMapping; 72 continue; 73 } 74 } 75 // Make sure no relocation is made in the toString Method 76 if(produced + 1 >= output.length) 77 working.length = working.length + working.length / 2 + 1; 78 working[produced++] = d is null ? ch:d.simpleUpperCaseMapping; 79 } 80 return toString(working[0..produced],output); 81 }+/ 82 83 84 85 /** 86 * Converts an Utf8 String to Upper case 87 * 88 * Params: 89 * input = String to be case mapped 90 * output = this output buffer will be used unless too small 91 * Returns: the case mapped string 92 */ 93 char[] toUpper(const(char)[] input, char[] output = null) { 94 95 dchar[1] buf; 96 // assume most common case: String stays the same length 97 if (output.length < input.length) 98 output.length = input.length; 99 100 auto produced = 0; 101 size_t ate; 102 foreach(dchar ch; input) { 103 // TODO Conditional Case Mapping 104 UnicodeData *d = getUnicodeData(ch); 105 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 106 SpecialCaseData *s = getSpecialCaseData(ch); 107 debug { 108 assert(s !is null); 109 } 110 if(s.upperCaseMapping !is null) { 111 // To speed up, use worst case for memory prealocation 112 // since the length of an UpperCaseMapping list is at most 4 113 // Make sure no relocation is made in the toString Method 114 // better allocation algorithm ? 115 if(produced + s.upperCaseMapping.length * 4 >= output.length) 116 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 4; 117 char[] res = toString(s.upperCaseMapping, output[produced..output.length], &ate); 118 debug { 119 assert(ate == s.upperCaseMapping.length); 120 assert(res.ptr == output[produced..output.length].ptr); 121 } 122 produced += res.length; 123 continue; 124 } 125 } 126 // Make sure no relocation is made in the toString Method 127 if(produced + 4 >= output.length) 128 output.length = output.length + output.length / 2 + 4; 129 buf[0] = d is null ? ch:d.simpleUpperCaseMapping; 130 char[] res = toString(buf, output[produced..output.length], &ate); 131 debug { 132 assert(ate == 1); 133 assert(res.ptr == output[produced..output.length].ptr); 134 } 135 produced += res.length; 136 } 137 return output[0..produced]; 138 } 139 140 141 /** 142 * Converts an Utf16 String to Upper case 143 * 144 * Params: 145 * input = String to be case mapped 146 * output = this output buffer will be used unless too small 147 * Returns: the case mapped string 148 */ 149 wchar[] toUpper(const(wchar)[] input, wchar[] output = null) { 150 151 dchar[1] buf; 152 // assume most common case: String stays the same length 153 if (output.length < input.length) 154 output.length = input.length; 155 156 auto produced = 0; 157 size_t ate; 158 foreach(dchar ch; input) { 159 // TODO Conditional Case Mapping 160 UnicodeData *d = getUnicodeData(ch); 161 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 162 SpecialCaseData *s = getSpecialCaseData(ch); 163 debug { 164 assert(s !is null); 165 } 166 if(s.upperCaseMapping !is null) { 167 // To speed up, use worst case for memory prealocation 168 // Make sure no relocation is made in the toString16 Method 169 // better allocation algorithm ? 170 if(produced + s.upperCaseMapping.length * 2 >= output.length) 171 output.length = output.length + output.length / 2 + s.upperCaseMapping.length * 3; 172 wchar[] res = toString16(s.upperCaseMapping, output[produced..output.length], &ate); 173 debug { 174 assert(ate == s.upperCaseMapping.length); 175 assert(res.ptr == output[produced..output.length].ptr); 176 } 177 produced += res.length; 178 continue; 179 } 180 } 181 // Make sure no relocation is made in the toString16 Method 182 if(produced + 4 >= output.length) 183 output.length = output.length + output.length / 2 + 3; 184 buf[0] = d is null ? ch:d.simpleUpperCaseMapping; 185 wchar[] res = toString16(buf, output[produced..output.length], &ate); 186 debug { 187 assert(ate == 1); 188 assert(res.ptr == output[produced..output.length].ptr); 189 } 190 produced += res.length; 191 } 192 return output[0..produced]; 193 } 194 195 /** 196 * Converts an Utf32 String to Upper case 197 * 198 * Params: 199 * input = String to be case mapped 200 * output = this output buffer will be used unless too small 201 * Returns: the case mapped string 202 */ 203 dchar[] toUpper(const(dchar)[] input, dchar[] output = null) { 204 205 // assume most common case: String stays the same length 206 if (input.length > output.length) 207 output.length = input.length; 208 209 uint produced = 0; 210 if (input.length) 211 foreach(dchar orig; input) { 212 // TODO Conditional Case Mapping 213 UnicodeData *d = getUnicodeData(orig); 214 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 215 SpecialCaseData *s = getSpecialCaseData(orig); 216 debug { 217 assert(s !is null); 218 } 219 if(s.upperCaseMapping !is null) { 220 // Better resize strategy ??? 221 if(produced + s.upperCaseMapping.length > output.length) 222 output.length = output.length + output.length / 2 + s.upperCaseMapping.length; 223 foreach(ch; s.upperCaseMapping) { 224 output[produced++] = ch; 225 } 226 } 227 continue; 228 } 229 if(produced >= output.length) 230 output.length = output.length + output.length / 2; 231 output[produced++] = d is null ? orig:d.simpleUpperCaseMapping; 232 } 233 return output[0..produced]; 234 } 235 236 237 /** 238 * Converts an Utf8 String to Lower case 239 * 240 * Params: 241 * input = String to be case mapped 242 * output = this output buffer will be used unless too small 243 * Returns: the case mapped string 244 */ 245 char[] toLower(const(char)[] input, char[] output = null) { 246 247 dchar[1] buf; 248 // assume most common case: String stays the same length 249 if (output.length < input.length) 250 output.length = input.length; 251 252 auto produced = 0; 253 size_t ate; 254 foreach(dchar ch; input) { 255 // TODO Conditional Case Mapping 256 UnicodeData *d = getUnicodeData(ch); 257 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 258 SpecialCaseData *s = getSpecialCaseData(ch); 259 debug { 260 assert(s !is null); 261 } 262 if(s.lowerCaseMapping !is null) { 263 // To speed up, use worst case for memory prealocation 264 // since the length of an LowerCaseMapping list is at most 4 265 // Make sure no relocation is made in the toString Method 266 // better allocation algorithm ? 267 if(produced + s.lowerCaseMapping.length * 4 >= output.length) 268 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 4; 269 char[] res = toString(s.lowerCaseMapping, output[produced..output.length], &ate); 270 debug { 271 assert(ate == s.lowerCaseMapping.length); 272 assert(res.ptr == output[produced..output.length].ptr); 273 } 274 produced += res.length; 275 continue; 276 } 277 } 278 // Make sure no relocation is made in the toString Method 279 if(produced + 4 >= output.length) 280 output.length = output.length + output.length / 2 + 4; 281 buf[0] = d is null ? ch:d.simpleLowerCaseMapping; 282 char[] res = toString(buf, output[produced..output.length], &ate); 283 debug { 284 assert(ate == 1); 285 assert(res.ptr == output[produced..output.length].ptr); 286 } 287 produced += res.length; 288 } 289 return output[0..produced]; 290 } 291 292 293 /** 294 * Converts an Utf16 String to Lower case 295 * 296 * Params: 297 * input = String to be case mapped 298 * output = this output buffer will be used unless too small 299 * Returns: the case mapped string 300 */ 301 wchar[] toLower(const(wchar)[] input, wchar[] output = null) { 302 303 dchar[1] buf; 304 // assume most common case: String stays the same length 305 if (output.length < input.length) 306 output.length = input.length; 307 308 auto produced = 0; 309 size_t ate; 310 foreach(dchar ch; input) { 311 // TODO Conditional Case Mapping 312 UnicodeData *d = getUnicodeData(ch); 313 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 314 SpecialCaseData *s = getSpecialCaseData(ch); 315 debug { 316 assert(s !is null); 317 } 318 if(s.lowerCaseMapping !is null) { 319 // To speed up, use worst case for memory prealocation 320 // Make sure no relocation is made in the toString16 Method 321 // better allocation algorithm ? 322 if(produced + s.lowerCaseMapping.length * 2 >= output.length) 323 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length * 3; 324 wchar[] res = toString16(s.lowerCaseMapping, output[produced..output.length], &ate); 325 debug { 326 assert(ate == s.lowerCaseMapping.length); 327 assert(res.ptr == output[produced..output.length].ptr); 328 } 329 produced += res.length; 330 continue; 331 } 332 } 333 // Make sure no relocation is made in the toString16 Method 334 if(produced + 4 >= output.length) 335 output.length = output.length + output.length / 2 + 3; 336 buf[0] = d is null ? ch:d.simpleLowerCaseMapping; 337 wchar[] res = toString16(buf, output[produced..output.length], &ate); 338 debug { 339 assert(ate == 1); 340 assert(res.ptr == output[produced..output.length].ptr); 341 } 342 produced += res.length; 343 } 344 return output[0..produced]; 345 } 346 347 348 /** 349 * Converts an Utf32 String to Lower case 350 * 351 * Params: 352 * input = String to be case mapped 353 * output = this output buffer will be used unless too small 354 * Returns: the case mapped string 355 */ 356 dchar[] toLower(const(dchar)[] input, dchar[] output = null) { 357 358 // assume most common case: String stays the same length 359 if (input.length > output.length) 360 output.length = input.length; 361 362 auto produced = 0; 363 if (input.length) 364 foreach(dchar orig; input) { 365 // TODO Conditional Case Mapping 366 UnicodeData *d = getUnicodeData(orig); 367 if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) { 368 SpecialCaseData *s = getSpecialCaseData(orig); 369 debug { 370 assert(s !is null); 371 } 372 if(s.lowerCaseMapping !is null) { 373 // Better resize strategy ??? 374 if(produced + s.lowerCaseMapping.length > output.length) 375 output.length = output.length + output.length / 2 + s.lowerCaseMapping.length; 376 foreach(ch; s.lowerCaseMapping) { 377 output[produced++] = ch; 378 } 379 } 380 continue; 381 } 382 if(produced >= output.length) 383 output.length = output.length + output.length / 2; 384 output[produced++] = d is null ? orig:d.simpleLowerCaseMapping; 385 } 386 return output[0..produced]; 387 } 388 389 /** 390 * Converts an Utf8 String to Folding case 391 * Folding case is used for case insensitive comparsions. 392 * 393 * Params: 394 * input = String to be case mapped 395 * output = this output buffer will be used unless too small 396 * Returns: the case mapped string 397 */ 398 char[] toFold(const(char)[] input, char[] output = null) { 399 400 dchar[1] buf; 401 // assume most common case: String stays the same length 402 if (output.length < input.length) 403 output.length = input.length; 404 405 auto produced = 0; 406 size_t ate; 407 foreach(dchar ch; input) { 408 FoldingCaseData *s = getFoldingCaseData(ch); 409 if(s !is null) { 410 // To speed up, use worst case for memory prealocation 411 // since the length of an UpperCaseMapping list is at most 4 412 // Make sure no relocation is made in the toString Method 413 // better allocation algorithm ? 414 if(produced + s.mapping.length * 4 >= output.length) 415 output.length = output.length + output.length / 2 + s.mapping.length * 4; 416 char[] res = toString(s.mapping, output[produced..output.length], &ate); 417 debug { 418 assert(ate == s.mapping.length); 419 assert(res.ptr == output[produced..output.length].ptr); 420 } 421 produced += res.length; 422 continue; 423 } 424 // Make sure no relocation is made in the toString Method 425 if(produced + 4 >= output.length) 426 output.length = output.length + output.length / 2 + 4; 427 buf[0] = ch; 428 char[] res = toString(buf, output[produced..output.length], &ate); 429 debug { 430 assert(ate == 1); 431 assert(res.ptr == output[produced..output.length].ptr); 432 } 433 produced += res.length; 434 } 435 return output[0..produced]; 436 } 437 438 /** 439 * Converts an Utf16 String to Folding case 440 * Folding case is used for case insensitive comparsions. 441 * 442 * Params: 443 * input = String to be case mapped 444 * output = this output buffer will be used unless too small 445 * Returns: the case mapped string 446 */ 447 wchar[] toFold(const(wchar)[] input, wchar[] output = null) { 448 449 dchar[1] buf; 450 // assume most common case: String stays the same length 451 if (output.length < input.length) 452 output.length = input.length; 453 454 auto produced = 0; 455 size_t ate; 456 foreach(dchar ch; input) { 457 FoldingCaseData *s = getFoldingCaseData(ch); 458 if(s !is null) { 459 // To speed up, use worst case for memory prealocation 460 // Make sure no relocation is made in the toString16 Method 461 // better allocation algorithm ? 462 if(produced + s.mapping.length * 2 >= output.length) 463 output.length = output.length + output.length / 2 + s.mapping.length * 3; 464 wchar[] res = toString16(s.mapping, output[produced..output.length], &ate); 465 debug { 466 assert(ate == s.mapping.length); 467 assert(res.ptr == output[produced..output.length].ptr); 468 } 469 produced += res.length; 470 continue; 471 } 472 // Make sure no relocation is made in the toString16 Method 473 if(produced + 4 >= output.length) 474 output.length = output.length + output.length / 2 + 3; 475 buf[0] = ch; 476 wchar[] res = toString16(buf, output[produced..output.length], &ate); 477 debug { 478 assert(ate == 1); 479 assert(res.ptr == output[produced..output.length].ptr); 480 } 481 produced += res.length; 482 } 483 return output[0..produced]; 484 } 485 486 /** 487 * Converts an Utf32 String to Folding case 488 * Folding case is used for case insensitive comparsions. 489 * 490 * Params: 491 * input = String to be case mapped 492 * output = this output buffer will be used unless too small 493 * Returns: the case mapped string 494 */ 495 dchar[] toFold(const(dchar)[] input, dchar[] output = null) { 496 497 // assume most common case: String stays the same length 498 if (input.length > output.length) 499 output.length = input.length; 500 501 uint produced = 0; 502 if (input.length) 503 foreach(dchar orig; input) { 504 FoldingCaseData *d = getFoldingCaseData(orig); 505 if(d !is null ) { 506 // Better resize strategy ??? 507 if(produced + d.mapping.length > output.length) 508 output.length = output.length + output.length / 2 + d.mapping.length; 509 foreach(ch; d.mapping) { 510 output[produced++] = ch; 511 } 512 continue; 513 } 514 if(produced >= output.length) 515 output.length = output.length + output.length / 2; 516 output[produced++] = orig; 517 } 518 return output[0..produced]; 519 } 520 521 522 /** 523 * Determines if a character is a digit. It returns true for decimal 524 * digits only. 525 * 526 * Params: 527 * ch = the character to be inspected 528 */ 529 bool isDigit(dchar ch) { 530 UnicodeData *d = getUnicodeData(ch); 531 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Nd); 532 } 533 534 535 /** 536 * Determines if a character is a letter. 537 * 538 * Params: 539 * ch = the character to be inspected 540 */ 541 bool isLetter(int ch) { 542 UnicodeData *d = getUnicodeData(ch); 543 return (d !is null) && (d.generalCategory & 544 ( UnicodeData.GeneralCategory.Lu 545 | UnicodeData.GeneralCategory.Ll 546 | UnicodeData.GeneralCategory.Lt 547 | UnicodeData.GeneralCategory.Lm 548 | UnicodeData.GeneralCategory.Lo)); 549 } 550 551 /** 552 * Determines if a character is a letter or a 553 * decimal digit. 554 * 555 * Params: 556 * ch = the character to be inspected 557 */ 558 bool isLetterOrDigit(int ch) { 559 UnicodeData *d = getUnicodeData(ch); 560 return (d !is null) && (d.generalCategory & 561 ( UnicodeData.GeneralCategory.Lu 562 | UnicodeData.GeneralCategory.Ll 563 | UnicodeData.GeneralCategory.Lt 564 | UnicodeData.GeneralCategory.Lm 565 | UnicodeData.GeneralCategory.Lo 566 | UnicodeData.GeneralCategory.Nd)); 567 } 568 569 /** 570 * Determines if a character is a lower case letter. 571 * Params: 572 * ch = the character to be inspected 573 */ 574 bool isLower(dchar ch) { 575 UnicodeData *d = getUnicodeData(ch); 576 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Ll); 577 } 578 579 /** 580 * Determines if a character is a title case letter. 581 * In case of combined letters, only the first is upper and the second is lower. 582 * Some of these special characters can be found in the croatian and greek language. 583 * See_Also: http://en.wikipedia.org/wiki/Capitalization 584 * Params: 585 * ch = the character to be inspected 586 */ 587 bool isTitle(dchar ch) { 588 UnicodeData *d = getUnicodeData(ch); 589 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lt); 590 } 591 592 /** 593 * Determines if a character is a upper case letter. 594 * Params: 595 * ch = the character to be inspected 596 */ 597 bool isUpper(dchar ch) { 598 UnicodeData *d = getUnicodeData(ch); 599 return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lu); 600 } 601 602 /** 603 * Determines if a character is a Whitespace character. 604 * Whitespace characters are characters in the 605 * General Catetories Zs, Zl, Zp without the No Break 606 * spaces plus the control characters out of the ASCII 607 * range, that are used as spaces: 608 * TAB VT LF FF CR FS GS RS US NL 609 * 610 * WARNING: look at isSpace, maybe that function does 611 * more what you expect. 612 * 613 * Params: 614 * ch = the character to be inspected 615 */ 616 bool isWhitespace(dchar ch) { 617 if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F)) 618 return true; 619 UnicodeData *d = getUnicodeData(ch); 620 return (d !is null) && (d.generalCategory & 621 ( UnicodeData.GeneralCategory.Zs 622 | UnicodeData.GeneralCategory.Zl 623 | UnicodeData.GeneralCategory.Zp)) 624 && ch != 0x00A0 // NBSP 625 && ch != 0x202F // NARROW NBSP 626 && ch != 0xFEFF; // ZERO WIDTH NBSP 627 } 628 629 /** 630 * Detemines if a character is a Space character as 631 * specified in the Unicode Standard. 632 * 633 * WARNING: look at isWhitespace, maybe that function does 634 * more what you expect. 635 * 636 * Params: 637 * ch = the character to be inspected 638 */ 639 bool isSpace(dchar ch) { 640 UnicodeData *d = getUnicodeData(ch); 641 return (d !is null) && (d.generalCategory & 642 ( UnicodeData.GeneralCategory.Zs 643 | UnicodeData.GeneralCategory.Zl 644 | UnicodeData.GeneralCategory.Zp)); 645 } 646 647 648 /** 649 * Detemines if a character is a printable character as 650 * specified in the Unicode Standard. 651 * 652 * Params: 653 * ch = the character to be inspected 654 */ 655 bool isPrintable(dchar ch) { 656 UnicodeData *d = getUnicodeData(ch); 657 return (d !is null) && !(d.generalCategory & 658 ( UnicodeData.GeneralCategory.Cn 659 | UnicodeData.GeneralCategory.Cc 660 | UnicodeData.GeneralCategory.Cf 661 | UnicodeData.GeneralCategory.Co 662 | UnicodeData.GeneralCategory.Cs)); 663 } 664 665 debug ( UnicodeTest ): 666 void main() {} 667 668 debug (UnitTest) { 669 670 unittest { 671 672 673 // 1) No Buffer passed, no resize, no SpecialCase 674 675 const(char)[] testString1utf8 = "\u00E4\u00F6\u00FC"; 676 const(wchar)[] testString1utf16 = "\u00E4\u00F6\u00FC"; 677 const(dchar)[] testString1utf32 = "\u00E4\u00F6\u00FC"; 678 const(char)[] refString1utf8 = "\u00C4\u00D6\u00DC"; 679 const(wchar)[] refString1utf16 = "\u00C4\u00D6\u00DC"; 680 const(dchar)[] refString1utf32 = "\u00C4\u00D6\u00DC"; 681 char[] resultString1utf8 = toUpper(testString1utf8); 682 assert(resultString1utf8 == refString1utf8); 683 wchar[] resultString1utf16 = toUpper(testString1utf16); 684 assert(resultString1utf16 == refString1utf16); 685 dchar[] resultString1utf32 = toUpper(testString1utf32); 686 assert(resultString1utf32 == refString1utf32); 687 688 // 2) Buffer passed, no resize, no SpecialCase 689 char[60] buffer1utf8; 690 wchar[30] buffer1utf16; 691 dchar[30] buffer1utf32; 692 resultString1utf8 = toUpper(testString1utf8,buffer1utf8); 693 assert(resultString1utf8.ptr == buffer1utf8.ptr); 694 assert(resultString1utf8 == refString1utf8); 695 resultString1utf16 = toUpper(testString1utf16,buffer1utf16); 696 assert(resultString1utf16.ptr == buffer1utf16.ptr); 697 assert(resultString1utf16 == refString1utf16); 698 resultString1utf32 = toUpper(testString1utf32,buffer1utf32); 699 assert(resultString1utf32.ptr == buffer1utf32.ptr); 700 assert(resultString1utf32 == refString1utf32); 701 702 // 3/ Buffer passed, resize necessary, no Special case 703 704 char[5] buffer2utf8; 705 wchar[2] buffer2utf16; 706 dchar[2] buffer2utf32; 707 resultString1utf8 = toUpper(testString1utf8,buffer2utf8); 708 assert(resultString1utf8.ptr != buffer2utf8.ptr); 709 assert(resultString1utf8 == refString1utf8); 710 resultString1utf16 = toUpper(testString1utf16,buffer2utf16); 711 assert(resultString1utf16.ptr != buffer2utf16.ptr); 712 assert(resultString1utf16 == refString1utf16); 713 resultString1utf32 = toUpper(testString1utf32,buffer2utf32); 714 assert(resultString1utf32.ptr != buffer2utf32.ptr); 715 assert(resultString1utf32 == refString1utf32); 716 717 // 4) Buffer passed, resize necessary, extensive SpecialCase 718 719 720 const(char)[] testString2utf8 = "\uFB03\uFB04\uFB05"; 721 const(wchar)[] testString2utf16 = "\uFB03\uFB04\uFB05"; 722 const(dchar)[] testString2utf32 = "\uFB03\uFB04\uFB05"; 723 const(char)[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 724 const(wchar)[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 725 const(dchar)[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054"; 726 resultString1utf8 = toUpper(testString2utf8,buffer2utf8); 727 assert(resultString1utf8.ptr != buffer2utf8.ptr); 728 assert(resultString1utf8 == refString2utf8); 729 resultString1utf16 = toUpper(testString2utf16,buffer2utf16); 730 assert(resultString1utf16.ptr != buffer2utf16.ptr); 731 assert(resultString1utf16 == refString2utf16); 732 resultString1utf32 = toUpper(testString2utf32,buffer2utf32); 733 assert(resultString1utf32.ptr != buffer2utf32.ptr); 734 assert(resultString1utf32 == refString2utf32); 735 736 } 737 738 739 unittest { 740 741 742 // 1) No Buffer passed, no resize, no SpecialCase 743 744 const(char)[] testString1utf8 = "\u00C4\u00D6\u00DC"; 745 const(wchar)[] testString1utf16 = "\u00C4\u00D6\u00DC"; 746 const(dchar)[] testString1utf32 = "\u00C4\u00D6\u00DC"; 747 const(char)[] refString1utf8 = "\u00E4\u00F6\u00FC"; 748 const(wchar)[] refString1utf16 = "\u00E4\u00F6\u00FC"; 749 const(dchar)[] refString1utf32 = "\u00E4\u00F6\u00FC"; 750 const(char)[] resultString1utf8 = toLower(testString1utf8); 751 assert(resultString1utf8 == refString1utf8); 752 const(wchar)[] resultString1utf16 = toLower(testString1utf16); 753 assert(resultString1utf16 == refString1utf16); 754 const(dchar)[] resultString1utf32 = toLower(testString1utf32); 755 assert(resultString1utf32 == refString1utf32); 756 757 // 2) Buffer passed, no resize, no SpecialCase 758 char[60] buffer1utf8; 759 wchar[30] buffer1utf16; 760 dchar[30] buffer1utf32; 761 resultString1utf8 = toLower(testString1utf8,buffer1utf8); 762 assert(resultString1utf8.ptr == buffer1utf8.ptr); 763 assert(resultString1utf8 == refString1utf8); 764 resultString1utf16 = toLower(testString1utf16,buffer1utf16); 765 assert(resultString1utf16.ptr == buffer1utf16.ptr); 766 assert(resultString1utf16 == refString1utf16); 767 resultString1utf32 = toLower(testString1utf32,buffer1utf32); 768 assert(resultString1utf32.ptr == buffer1utf32.ptr); 769 assert(resultString1utf32 == refString1utf32); 770 771 // 3/ Buffer passed, resize necessary, no Special case 772 773 char[5] buffer2utf8; 774 wchar[2] buffer2utf16; 775 dchar[2] buffer2utf32; 776 resultString1utf8 = toLower(testString1utf8,buffer2utf8); 777 assert(resultString1utf8.ptr != buffer2utf8.ptr); 778 assert(resultString1utf8 == refString1utf8); 779 resultString1utf16 = toLower(testString1utf16,buffer2utf16); 780 assert(resultString1utf16.ptr != buffer2utf16.ptr); 781 assert(resultString1utf16 == refString1utf16); 782 resultString1utf32 = toLower(testString1utf32,buffer2utf32); 783 assert(resultString1utf32.ptr != buffer2utf32.ptr); 784 assert(resultString1utf32 == refString1utf32); 785 786 // 4) Buffer passed, resize necessary, extensive SpecialCase 787 788 const(char)[] testString2utf8 = "\u0130\u0130\u0130"; 789 const(wchar)[] testString2utf16 = "\u0130\u0130\u0130"; 790 const(dchar)[] testString2utf32 = "\u0130\u0130\u0130"; 791 const(char)[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 792 const(wchar)[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 793 const(dchar)[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307"; 794 resultString1utf8 = toLower(testString2utf8,buffer2utf8); 795 assert(resultString1utf8.ptr != buffer2utf8.ptr); 796 assert(resultString1utf8 == refString2utf8); 797 resultString1utf16 = toLower(testString2utf16,buffer2utf16); 798 assert(resultString1utf16.ptr != buffer2utf16.ptr); 799 assert(resultString1utf16 == refString2utf16); 800 resultString1utf32 = toLower(testString2utf32,buffer2utf32); 801 assert(resultString1utf32.ptr != buffer2utf32.ptr); 802 assert(resultString1utf32 == refString2utf32); 803 } 804 805 unittest { 806 const(char)[] testString1utf8 = "?!Mädchen \u0390\u0390,;"; 807 const(char)[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 808 assert(toFold(testString1utf8) == toFold(testString2utf8)); 809 const(wchar)[] testString1utf16 = "?!Mädchen \u0390\u0390,;"; 810 const(wchar)[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 811 assert(toFold(testString1utf16) == toFold(testString2utf16)); 812 const(wchar)[] testString1utf32 = "?!Mädchen \u0390\u0390,;"; 813 const(wchar)[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;"; 814 assert(toFold(testString1utf32) == toFold(testString2utf32)); 815 } 816 817 }