tango.text.Unicode source code

1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2007 Peter Triller. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Initial release: Sept 2007
8 
9         authors:        Peter
10 
11         Provides case mapping Functions for Unicode Strings. As of now it is
12         only 99 % complete, because it does not take into account Conditional
13         case mappings. This means the Greek Letter Sigma will not be correctly
14         case mapped at the end of a Word, and the Locales Lithuanian, Turkish
15         and Azeri are not taken into account during Case Mappings. This means
16         all in all around 12 Characters will not be mapped correctly under
17         some circumstances.
18 
19         ICU4j also does not handle these cases at the moment.
20 
21         Unittests are written against output from ICU4j
22 
23         This Module tries to minimize Memory allocation and usage. You can
24         always pass the output buffer that should be used to the case mapping
25         function, which will be resized if necessary.
26 
27 *******************************************************************************/
28 
29 module tango.text.Unicode;
30 
31 private import tango.text.UnicodeData;
32 private import tango.text.convert.Utf;
33 
34 
35 
36 /**
37  * Converts an Utf8 String to Upper case
38  *
39  * Params:
40  *     input = String to be case mapped
41  *     output = this output buffer will be used unless too small
42  * Returns: the case mapped string
43  */
44 /+deprecated char[] blockToUpper(char[] input, char[] output = null, dchar[] working = null) {
45 
46     // ?? How much preallocation ?? This is worst case allocation
47     if (working is null)
48         working.length = input.length;
49 
50     uint produced = 0;
51     size_t ate;
52     uint oprod = 0;
53     foreach(dchar ch; input) {
54         // TODO Conditional Case Mapping
55         UnicodeData *d = getUnicodeData(ch);
56         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
57             SpecialCaseData *s = getSpecialCaseData(ch);
58             debug {
59                 assert(s !is null);
60             }
61             if(s.upperCaseMapping !is null) {
62                 // To speed up, use worst case for memory prealocation
63                 // since the length of an UpperCaseMapping list is at most 4
64                 // Make sure no relocation is made in the toString Method
65                 // better allocation algorithm ?
66                 auto len = s.upperCaseMapping.length;
67                 if(produced + len >= working.length)
68                     working.length = working.length + working.length / 2 +  len;
69                 oprod = produced;
70                 produced += len;
71                 working[oprod..produced] = s.upperCaseMapping;
72                 continue;
73             }
74         }
75         // Make sure no relocation is made in the toString Method
76         if(produced + 1 >= output.length)
77             working.length = working.length + working.length / 2 + 1;
78         working[produced++] =  d is null ? ch:d.simpleUpperCaseMapping;
79     }
80     return toString(working[0..produced],output);
81 }+/
82 
83 
84 
85 /**
86  * Converts an Utf8 String to Upper case
87  *
88  * Params:
89  *     input = String to be case mapped
90  *     output = this output buffer will be used unless too small
91  * Returns: the case mapped string
92  */
93 char[] toUpper(const(char)[] input, char[] output = null) {
94 
95     dchar[1] buf;
96     // assume most common case: String stays the same length
97     if (output.length < input.length)
98         output.length = input.length;
99 
100     auto produced = 0;
101     size_t ate;
102     foreach(dchar ch; input) {
103         // TODO Conditional Case Mapping
104         UnicodeData *d = getUnicodeData(ch);
105         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
106             SpecialCaseData *s = getSpecialCaseData(ch);
107             debug {
108                 assert(s !is null);
109             }
110             if(s.upperCaseMapping !is null) {
111                 // To speed up, use worst case for memory prealocation
112                 // since the length of an UpperCaseMapping list is at most 4
113                 // Make sure no relocation is made in the toString Method
114                 // better allocation algorithm ?
115                 if(produced + s.upperCaseMapping.length * 4 >= output.length)
116                         output.length = output.length + output.length / 2 +  s.upperCaseMapping.length * 4;
117                 char[] res = toString(s.upperCaseMapping, output[produced..output.length], &ate);
118                 debug {
119                     assert(ate == s.upperCaseMapping.length);
120                     assert(res.ptr == output[produced..output.length].ptr);
121                 }
122                 produced += res.length;
123                 continue;
124             }
125         }
126         // Make sure no relocation is made in the toString Method
127         if(produced + 4 >= output.length)
128             output.length = output.length + output.length / 2 + 4;
129         buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
130         char[] res = toString(buf, output[produced..output.length], &ate);
131         debug {
132             assert(ate == 1);
133             assert(res.ptr == output[produced..output.length].ptr);
134         }
135         produced += res.length;
136     }
137     return output[0..produced];
138 }
139 
140 
141 /**
142  * Converts an Utf16 String to Upper case
143  *
144  * Params:
145  *     input = String to be case mapped
146  *     output = this output buffer will be used unless too small
147  * Returns: the case mapped string
148  */
149 wchar[] toUpper(const(wchar)[] input, wchar[] output = null) {
150 
151     dchar[1] buf;
152     // assume most common case: String stays the same length
153     if (output.length < input.length)
154         output.length = input.length;
155 
156     auto produced = 0;
157     size_t ate;
158     foreach(dchar ch; input) {
159         // TODO Conditional Case Mapping
160         UnicodeData *d = getUnicodeData(ch);
161         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
162             SpecialCaseData *s = getSpecialCaseData(ch);
163             debug {
164                 assert(s !is null);
165             }
166             if(s.upperCaseMapping !is null) {
167                 // To speed up, use worst case for memory prealocation
168                 // Make sure no relocation is made in the toString16 Method
169                 // better allocation algorithm ?
170                 if(produced + s.upperCaseMapping.length * 2 >= output.length)
171                     output.length = output.length + output.length / 2 +  s.upperCaseMapping.length * 3;
172                 wchar[] res = toString16(s.upperCaseMapping, output[produced..output.length], &ate);
173                 debug {
174                     assert(ate == s.upperCaseMapping.length);
175                     assert(res.ptr == output[produced..output.length].ptr);
176                 }
177                 produced += res.length;
178                 continue;
179             }
180         }
181         // Make sure no relocation is made in the toString16 Method
182         if(produced + 4 >= output.length)
183             output.length = output.length + output.length / 2 + 3;
184         buf[0] = d is null ? ch:d.simpleUpperCaseMapping;
185         wchar[] res = toString16(buf, output[produced..output.length], &ate);
186         debug {
187             assert(ate == 1);
188             assert(res.ptr == output[produced..output.length].ptr);
189         }
190         produced += res.length;
191     }
192     return output[0..produced];
193 }
194 
195 /**
196  * Converts an Utf32 String to Upper case
197  *
198  * Params:
199  *     input = String to be case mapped
200  *     output = this output buffer will be used unless too small
201  * Returns: the case mapped string
202  */
203 dchar[] toUpper(const(dchar)[] input, dchar[] output = null) {
204 
205     // assume most common case: String stays the same length
206     if (input.length > output.length)
207         output.length = input.length;
208 
209     uint produced = 0;
210     if (input.length)
211         foreach(dchar orig; input) {
212             // TODO Conditional Case Mapping
213             UnicodeData *d = getUnicodeData(orig);
214             if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
215                 SpecialCaseData *s = getSpecialCaseData(orig);
216                 debug {
217                     assert(s !is null);
218                 }
219                 if(s.upperCaseMapping !is null) {
220                     // Better resize strategy ???
221                     if(produced + s.upperCaseMapping.length  > output.length)
222                         output.length = output.length + output.length / 2 + s.upperCaseMapping.length;
223                     foreach(ch; s.upperCaseMapping) {
224                         output[produced++] = ch;
225                     }
226                 }
227                 continue;
228             }
229             if(produced >= output.length)
230                 output.length = output.length + output.length / 2;
231             output[produced++] = d is null ? orig:d.simpleUpperCaseMapping;
232         }
233     return output[0..produced];
234 }
235 
236 
237 /**
238  * Converts an Utf8 String to Lower case
239  *
240  * Params:
241  *     input = String to be case mapped
242  *     output = this output buffer will be used unless too small
243  * Returns: the case mapped string
244  */
245 char[] toLower(const(char)[] input, char[] output = null) {
246 
247     dchar[1] buf;
248     // assume most common case: String stays the same length
249     if (output.length < input.length)
250         output.length = input.length;
251 
252     auto produced = 0;
253     size_t ate;
254     foreach(dchar ch; input) {
255         // TODO Conditional Case Mapping
256         UnicodeData *d = getUnicodeData(ch);
257         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
258             SpecialCaseData *s = getSpecialCaseData(ch);
259             debug {
260                 assert(s !is null);
261             }
262             if(s.lowerCaseMapping !is null) {
263                 // To speed up, use worst case for memory prealocation
264                 // since the length of an LowerCaseMapping list is at most 4
265                 // Make sure no relocation is made in the toString Method
266                 // better allocation algorithm ?
267                 if(produced + s.lowerCaseMapping.length * 4 >= output.length)
268                         output.length = output.length + output.length / 2 +  s.lowerCaseMapping.length * 4;
269                 char[] res = toString(s.lowerCaseMapping, output[produced..output.length], &ate);
270                 debug {
271                     assert(ate == s.lowerCaseMapping.length);
272                     assert(res.ptr == output[produced..output.length].ptr);
273                 }
274                 produced += res.length;
275                 continue;
276             }
277         }
278         // Make sure no relocation is made in the toString Method
279         if(produced + 4 >= output.length)
280             output.length = output.length + output.length / 2 + 4;
281         buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
282         char[] res = toString(buf, output[produced..output.length], &ate);
283         debug {
284             assert(ate == 1);
285             assert(res.ptr == output[produced..output.length].ptr);
286         }
287         produced += res.length;
288     }
289     return output[0..produced];
290 }
291 
292 
293 /**
294  * Converts an Utf16 String to Lower case
295  *
296  * Params:
297  *     input = String to be case mapped
298  *     output = this output buffer will be used unless too small
299  * Returns: the case mapped string
300  */
301 wchar[] toLower(const(wchar)[] input, wchar[] output = null) {
302 
303     dchar[1] buf;
304     // assume most common case: String stays the same length
305     if (output.length < input.length)
306         output.length = input.length;
307 
308     auto produced = 0;
309     size_t ate;
310     foreach(dchar ch; input) {
311         // TODO Conditional Case Mapping
312         UnicodeData *d = getUnicodeData(ch);
313         if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
314             SpecialCaseData *s = getSpecialCaseData(ch);
315             debug {
316                 assert(s !is null);
317             }
318             if(s.lowerCaseMapping !is null) {
319                 // To speed up, use worst case for memory prealocation
320                 // Make sure no relocation is made in the toString16 Method
321                 // better allocation algorithm ?
322                 if(produced + s.lowerCaseMapping.length * 2 >= output.length)
323                     output.length = output.length + output.length / 2 +  s.lowerCaseMapping.length * 3;
324                 wchar[] res = toString16(s.lowerCaseMapping, output[produced..output.length], &ate);
325                 debug {
326                     assert(ate == s.lowerCaseMapping.length);
327                     assert(res.ptr == output[produced..output.length].ptr);
328                 }
329                 produced += res.length;
330                 continue;
331             }
332         }
333         // Make sure no relocation is made in the toString16 Method
334         if(produced + 4 >= output.length)
335             output.length = output.length + output.length / 2 + 3;
336         buf[0] = d is null ? ch:d.simpleLowerCaseMapping;
337         wchar[] res = toString16(buf, output[produced..output.length], &ate);
338         debug {
339             assert(ate == 1);
340             assert(res.ptr == output[produced..output.length].ptr);
341         }
342         produced += res.length;
343     }
344     return output[0..produced];
345 }
346 
347 
348 /**
349  * Converts an Utf32 String to Lower case
350  *
351  * Params:
352  *     input = String to be case mapped
353  *     output = this output buffer will be used unless too small
354  * Returns: the case mapped string
355  */
356 dchar[] toLower(const(dchar)[] input, dchar[] output = null) {
357 
358     // assume most common case: String stays the same length
359     if (input.length > output.length)
360         output.length = input.length;
361 
362     auto produced = 0;
363     if (input.length)
364         foreach(dchar orig; input) {
365             // TODO Conditional Case Mapping
366             UnicodeData *d = getUnicodeData(orig);
367             if(d !is null && (d.generalCategory & UnicodeData.GeneralCategory.SpecialMapping)) {
368                 SpecialCaseData *s = getSpecialCaseData(orig);
369                 debug {
370                     assert(s !is null);
371                 }
372                 if(s.lowerCaseMapping !is null) {
373                     // Better resize strategy ???
374                     if(produced + s.lowerCaseMapping.length  > output.length)
375                         output.length = output.length + output.length / 2 + s.lowerCaseMapping.length;
376                     foreach(ch; s.lowerCaseMapping) {
377                         output[produced++] = ch;
378                     }
379                 }
380                 continue;
381             }
382             if(produced >= output.length)
383                 output.length = output.length + output.length / 2;
384             output[produced++] = d is null ? orig:d.simpleLowerCaseMapping;
385         }
386     return output[0..produced];
387 }
388 
389 /**
390  * Converts an Utf8 String to Folding case
391  * Folding case is used for case insensitive comparsions.
392  *
393  * Params:
394  *     input = String to be case mapped
395  *     output = this output buffer will be used unless too small
396  * Returns: the case mapped string
397  */
398 char[] toFold(const(char)[] input, char[] output = null) {
399 
400     dchar[1] buf;
401     // assume most common case: String stays the same length
402     if (output.length < input.length)
403         output.length = input.length;
404 
405     auto produced = 0;
406     size_t ate;
407     foreach(dchar ch; input) {
408         FoldingCaseData *s = getFoldingCaseData(ch);
409         if(s !is null) {
410             // To speed up, use worst case for memory prealocation
411             // since the length of an UpperCaseMapping list is at most 4
412             // Make sure no relocation is made in the toString Method
413             // better allocation algorithm ?
414             if(produced + s.mapping.length * 4 >= output.length)
415                 output.length = output.length + output.length / 2 +  s.mapping.length * 4;
416             char[] res = toString(s.mapping, output[produced..output.length], &ate);
417             debug {
418                 assert(ate == s.mapping.length);
419                 assert(res.ptr == output[produced..output.length].ptr);
420             }
421             produced += res.length;
422             continue;
423         }
424         // Make sure no relocation is made in the toString Method
425         if(produced + 4 >= output.length)
426             output.length = output.length + output.length / 2 + 4;
427         buf[0] = ch;
428         char[] res = toString(buf, output[produced..output.length], &ate);
429         debug {
430             assert(ate == 1);
431             assert(res.ptr == output[produced..output.length].ptr);
432         }
433         produced += res.length;
434     }
435     return output[0..produced];
436 }
437 
438 /**
439  * Converts an Utf16 String to Folding case
440  * Folding case is used for case insensitive comparsions.
441  *
442  * Params:
443  *     input = String to be case mapped
444  *     output = this output buffer will be used unless too small
445  * Returns: the case mapped string
446  */
447 wchar[] toFold(const(wchar)[] input, wchar[] output = null) {
448 
449     dchar[1] buf;
450     // assume most common case: String stays the same length
451     if (output.length < input.length)
452         output.length = input.length;
453 
454     auto produced = 0;
455     size_t ate;
456     foreach(dchar ch; input) {
457         FoldingCaseData *s = getFoldingCaseData(ch);
458         if(s !is null) {
459             // To speed up, use worst case for memory prealocation
460             // Make sure no relocation is made in the toString16 Method
461             // better allocation algorithm ?
462             if(produced + s.mapping.length * 2 >= output.length)
463                 output.length = output.length + output.length / 2 +  s.mapping.length * 3;
464             wchar[] res = toString16(s.mapping, output[produced..output.length], &ate);
465             debug {
466                 assert(ate == s.mapping.length);
467                 assert(res.ptr == output[produced..output.length].ptr);
468             }
469             produced += res.length;
470             continue;
471         }
472         // Make sure no relocation is made in the toString16 Method
473         if(produced + 4 >= output.length)
474             output.length = output.length + output.length / 2 + 3;
475         buf[0] = ch;
476         wchar[] res = toString16(buf, output[produced..output.length], &ate);
477         debug {
478             assert(ate == 1);
479             assert(res.ptr == output[produced..output.length].ptr);
480         }
481         produced += res.length;
482     }
483     return output[0..produced];
484 }
485 
486 /**
487  * Converts an Utf32 String to Folding case
488  * Folding case is used for case insensitive comparsions.
489  *
490  * Params:
491  *     input = String to be case mapped
492  *     output = this output buffer will be used unless too small
493  * Returns: the case mapped string
494  */
495 dchar[] toFold(const(dchar)[] input, dchar[] output = null) {
496 
497     // assume most common case: String stays the same length
498     if (input.length > output.length)
499         output.length = input.length;
500 
501     uint produced = 0;
502     if (input.length)
503         foreach(dchar orig; input) {
504             FoldingCaseData *d = getFoldingCaseData(orig);
505             if(d !is null ) {
506                 // Better resize strategy ???
507                 if(produced + d.mapping.length  > output.length)
508                     output.length = output.length + output.length / 2 + d.mapping.length;
509                 foreach(ch; d.mapping) {
510                     output[produced++] = ch;
511                 }
512                 continue;
513             }
514             if(produced >= output.length)
515                 output.length = output.length + output.length / 2;
516             output[produced++] = orig;
517         }
518     return output[0..produced];
519 }
520 
521 
522 /**
523  * Determines if a character is a digit. It returns true for decimal
524  * digits only.
525  *
526  * Params:
527  *     ch = the character to be inspected
528  */
529 bool isDigit(dchar ch) {
530     UnicodeData *d = getUnicodeData(ch);
531     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Nd);
532 }
533 
534 
535 /**
536  * Determines if a character is a letter.
537  *
538  * Params:
539  *     ch = the character to be inspected
540  */
541 bool isLetter(int ch) {
542     UnicodeData *d = getUnicodeData(ch);
543     return (d !is null) && (d.generalCategory &
544         ( UnicodeData.GeneralCategory.Lu
545         | UnicodeData.GeneralCategory.Ll
546         | UnicodeData.GeneralCategory.Lt
547         | UnicodeData.GeneralCategory.Lm
548         | UnicodeData.GeneralCategory.Lo));
549 }
550 
551 /**
552  * Determines if a character is a letter or a
553  * decimal digit.
554  *
555  * Params:
556  *     ch = the character to be inspected
557  */
558 bool isLetterOrDigit(int ch) {
559     UnicodeData *d = getUnicodeData(ch);
560     return (d !is null) && (d.generalCategory &
561         ( UnicodeData.GeneralCategory.Lu
562         | UnicodeData.GeneralCategory.Ll
563         | UnicodeData.GeneralCategory.Lt
564         | UnicodeData.GeneralCategory.Lm
565         | UnicodeData.GeneralCategory.Lo
566         | UnicodeData.GeneralCategory.Nd));
567 }
568 
569 /**
570  * Determines if a character is a lower case letter.
571  * Params:
572  *     ch = the character to be inspected
573  */
574 bool isLower(dchar ch) {
575     UnicodeData *d = getUnicodeData(ch);
576     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Ll);
577 }
578 
579 /**
580  * Determines if a character is a title case letter.
581  * In case of combined letters, only the first is upper and the second is lower.
582  * Some of these special characters can be found in the croatian and greek language.
583  * See_Also: http://en.wikipedia.org/wiki/Capitalization
584  * Params:
585  *     ch = the character to be inspected
586  */
587 bool isTitle(dchar ch) {
588     UnicodeData *d = getUnicodeData(ch);
589     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lt);
590 }
591 
592 /**
593  * Determines if a character is a upper case letter.
594  * Params:
595  *     ch = the character to be inspected
596  */
597 bool isUpper(dchar ch) {
598     UnicodeData *d = getUnicodeData(ch);
599     return (d !is null) && (d.generalCategory & UnicodeData.GeneralCategory.Lu);
600 }
601 
602 /**
603  * Determines if a character is a Whitespace character.
604  * Whitespace characters are characters in the
605  * General Catetories Zs, Zl, Zp without the No Break
606  * spaces plus the control characters out of the ASCII
607  * range, that are used as spaces:
608  * TAB VT LF FF CR FS GS RS US NL
609  *
610  * WARNING: look at isSpace, maybe that function does
611  *          more what you expect.
612  *
613  * Params:
614  *     ch = the character to be inspected
615  */
616 bool isWhitespace(dchar ch) {
617     if((ch >= 0x0009 && ch <= 0x000D) || (ch >= 0x001C && ch <= 0x001F))
618         return true;
619     UnicodeData *d = getUnicodeData(ch);
620     return (d !is null) && (d.generalCategory &
621             ( UnicodeData.GeneralCategory.Zs
622             | UnicodeData.GeneralCategory.Zl
623             | UnicodeData.GeneralCategory.Zp))
624             && ch != 0x00A0 // NBSP
625             && ch != 0x202F // NARROW NBSP
626             && ch != 0xFEFF; // ZERO WIDTH NBSP
627 }
628 
629 /**
630  * Detemines if a character is a Space character as
631  * specified in the Unicode Standard.
632  *
633  * WARNING: look at isWhitespace, maybe that function does
634  *          more what you expect.
635  *
636  * Params:
637  *     ch = the character to be inspected
638  */
639 bool isSpace(dchar ch) {
640     UnicodeData *d = getUnicodeData(ch);
641     return (d !is null) && (d.generalCategory &
642             ( UnicodeData.GeneralCategory.Zs
643             | UnicodeData.GeneralCategory.Zl
644             | UnicodeData.GeneralCategory.Zp));
645 }
646 
647 
648 /**
649  * Detemines if a character is a printable character as
650  * specified in the Unicode Standard.
651  *
652  * Params:
653  *     ch = the character to be inspected
654  */
655 bool isPrintable(dchar ch) {
656     UnicodeData *d = getUnicodeData(ch);
657     return (d !is null) && !(d.generalCategory &
658             ( UnicodeData.GeneralCategory.Cn
659             | UnicodeData.GeneralCategory.Cc
660             | UnicodeData.GeneralCategory.Cf
661             | UnicodeData.GeneralCategory.Co
662             | UnicodeData.GeneralCategory.Cs));
663 }
664 
665 debug ( UnicodeTest ):
666     void main() {}
667 
668 debug (UnitTest) {
669 
670 unittest {
671 
672 
673     // 1) No Buffer passed, no resize, no SpecialCase
674 
675     const(char)[] testString1utf8 = "\u00E4\u00F6\u00FC";
676     const(wchar)[] testString1utf16 = "\u00E4\u00F6\u00FC";
677     const(dchar)[] testString1utf32 = "\u00E4\u00F6\u00FC";
678     const(char)[] refString1utf8 = "\u00C4\u00D6\u00DC";
679     const(wchar)[] refString1utf16 = "\u00C4\u00D6\u00DC";
680     const(dchar)[] refString1utf32 = "\u00C4\u00D6\u00DC";
681     char[] resultString1utf8 = toUpper(testString1utf8);
682     assert(resultString1utf8 == refString1utf8);
683     wchar[] resultString1utf16 = toUpper(testString1utf16);
684     assert(resultString1utf16 == refString1utf16);
685     dchar[] resultString1utf32 = toUpper(testString1utf32);
686     assert(resultString1utf32 == refString1utf32);
687 
688     // 2) Buffer passed, no resize, no SpecialCase
689     char[60] buffer1utf8;
690     wchar[30] buffer1utf16;
691     dchar[30] buffer1utf32;
692     resultString1utf8 = toUpper(testString1utf8,buffer1utf8);
693     assert(resultString1utf8.ptr == buffer1utf8.ptr);
694     assert(resultString1utf8 == refString1utf8);
695     resultString1utf16 = toUpper(testString1utf16,buffer1utf16);
696     assert(resultString1utf16.ptr == buffer1utf16.ptr);
697     assert(resultString1utf16 == refString1utf16);
698     resultString1utf32 = toUpper(testString1utf32,buffer1utf32);
699     assert(resultString1utf32.ptr == buffer1utf32.ptr);
700     assert(resultString1utf32 == refString1utf32);
701 
702     // 3/ Buffer passed, resize necessary, no Special case
703 
704     char[5] buffer2utf8;
705     wchar[2] buffer2utf16;
706     dchar[2] buffer2utf32;
707     resultString1utf8 = toUpper(testString1utf8,buffer2utf8);
708     assert(resultString1utf8.ptr != buffer2utf8.ptr);
709     assert(resultString1utf8 == refString1utf8);
710     resultString1utf16 = toUpper(testString1utf16,buffer2utf16);
711     assert(resultString1utf16.ptr != buffer2utf16.ptr);
712     assert(resultString1utf16 == refString1utf16);
713     resultString1utf32 = toUpper(testString1utf32,buffer2utf32);
714     assert(resultString1utf32.ptr != buffer2utf32.ptr);
715     assert(resultString1utf32 == refString1utf32);
716 
717     // 4) Buffer passed, resize necessary, extensive SpecialCase
718 
719 
720     const(char)[] testString2utf8 = "\uFB03\uFB04\uFB05";
721     const(wchar)[] testString2utf16 = "\uFB03\uFB04\uFB05";
722     const(dchar)[] testString2utf32 = "\uFB03\uFB04\uFB05";
723     const(char)[] refString2utf8 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
724     const(wchar)[] refString2utf16 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
725     const(dchar)[] refString2utf32 = "\u0046\u0046\u0049\u0046\u0046\u004C\u0053\u0054";
726     resultString1utf8 = toUpper(testString2utf8,buffer2utf8);
727     assert(resultString1utf8.ptr != buffer2utf8.ptr);
728     assert(resultString1utf8 == refString2utf8);
729     resultString1utf16 = toUpper(testString2utf16,buffer2utf16);
730     assert(resultString1utf16.ptr != buffer2utf16.ptr);
731     assert(resultString1utf16 == refString2utf16);
732     resultString1utf32 = toUpper(testString2utf32,buffer2utf32);
733     assert(resultString1utf32.ptr != buffer2utf32.ptr);
734     assert(resultString1utf32 == refString2utf32);
735 
736 }
737 
738 
739 unittest {
740 
741 
742     // 1) No Buffer passed, no resize, no SpecialCase
743 
744     const(char)[] testString1utf8 = "\u00C4\u00D6\u00DC";
745     const(wchar)[] testString1utf16 = "\u00C4\u00D6\u00DC";
746     const(dchar)[] testString1utf32 = "\u00C4\u00D6\u00DC";
747     const(char)[] refString1utf8 = "\u00E4\u00F6\u00FC";
748     const(wchar)[] refString1utf16 = "\u00E4\u00F6\u00FC";
749     const(dchar)[] refString1utf32 = "\u00E4\u00F6\u00FC";
750     const(char)[] resultString1utf8 = toLower(testString1utf8);
751     assert(resultString1utf8 == refString1utf8);
752     const(wchar)[] resultString1utf16 = toLower(testString1utf16);
753     assert(resultString1utf16 == refString1utf16);
754     const(dchar)[] resultString1utf32 = toLower(testString1utf32);
755     assert(resultString1utf32 == refString1utf32);
756 
757     // 2) Buffer passed, no resize, no SpecialCase
758     char[60] buffer1utf8;
759     wchar[30] buffer1utf16;
760     dchar[30] buffer1utf32;
761     resultString1utf8 = toLower(testString1utf8,buffer1utf8);
762     assert(resultString1utf8.ptr == buffer1utf8.ptr);
763     assert(resultString1utf8 == refString1utf8);
764     resultString1utf16 = toLower(testString1utf16,buffer1utf16);
765     assert(resultString1utf16.ptr == buffer1utf16.ptr);
766     assert(resultString1utf16 == refString1utf16);
767     resultString1utf32 = toLower(testString1utf32,buffer1utf32);
768     assert(resultString1utf32.ptr == buffer1utf32.ptr);
769     assert(resultString1utf32 == refString1utf32);
770 
771     // 3/ Buffer passed, resize necessary, no Special case
772 
773     char[5] buffer2utf8;
774     wchar[2] buffer2utf16;
775     dchar[2] buffer2utf32;
776     resultString1utf8 = toLower(testString1utf8,buffer2utf8);
777     assert(resultString1utf8.ptr != buffer2utf8.ptr);
778     assert(resultString1utf8 == refString1utf8);
779     resultString1utf16 = toLower(testString1utf16,buffer2utf16);
780     assert(resultString1utf16.ptr != buffer2utf16.ptr);
781     assert(resultString1utf16 == refString1utf16);
782     resultString1utf32 = toLower(testString1utf32,buffer2utf32);
783     assert(resultString1utf32.ptr != buffer2utf32.ptr);
784     assert(resultString1utf32 == refString1utf32);
785 
786     // 4) Buffer passed, resize necessary, extensive SpecialCase
787 
788     const(char)[] testString2utf8 = "\u0130\u0130\u0130";
789     const(wchar)[] testString2utf16 = "\u0130\u0130\u0130";
790     const(dchar)[] testString2utf32 = "\u0130\u0130\u0130";
791     const(char)[] refString2utf8 = "\u0069\u0307\u0069\u0307\u0069\u0307";
792     const(wchar)[] refString2utf16 = "\u0069\u0307\u0069\u0307\u0069\u0307";
793     const(dchar)[] refString2utf32 = "\u0069\u0307\u0069\u0307\u0069\u0307";
794     resultString1utf8 = toLower(testString2utf8,buffer2utf8);
795     assert(resultString1utf8.ptr != buffer2utf8.ptr);
796     assert(resultString1utf8 == refString2utf8);
797     resultString1utf16 = toLower(testString2utf16,buffer2utf16);
798     assert(resultString1utf16.ptr != buffer2utf16.ptr);
799     assert(resultString1utf16 == refString2utf16);
800     resultString1utf32 = toLower(testString2utf32,buffer2utf32);
801     assert(resultString1utf32.ptr != buffer2utf32.ptr);
802     assert(resultString1utf32 == refString2utf32);
803 }
804 
805 unittest {
806     const(char)[] testString1utf8 = "?!Mädchen \u0390\u0390,;";
807     const(char)[] testString2utf8 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
808     assert(toFold(testString1utf8) == toFold(testString2utf8));
809     const(wchar)[] testString1utf16 = "?!Mädchen \u0390\u0390,;";
810     const(wchar)[] testString2utf16 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
811     assert(toFold(testString1utf16) == toFold(testString2utf16));
812     const(wchar)[] testString1utf32 = "?!Mädchen \u0390\u0390,;";
813     const(wchar)[] testString2utf32 = "?!MÄDCHEN \u03B9\u0308\u0301\u03B9\u0308\u0301,;";
814     assert(toFold(testString1utf32) == toFold(testString2utf32));
815 }
816 
817 }