1 //_ adi.d
2 
3 /**
4  * Part of the D programming language runtime library.
5  * Dynamic array property support routines
6  */
7 
8 /*
9  *  Copyright (C) 2000-2006 by Digital Mars, www.digitalmars.com
10  *  Written by Walter Bright
11  *
12  *  This software is provided 'as-is', without any express or implied
13  *  warranty. In no event will the authors be held liable for any damages
14  *  arising from the use of this software.
15  *
16  *  Permission is granted to anyone to use this software for any purpose,
17  *  including commercial applications, and to alter it and redistribute it
18  *  freely, in both source and binary form, subject to the following
19  *  restrictions:
20  *
21  *  o  The origin of this software must not be misrepresented; you must not
22  *     claim that you wrote the original software. If you use this software
23  *     in a product, an acknowledgment in the product documentation would be
24  *     appreciated but is not required.
25  *  o  Altered source versions must be plainly marked as such, and must not
26  *     be misrepresented as being the original software.
27  *  o  This notice may not be removed or altered from any source
28  *     distribution.
29  */
30 
31 /*
32  *  Modified by Sean Kelly <sean@f4.ca> for use with Tango.
33  */
34 
35 module rt.adi;
36 //debug=adi;            // uncomment to turn on debugging printf's
37 
38 private
39 {
40     import tango.stdc.string : memcpy, memmove, memcmp;
41     import tango.stdc.stdio : printf;
42     import rt.compiler.util.utf;
43 
44     enum BlkAttr : uint
45     {
46         FINALIZE = 0b0000_0001,
47         NO_SCAN  = 0b0000_0010,
48         NO_MOVE  = 0b0000_0100,
49         ALL_BITS = 0b1111_1111
50     }
51 
52     extern (C) void*  gc_malloc( size_t sz, uint ba = 0, PointerMap bitMask = PointerMap.init);
53     extern (C) void*  gc_calloc( size_t sz, uint ba = 0, PointerMap bitMask = PointerMap.init);
54     extern (C) void  gc_free( void* p );
55 }
56 
57 
58 /**********************************************
59  * Reverse array of chars.
60  * Handled separately because embedded multibyte encodings should not be
61  * reversed.
62  */
63 
64 extern (C) char[] _adReverseChar(char[] a)
65 {
66     bool hadErrors = false;
67     if (a.length > 1)
68     {
69         char[6] tmp;
70         char[6] tmplo;
71         char* lo = a.ptr;
72         char* hi = &(a[a.length - 1]);
73 
74         while (lo < hi)
75         {   auto clo = *lo;
76             auto chi = *hi;
77 
78 	    debug(adi) printf("lo = %d, hi = %d\n", lo, hi);
79             if (clo <= 0x7F && chi <= 0x7F)
80             {
81 		debug(adi) printf("\tascii\n");
82                 *lo = chi;
83                 *hi = clo;
84                 lo++;
85                 hi--;
86                 continue;
87             }
88 
89             int stridelo = UTF8stride[clo];
90             if (stridelo > 6) { // invalid UTF-8 0xFF 
91                 stridelo = 1; 
92                 hadErrors=true;
93             }
94 
95             int stridehi = 1;
96             while ((chi & 0xC0) == 0x80 && hi >= lo)
97             {
98                 chi = *--hi;
99                 stridehi++;
100             }
101             if (lo+stridelo> hi) {
102                 if (lo != hi) {
103                     hadErrors = true;
104                 }
105                 break;
106             }
107             if (stridehi > 6) {
108                 hadErrors = true;
109                 stridehi = 6;
110             }
111 
112 	    debug(adi) printf("\tstridelo = %d, stridehi = %d\n", stridelo, stridehi);
113             if (stridelo == stridehi)
114             {
115                 memcpy(tmp.ptr, lo, stridelo);
116                 memcpy(lo, hi, stridelo);
117                 memcpy(hi, tmp.ptr, stridelo);
118                 lo += stridelo;
119                 hi--;
120                 continue;
121             }
122 
123             /* Shift the whole array. This is woefully inefficient
124              */
125             memcpy(tmp.ptr, hi, stridehi);
126             memcpy(tmplo.ptr, lo, stridelo);
127             memmove(lo + stridehi, lo + stridelo , cast(size_t)(hi - lo) - stridelo);
128             memcpy(lo, tmp.ptr, stridehi);
129             memcpy(hi + stridehi - stridelo, tmplo.ptr, stridelo);
130 
131             lo += stridehi;
132             hi = hi - 1 + cast(ptrdiff_t)(stridehi - stridelo);
133         }
134     }
135     if (hadErrors)
136         throw new Exception("invalid UTF-8 sequence",__FILE__,__LINE__);
137     return a;
138 }
139 
140 unittest
141 {
142     char[] a = "abcd"c;
143 
144     char[] r = a.dup.reverse;
145     //writefln(r);
146     assert(r == "dcba");
147     r=r.reverse;
148     assert(r==a);
149 
150     a = "a\u1235\u1234c";
151     //writefln(a);
152     r = a.dup.reverse;
153     //writefln(r);
154     assert(r == "c\u1234\u1235a");
155     r=r.reverse;
156     assert(r==a);
157 
158     a = "ab\u1234c";
159     //writefln(a);
160     r = a.dup.reverse;
161     //writefln(r);
162     assert(r == "c\u1234ba");
163     r=r.reverse;
164     assert(r==a);
165 
166     a = "\u3026\u2021\u3061\n";
167     r = a.dup.reverse;
168     assert(r == "\n\u3061\u2021\u3026");
169     r=r.reverse;
170     assert(r==a);
171 }
172 
173 
174 /**********************************************
175  * Reverse array of wchars.
176  * Handled separately because embedded multiword encodings should not be
177  * reversed.
178  */
179 
180 extern (C) wchar[] _adReverseWchar(wchar[] a)
181 {
182     bool hadErrors = false;
183     if (a.length > 1)
184     {
185         wchar[2] tmp;
186         wchar* lo = a.ptr;
187         wchar* hi = &a[length - 1];
188 
189         while (lo < hi)
190         {   auto clo = *lo;
191             auto chi = *hi;
192 
193             if ((clo < 0xD800 || clo > 0xDFFF) &&
194                 (chi < 0xD800 || chi > 0xDFFF))
195             {
196                 *lo = chi;
197                 *hi = clo;
198                 lo++;
199                 hi--;
200                 continue;
201             }
202 
203             int stridelo = 1 + (clo >= 0xD800 && clo <= 0xDBFF);
204 
205             int stridehi = 1;
206             if (chi >= 0xDC00 && chi <= 0xDFFF)
207             {
208                 chi = *--hi;
209                 stridehi++;
210             }
211             if (lo >= hi) {
212                 if (lo > hi) {
213                     hadErrors = true;
214                 }
215                 break;
216             }
217 
218             if (stridelo == stridehi)
219             {   int stmp;
220 
221                 assert(stridelo == 2);
222                 assert(stmp.sizeof == 2 * (*lo).sizeof);
223                 stmp = *cast(int*)lo;
224                 *cast(int*)lo = *cast(int*)hi;
225                 *cast(int*)hi = stmp;
226                 lo += stridelo;
227                 hi--;
228                 continue;
229             }
230 
231             /* Shift the whole array. This is woefully inefficient
232              */
233             memcpy(tmp.ptr, hi, stridehi * wchar.sizeof);
234             memcpy(hi + stridehi - stridelo, lo, stridelo * wchar.sizeof);
235             memmove(lo + stridehi, lo + stridelo , (hi - (lo + stridelo)) * wchar.sizeof);
236             memcpy(lo, tmp.ptr, stridehi * wchar.sizeof);
237 
238             lo += stridehi;
239             hi = hi - 1 + (stridehi - stridelo);
240         }
241     }
242     if (hadErrors)
243         throw new Exception("invalid UTF-8 sequence",__FILE__,__LINE__);
244     return a;
245 }
246 
247 unittest
248 {
249     wchar[] a = "abcd";
250     wchar[] r;
251 
252     r = a.dup.reverse;
253     assert(r == "dcba");
254 
255     a = "a\U00012356\U00012346c";
256     r = a.dup.reverse;
257     assert(r == "c\U00012346\U00012356a");
258 
259     a = "ab\U00012345c";
260     r = a.dup.reverse;
261     assert(r == "c\U00012345ba");
262 }
263 
264 
265 /**********************************************
266  * Support for array.reverse property.
267  * The actual type is painted on the return value by the frontend
268  * Given and returned length are number of elements
269  */
270 
271 extern (C) void[] _adReverse(void[] a, size_t szelem)
272     out (result)
273     {
274         assert(result.ptr is a.ptr);
275     }
276     body
277     {
278         if (a.length >= 2)
279         {
280             byte*    tmp;
281             byte[16] buffer;
282 
283             void* lo = a.ptr;
284             void* hi = a.ptr + (a.length - 1) * szelem;
285 
286             tmp = buffer.ptr;
287             if (szelem > 16)
288             {
289                 //version (Win32)
290                     //tmp = cast(byte*) alloca(szelem);
291                 //else
292                     tmp = cast(byte*) gc_malloc(szelem);
293             }
294 
295             for (; lo < hi; lo += szelem, hi -= szelem)
296             {
297                 memcpy(tmp, lo,  szelem);
298                 memcpy(lo,  hi,  szelem);
299                 memcpy(hi,  tmp, szelem);
300             }
301 
302             version (Win32)
303             {
304             }
305             else
306             {
307                 //if (szelem > 16)
308                     // BUG: bad code is generate for delete pointer, tries
309                     // to call delclass.
310                     //gc_free(tmp);
311             }
312         }
313         return a.ptr[0 .. a.length];
314     }
315 
316 unittest
317 {
318     debug(adi) printf("array.reverse.unittest\n");
319 
320     int[] a = new int[5];
321     int[] b;
322     size_t i;
323 
324     for (i = 0; i < 5; i++)
325         a[i] = cast(int)i;
326     b = a.reverse;
327     assert(b is a);
328     for (i = 0; i < 5; i++)
329         assert(a[i] == 4 - i);
330 
331     struct X20
332     {   // More than 16 bytes in size
333         int a;
334         int b, c, d, e;
335     }
336 
337     X20[] c = new X20[5];
338     X20[] d;
339 
340     for (i = 0; i < 5; i++)
341     {   c[i].a = cast(int)i;
342         c[i].e = 10;
343     }
344     d = c.reverse;
345     assert(d is c);
346     for (i = 0; i < 5; i++)
347     {
348         assert(c[i].a == 4 - i);
349         assert(c[i].e == 10);
350     }
351 }
352 
353 /**********************************************
354  * Sort array of chars.
355  */
356 
357 extern (C) char[] _adSortChar(char[] a)
358 {
359     if (a.length > 1)
360     {
361 	dchar[] da = toUTF32(a);
362         da.sort;
363         size_t i = 0;
364         foreach (dchar d; da)
365         {   char[4] buf;
366             auto t = toUTF8(buf, d);
367             a[i .. i + t.length] = t[];
368             i += t.length;
369         }
370         delete da;
371     }
372     return a;
373 }
374 
375 /**********************************************
376  * Sort array of wchars.
377  */
378 
379 extern (C) wchar[] _adSortWchar(wchar[] a)
380 {
381     if (a.length > 1)
382     {
383 	dchar[] da = toUTF32(a);
384         da.sort;
385         size_t i = 0;
386         foreach (dchar d; da)
387         {   wchar[2] buf;
388 	    auto t = toUTF16(buf, d);
389             a[i .. i + t.length] = t[];
390             i += t.length;
391         }
392         delete da;
393     }
394     return a;
395 }
396 
397 /***************************************
398  * Support for array equality test.
399  * The actual type is painted on the return value by the frontend
400  * Given lengths are number of elements
401  */
402 
403 extern (C) int _adEq(void[] a1, void[] a2, TypeInfo ti)
404 {
405     debug(adi) printf("_adEq(a1.length = %d, a2.length = %d)\n", a1.length, a2.length);
406 
407     if (a1.length != a2.length)
408         return 0;               // not equal
409     else if (a1.ptr == a2.ptr)
410         return 1;               // equal
411 
412     // let typeinfo decide
413     return ti.equals(&a1, &a2);
414 }
415 
416 unittest
417 {
418     debug(adi) printf("array.Eq unittest\n");
419 
420     char[] a = "hello"c;
421 
422     assert(a != "hel");
423     assert(a != "helloo");
424     assert(a != "betty");
425     assert(a == "hello");
426     assert(a != "hxxxx");
427 }
428 
429 /***************************************
430  * Support for array compare test.
431  * The actual type is painted on the return value by the frontend
432  * Given lengths are number of elements
433  */
434 
435 extern (C) int _adCmp(void[] a1, void[] a2, TypeInfo ti)
436 {
437     debug(adi) printf("adCmp()\n");
438 
439     if (a1.ptr == a2.ptr &&
440         a1.length == a2.length)
441         return 0;
442 
443     auto len = a1.length;
444     if (a2.length < len)
445         len = a2.length;
446 
447     // let typeinfo decide
448     return ti.compare(&a1, &a2);
449 }
450 
451 unittest
452 {
453     debug(adi) printf("array.Cmp unittest\n");
454 
455     char[] a = "hello"c;
456 
457     assert(a >  "hel");
458     assert(a >= "hel");
459     assert(a <  "helloo");
460     assert(a <= "helloo");
461     assert(a >  "betty");
462     assert(a >= "betty");
463     assert(a == "hello");
464     assert(a <= "hello");
465     assert(a >= "hello");
466 }
467 
468 /***************************************
469  * Support for array compare test.
470  * The actual type is painted on the return value by the frontend
471  * Given lengths are number of elements
472  */
473 
474 extern (C) int _adCmpChar(void[] a1, void[] a2)
475 {
476   version(D_InlineAsm_X86)
477   {
478   //version = Asm86;
479   }
480   version (Asm86)
481   {
482     asm
483     {   naked                   ;
484 
485         push    EDI             ;
486         push    ESI             ;
487 
488         mov    ESI,a1+4[4+ESP]  ;
489         mov    EDI,a2+4[4+ESP]  ;
490 
491         mov    ECX,a1[4+ESP]    ;
492         mov    EDX,a2[4+ESP]    ;
493 
494         cmp     ECX,EDX         ;
495         jb      GotLength       ;
496 
497         mov     ECX,EDX         ;
498 
499 GotLength:
500         cmp    ECX,4            ;
501         jb    DoBytes           ;
502 
503         // Do alignment if neither is dword aligned
504         test    ESI,3           ;
505         jz    Aligned           ;
506 
507         test    EDI,3           ;
508         jz    Aligned           ;
509 DoAlign:
510         mov    AL,[ESI]         ; //align ESI to dword bounds
511         mov    DL,[EDI]         ;
512 
513         cmp    AL,DL            ;
514         jnz    Unequal          ;
515 
516         inc    ESI              ;
517         inc    EDI              ;
518 
519         test    ESI,3           ;
520 
521         lea    ECX,[ECX-1]      ;
522         jnz    DoAlign          ;
523 Aligned:
524         mov    EAX,ECX          ;
525 
526         // do multiple of 4 bytes at a time
527 
528         shr    ECX,2            ;
529         jz    TryOdd            ;
530 
531         repe                    ;
532         cmpsd                   ;
533 
534         jnz    UnequalQuad      ;
535 
536 TryOdd:
537         mov    ECX,EAX          ;
538 DoBytes:
539         // if still equal and not end of string, do up to 3 bytes slightly
540         // slower.
541 
542         and    ECX,3            ;
543         jz    Equal             ;
544 
545         repe                    ;
546         cmpsb                   ;
547 
548         jnz    Unequal          ;
549 Equal:
550         mov    EAX,a1[4+ESP]    ;
551         mov    EDX,a2[4+ESP]    ;
552 
553         sub    EAX,EDX          ;
554         pop    ESI              ;
555 
556         pop    EDI              ;
557         ret                     ;
558 
559 UnequalQuad:
560         mov    EDX,[EDI-4]      ;
561         mov    EAX,[ESI-4]      ;
562 
563         cmp    AL,DL            ;
564         jnz    Unequal          ;
565 
566         cmp    AH,DH            ;
567         jnz    Unequal          ;
568 
569         shr    EAX,16           ;
570 
571         shr    EDX,16           ;
572 
573         cmp    AL,DL            ;
574         jnz    Unequal          ;
575 
576         cmp    AH,DH            ;
577 Unequal:
578         sbb    EAX,EAX          ;
579         pop    ESI              ;
580 
581         or     EAX,1            ;
582         pop    EDI              ;
583 
584         ret                     ;
585     }
586   }
587   else
588   {
589     int len;
590     int c;
591 
592     debug(adi) printf("adCmpChar()\n");
593     len = cast(int)a1.length;
594     if (a2.length < len)
595         len = cast(int)a2.length;
596     c = memcmp(cast(char *)a1.ptr, cast(char *)a2.ptr, len);
597     if (!c)
598         c = cast(int)a1.length - cast(int)a2.length;
599     return c;
600   }
601 }
602 
603 unittest
604 {
605     debug(adi) printf("array.CmpChar unittest\n");
606 
607     char[] a = "hello"c;
608 
609     assert(a >  "hel");
610     assert(a >= "hel");
611     assert(a <  "helloo");
612     assert(a <= "helloo");
613     assert(a >  "betty");
614     assert(a >= "betty");
615     assert(a == "hello");
616     assert(a <= "hello");
617     assert(a >= "hello");
618 }