1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2008 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Apr 2008: Initial release
8                         Jan 2009: Added GCChunk allocator
9 
10         authors:        Kris, schveiguy
11 
12         Since:          0.99.7
13 
14 *******************************************************************************/
15 
16 module tango.util.container.Container;
17 
18 private import tango.core.Memory;
19 
20 private import tango.stdc.stdlib;
21 private import tango.stdc.string;
22 
23 /*******************************************************************************
24 
25         Utility functions and constants
26 
27 *******************************************************************************/
28 
29 struct Container
30 {
31         /***********************************************************************
32         
33                default initial number of buckets of a non-empty hashmap
34 
35         ***********************************************************************/
36         
37         enum size_t defaultInitialBuckets = 31;
38 
39         /***********************************************************************
40 
41                 default load factor for a non-empty hashmap. The hash 
42                 table is resized when the proportion of elements per 
43                 buckets exceeds this limit
44         
45         ***********************************************************************/
46         
47         enum float defaultLoadFactor = 0.75f;
48 
49         /***********************************************************************
50         
51                 generic value reaper, which does nothing
52 
53         ***********************************************************************/
54         
55         static void reap(V) (V v) {}
56 
57         /***********************************************************************
58         
59                 generic key/value reaper, which does nothing
60 
61         ***********************************************************************/
62         
63         static void reap(K, V) (K k, V v) {}
64 
65         /***********************************************************************
66 
67                 generic hash function, using the default hashing. Thanks
68                 to 'mwarning' for the optimization suggestion
69 
70         ***********************************************************************/
71 
72         static size_t hash(K) (K k, size_t length)
73         {
74                 static if (is(K : int)   || is(K : uint)   || 
75                            is(K : long)  || is(K : ulong)  || 
76                            is(K : short) || is(K : ushort) ||
77                            is(K : byte)  || is(K : ubyte)  ||
78                            is(K : char)  || is(K : wchar)  || is (K : dchar))
79                            return cast(size_t) (k % length);
80                 else
81                    return (typeid(K).getHash(&k) & 0x7FFFFFFF) % length;
82         }
83 
84 
85         /***********************************************************************
86         
87                 GC Chunk allocator
88 
89                 Can save approximately 30% memory for small elements (tested 
90                 with integer elements and a chunk size of 1000), and is at 
91                 least twice as fast at adding elements when compared to the 
92                 generic allocator (approximately 50x faster with LinkedList)
93         
94                 Operates safely with GC managed entities
95 
96         ***********************************************************************/
97         
98         struct ChunkGC(T)
99         {
100                 static assert (T.sizeof >= (T*).sizeof, "The ChunkGC allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!");
101 
102                 private struct Cache {Cache* next;}
103 
104                 private Cache*  cache;
105                 private T[][]   lists;
106                 private size_t  chunks = 256;
107 
108                 /***************************************************************
109         
110                         allocate a T-sized memory chunk
111                         
112                 ***************************************************************/
113 
114                 T* allocate ()
115                 {
116                         if (cache is null)
117                             newlist();
118                         auto p = cache;
119                         cache = p.next;
120                         return cast(T*) p;
121                 }
122         
123                 /***************************************************************
124         
125                         allocate an array of T* sized memory chunks
126                         
127                 ***************************************************************/
128         
129                 T*[] allocate (size_t count)
130                 {
131                         auto p = (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
132                         GC.addRange (cast(void*) p, count * (T*).sizeof);
133                         return p;
134                 }
135         
136                 /***************************************************************
137         
138                         Invoked when a specific T*[] is discarded
139                         
140                 ***************************************************************/
141         
142                 void collect (T*[] t)
143                 {      
144                         if (t.ptr)
145                            {
146                            GC.removeRange (t.ptr);
147                            free (t.ptr);
148                            }
149                 }
150 
151                 /***************************************************************
152         
153                         Invoked when a specific T is discarded
154                         
155                 ***************************************************************/
156         
157                 void collect (T* p)
158                 {      
159                         assert (p);
160                         auto d = cast(Cache*) p;
161                         //*p = T.init;
162                         d.next = cache;
163                         cache = d;
164                 }
165         
166                 /***************************************************************
167         
168                         Invoked when clear/reset is called on the host. 
169                         This is a shortcut to clear everything allocated.
170         
171                         Should return true if supported, or false otherwise. 
172                         False return will cause a series of discrete collect
173                         calls
174                         
175                 ***************************************************************/
176         
177                 bool collect (bool all = true)
178                 {
179                         if (all)
180                            {
181                            foreach (ref list; lists)
182                                    {
183                                    GC.removeRange (list.ptr);
184                                    free (list.ptr);
185                                    list = null;
186                                    }
187                            cache = null;
188                            lists = null;
189                            return true;
190                            }
191                         return false;
192                 }
193               
194                 /***************************************************************
195         
196                         set the chunk size and prepopulate with nodes
197                         
198                 ***************************************************************/
199         
200                 void config (size_t chunks, size_t allocate=0)
201                 {
202                         this.chunks = chunks;
203                         if (allocate)
204                             for (size_t i=allocate/chunks+1; i--;)
205                                  newlist();
206                 }
207         
208                 /***************************************************************
209         
210                         list manager
211                         
212                 ***************************************************************/
213         
214                 private void newlist ()
215                 {
216                         lists.length = lists.length + 1;
217                         auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks];
218                         lists[$-1] = p;
219                         GC.addRange (p.ptr, T.sizeof * chunks);
220                         auto head = cache;
221                         foreach (ref node; p)
222                                 {
223                                 auto d = cast(Cache*) &node;
224                                 d.next = head;
225                                 head = d;
226                                 }
227                         cache = head;
228                 }
229         }        
230 
231 
232         /***********************************************************************
233         
234                 Chunk allocator (non GC)
235 
236                 Can save approximately 30% memory for small elements (tested 
237                 with integer elements and a chunk size of 1000), and is at 
238                 least twice as fast at adding elements when compared to the 
239                 default allocator (approximately 50x faster with LinkedList)
240         
241                 Note that, due to GC behaviour, you should not configure
242                 a custom allocator for containers holding anything managed
243                 by the GC. For example, you cannot use a MallocAllocator
244                 to manage a container of classes or strings where those 
245                 were allocated by the GC. Once something is owned by a GC
246                 then it's lifetime must be managed by GC-managed entities
247                 (otherwise the GC may think there are no live references
248                 and prematurely collect container contents).
249         
250                 You can explicity manage the collection of keys and values
251                 yourself by providing a reaper delegate. For example, if 
252                 you use a MallocAllocator to manage key/value pairs which
253                 are themselves allocated via malloc, then you should also
254                 provide a reaper delegate to collect those as required.
255 
256                 The primary benefit of this allocator is to avoid the GC
257                 scanning the date-structures involved. Use ChunkGC where
258                 that option is unwarranted, or if you have GC-managed data
259                 instead
260               
261         ***********************************************************************/
262         
263         struct Chunk(T)
264         {
265                 static assert (T.sizeof >= (T*).sizeof, "The Chunk allocator can only be used for data sizes of at least " ~ ((T*).sizeof).stringof[0..$-1] ~ " bytes!");
266 
267                 private struct Cache {Cache* next;}
268 
269                 private Cache*  cache;
270                 private T[][]   lists;
271                 private size_t  chunks = 256;
272 
273                 /***************************************************************
274         
275                         allocate a T-sized memory chunk
276                         
277                 ***************************************************************/
278 
279                 T* allocate ()
280                 {
281                         if (cache is null)
282                             newlist();
283                         auto p = cache;
284                         cache = p.next;
285                         return cast(T*) p;
286                 }
287         
288                 /***************************************************************
289         
290                         allocate an array of T* sized memory chunks
291                         
292                 ***************************************************************/
293         
294                 T*[] allocate (size_t count)
295                 {
296                         return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
297                 }
298         
299                 /***************************************************************
300         
301                         Invoked when a specific T*[] is discarded
302                         
303                 ***************************************************************/
304         
305                 void collect (T*[] t)
306                 {      
307                         if (t.ptr)
308                             free (t.ptr);
309                 }
310 
311                 /***************************************************************
312         
313                         Invoked when a specific T is discarded
314                         
315                 ***************************************************************/
316         
317                 void collect (T* p)
318                 {      
319                         assert (p);
320                         auto d = cast(Cache*) p;
321                         d.next = cache;
322                         cache = d;
323                 }
324         
325                 /***************************************************************
326         
327                         Invoked when clear/reset is called on the host. 
328                         This is a shortcut to clear everything allocated.
329         
330                         Should return true if supported, or false otherwise. 
331                         False return will cause a series of discrete collect
332                         calls
333                         
334                 ***************************************************************/
335         
336                 bool collect (bool all = true)
337                 {
338                         if (all)
339                            {
340                            foreach (ref list; lists)
341                                    {
342                                    free (list.ptr);
343                                    list = null;
344                                    }
345                            cache = null;
346                            lists = null;
347                            return true;
348                            }
349                         return false;
350                 }
351               
352                 /***************************************************************
353         
354                         set the chunk size and prepopulate with nodes
355                         
356                 ***************************************************************/
357         
358                 void config (size_t chunks, int allocate=0)
359                 {
360                         this.chunks = chunks;
361                         if (allocate)
362                             for (int i=allocate/chunks+1; i--;)
363                                  newlist();
364                 }
365         
366                 /***************************************************************
367         
368                         list manager
369                         
370                 ***************************************************************/
371         
372                 private void newlist ()
373                 {
374                         lists.length = lists.length + 1;
375                         auto p = (cast(T*) calloc (chunks, T.sizeof)) [0 .. chunks];
376                         lists[$-1] = p;
377                         auto head = cache;
378                         foreach (ref node; p)
379                                 {
380                                 auto d = cast(Cache*) &node;
381                                 d.next = head;
382                                 head = d;
383                                 }
384                         cache = head;
385                 }
386         }        
387 
388 
389         /***********************************************************************
390         
391                 generic GC allocation manager
392 
393                 Slow and expensive in memory costs
394                 
395         ***********************************************************************/
396         
397         struct Collect(T)
398         {
399                 /***************************************************************
400         
401                         allocate a T sized memory chunk
402                         
403                 ***************************************************************/
404         
405                 T* allocate ()
406                 {       
407                         return cast(T*) GC.calloc (T.sizeof);
408                 }
409         
410                 /***************************************************************
411         
412                         allocate an array of T sized memory chunks
413                         
414                 ***************************************************************/
415         
416                 T*[] allocate (size_t count)
417                 {
418                         return new T*[count];
419                 }
420         
421                 /***************************************************************
422         
423                         Invoked when a specific T[] is discarded
424                         
425                 ***************************************************************/
426         
427                 void collect (T* p)
428                 {
429                         if (p)
430                             delete p;
431                 }
432         
433                 /***************************************************************
434         
435                         Invoked when a specific T[] is discarded
436                         
437                 ***************************************************************/
438         
439                 void collect (T*[] t)
440                 {      
441                         if (t)
442                             delete t;
443                 }
444 
445                 /***************************************************************
446         
447                         Invoked when clear/reset is called on the host. 
448                         This is a shortcut to clear everything allocated.
449         
450                         Should return true if supported, or false otherwise. 
451                         False return will cause a series of discrete collect
452                         calls
453 
454                 ***************************************************************/
455         
456                 bool collect (bool all = true)
457                 {
458                         return false;
459                 }
460 
461                 /***************************************************************
462         
463                         set the chunk size and prepopulate with nodes
464                         
465                 ***************************************************************/
466         
467                 void config (size_t chunks, int allocate=0)
468                 {
469                 }
470         }        
471         
472                 
473         /***********************************************************************
474         
475                 Malloc allocation manager.
476 
477                 Note that, due to GC behaviour, you should not configure
478                 a custom allocator for containers holding anything managed
479                 by the GC. For example, you cannot use a MallocAllocator
480                 to manage a container of classes or strings where those 
481                 were allocated by the GC. Once something is owned by a GC
482                 then it's lifetime must be managed by GC-managed entities
483                 (otherwise the GC may think there are no live references
484                 and prematurely collect container contents).
485         
486                 You can explicity manage the collection of keys and values
487                 yourself by providing a reaper delegate. For example, if 
488                 you use a MallocAllocator to manage key/value pairs which
489                 are themselves allocated via malloc, then you should also
490                 provide a reaper delegate to collect those as required.      
491                 
492         ***********************************************************************/
493         
494         struct Malloc(T)
495         {
496                 /***************************************************************
497         
498                         allocate an array of T sized memory chunks
499                         
500                 ***************************************************************/
501         
502                 T* allocate ()
503                 {
504                         return cast(T*) calloc (1, T.sizeof);
505                 }
506         
507                 /***************************************************************
508         
509                         allocate an array of T sized memory chunks
510                         
511                 ***************************************************************/
512         
513                 T*[] allocate (size_t count)
514                 {
515                         return (cast(T**) calloc(count, (T*).sizeof)) [0 .. count];
516                 }
517         
518                 /***************************************************************
519         
520                         Invoked when a specific T[] is discarded
521                         
522                 ***************************************************************/
523         
524                 void collect (T*[] t)
525                 {      
526                         if (t.length)
527                             free (t.ptr);
528                 }
529 
530                 /***************************************************************
531         
532                         Invoked when a specific T[] is discarded
533                         
534                 ***************************************************************/
535         
536                 void collect (T* p)
537                 {       
538                         if (p)
539                             free (p);
540                 }
541         
542                 /***************************************************************
543         
544                         Invoked when clear/reset is called on the host. 
545                         This is a shortcut to clear everything allocated.
546         
547                         Should return true if supported, or false otherwise. 
548                         False return will cause a series of discrete collect
549                         calls
550                         
551                 ***************************************************************/
552         
553                 bool collect (bool all = true)
554                 {
555                         return false;
556                 }
557 
558                 /***************************************************************
559         
560                         set the chunk size and prepopulate with nodes
561                         
562                 ***************************************************************/
563         
564                 void config (size_t chunks, int allocate=0)
565                 {
566                 }
567         }        
568         
569         
570 version (prior_allocator)
571 {
572         /***********************************************************************
573         
574                 GCChunk allocator
575 
576                 Like the Chunk allocator, this allocates elements in chunks,
577                 but allows you to allocate elements that can have GC pointers.
578 
579                 Tests have shown about a 60% speedup when using the GC chunk
580                 allocator for a Hashmap!(int, int).
581         
582         ***********************************************************************/
583 
584         struct GCChunk(T, uint chunkSize)
585         {
586             static if(T.sizeof < (void*).sizeof)
587             {
588                 static assert(false, "Error, allocator for " ~ T.stringof ~ " failed to instantiate");
589             }
590 
591             /**
592              * This is the form used to link recyclable elements together.
593              */
594             struct element
595             {
596                 element *next;
597             }
598 
599             /**
600              * A chunk of elements
601              */
602             struct chunk
603             {
604                 /**
605                  * The next chunk in the chain
606                  */
607                 chunk *next;
608 
609                 /**
610                  * The previous chunk in the chain.  Required for O(1) removal
611                  * from the chain.
612                  */
613                 chunk *prev;
614 
615                 /**
616                  * The linked list of free elements in the chunk.  This list is
617                  * amended each time an element in this chunk is freed.
618                  */
619                 element *freeList;
620 
621                 /**
622                  * The number of free elements in the freeList.  Used to determine
623                  * whether this chunk can be given back to the GC
624                  */
625                 uint numFree;
626 
627                 /**
628                  * The elements in the chunk.
629                  */
630                 T[chunkSize] elems;
631 
632                 /**
633                  * Allocate a T* from the free list.
634                  */
635                 T *allocateFromFree()
636                 {
637                     element *x = freeList;
638                     freeList = x.next;
639                     //
640                     // clear the pointer, this clears the element as if it was
641                     // newly allocated
642                     //
643                     x.next = null;
644                     numFree--;
645                     return cast(T*)x;
646                 }
647 
648                 /**
649                  * deallocate a T*, send it to the free list
650                  *
651                  * returns true if this chunk no longer has any used elements.
652                  */
653                 bool deallocate(T *t)
654                 {
655                     //
656                     // clear the element so the GC does not interpret the element
657                     // as pointing to anything else.
658                     //
659                     memset(t, 0, (T).sizeof);
660                     element *x = cast(element *)t;
661                     x.next = freeList;
662                     freeList = x;
663                     return (++numFree == chunkSize);
664                 }
665             }
666 
667             /**
668              * The chain of used chunks.  Used chunks have had all their elements
669              * allocated at least once.
670              */
671             chunk *used;
672 
673             /**
674              * The fresh chunk.  This is only used if no elements are available in
675              * the used chain.
676              */
677             chunk *fresh;
678 
679             /**
680              * The next element in the fresh chunk.  Because we don't worry about
681              * the free list in the fresh chunk, we need to keep track of the next
682              * fresh element to use.
683              */
684             uint nextFresh;
685 
686             /**
687              * Allocate a T*
688              */
689             T* allocate()
690             {
691                 if(used !is null && used.numFree > 0)
692                 {
693                     //
694                     // allocate one element of the used list
695                     //
696                     T* result = used.allocateFromFree();
697                     if(used.numFree == 0)
698                         //
699                         // move used to the end of the list
700                         //
701                         used = used.next;
702                     return result;
703                 }
704 
705                 //
706                 // no used elements are available, allocate out of the fresh
707                 // elements
708                 //
709                 if(fresh is null)
710                 {
711                     fresh = new chunk;
712                     nextFresh = 0;
713                 }
714 
715                 T* result = &fresh.elems[nextFresh];
716                 if(++nextFresh == chunkSize)
717                 {
718                     if(used is null)
719                     {
720                         used = fresh;
721                         fresh.next = fresh;
722                         fresh.prev = fresh;
723                     }
724                     else
725                     {
726                         //
727                         // insert fresh into the used chain
728                         //
729                         fresh.prev = used.prev;
730                         fresh.next = used;
731                         fresh.prev.next = fresh;
732                         fresh.next.prev = fresh;
733                         if(fresh.numFree != 0)
734                         {
735                             //
736                             // can recycle elements from fresh
737                             //
738                             used = fresh;
739                         }
740                     }
741                     fresh = null;
742                 }
743                 return result;
744             }
745 
746             T*[] allocate(uint count)
747             {
748                 return new T*[count];
749             }
750 
751 
752             /**
753              * free a T*
754              */
755             void collect(T* t)
756             {
757                 //
758                 // need to figure out which chunk t is in
759                 //
760                 chunk *cur = cast(chunk *)GC.addrOf(t);
761 
762                 if(cur !is fresh && cur.numFree == 0)
763                 {
764                     //
765                     // move cur to the front of the used list, it has free nodes
766                     // to be used.
767                     //
768                     if(cur !is used)
769                     {
770                         if(used.numFree != 0)
771                         {
772                             //
773                             // first, unlink cur from its current location
774                             //
775                             cur.prev.next = cur.next;
776                             cur.next.prev = cur.prev;
777 
778                             //
779                             // now, insert cur before used.
780                             //
781                             cur.prev = used.prev;
782                             cur.next = used;
783                             used.prev = cur;
784                             cur.prev.next = cur;
785                         }
786                         used = cur;
787                     }
788                 }
789 
790                 if(cur.deallocate(t))
791                 {
792                     //
793                     // cur no longer has any elements in use, it can be deleted.
794                     //
795                     if(cur.next is cur)
796                     {
797                         //
798                         // only one element, don't free it.
799                         //
800                     }
801                     else
802                     {
803                         //
804                         // remove cur from list
805                         //
806                         if(used is cur)
807                         {
808                             //
809                             // update used pointer
810                             //
811                             used = used.next;
812                         }
813                         cur.next.prev = cur.prev;
814                         cur.prev.next = cur.next;
815                         delete cur;
816                     }
817                 }
818             }
819 
820             void collect(T*[] t)
821             {
822                 if(t)
823                     delete t;
824             }
825 
826             /**
827              * Deallocate all chunks used by this allocator.  Depends on the GC to do
828              * the actual collection
829              */
830             bool collect(bool all = true)
831             {
832                 used = null;
833 
834                 //
835                 // keep fresh around
836                 //
837                 if(fresh !is null)
838                 {
839                     nextFresh = 0;
840                     fresh.freeList = null;
841                 }
842 
843                 return true;
844             }
845 
846             void config (size_t chunks, int allocate=0)
847             {
848             }
849         }
850 
851         /***********************************************************************
852 
853                 aliases to the correct Default allocator depending on how big
854                 the type is.  It makes less sense to use a GCChunk allocator
855                 if the type is going to be larger than a page (currently there
856                 is no way to get the page size from the GC, so we assume 4096
857                 bytes).  If not more than one unit can fit into a page, then
858                 we use the default GC allocator.
859 
860         ***********************************************************************/
861         template DefaultCollect(T)
862         {
863             static if((T).sizeof + ((void*).sizeof * 3) + uint.sizeof >= 4095 / 2)
864             {
865                 alias Collect!(T) DefaultCollect;
866             }
867             else
868             {
869                 alias GCChunk!(T, (4095 - ((void *).sizeof * 3) - uint.sizeof) / (T).sizeof) DefaultCollect;
870             }
871             // TODO: see if we can automatically figure out whether a type has
872             // any pointers in it, this would allow automatic usage of the
873             // Chunk allocator for added speed.
874         }
875 }
876 else
877    template DefaultCollect(T) {alias ChunkGC!(T) DefaultCollect;}
878 
879 }
880 
881