1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2008 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Apr 2008: Initial release
8 
9         authors:        Kris
10 
11         Since:          0.99.7
12 
13         Based upon Doug Lea's Java collection package
14 
15 *******************************************************************************/
16 
17 module tango.util.container.HashSet;
18 
19 private import  tango.util.container.Slink;
20 
21 public  import  tango.util.container.Container;
22 
23 private import tango.util.container.model.IContainer;
24 
25 /*******************************************************************************
26 
27         Hash table implementation of a Set
28 
29         ---
30         Iterator iterator ()
31         int opApply (scope int delegate(ref V value) dg)
32 
33         bool add (V element)
34         bool contains (V element)
35         bool take (ref V element)
36         bool remove (V element)
37         size_t remove (IContainer!(V) e)
38         bool replace (V oldElement, V newElement)
39 
40         size_t size ()
41         bool isEmpty ()
42         V[] toArray (V[] dst)
43         HashSet dup ()
44         HashSet clear ()
45         HashSet reset ()
46 
47         size_t buckets ()
48         void buckets (size_t cap)
49         float threshold ()
50         void threshold (float desired)
51         ---
52 
53 *******************************************************************************/
54 
55 class HashSet (V, alias Hash = Container.hash, 
56                   alias Reap = Container.reap, 
57                   alias Heap = Container.DefaultCollect) 
58                   : IContainer!(V)
59 {
60         // use this type for Allocator configuration
61         public alias Slink!(V)  Type;
62         
63         private alias Type      *Ref;
64 
65         private alias Heap!(Type) Alloc;
66 
67         // Each table entry is a list - null if no table allocated
68         private Ref[]             table;
69         
70         // number of elements contained
71         private size_t          count;
72 
73         // the threshold load factor
74         private float           loadFactor;
75 
76         // configured heap manager
77         private Alloc           heap;
78         
79         // mutation tag updates on each change
80         private size_t            mutation;
81 
82         /***********************************************************************
83 
84                 Construct a HashSet instance
85 
86         ***********************************************************************/
87 
88         this (float f = Container.defaultLoadFactor)
89         {
90                 loadFactor = f;
91         }
92 
93         /***********************************************************************
94 
95                 Clean up when deleted
96 
97         ***********************************************************************/
98 
99         ~this ()
100         {
101                 reset;
102         }
103 
104         /***********************************************************************
105 
106                 Return a generic iterator for contained elements
107                 
108         ***********************************************************************/
109 
110         final Iterator iterator ()
111         {
112                 Iterator i = void;
113                 i.mutation = mutation;
114                 i.table = table;
115                 i.owner = this;
116                 i.cell = null;
117                 i.row = 0;
118                 return i;
119         }
120 
121         /***********************************************************************
122 
123 
124         ***********************************************************************/
125 
126         final int opApply (scope int delegate(ref V value) dg)
127         {
128                 return iterator.opApply (dg);
129         }
130 
131         /***********************************************************************
132 
133                 Return the number of elements contained
134                 
135         ***********************************************************************/
136 
137         @property final const size_t size ()
138         {
139                 return count;
140         }
141         
142         /***********************************************************************
143 
144                 Add a new element to the set. Does not add if there is an
145                 equivalent already present. Returns true where an element
146                 is added, false where it already exists
147                 
148                 Time complexity: O(1) average; O(n) worst.
149                 
150         ***********************************************************************/
151 
152         final bool add (V element)
153         {
154                 if (table is null)
155                     resize (Container.defaultInitialBuckets);
156 
157                 auto h = Hash  (element, table.length);
158                 auto hd = table[h];
159 
160                 if (hd && hd.find (element))
161                     return false;
162 
163                 table[h] = allocate.set (element, hd);
164                 increment;
165 
166                 // only check if bin was nonempty                    
167                 if (hd)
168                     checkLoad; 
169                 return true;
170         }
171 
172         /***********************************************************************
173 
174                 Does this set contain the given element?
175         
176                 Time complexity: O(1) average; O(n) worst
177                 
178         ***********************************************************************/
179 
180         final bool contains (V element)
181         {
182                 if (count)
183                    {
184                    auto p = table[Hash (element, table.length)];
185                    if (p && p.find (element))
186                        return true;
187                    }
188                 return false;
189         }
190 
191         /***********************************************************************
192 
193                 Make an independent copy of the container. Does not clone
194                 elements
195                 
196                 Time complexity: O(n)
197                 
198         ***********************************************************************/
199 
200         @property final HashSet dup ()
201         {
202                 auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor);
203                 
204                 if (count)
205                    {
206                    clone.buckets (buckets);
207 
208                    foreach (value; iterator)
209                             clone.add (value);
210                    }
211                 return clone;
212         }
213 
214         /***********************************************************************
215 
216                 Remove the provided element. Returns true if found, false
217                 otherwise
218                 
219                 Time complexity: O(1) average; O(n) worst
220 
221         ***********************************************************************/
222 
223         final size_t remove (V element, bool all)
224         {
225                 return remove(element) ? 1 : 0;
226         }
227 
228         /***********************************************************************
229 
230                 Remove the provided element. Returns true if found, false
231                 otherwise
232                 
233                 Time complexity: O(1) average; O(n) worst
234 
235         ***********************************************************************/
236 
237         final bool remove (V element)
238         {
239                 if (count)
240                    {
241                    auto h = Hash (element, table.length);
242                    auto hd = table[h];
243                    auto trail = hd;
244                    auto p = hd;
245 
246                    while (p)
247                          {
248                          auto n = p.next;
249                          if (element == p.value)
250                             {
251                             decrement (p);
252                             if (p is table[h])
253                                {
254                                table[h] = n;
255                                trail = n;
256                                }
257                             else
258                                trail.next = n;
259                             return true;
260                             } 
261                          else
262                             {
263                             trail = p;
264                             p = n;
265                             }
266                          }
267                    }
268                 return false;
269         }
270 
271         /***********************************************************************
272 
273                 Replace the first instance of oldElement with newElement.
274                 Returns true if oldElement was found and replaced, false
275                 otherwise.
276                 
277         ***********************************************************************/
278 
279         final size_t replace (V oldElement, V newElement, bool all)
280         {
281                 return replace (oldElement, newElement) ? 1 : 0;
282         }
283 
284         /***********************************************************************
285 
286                 Replace the first instance of oldElement with newElement.
287                 Returns true if oldElement was found and replaced, false
288                 otherwise.
289                 
290         ***********************************************************************/
291 
292         final bool replace (V oldElement, V newElement)
293         {
294 
295                 if (count && oldElement != newElement)
296                    if (contains (oldElement))
297                       {
298                       remove (oldElement);
299                       add (newElement);
300                       return true;
301                       }
302                 return false;
303         }
304 
305         /***********************************************************************
306 
307                 Remove and expose the first element. Returns false when no
308                 more elements are contained
309         
310                 Time complexity: O(n)
311 
312         ***********************************************************************/
313 
314         final bool take (ref V element)
315         {
316                 if (count)
317                     foreach (ref list; table)
318                              if (list)
319                                 {
320                                 auto p = list;
321                                 element = p.value;
322                                 list = p.next;
323                                 decrement (p);
324                                 return true;
325                                 }
326                 return false;
327         }
328 
329         /***********************************************************************
330 
331         ************************************************************************/
332 
333         public void add (IContainer!(V) e)
334         {
335                 foreach (value; e)
336                          add (value);
337         }
338 
339         /***********************************************************************
340 
341         ************************************************************************/
342 
343         public size_t remove (IContainer!(V) e)
344         {
345                 size_t c;
346                 foreach (value; e)
347                          if (remove (value))
348                              ++c;
349                 return c;
350         }
351 
352         /***********************************************************************
353 
354                 Clears the HashMap contents. Various attributes are
355                 retained, such as the internal table itself. Invoke
356                 reset() to drop everything.
357 
358                 Time complexity: O(n)
359                 
360         ***********************************************************************/
361 
362         final HashSet clear ()
363         {
364                 return clear (false);
365         }
366 
367         /***********************************************************************
368 
369                 Reset the HashSet contents and optionally configure a new
370                 heap manager. This releases more memory than clear() does
371 
372                 Time complexity: O(1)
373                 
374         ***********************************************************************/
375 
376         final HashSet reset ()
377         {
378                 clear (true);
379                 heap.collect (table);
380                 table = null;
381                 return this;
382         }
383 
384         /***********************************************************************
385 
386                 Return the number of buckets
387 
388                 Time complexity: O(1)
389 
390         ***********************************************************************/
391 
392         final size_t buckets ()
393         {
394                 return table ? table.length : 0;
395         }
396 
397         /***********************************************************************
398 
399                 Set the number of buckets and resize as required
400                 
401                 Time complexity: O(n)
402 
403         ***********************************************************************/
404 
405         final HashSet buckets (size_t cap)
406         {
407                 if (cap < Container.defaultInitialBuckets)
408                     cap = Container.defaultInitialBuckets;
409 
410                 if (cap !is buckets)
411                     resize (cap);
412                 return this;
413         }
414 
415         /***********************************************************************
416 
417                 Return the resize threshold
418                 
419                 Time complexity: O(1)
420 
421         ***********************************************************************/
422 
423         final const float threshold ()
424         {
425                 return loadFactor;
426         }
427 
428         /***********************************************************************
429 
430                 Set the resize threshold, and resize as required
431                 
432                 Time complexity: O(n)
433                 
434         ***********************************************************************/
435 
436         final void threshold (float desired)
437         {
438                 assert (desired > 0.0);
439                 loadFactor = desired;
440                 if (table)
441                     checkLoad;
442         }
443 
444         /***********************************************************************
445 
446                 Configure the assigned allocator with the size of each
447                 allocation block (number of nodes allocated at one time)
448                 and the number of nodes to pre-populate the cache with.
449                 
450                 Time complexity: O(n)
451 
452         ***********************************************************************/
453 
454         final HashSet cache (size_t chunk, size_t count=0)
455         {
456                 heap.config (chunk, count);
457                 return this;
458         }
459 
460         /***********************************************************************
461 
462                 Copy and return the contained set of values in an array, 
463                 using the optional dst as a recipient (which is resized 
464                 as necessary).
465 
466                 Returns a slice of dst representing the container values.
467                 
468                 Time complexity: O(n)
469                 
470         ***********************************************************************/
471 
472         final V[] toArray (V[] dst = null)
473         {
474                 if (dst.length < count)
475                     dst.length = count;
476 
477                 size_t i = 0;
478                 foreach (v; this)
479                          dst[i++] = v;
480                 return dst [0 .. count];                        
481         }
482 
483         /***********************************************************************
484 
485                 Is this container empty?
486                 
487                 Time complexity: O(1)
488                 
489         ***********************************************************************/
490 
491         final const bool isEmpty ()
492         {
493                 return count is 0;
494         }
495 
496         /***********************************************************************
497 
498                 Sanity check
499                  
500         ***********************************************************************/
501 
502         final HashSet check()
503         {
504                 assert(!(table is null && count !is 0));
505                 assert((table is null || table.length > 0));
506                 assert(loadFactor > 0.0f);
507 
508                 if (table)
509                    {
510                    size_t c = 0;
511                    for (size_t i = 0; i < table.length; ++i)
512                        {
513                        for (auto p = table[i]; p; p = p.next)
514                            {
515                            ++c;
516                            assert(contains(p.value));
517                            assert(Hash (p.value, table.length) is i);
518                            }
519                        }
520                    assert(c is count);
521                    }
522                 return this;
523         }
524 
525         /***********************************************************************
526 
527                 Allocate a node instance. This is used as the default allocator
528                  
529         ***********************************************************************/
530 
531         private Ref allocate ()
532         {
533                 return heap.allocate;
534         }
535         
536         /***********************************************************************
537 
538                  Check to see if we are past load factor threshold. If so,
539                  resize so that we are at half of the desired threshold.
540                  
541         ***********************************************************************/
542 
543         private void checkLoad ()
544         {
545                 float fc = count;
546                 float ft = table.length;
547                 if (fc / ft > loadFactor)
548                     resize (2 * cast(size_t)(fc / loadFactor) + 1);
549         }
550 
551         /***********************************************************************
552 
553                 resize table to new capacity, rehashing all elements
554                 
555         ***********************************************************************/
556 
557         private void resize (size_t newCap)
558         {
559                 //Stdout.formatln ("resize {}", newCap);
560                 auto newtab = heap.allocate (newCap);
561                 mutate;
562 
563                 foreach (bucket; table)
564                          while (bucket)
565                                {
566                                auto n = bucket.next;
567                                auto h = Hash (bucket.value, newCap);
568                                bucket.next = newtab[h];
569                                newtab[h] = bucket;
570                                bucket = n;
571                                }
572 
573                 // release the prior table and assign new one
574                 heap.collect (table);
575                 table = newtab;
576         }
577 
578         /***********************************************************************
579 
580                 Remove the indicated node. We need to traverse buckets
581                 for this, since we're singly-linked only. Better to save
582                 the per-node memory than to gain a little on each remove
583 
584                 Used by iterators only
585                  
586         ***********************************************************************/
587 
588         private bool remove (Ref node, size_t row)
589         {
590                 auto hd = table[row];
591                 auto trail = hd;
592                 auto p = hd;
593 
594                 while (p)
595                       {
596                       auto n = p.next;
597                       if (p is node)
598                          {
599                          decrement (p);
600                          if (p is hd)
601                              table[row] = n;
602                          else
603                             trail.next = n;
604                          return true;
605                          } 
606                       else
607                          {
608                          trail = p;
609                          p = n;
610                          }
611                       }
612                 return false;
613         }
614 
615         /***********************************************************************
616 
617                 Clears the HashSet contents. Various attributes are
618                 retained, such as the internal table itself. Invoke
619                 reset() to drop everything.
620 
621                 Time complexity: O(n)
622                 
623         ***********************************************************************/
624 
625         private HashSet clear (bool all)
626         {
627                 mutate;
628 
629                 // collect each node if we can't collect all at once
630                 if (heap.collect(all) is false)
631                     foreach (ref v; table)
632                              while (v)
633                                    {
634                                    auto n = v.next;
635                                    decrement (v);
636                                    v = n;
637                                    }
638 
639                 // retain table, but remove bucket chains
640                 foreach (ref v; table)
641                          v = null;
642 
643                 count = 0;
644                 return this;
645         }
646 
647         /***********************************************************************
648 
649                 new element was added
650                 
651         ***********************************************************************/
652 
653         private void increment()
654         {
655                 ++mutation;
656                 ++count;
657         }
658         
659         /***********************************************************************
660 
661                 element was removed
662                 
663         ***********************************************************************/
664 
665         private void decrement (Ref p)
666         {
667                 Reap (p.value);
668                 heap.collect (p);
669                 ++mutation;
670                 --count;
671         }
672         
673         /***********************************************************************
674 
675                 set was changed
676                 
677         ***********************************************************************/
678 
679         private void mutate()
680         {
681                 ++mutation;
682         }
683 
684         /***********************************************************************
685 
686                 Iterator with no filtering
687 
688         ***********************************************************************/
689 
690         private struct Iterator
691         {
692                 size_t  row;
693                 Ref     cell,
694                         prior;
695                 Ref[]   table;
696                 HashSet owner;
697                 size_t  mutation;
698 
699                 /***************************************************************
700 
701                         Did the container change underneath us?
702 
703                 ***************************************************************/
704 
705                 bool valid ()
706                 {
707                         return owner.mutation is mutation;
708                 }               
709 
710                 /***************************************************************
711 
712                         Accesses the next value, and returns false when
713                         there are no further values to traverse
714 
715                 ***************************************************************/
716 
717                 bool next (ref V v)
718                 {
719                         auto n = next;
720                         return (n) ? v = *n, true : false;
721                 }
722                 
723                 /***************************************************************
724 
725                         Return a pointer to the next value, or null when
726                         there are no further values to traverse
727 
728                 ***************************************************************/
729 
730                 V* next ()
731                 {
732                         while (cell is null)
733                                if (row < table.length)
734                                    cell = table [row++];
735                                else
736                                   return null;
737   
738                         prior = cell;
739                         cell = cell.next;
740                         return &prior.value;
741                 }
742 
743                 /***************************************************************
744 
745                         Foreach support
746 
747                 ***************************************************************/
748 
749                 int opApply (scope int delegate(ref V value) dg)
750                 {
751                         int result;
752 
753                         auto c = cell;
754                         loop: while (true)
755                               {
756                               while (c is null)
757                                      if (row < table.length)
758                                          c = table [row++];
759                                      else
760                                         break loop;
761   
762                               prior = c;
763                               c = c.next;
764                               if ((result = dg(prior.value)) != 0)
765                                    break loop;
766                               }
767 
768                         cell = c;
769                         return result;
770                 }                               
771 
772                 /***************************************************************
773 
774                         Remove value at the current iterator location
775 
776                 ***************************************************************/
777 
778                 bool remove ()
779                 {
780                         if (prior)
781                             if (owner.remove (prior, row-1))
782                                {
783                                // ignore this change
784                                ++mutation;
785                                return true;
786                                }
787 
788                         prior = null;
789                         return false;
790                 }
791         }
792 }
793 
794 
795 
796 /*******************************************************************************
797 
798 *******************************************************************************/
799 
800 debug (HashSet)
801 {
802         import tango.io.Stdout;
803         import tango.core.Thread;
804         import tango.time.StopWatch;
805        
806         void main()
807         {
808                 // usage examples ...
809                 auto set = new HashSet!(char[]);
810                 set.add ("foo");
811                 set.add ("bar");
812                 set.add ("wumpus");
813 
814                 // implicit generic iteration
815                 foreach (value; set)
816                          Stdout (value).newline;
817 
818                 // explicit generic iteration
819                 foreach (value; set.iterator)
820                          Stdout (value).newline;
821 
822                 // generic iteration with optional remove
823                 auto s = set.iterator;
824                 foreach (value; s)
825                         {} // s.remove;
826 
827                 // incremental iteration, with optional remove
828                 char[] v;
829                 auto iterator = set.iterator;
830                 while (iterator.next(v))
831                       {} //iterator.remove;
832                 
833                 // incremental iteration, with optional failfast
834                 auto it = set.iterator;
835                 while (it.valid && it.next(v))
836                       {}
837 
838                 // remove specific element
839                 set.remove ("wumpus");
840 
841                 // remove first element ...
842                 while (set.take(v))
843                        Stdout.formatln ("taking {}, {} left", v, set.size);
844                 
845                 
846                 // setup for benchmark, with a set of integers. We
847                 // use a chunk allocator, and presize the bucket[]
848                 auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk);
849                 test.cache (1000, 1_000_000);
850                 test.buckets = 1_500_000;
851                 const count = 1_000_000;
852                 StopWatch w;
853 
854                 // benchmark adding
855                 w.start;
856                 for (int i=count; i--;)
857                      test.add(i);
858                 Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop);
859 
860                 // benchmark reading
861                 w.start;
862                 for (int i=count; i--;)
863                      test.contains(i);
864                 Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop);
865 
866                 // benchmark adding without allocation overhead
867                 test.clear;
868                 w.start;
869                 for (int i=count; i--;)
870                      test.add(i);
871                 Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop);
872 
873                 // benchmark duplication
874                 w.start;
875                 auto dup = test.dup;
876                 Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop);
877 
878                 // benchmark iteration
879                 w.start;
880                 foreach (value; test) {}
881                 Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop);
882 
883                 test.check;
884         }
885 }