1 /******************************************************************************* 2 3 copyright: Copyright (c) 2008 Kris Bell. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Apr 2008: Initial release 8 9 authors: Kris 10 11 Since: 0.99.7 12 13 Based upon Doug Lea's Java collection package 14 15 *******************************************************************************/ 16 17 module tango.util.container.HashSet; 18 19 private import tango.util.container.Slink; 20 21 public import tango.util.container.Container; 22 23 private import tango.util.container.model.IContainer; 24 25 /******************************************************************************* 26 27 Hash table implementation of a Set 28 29 --- 30 Iterator iterator () 31 int opApply (scope int delegate(ref V value) dg) 32 33 bool add (V element) 34 bool contains (V element) 35 bool take (ref V element) 36 bool remove (V element) 37 size_t remove (IContainer!(V) e) 38 bool replace (V oldElement, V newElement) 39 40 size_t size () 41 bool isEmpty () 42 V[] toArray (V[] dst) 43 HashSet dup () 44 HashSet clear () 45 HashSet reset () 46 47 size_t buckets () 48 void buckets (size_t cap) 49 float threshold () 50 void threshold (float desired) 51 --- 52 53 *******************************************************************************/ 54 55 class HashSet (V, alias Hash = Container.hash, 56 alias Reap = Container.reap, 57 alias Heap = Container.DefaultCollect) 58 : IContainer!(V) 59 { 60 // use this type for Allocator configuration 61 public alias Slink!(V) Type; 62 63 private alias Type *Ref; 64 65 private alias Heap!(Type) Alloc; 66 67 // Each table entry is a list - null if no table allocated 68 private Ref[] table; 69 70 // number of elements contained 71 private size_t count; 72 73 // the threshold load factor 74 private float loadFactor; 75 76 // configured heap manager 77 private Alloc heap; 78 79 // mutation tag updates on each change 80 private size_t mutation; 81 82 /*********************************************************************** 83 84 Construct a HashSet instance 85 86 ***********************************************************************/ 87 88 this (float f = Container.defaultLoadFactor) 89 { 90 loadFactor = f; 91 } 92 93 /*********************************************************************** 94 95 Clean up when deleted 96 97 ***********************************************************************/ 98 99 ~this () 100 { 101 reset; 102 } 103 104 /*********************************************************************** 105 106 Return a generic iterator for contained elements 107 108 ***********************************************************************/ 109 110 final Iterator iterator () 111 { 112 Iterator i = void; 113 i.mutation = mutation; 114 i.table = table; 115 i.owner = this; 116 i.cell = null; 117 i.row = 0; 118 return i; 119 } 120 121 /*********************************************************************** 122 123 124 ***********************************************************************/ 125 126 final int opApply (scope int delegate(ref V value) dg) 127 { 128 return iterator.opApply (dg); 129 } 130 131 /*********************************************************************** 132 133 Return the number of elements contained 134 135 ***********************************************************************/ 136 137 @property final const size_t size () 138 { 139 return count; 140 } 141 142 /*********************************************************************** 143 144 Add a new element to the set. Does not add if there is an 145 equivalent already present. Returns true where an element 146 is added, false where it already exists 147 148 Time complexity: O(1) average; O(n) worst. 149 150 ***********************************************************************/ 151 152 final bool add (V element) 153 { 154 if (table is null) 155 resize (Container.defaultInitialBuckets); 156 157 auto h = Hash (element, table.length); 158 auto hd = table[h]; 159 160 if (hd && hd.find (element)) 161 return false; 162 163 table[h] = allocate.set (element, hd); 164 increment; 165 166 // only check if bin was nonempty 167 if (hd) 168 checkLoad; 169 return true; 170 } 171 172 /*********************************************************************** 173 174 Does this set contain the given element? 175 176 Time complexity: O(1) average; O(n) worst 177 178 ***********************************************************************/ 179 180 final bool contains (V element) 181 { 182 if (count) 183 { 184 auto p = table[Hash (element, table.length)]; 185 if (p && p.find (element)) 186 return true; 187 } 188 return false; 189 } 190 191 /*********************************************************************** 192 193 Make an independent copy of the container. Does not clone 194 elements 195 196 Time complexity: O(n) 197 198 ***********************************************************************/ 199 200 @property final HashSet dup () 201 { 202 auto clone = new HashSet!(V, Hash, Reap, Heap) (loadFactor); 203 204 if (count) 205 { 206 clone.buckets (buckets); 207 208 foreach (value; iterator) 209 clone.add (value); 210 } 211 return clone; 212 } 213 214 /*********************************************************************** 215 216 Remove the provided element. Returns true if found, false 217 otherwise 218 219 Time complexity: O(1) average; O(n) worst 220 221 ***********************************************************************/ 222 223 final size_t remove (V element, bool all) 224 { 225 return remove(element) ? 1 : 0; 226 } 227 228 /*********************************************************************** 229 230 Remove the provided element. Returns true if found, false 231 otherwise 232 233 Time complexity: O(1) average; O(n) worst 234 235 ***********************************************************************/ 236 237 final bool remove (V element) 238 { 239 if (count) 240 { 241 auto h = Hash (element, table.length); 242 auto hd = table[h]; 243 auto trail = hd; 244 auto p = hd; 245 246 while (p) 247 { 248 auto n = p.next; 249 if (element == p.value) 250 { 251 decrement (p); 252 if (p is table[h]) 253 { 254 table[h] = n; 255 trail = n; 256 } 257 else 258 trail.next = n; 259 return true; 260 } 261 else 262 { 263 trail = p; 264 p = n; 265 } 266 } 267 } 268 return false; 269 } 270 271 /*********************************************************************** 272 273 Replace the first instance of oldElement with newElement. 274 Returns true if oldElement was found and replaced, false 275 otherwise. 276 277 ***********************************************************************/ 278 279 final size_t replace (V oldElement, V newElement, bool all) 280 { 281 return replace (oldElement, newElement) ? 1 : 0; 282 } 283 284 /*********************************************************************** 285 286 Replace the first instance of oldElement with newElement. 287 Returns true if oldElement was found and replaced, false 288 otherwise. 289 290 ***********************************************************************/ 291 292 final bool replace (V oldElement, V newElement) 293 { 294 295 if (count && oldElement != newElement) 296 if (contains (oldElement)) 297 { 298 remove (oldElement); 299 add (newElement); 300 return true; 301 } 302 return false; 303 } 304 305 /*********************************************************************** 306 307 Remove and expose the first element. Returns false when no 308 more elements are contained 309 310 Time complexity: O(n) 311 312 ***********************************************************************/ 313 314 final bool take (ref V element) 315 { 316 if (count) 317 foreach (ref list; table) 318 if (list) 319 { 320 auto p = list; 321 element = p.value; 322 list = p.next; 323 decrement (p); 324 return true; 325 } 326 return false; 327 } 328 329 /*********************************************************************** 330 331 ************************************************************************/ 332 333 public void add (IContainer!(V) e) 334 { 335 foreach (value; e) 336 add (value); 337 } 338 339 /*********************************************************************** 340 341 ************************************************************************/ 342 343 public size_t remove (IContainer!(V) e) 344 { 345 size_t c; 346 foreach (value; e) 347 if (remove (value)) 348 ++c; 349 return c; 350 } 351 352 /*********************************************************************** 353 354 Clears the HashMap contents. Various attributes are 355 retained, such as the internal table itself. Invoke 356 reset() to drop everything. 357 358 Time complexity: O(n) 359 360 ***********************************************************************/ 361 362 final HashSet clear () 363 { 364 return clear (false); 365 } 366 367 /*********************************************************************** 368 369 Reset the HashSet contents and optionally configure a new 370 heap manager. This releases more memory than clear() does 371 372 Time complexity: O(1) 373 374 ***********************************************************************/ 375 376 final HashSet reset () 377 { 378 clear (true); 379 heap.collect (table); 380 table = null; 381 return this; 382 } 383 384 /*********************************************************************** 385 386 Return the number of buckets 387 388 Time complexity: O(1) 389 390 ***********************************************************************/ 391 392 final size_t buckets () 393 { 394 return table ? table.length : 0; 395 } 396 397 /*********************************************************************** 398 399 Set the number of buckets and resize as required 400 401 Time complexity: O(n) 402 403 ***********************************************************************/ 404 405 final HashSet buckets (size_t cap) 406 { 407 if (cap < Container.defaultInitialBuckets) 408 cap = Container.defaultInitialBuckets; 409 410 if (cap !is buckets) 411 resize (cap); 412 return this; 413 } 414 415 /*********************************************************************** 416 417 Return the resize threshold 418 419 Time complexity: O(1) 420 421 ***********************************************************************/ 422 423 final const float threshold () 424 { 425 return loadFactor; 426 } 427 428 /*********************************************************************** 429 430 Set the resize threshold, and resize as required 431 432 Time complexity: O(n) 433 434 ***********************************************************************/ 435 436 final void threshold (float desired) 437 { 438 assert (desired > 0.0); 439 loadFactor = desired; 440 if (table) 441 checkLoad; 442 } 443 444 /*********************************************************************** 445 446 Configure the assigned allocator with the size of each 447 allocation block (number of nodes allocated at one time) 448 and the number of nodes to pre-populate the cache with. 449 450 Time complexity: O(n) 451 452 ***********************************************************************/ 453 454 final HashSet cache (size_t chunk, size_t count=0) 455 { 456 heap.config (chunk, count); 457 return this; 458 } 459 460 /*********************************************************************** 461 462 Copy and return the contained set of values in an array, 463 using the optional dst as a recipient (which is resized 464 as necessary). 465 466 Returns a slice of dst representing the container values. 467 468 Time complexity: O(n) 469 470 ***********************************************************************/ 471 472 final V[] toArray (V[] dst = null) 473 { 474 if (dst.length < count) 475 dst.length = count; 476 477 size_t i = 0; 478 foreach (v; this) 479 dst[i++] = v; 480 return dst [0 .. count]; 481 } 482 483 /*********************************************************************** 484 485 Is this container empty? 486 487 Time complexity: O(1) 488 489 ***********************************************************************/ 490 491 final const bool isEmpty () 492 { 493 return count is 0; 494 } 495 496 /*********************************************************************** 497 498 Sanity check 499 500 ***********************************************************************/ 501 502 final HashSet check() 503 { 504 assert(!(table is null && count !is 0)); 505 assert((table is null || table.length > 0)); 506 assert(loadFactor > 0.0f); 507 508 if (table) 509 { 510 size_t c = 0; 511 for (size_t i = 0; i < table.length; ++i) 512 { 513 for (auto p = table[i]; p; p = p.next) 514 { 515 ++c; 516 assert(contains(p.value)); 517 assert(Hash (p.value, table.length) is i); 518 } 519 } 520 assert(c is count); 521 } 522 return this; 523 } 524 525 /*********************************************************************** 526 527 Allocate a node instance. This is used as the default allocator 528 529 ***********************************************************************/ 530 531 private Ref allocate () 532 { 533 return heap.allocate; 534 } 535 536 /*********************************************************************** 537 538 Check to see if we are past load factor threshold. If so, 539 resize so that we are at half of the desired threshold. 540 541 ***********************************************************************/ 542 543 private void checkLoad () 544 { 545 float fc = count; 546 float ft = table.length; 547 if (fc / ft > loadFactor) 548 resize (2 * cast(size_t)(fc / loadFactor) + 1); 549 } 550 551 /*********************************************************************** 552 553 resize table to new capacity, rehashing all elements 554 555 ***********************************************************************/ 556 557 private void resize (size_t newCap) 558 { 559 //Stdout.formatln ("resize {}", newCap); 560 auto newtab = heap.allocate (newCap); 561 mutate; 562 563 foreach (bucket; table) 564 while (bucket) 565 { 566 auto n = bucket.next; 567 auto h = Hash (bucket.value, newCap); 568 bucket.next = newtab[h]; 569 newtab[h] = bucket; 570 bucket = n; 571 } 572 573 // release the prior table and assign new one 574 heap.collect (table); 575 table = newtab; 576 } 577 578 /*********************************************************************** 579 580 Remove the indicated node. We need to traverse buckets 581 for this, since we're singly-linked only. Better to save 582 the per-node memory than to gain a little on each remove 583 584 Used by iterators only 585 586 ***********************************************************************/ 587 588 private bool remove (Ref node, size_t row) 589 { 590 auto hd = table[row]; 591 auto trail = hd; 592 auto p = hd; 593 594 while (p) 595 { 596 auto n = p.next; 597 if (p is node) 598 { 599 decrement (p); 600 if (p is hd) 601 table[row] = n; 602 else 603 trail.next = n; 604 return true; 605 } 606 else 607 { 608 trail = p; 609 p = n; 610 } 611 } 612 return false; 613 } 614 615 /*********************************************************************** 616 617 Clears the HashSet contents. Various attributes are 618 retained, such as the internal table itself. Invoke 619 reset() to drop everything. 620 621 Time complexity: O(n) 622 623 ***********************************************************************/ 624 625 private HashSet clear (bool all) 626 { 627 mutate; 628 629 // collect each node if we can't collect all at once 630 if (heap.collect(all) is false) 631 foreach (ref v; table) 632 while (v) 633 { 634 auto n = v.next; 635 decrement (v); 636 v = n; 637 } 638 639 // retain table, but remove bucket chains 640 foreach (ref v; table) 641 v = null; 642 643 count = 0; 644 return this; 645 } 646 647 /*********************************************************************** 648 649 new element was added 650 651 ***********************************************************************/ 652 653 private void increment() 654 { 655 ++mutation; 656 ++count; 657 } 658 659 /*********************************************************************** 660 661 element was removed 662 663 ***********************************************************************/ 664 665 private void decrement (Ref p) 666 { 667 Reap (p.value); 668 heap.collect (p); 669 ++mutation; 670 --count; 671 } 672 673 /*********************************************************************** 674 675 set was changed 676 677 ***********************************************************************/ 678 679 private void mutate() 680 { 681 ++mutation; 682 } 683 684 /*********************************************************************** 685 686 Iterator with no filtering 687 688 ***********************************************************************/ 689 690 private struct Iterator 691 { 692 size_t row; 693 Ref cell, 694 prior; 695 Ref[] table; 696 HashSet owner; 697 size_t mutation; 698 699 /*************************************************************** 700 701 Did the container change underneath us? 702 703 ***************************************************************/ 704 705 bool valid () 706 { 707 return owner.mutation is mutation; 708 } 709 710 /*************************************************************** 711 712 Accesses the next value, and returns false when 713 there are no further values to traverse 714 715 ***************************************************************/ 716 717 bool next (ref V v) 718 { 719 auto n = next; 720 return (n) ? v = *n, true : false; 721 } 722 723 /*************************************************************** 724 725 Return a pointer to the next value, or null when 726 there are no further values to traverse 727 728 ***************************************************************/ 729 730 V* next () 731 { 732 while (cell is null) 733 if (row < table.length) 734 cell = table [row++]; 735 else 736 return null; 737 738 prior = cell; 739 cell = cell.next; 740 return &prior.value; 741 } 742 743 /*************************************************************** 744 745 Foreach support 746 747 ***************************************************************/ 748 749 int opApply (scope int delegate(ref V value) dg) 750 { 751 int result; 752 753 auto c = cell; 754 loop: while (true) 755 { 756 while (c is null) 757 if (row < table.length) 758 c = table [row++]; 759 else 760 break loop; 761 762 prior = c; 763 c = c.next; 764 if ((result = dg(prior.value)) != 0) 765 break loop; 766 } 767 768 cell = c; 769 return result; 770 } 771 772 /*************************************************************** 773 774 Remove value at the current iterator location 775 776 ***************************************************************/ 777 778 bool remove () 779 { 780 if (prior) 781 if (owner.remove (prior, row-1)) 782 { 783 // ignore this change 784 ++mutation; 785 return true; 786 } 787 788 prior = null; 789 return false; 790 } 791 } 792 } 793 794 795 796 /******************************************************************************* 797 798 *******************************************************************************/ 799 800 debug (HashSet) 801 { 802 import tango.io.Stdout; 803 import tango.core.Thread; 804 import tango.time.StopWatch; 805 806 void main() 807 { 808 // usage examples ... 809 auto set = new HashSet!(char[]); 810 set.add ("foo"); 811 set.add ("bar"); 812 set.add ("wumpus"); 813 814 // implicit generic iteration 815 foreach (value; set) 816 Stdout (value).newline; 817 818 // explicit generic iteration 819 foreach (value; set.iterator) 820 Stdout (value).newline; 821 822 // generic iteration with optional remove 823 auto s = set.iterator; 824 foreach (value; s) 825 {} // s.remove; 826 827 // incremental iteration, with optional remove 828 char[] v; 829 auto iterator = set.iterator; 830 while (iterator.next(v)) 831 {} //iterator.remove; 832 833 // incremental iteration, with optional failfast 834 auto it = set.iterator; 835 while (it.valid && it.next(v)) 836 {} 837 838 // remove specific element 839 set.remove ("wumpus"); 840 841 // remove first element ... 842 while (set.take(v)) 843 Stdout.formatln ("taking {}, {} left", v, set.size); 844 845 846 // setup for benchmark, with a set of integers. We 847 // use a chunk allocator, and presize the bucket[] 848 auto test = new HashSet!(int, Container.hash, Container.reap, Container.Chunk); 849 test.cache (1000, 1_000_000); 850 test.buckets = 1_500_000; 851 const count = 1_000_000; 852 StopWatch w; 853 854 // benchmark adding 855 w.start; 856 for (int i=count; i--;) 857 test.add(i); 858 Stdout.formatln ("{} adds: {}/s", test.size, test.size/w.stop); 859 860 // benchmark reading 861 w.start; 862 for (int i=count; i--;) 863 test.contains(i); 864 Stdout.formatln ("{} lookups: {}/s", test.size, test.size/w.stop); 865 866 // benchmark adding without allocation overhead 867 test.clear; 868 w.start; 869 for (int i=count; i--;) 870 test.add(i); 871 Stdout.formatln ("{} adds (after clear): {}/s", test.size, test.size/w.stop); 872 873 // benchmark duplication 874 w.start; 875 auto dup = test.dup; 876 Stdout.formatln ("{} element dup: {}/s", dup.size, dup.size/w.stop); 877 878 // benchmark iteration 879 w.start; 880 foreach (value; test) {} 881 Stdout.formatln ("{} element iteration: {}/s", test.size, test.size/w.stop); 882 883 test.check; 884 } 885 }