1 module tango.util.container.more.HashFile; 2 3 private import tango.io.device.FileMap : MappedFile; 4 5 /****************************************************************************** 6 7 HashFile implements a simple mechanism to store and recover a 8 large quantity of data for the duration of the hosting process. 9 It is intended to act as a local-cache for a remote data-source, 10 or as a spillover area for large in-memory cache instances. 11 12 Note that any and all stored data is rendered invalid the moment 13 a HashFile object is garbage-collected. 14 15 The implementation follows a fixed-capacity record scheme, where 16 content can be rewritten in-place until said capacity is reached. 17 At such time, the altered content is moved to a larger capacity 18 record at end-of-file, and a hole remains at the prior location. 19 These holes are not collected, since the lifespan of a HashFile 20 is limited to that of the host process. 21 22 All index keys must be unique. Writing to the HashFile with an 23 existing key will overwrite any previous content. What follows 24 is a contrived example: 25 26 --- 27 alias HashFile!(char[], char[]) Bucket; 28 29 auto bucket = new Bucket ("bucket.bin", HashFile.HalfK); 30 31 // insert some data, and retrieve it again 32 auto text = "this is a test"; 33 bucket.put ("a key", text); 34 auto b = cast(char[]) bucket.get ("a key"); 35 36 assert (b == text); 37 bucket.close; 38 --- 39 40 ******************************************************************************/ 41 42 class HashFile(K, V) 43 { 44 /********************************************************************** 45 46 Define the capacity (block-size) of each record 47 48 **********************************************************************/ 49 50 struct BlockSize 51 { 52 int capacity; 53 } 54 55 // backing storage 56 private MappedFile file; 57 58 // memory-mapped content 59 private ubyte[] heap; 60 61 // basic capacity for each record 62 private BlockSize block; 63 64 // pointers to file records 65 private Record[K] map; 66 67 // current file size 68 private ulong fileSize; 69 70 // current file usage 71 private ulong waterLine; 72 73 // supported block sizes 74 public enum BlockSize EighthK = {128-1}, 75 QuarterK = {256-1}, 76 HalfK = {512-1}, 77 OneK = {1024*1-1}, 78 TwoK = {1024*2-1}, 79 FourK = {1024*4-1}, 80 EightK = {1024*8-1}, 81 SixteenK = {1024*16-1}, 82 ThirtyTwoK = {1024*32-1}, 83 SixtyFourK = {1024*64-1}; 84 85 86 /********************************************************************** 87 88 Construct a HashFile with the provided path, record-size, 89 and inital record count. The latter causes records to be 90 pre-allocated, saving a certain amount of growth activity. 91 Selecting a record size that roughly matches the serialized 92 content will limit 'thrashing'. 93 94 **********************************************************************/ 95 96 this (const(char[]) path, BlockSize block, uint initialRecords = 100) 97 { 98 this.block = block; 99 100 // open a storage file 101 file = new MappedFile (path); 102 103 // set initial file size (cannot be zero) 104 fileSize = initialRecords * (block.capacity + 1); 105 106 // map the file content 107 heap = file.resize (fileSize); 108 } 109 110 /********************************************************************** 111 112 Return where the HashFile is located 113 114 **********************************************************************/ 115 116 final const(char[]) path () 117 { 118 return file.path; 119 } 120 121 /********************************************************************** 122 123 Return the currently populated size of this HashFile 124 125 **********************************************************************/ 126 127 final const ulong length () 128 { 129 return waterLine; 130 } 131 132 /********************************************************************** 133 134 Return the serialized data for the provided key. Returns 135 null if the key was not found. 136 137 Be sure to synchronize access by multiple threads 138 139 **********************************************************************/ 140 141 final V get (K key, bool clear = false) 142 { 143 auto p = key in map; 144 145 if (p) 146 return p.read (this, clear); 147 return V.init; 148 } 149 150 /********************************************************************** 151 152 Remove the provided key from this HashFile. Leaves a 153 hole in the backing file 154 155 Be sure to synchronize access by multiple threads 156 157 **********************************************************************/ 158 159 final void remove (K key) 160 { 161 map.remove (key); 162 } 163 164 /********************************************************************** 165 166 Write a serialized block of data, and associate it with 167 the provided key. All keys must be unique, and it is the 168 responsibility of the programmer to ensure this. Reusing 169 an existing key will overwrite previous data. 170 171 Note that data is allowed to grow within the occupied 172 bucket until it becomes larger than the allocated space. 173 When this happens, the data is moved to a larger bucket 174 at the file tail. 175 176 Be sure to synchronize access by multiple threads 177 178 **********************************************************************/ 179 180 final void put (K key, V data, K function(K) retain = null) 181 { 182 auto r = key in map; 183 184 if (r) 185 r.write (this, data, block); 186 else 187 { 188 Record rr; 189 rr.write (this, data, block); 190 if (retain) 191 key = retain (key); 192 map [key] = rr; 193 } 194 } 195 196 /********************************************************************** 197 198 Close this HashFile -- all content is lost. 199 200 **********************************************************************/ 201 202 final void close () 203 { 204 if (file) 205 { 206 file.close; 207 file = null; 208 map = null; 209 } 210 } 211 212 /********************************************************************** 213 214 Each Record takes up a number of 'pages' within the file. 215 The size of these pages is determined by the BlockSize 216 provided during HashFile construction. Additional space 217 at the end of each block is potentially wasted, but enables 218 content to grow in size without creating a myriad of holes. 219 220 **********************************************************************/ 221 222 private struct Record 223 { 224 private ulong offset; 225 private int used, 226 capacity = -1; 227 228 /************************************************************** 229 230 This should be protected from thread-contention at 231 a higher level. 232 233 **************************************************************/ 234 235 V read (HashFile bucket, bool clear) 236 { 237 if (used) 238 { 239 auto ret = cast(V) bucket.heap [cast(size_t)offset .. cast(size_t)(offset + used)]; 240 if (clear) 241 used = 0; 242 return ret; 243 } 244 return V.init; 245 } 246 247 /************************************************************** 248 249 This should be protected from thread-contention at 250 a higher level. 251 252 **************************************************************/ 253 254 void write (HashFile bucket, V data, BlockSize block) 255 { 256 this.used = data.length; 257 258 // create new slot if we exceed capacity 259 if (this.used > this.capacity) 260 createBucket (bucket, this.used, block); 261 262 bucket.heap [cast(size_t)offset .. cast(size_t)(offset+used)] = cast(ubyte[]) data; 263 } 264 265 /************************************************************** 266 267 **************************************************************/ 268 269 void createBucket (HashFile bucket, int bytes, BlockSize block) 270 { 271 this.offset = bucket.waterLine; 272 this.capacity = (bytes + block.capacity) & ~block.capacity; 273 274 bucket.waterLine += this.capacity; 275 if (bucket.waterLine > bucket.fileSize) 276 { 277 auto target = bucket.waterLine * 2; 278 debug(HashFile) 279 printf ("growing file from %lld, %lld, to %lld\n", 280 bucket.fileSize, bucket.waterLine, target); 281 282 // expand the physical file size and remap the heap 283 bucket.heap = bucket.file.resize (bucket.fileSize = target); 284 } 285 } 286 } 287 } 288 289 290 /****************************************************************************** 291 292 ******************************************************************************/ 293 294 debug (HashFile) 295 { 296 extern(C) int printf (const(char)*, ...); 297 298 import tango.io.Path; 299 import tango.io.Stdout; 300 import tango.text.convert.Integer; 301 302 void main() 303 { 304 alias HashFile!(char[], char[]) Bucket; 305 306 auto file = new Bucket ("foo.map", Bucket.QuarterK, 1); 307 308 char[16] tmp; 309 for (int i=1; i < 1024; ++i) 310 file.put (format(tmp, i).dup, "blah"); 311 312 auto s = file.get ("1", true); 313 if (s.length) 314 Stdout.formatln ("result '{}'", s); 315 s = file.get ("1"); 316 if (s.length) 317 Stdout.formatln ("result '{}'", s); 318 file.close; 319 remove ("foo.map"); 320 } 321 }