1 module tango.util.container.more.HashFile;
2 
3 private import tango.io.device.FileMap : MappedFile;
4 
5 /******************************************************************************
6 
7         HashFile implements a simple mechanism to store and recover a 
8         large quantity of data for the duration of the hosting process.
9         It is intended to act as a local-cache for a remote data-source, 
10         or as a spillover area for large in-memory cache instances. 
11         
12         Note that any and all stored data is rendered invalid the moment
13         a HashFile object is garbage-collected.
14 
15         The implementation follows a fixed-capacity record scheme, where
16         content can be rewritten in-place until said capacity is reached.
17         At such time, the altered content is moved to a larger capacity
18         record at end-of-file, and a hole remains at the prior location.
19         These holes are not collected, since the lifespan of a HashFile
20         is limited to that of the host process.
21 
22         All index keys must be unique. Writing to the HashFile with an
23         existing key will overwrite any previous content. What follows
24         is a contrived example:
25         
26         ---
27         alias HashFile!(char[], char[]) Bucket;
28 
29         auto bucket = new Bucket ("bucket.bin", HashFile.HalfK);
30 
31         // insert some data, and retrieve it again
32         auto text = "this is a test";
33         bucket.put ("a key", text);
34         auto b = cast(char[]) bucket.get ("a key");
35 
36         assert (b == text);
37         bucket.close;
38         ---
39 
40 ******************************************************************************/
41 
42 class HashFile(K, V)
43 {
44         /**********************************************************************
45 
46                 Define the capacity (block-size) of each record
47 
48         **********************************************************************/
49 
50         struct BlockSize
51         {
52                 int capacity;
53         }
54 
55         // backing storage
56         private MappedFile              file;
57 
58         // memory-mapped content
59         private ubyte[]                 heap;
60 
61         // basic capacity for each record
62         private BlockSize               block;
63 
64         // pointers to file records
65         private Record[K]               map;
66 
67         // current file size
68         private ulong                   fileSize;
69 
70         // current file usage
71         private ulong                   waterLine;
72 
73         // supported block sizes
74         public enum BlockSize   EighthK  = {128-1},
75                                 QuarterK = {256-1},
76                                 HalfK    = {512-1},
77                                 OneK     = {1024*1-1},
78                                 TwoK     = {1024*2-1},
79                                 FourK    = {1024*4-1},
80                                 EightK   = {1024*8-1},
81                                 SixteenK = {1024*16-1},
82                                 ThirtyTwoK = {1024*32-1},
83                                 SixtyFourK = {1024*64-1};
84 
85 
86         /**********************************************************************
87 
88                 Construct a HashFile with the provided path, record-size,
89                 and inital record count. The latter causes records to be 
90                 pre-allocated, saving a certain amount of growth activity.
91                 Selecting a record size that roughly matches the serialized 
92                 content will limit 'thrashing'. 
93 
94         **********************************************************************/
95 
96         this (const(char[]) path, BlockSize block, uint initialRecords = 100)
97         {
98                 this.block = block;
99 
100                 // open a storage file
101                 file = new MappedFile (path);
102 
103                 // set initial file size (cannot be zero)
104                 fileSize = initialRecords * (block.capacity + 1);
105 
106                 // map the file content
107                 heap = file.resize (fileSize);
108         }
109 
110         /**********************************************************************
111         
112                 Return where the HashFile is located
113 
114         **********************************************************************/
115 
116         final const(char[]) path ()
117         {
118                 return file.path;
119         }
120 
121         /**********************************************************************
122 
123                 Return the currently populated size of this HashFile
124 
125         **********************************************************************/
126 
127         final const ulong length ()
128         {
129                 return waterLine;
130         }
131 
132         /**********************************************************************
133 
134                 Return the serialized data for the provided key. Returns
135                 null if the key was not found.
136 
137                 Be sure to synchronize access by multiple threads
138 
139         **********************************************************************/
140 
141         final V get (K key, bool clear = false)
142         {
143                 auto p = key in map;
144 
145                 if (p)
146                     return p.read (this, clear);
147                 return V.init;
148         }
149 
150         /**********************************************************************
151 
152                 Remove the provided key from this HashFile. Leaves a 
153                 hole in the backing file
154 
155                 Be sure to synchronize access by multiple threads
156 
157         **********************************************************************/
158 
159         final void remove (K key)
160         {
161                 map.remove (key);
162         }
163 
164         /**********************************************************************
165 
166                 Write a serialized block of data, and associate it with
167                 the provided key. All keys must be unique, and it is the
168                 responsibility of the programmer to ensure this. Reusing 
169                 an existing key will overwrite previous data. 
170 
171                 Note that data is allowed to grow within the occupied 
172                 bucket until it becomes larger than the allocated space.
173                 When this happens, the data is moved to a larger bucket
174                 at the file tail.
175 
176                 Be sure to synchronize access by multiple threads
177 
178         **********************************************************************/
179 
180         final void put (K key, V data, K function(K) retain = null)
181         {
182                 auto r = key in map;
183                 
184                 if (r)
185                     r.write (this, data, block);
186                 else
187                    {
188                    Record rr;
189                    rr.write (this, data, block);
190                    if (retain)
191                        key = retain (key);
192                    map [key] = rr;
193                    }
194         }
195 
196         /**********************************************************************
197 
198                 Close this HashFile -- all content is lost.
199 
200         **********************************************************************/
201 
202         final void close ()
203         {
204                 if (file)
205                    {
206                    file.close;
207                    file = null;
208                    map = null;
209                    }
210         }
211 
212         /**********************************************************************
213 
214                 Each Record takes up a number of 'pages' within the file. 
215                 The size of these pages is determined by the BlockSize 
216                 provided during HashFile construction. Additional space
217                 at the end of each block is potentially wasted, but enables 
218                 content to grow in size without creating a myriad of holes.
219 
220         **********************************************************************/
221 
222         private struct Record
223         {
224                 private ulong           offset;
225                 private int             used,
226                                         capacity = -1;
227 
228                 /**************************************************************
229 
230                         This should be protected from thread-contention at
231                         a higher level.
232 
233                 **************************************************************/
234 
235                 V read (HashFile bucket, bool clear)
236                 {
237                         if (used)
238                            {
239                            auto ret = cast(V) bucket.heap [cast(size_t)offset .. cast(size_t)(offset + used)];
240                            if (clear)
241                                used = 0;
242                            return ret;
243                            }
244                         return V.init;
245                 }
246 
247                 /**************************************************************
248 
249                         This should be protected from thread-contention at
250                         a higher level.
251 
252                 **************************************************************/
253 
254                 void write (HashFile bucket, V data, BlockSize block)
255                 {
256                         this.used = data.length;
257 
258                         // create new slot if we exceed capacity
259                         if (this.used > this.capacity)
260                             createBucket (bucket, this.used, block);
261 
262                         bucket.heap [cast(size_t)offset .. cast(size_t)(offset+used)] = cast(ubyte[]) data;
263                 }
264 
265                 /**************************************************************
266 
267                 **************************************************************/
268 
269                 void createBucket (HashFile bucket, int bytes, BlockSize block)
270                 {
271                         this.offset = bucket.waterLine;
272                         this.capacity = (bytes + block.capacity) & ~block.capacity;
273                         
274                         bucket.waterLine += this.capacity;
275                         if (bucket.waterLine > bucket.fileSize)
276                            {
277                            auto target = bucket.waterLine * 2;
278                            debug(HashFile) 
279                                  printf ("growing file from %lld, %lld, to %lld\n", 
280                                           bucket.fileSize, bucket.waterLine, target);
281 
282                            // expand the physical file size and remap the heap
283                            bucket.heap = bucket.file.resize (bucket.fileSize = target);
284                            }
285                 }
286         }
287 }
288 
289 
290 /******************************************************************************
291 
292 ******************************************************************************/
293 
294 debug (HashFile)
295 {
296         extern(C) int printf (const(char)*, ...);
297 
298         import tango.io.Path;
299         import tango.io.Stdout;
300         import tango.text.convert.Integer;
301 
302         void main()
303         {
304                 alias HashFile!(char[], char[]) Bucket;
305 
306                 auto file = new Bucket ("foo.map", Bucket.QuarterK, 1);
307         
308                 char[16] tmp;
309                 for (int i=1; i < 1024; ++i)
310                      file.put (format(tmp, i).dup, "blah");
311 
312                 auto s = file.get ("1", true);
313                 if (s.length)
314                     Stdout.formatln ("result '{}'", s);
315                 s = file.get ("1");
316                 if (s.length)
317                     Stdout.formatln ("result '{}'", s);
318                 file.close;
319                 remove ("foo.map");
320         }
321 }