1 /**
2  * A UUID is a Universally Unique Identifier.
3  * It is a 128-bit number generated either randomly or according to some
4  * inscrutable algorithm, depending on the UUID version used.
5  *
6  * Here, we implement a data structure for holding and formatting UUIDs.
7  * To generate a UUID, use one of the other modules in the UUID package.
8  * You can also create a UUID by parsing a string containing a textual
9  * representation of a UUID, or by providing the constituent bytes.
10  */
11 module tango.util.uuid.Uuid;
12 
13 import tango.core.Exception;
14 import Integer = tango.text.convert.Integer;
15 
16 private union UuidData
17 {
18         uint[4] ui;
19         ubyte[16] ub;
20 } 
21 
22 /** This struct represents a UUID. It offers static members for creating and
23  * parsing UUIDs.
24  * 
25  * This struct treats a UUID as an opaque type. The specification has fields
26  * for time, version, client MAC address, and several other data points, but
27  * these are meaningless for most applications and means of generating a UUID.
28  *
29  * There are versions of UUID generation involving the system time and MAC 
30  * address. These are not used for several reasons:
31  *      - One version contains identifying information, which is undesirable.
32  *      - Ensuring uniqueness between processes requires inter-process 
33  *              communication. This would be unreasonably slow and complex.
34  *      - Obtaining the MAC address is a system-dependent operation and beyond
35  *              the scope of this module.
36  *      - Using Java and .NET as a guide, they only implement randomized creation
37  *              of UUIDs, not the MAC address/time based generation.
38  *
39  * When generating a random UUID, use a carefully seeded random number 
40  * generator. A poorly chosen seed may produce undesirably consistent results.
41  */
42 struct Uuid
43 {
44         private UuidData _data;
45 
46         /** Copy the givent bytes into a UUID. If you supply more or fewer than
47           * 16 bytes, throws an IllegalArgumentException. */
48         public static Uuid opCall(const(ubyte[]) data)
49         {
50                 if (data.length != 16)
51                 {
52                         throw new IllegalArgumentException("A UUID is 16 bytes long.");
53                 }
54                 Uuid u;
55                 u._data.ub[] = data[];
56                 return u;
57         }
58 
59         /** Attempt to parse the representation of a UUID given in value. If the
60           * value is not in the correct format, throw IllegalArgumentException.
61           * If the value is in the correct format, return a UUID representing the
62           * given value. 
63           *
64           * The following is an example of a UUID in the expected format:
65           *     67e55044-10b1-426f-9247-bb680e5fe0c8 
66           */
67         public static Uuid parse(const(char[]) value)
68         {
69                 Uuid u;
70                 if (!tryParse(value, u))
71                         throw new IllegalArgumentException("'" ~ value.idup ~ "' is not in the correct format for a UUID");
72                 return u;
73         }
74 
75         /** Attempt to parse the representation of a UUID given in value. If the
76           * value is not in the correct format, return false rather than throwing
77           * an exception. If the value is in the correct format, set uuid to 
78           * represent the given value. 
79           *
80           * The following is an example of a UUID in the expected format:
81           *     67e55044-10b1-426f-9247-bb680e5fe0c8 
82           */
83         public static bool tryParse(const(char[]) value, out Uuid uuid)
84         {
85                 if (value.length != 36 ||
86                         value[8] != '-' ||
87                         value[13] != '-' ||
88                         value[18] != '-' ||
89                         value[23] != '-')
90                 {
91                         return false;
92                 }
93                 int hyphens = 0;
94                 foreach (i, v; value)
95                 {
96                         if ('a' <= v && 'f' >= v) continue;
97                         if ('A' <= v && 'F' >= v) continue;
98                         if ('0' <= v && '9' >= v) continue;
99                         if (v == '-') 
100                         {
101                                 hyphens++;
102                                 continue;
103                         }
104                         // illegal character
105                         return false;
106                 }
107                 if (hyphens != 4) 
108                 {
109                         return false;
110                 }
111 
112                 with (uuid._data)
113                 {
114                         // This is verbose, but it's simple, and it gets around endian
115                         // issues if you try parsing an integer at a time.
116                         ub[0] = cast(ubyte) Integer.parse(value[0..2], 16);
117                         ub[1] = cast(ubyte) Integer.parse(value[2..4], 16);
118                         ub[2] = cast(ubyte) Integer.parse(value[4..6], 16);
119                         ub[3] = cast(ubyte) Integer.parse(value[6..8], 16);
120 
121                         ub[4] = cast(ubyte) Integer.parse(value[9..11], 16);
122                         ub[5] = cast(ubyte) Integer.parse(value[11..13], 16);
123 
124                         ub[6] = cast(ubyte) Integer.parse(value[14..16], 16);
125                         ub[7] = cast(ubyte) Integer.parse(value[16..18], 16);
126 
127                         ub[8] = cast(ubyte) Integer.parse(value[19..21], 16);
128                         ub[9] = cast(ubyte) Integer.parse(value[21..23], 16);
129 
130                         ub[10] = cast(ubyte) Integer.parse(value[24..26], 16);
131                         ub[11] = cast(ubyte) Integer.parse(value[26..28], 16);
132                         ub[12] = cast(ubyte) Integer.parse(value[28..30], 16);
133                         ub[13] = cast(ubyte) Integer.parse(value[30..32], 16);
134                         ub[14] = cast(ubyte) Integer.parse(value[32..34], 16);
135                         ub[15] = cast(ubyte) Integer.parse(value[34..36], 16);
136                 }
137 
138                 return true;
139         }
140         
141         /** Generate a UUID based on the given random number generator.
142           * The generator must have a method 'uint natural()' that returns
143           * a random number. The generated UUID conforms to version 4 of the
144           * specification. */
145         public static Uuid random(Random)(Random generator)
146         {
147                 Uuid u;
148                 with (u)
149                 {
150                         _data.ui[0] = generator.natural();
151                         _data.ui[1] = generator.natural();
152                         _data.ui[2] = generator.natural();
153                         _data.ui[3] = generator.natural();
154 
155                         // v4: 7th bytes' first half is 0b0100: 4 in hex
156                         _data.ub[6] &= 0b01001111;
157                         _data.ub[6] |= 0b01000000;
158 
159                         // v4: 9th byte's 1st half is 0b1000 to 0b1011: 8, 9, A, B in hex
160                         _data.ub[8] &= 0b10111111;
161                         _data.ub[8] |= 0b10000000;
162                 }
163                 return u;
164         }
165 
166         /* Generate a UUID based on the given namespace and name. This conforms to 
167          * versions 3 and 5 of the standard -- version 3 if you use MD5, or version
168          * 5 if you use SHA1.
169          *
170          * You should pass 3 as the value for uuidVersion if you are using the
171          * MD5 hash, and 5 if you are using the SHA1 hash. To do otherwise is an
172          * Abomination Unto Nuggan.
173          *
174          * This method is exposed mainly for the convenience methods in 
175          * tango.util.uuid.*. You can use this method directly if you prefer.
176          */
177         public static Uuid byName(Digest)(Uuid namespace, const(char[]) name, Digest digest,
178                                                                               ubyte uuidVersion)
179         {
180                 /* o  Compute the hash of the name space ID concatenated with the name.
181                    o  Set octets zero through 15 to octets zero through 15 of the hash.
182                    o  Set the four most significant bits (bits 12 through 15) of octet
183                           6 to the appropriate 4-bit version number from Section 4.1.3.
184                    o  Set the two most significant bits (bits 6 and 7) of octet 8 to 
185                           zero and one, respectively.  */
186                 auto nameBytes = namespace.toBytes();
187                 nameBytes ~= cast(ubyte[])name;
188                 digest.update(nameBytes);
189                 nameBytes = digest.binaryDigest();
190                 nameBytes[6] = cast(ubyte)((uuidVersion << 4) | (nameBytes[6] & 0b1111));
191                 nameBytes[8] |= 0b1000_0000;
192                 nameBytes[8] &= 0b1011_1111;
193                 return Uuid(nameBytes[0..16]);
194         }
195 
196         /** Return an empty UUID (with all bits set to 0). This doesn't conform
197           * to any particular version of the specification. It's equivalent to
198           * using an uninitialized UUID. This method is provided for clarity. */
199         @property public static Uuid empty()
200         {
201                 Uuid uuid;
202                 uuid._data.ui[] = 0;
203                 return uuid;
204         }
205 
206         /** Get a copy of this UUID's value as an array of unsigned bytes. */
207         public const ubyte[] toBytes()
208         {
209                 return _data.ub.dup;
210         }
211 
212         /** Gets the version of this UUID. 
213           * RFC 4122 defines five types of UUIDs:
214           *     -       Version 1 is based on the system's MAC address and the current time.
215           *     -       Version 2 uses the current user's userid and user domain in 
216           *                     addition to the time and MAC address.
217           * -   Version 3 is namespace-based, as generated by the NamespaceGenV3
218           *                     module. It uses MD5 as a hash algorithm. RFC 4122 states that
219           *                     version 5 is preferred over version 3.
220           * -   Version 4 is generated randomly.
221           * -   Version 5 is like version 3, but uses SHA-1 rather than MD5. Use
222           *                     the NamespaceGenV5 module to create UUIDs like this.
223           *
224           * The following additional versions exist:
225           * -   Version 0 is reserved for backwards compatibility.
226           * -   Version 6 is a non-standard Microsoft extension.
227           * -   Version 7 is reserved for future use.
228           */
229         public const ubyte format()
230         {
231                 return cast(ubyte) (_data.ub[6] >> 4);
232         }
233 
234         /** Get the canonical string representation of a UUID.
235           * The canonical representation is in hexidecimal, with hyphens inserted
236           * after the eighth, twelfth, sixteenth, and twentieth digits. For example:
237           *     67e55044-10b1-426f-9247-bb680e5fe0c8
238           * This is the format used by the parsing functions.
239           */
240         public const(char[]) toString()
241         {
242                 // Look, only one allocation.
243                 char[] buf = new char[36];
244                 buf[8] = '-';
245                 buf[13] = '-';
246                 buf[18] = '-';
247                 buf[23] = '-';
248                 with (_data)
249                 {
250                         // See above with tryParse: this ignores endianness.
251                         // Technically, it's sufficient that the conversion to string
252                         // matches the conversion from string and from byte array. But
253                         // this is the simplest way to make sure of that. Plus you can
254                         // serialize and deserialize on machines with different endianness
255                         // without a bunch of strange conversions, and with consistent
256                         // string representations.
257                         Integer.format(buf[0..2], ub[0], "x2");
258                         Integer.format(buf[2..4], ub[1], "x2");
259                         Integer.format(buf[4..6], ub[2], "x2");
260                         Integer.format(buf[6..8], ub[3], "x2");
261                         Integer.format(buf[9..11], ub[4], "x2");
262                         Integer.format(buf[11..13], ub[5], "x2");
263                         Integer.format(buf[14..16], ub[6], "x2");
264                         Integer.format(buf[16..18], ub[7], "x2");
265                         Integer.format(buf[19..21], ub[8], "x2");
266                         Integer.format(buf[21..23], ub[9], "x2");
267                         Integer.format(buf[24..26], ub[10], "x2");
268                         Integer.format(buf[26..28], ub[11], "x2");
269                         Integer.format(buf[28..30], ub[12], "x2");
270                         Integer.format(buf[30..32], ub[13], "x2");
271                         Integer.format(buf[32..34], ub[14], "x2");
272                         Integer.format(buf[34..36], ub[15], "x2");
273                 }
274                 return buf;
275         }
276 
277         /** Determines if this UUID has the same value as another. */
278         public const bool opEquals(ref const(Uuid) other)
279         {
280                 return 
281                         _data.ui[0] == other._data.ui[0] &&
282                         _data.ui[1] == other._data.ui[1] &&
283                         _data.ui[2] == other._data.ui[2] &&
284                         _data.ui[3] == other._data.ui[3];
285         }
286 
287         /** Get a hash code representing this UUID. */
288         public const hash_t toHash() nothrow @safe
289         {
290                 with (_data)
291                 {
292                         // 29 is just a convenient prime number
293                         return (((((ui[0] * 29) ^ ui[1]) * 29) ^ ui[2]) * 29) ^ ui[3];
294                 }
295         }
296 }
297 
298 
299 debug (UnitTest)
300 {
301         import tango.math.random.Kiss;
302         unittest
303         {
304                 // Generate them in the correct format
305                 for (int i = 0; i < 20; i++)
306                 {
307                         auto uu = Uuid.random(&Kiss.instance).toString();
308                         auto c = uu[19];
309                         assert (c == '9' || c == '8' || c == 'a' || c == 'b', uu);
310                         auto d = uu[14];
311                         assert (d == '4', uu);
312                 }
313 
314                 // empty
315                 assert (Uuid.empty.toString() == "00000000-0000-0000-0000-000000000000", Uuid.empty.toString());
316 
317                 ubyte[] bytes = [0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x11, 0xd1, 
318                                           0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4, 0x30, 0xc8];
319                 Uuid u = Uuid(bytes.dup);
320                 auto str = "64f2ad82-5182-4c6a-ade5-59728ca0567b";
321                 auto u2 = Uuid.parse(str);
322 
323                 // toString
324                 assert (Uuid(bytes) == u);
325                 assert (u2 != u);
326 
327                 assert (u2.format() == 4);
328 
329                 // tryParse
330                 Uuid u3;
331                 assert (Uuid.tryParse(str, u3));
332                 assert (u3 == u2);
333         }
334 
335         unittest
336         {
337                 Uuid fail;
338                 // contains 'r'
339                 assert (!Uuid.tryParse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7", fail));
340                 // too short
341                 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba", fail));
342                 // hyphens matter
343                 assert (!Uuid.tryParse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7", fail));
344                 // hyphens matter (2)
345                 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7", fail));
346                 // hyphens matter (3)
347                 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7", fail));
348         }
349 
350         unittest
351         {
352                 // contains 'r'
353                 try 
354                 {
355                         Uuid.parse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
356                 }
357                 catch (IllegalArgumentException) {}
358 
359                 // too short
360                 try 
361                 {
362                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba"); assert (false);
363                 }
364                 catch (IllegalArgumentException) {}
365 
366                 // hyphens matter
367                 try 
368                 {
369                         Uuid.parse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7"); assert (false);
370                 }
371                 catch (IllegalArgumentException) {}
372 
373                 // hyphens matter (2)
374                 try 
375                 {
376                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7"); assert (false);
377                 }
378                 catch (IllegalArgumentException) {}
379 
380                 // hyphens matter (3)
381                 try 
382                 {
383                         Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7"); assert (false);
384                 }
385                 catch (IllegalArgumentException) {}
386         }
387 
388         import tango.util.digest.Sha1;
389         unittest
390         {
391                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
392                 auto name = "hello";
393                 // This was generated with the uuid utility on linux/amd64. It might have different results on
394                 // a ppc processor -- the spec says something about network byte order, but it's using an array
395                 // of bytes at that point, so converting to NBO is a noop...
396                 auto expected = Uuid.parse("2b1c6704-a43f-5d43-9abb-b13310b4458a");
397                 auto generated = Uuid.byName(namespace, name, new Sha1, cast(ubyte)5);
398                 assert (generated == expected, "\nexpected: " ~ expected.toString() ~ "\nbut was:  " ~ generated.toString());
399         }
400         
401         import tango.util.digest.Md5;
402         unittest
403         {
404                 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41");
405                 auto name = "hello";
406                 auto expected = Uuid.parse("31a2b702-85a8-349a-9b0e-213b1bd753b8");
407                 auto generated = Uuid.byName(namespace, name, new Md5, cast(ubyte)3);
408                 assert (generated == expected, "\nexpected: " ~ expected.toString() ~ "\nbut was:  " ~ generated.toString());
409         }
410         //void main(){}
411 }
412 
413 /** A base interface for any UUID generator for UUIDs. That is,
414   * this interface is specified so that you write your code dependent on a
415   * UUID generator that takes an arbitrary random source, and easily switch
416   * to a different random source. Since the default uses KISS, if you find
417   * yourself needing more secure random numbers, you could trivially switch 
418   * your code to use the Mersenne twister, or some other PRNG.
419   *
420   * You could also, if you wish, use this to switch to deterministic UUID
421   * generation, if your needs require it.
422   */
423 interface UuidGen
424 {
425         Uuid next();
426 }
427 
428 /** Given a random number generator conforming to Tango's standard random
429   * interface, this will generate random UUIDs according to version 4 of
430   * RFC 4122. */
431 class RandomGen(TRandom) : UuidGen
432 {
433         TRandom random;
434         this (TRandom random)
435         {
436                 this.random = random;
437         }
438 
439         Uuid next()
440         {
441                 return Uuid.random(random);
442         }
443 }
444