1 /** 2 * A UUID is a Universally Unique Identifier. 3 * It is a 128-bit number generated either randomly or according to some 4 * inscrutable algorithm, depending on the UUID version used. 5 * 6 * Here, we implement a data structure for holding and formatting UUIDs. 7 * To generate a UUID, use one of the other modules in the UUID package. 8 * You can also create a UUID by parsing a string containing a textual 9 * representation of a UUID, or by providing the constituent bytes. 10 */ 11 module tango.util.uuid.Uuid; 12 13 import tango.core.Exception; 14 import Integer = tango.text.convert.Integer; 15 16 private union UuidData 17 { 18 uint[4] ui; 19 ubyte[16] ub; 20 } 21 22 /** This struct represents a UUID. It offers static members for creating and 23 * parsing UUIDs. 24 * 25 * This struct treats a UUID as an opaque type. The specification has fields 26 * for time, version, client MAC address, and several other data points, but 27 * these are meaningless for most applications and means of generating a UUID. 28 * 29 * There are versions of UUID generation involving the system time and MAC 30 * address. These are not used for several reasons: 31 * - One version contains identifying information, which is undesirable. 32 * - Ensuring uniqueness between processes requires inter-process 33 * communication. This would be unreasonably slow and complex. 34 * - Obtaining the MAC address is a system-dependent operation and beyond 35 * the scope of this module. 36 * - Using Java and .NET as a guide, they only implement randomized creation 37 * of UUIDs, not the MAC address/time based generation. 38 * 39 * When generating a random UUID, use a carefully seeded random number 40 * generator. A poorly chosen seed may produce undesirably consistent results. 41 */ 42 struct Uuid 43 { 44 private UuidData _data; 45 46 /** Copy the givent bytes into a UUID. If you supply more or fewer than 47 * 16 bytes, throws an IllegalArgumentException. */ 48 public static Uuid opCall(const(ubyte[]) data) 49 { 50 if (data.length != 16) 51 { 52 throw new IllegalArgumentException("A UUID is 16 bytes long."); 53 } 54 Uuid u; 55 u._data.ub[] = data[]; 56 return u; 57 } 58 59 /** Attempt to parse the representation of a UUID given in value. If the 60 * value is not in the correct format, throw IllegalArgumentException. 61 * If the value is in the correct format, return a UUID representing the 62 * given value. 63 * 64 * The following is an example of a UUID in the expected format: 65 * 67e55044-10b1-426f-9247-bb680e5fe0c8 66 */ 67 public static Uuid parse(const(char[]) value) 68 { 69 Uuid u; 70 if (!tryParse(value, u)) 71 throw new IllegalArgumentException("'" ~ value.idup ~ "' is not in the correct format for a UUID"); 72 return u; 73 } 74 75 /** Attempt to parse the representation of a UUID given in value. If the 76 * value is not in the correct format, return false rather than throwing 77 * an exception. If the value is in the correct format, set uuid to 78 * represent the given value. 79 * 80 * The following is an example of a UUID in the expected format: 81 * 67e55044-10b1-426f-9247-bb680e5fe0c8 82 */ 83 public static bool tryParse(const(char[]) value, out Uuid uuid) 84 { 85 if (value.length != 36 || 86 value[8] != '-' || 87 value[13] != '-' || 88 value[18] != '-' || 89 value[23] != '-') 90 { 91 return false; 92 } 93 int hyphens = 0; 94 foreach (i, v; value) 95 { 96 if ('a' <= v && 'f' >= v) continue; 97 if ('A' <= v && 'F' >= v) continue; 98 if ('0' <= v && '9' >= v) continue; 99 if (v == '-') 100 { 101 hyphens++; 102 continue; 103 } 104 // illegal character 105 return false; 106 } 107 if (hyphens != 4) 108 { 109 return false; 110 } 111 112 with (uuid._data) 113 { 114 // This is verbose, but it's simple, and it gets around endian 115 // issues if you try parsing an integer at a time. 116 ub[0] = cast(ubyte) Integer.parse(value[0..2], 16); 117 ub[1] = cast(ubyte) Integer.parse(value[2..4], 16); 118 ub[2] = cast(ubyte) Integer.parse(value[4..6], 16); 119 ub[3] = cast(ubyte) Integer.parse(value[6..8], 16); 120 121 ub[4] = cast(ubyte) Integer.parse(value[9..11], 16); 122 ub[5] = cast(ubyte) Integer.parse(value[11..13], 16); 123 124 ub[6] = cast(ubyte) Integer.parse(value[14..16], 16); 125 ub[7] = cast(ubyte) Integer.parse(value[16..18], 16); 126 127 ub[8] = cast(ubyte) Integer.parse(value[19..21], 16); 128 ub[9] = cast(ubyte) Integer.parse(value[21..23], 16); 129 130 ub[10] = cast(ubyte) Integer.parse(value[24..26], 16); 131 ub[11] = cast(ubyte) Integer.parse(value[26..28], 16); 132 ub[12] = cast(ubyte) Integer.parse(value[28..30], 16); 133 ub[13] = cast(ubyte) Integer.parse(value[30..32], 16); 134 ub[14] = cast(ubyte) Integer.parse(value[32..34], 16); 135 ub[15] = cast(ubyte) Integer.parse(value[34..36], 16); 136 } 137 138 return true; 139 } 140 141 /** Generate a UUID based on the given random number generator. 142 * The generator must have a method 'uint natural()' that returns 143 * a random number. The generated UUID conforms to version 4 of the 144 * specification. */ 145 public static Uuid random(Random)(Random generator) 146 { 147 Uuid u; 148 with (u) 149 { 150 _data.ui[0] = generator.natural(); 151 _data.ui[1] = generator.natural(); 152 _data.ui[2] = generator.natural(); 153 _data.ui[3] = generator.natural(); 154 155 // v4: 7th bytes' first half is 0b0100: 4 in hex 156 _data.ub[6] &= 0b01001111; 157 _data.ub[6] |= 0b01000000; 158 159 // v4: 9th byte's 1st half is 0b1000 to 0b1011: 8, 9, A, B in hex 160 _data.ub[8] &= 0b10111111; 161 _data.ub[8] |= 0b10000000; 162 } 163 return u; 164 } 165 166 /* Generate a UUID based on the given namespace and name. This conforms to 167 * versions 3 and 5 of the standard -- version 3 if you use MD5, or version 168 * 5 if you use SHA1. 169 * 170 * You should pass 3 as the value for uuidVersion if you are using the 171 * MD5 hash, and 5 if you are using the SHA1 hash. To do otherwise is an 172 * Abomination Unto Nuggan. 173 * 174 * This method is exposed mainly for the convenience methods in 175 * tango.util.uuid.*. You can use this method directly if you prefer. 176 */ 177 public static Uuid byName(Digest)(Uuid namespace, const(char[]) name, Digest digest, 178 ubyte uuidVersion) 179 { 180 /* o Compute the hash of the name space ID concatenated with the name. 181 o Set octets zero through 15 to octets zero through 15 of the hash. 182 o Set the four most significant bits (bits 12 through 15) of octet 183 6 to the appropriate 4-bit version number from Section 4.1.3. 184 o Set the two most significant bits (bits 6 and 7) of octet 8 to 185 zero and one, respectively. */ 186 auto nameBytes = namespace.toBytes(); 187 nameBytes ~= cast(ubyte[])name; 188 digest.update(nameBytes); 189 nameBytes = digest.binaryDigest(); 190 nameBytes[6] = cast(ubyte)((uuidVersion << 4) | (nameBytes[6] & 0b1111)); 191 nameBytes[8] |= 0b1000_0000; 192 nameBytes[8] &= 0b1011_1111; 193 return Uuid(nameBytes[0..16]); 194 } 195 196 /** Return an empty UUID (with all bits set to 0). This doesn't conform 197 * to any particular version of the specification. It's equivalent to 198 * using an uninitialized UUID. This method is provided for clarity. */ 199 @property public static Uuid empty() 200 { 201 Uuid uuid; 202 uuid._data.ui[] = 0; 203 return uuid; 204 } 205 206 /** Get a copy of this UUID's value as an array of unsigned bytes. */ 207 public const ubyte[] toBytes() 208 { 209 return _data.ub.dup; 210 } 211 212 /** Gets the version of this UUID. 213 * RFC 4122 defines five types of UUIDs: 214 * - Version 1 is based on the system's MAC address and the current time. 215 * - Version 2 uses the current user's userid and user domain in 216 * addition to the time and MAC address. 217 * - Version 3 is namespace-based, as generated by the NamespaceGenV3 218 * module. It uses MD5 as a hash algorithm. RFC 4122 states that 219 * version 5 is preferred over version 3. 220 * - Version 4 is generated randomly. 221 * - Version 5 is like version 3, but uses SHA-1 rather than MD5. Use 222 * the NamespaceGenV5 module to create UUIDs like this. 223 * 224 * The following additional versions exist: 225 * - Version 0 is reserved for backwards compatibility. 226 * - Version 6 is a non-standard Microsoft extension. 227 * - Version 7 is reserved for future use. 228 */ 229 public const ubyte format() 230 { 231 return cast(ubyte) (_data.ub[6] >> 4); 232 } 233 234 /** Get the canonical string representation of a UUID. 235 * The canonical representation is in hexidecimal, with hyphens inserted 236 * after the eighth, twelfth, sixteenth, and twentieth digits. For example: 237 * 67e55044-10b1-426f-9247-bb680e5fe0c8 238 * This is the format used by the parsing functions. 239 */ 240 public const(char[]) toString() 241 { 242 // Look, only one allocation. 243 char[] buf = new char[36]; 244 buf[8] = '-'; 245 buf[13] = '-'; 246 buf[18] = '-'; 247 buf[23] = '-'; 248 with (_data) 249 { 250 // See above with tryParse: this ignores endianness. 251 // Technically, it's sufficient that the conversion to string 252 // matches the conversion from string and from byte array. But 253 // this is the simplest way to make sure of that. Plus you can 254 // serialize and deserialize on machines with different endianness 255 // without a bunch of strange conversions, and with consistent 256 // string representations. 257 Integer.format(buf[0..2], ub[0], "x2"); 258 Integer.format(buf[2..4], ub[1], "x2"); 259 Integer.format(buf[4..6], ub[2], "x2"); 260 Integer.format(buf[6..8], ub[3], "x2"); 261 Integer.format(buf[9..11], ub[4], "x2"); 262 Integer.format(buf[11..13], ub[5], "x2"); 263 Integer.format(buf[14..16], ub[6], "x2"); 264 Integer.format(buf[16..18], ub[7], "x2"); 265 Integer.format(buf[19..21], ub[8], "x2"); 266 Integer.format(buf[21..23], ub[9], "x2"); 267 Integer.format(buf[24..26], ub[10], "x2"); 268 Integer.format(buf[26..28], ub[11], "x2"); 269 Integer.format(buf[28..30], ub[12], "x2"); 270 Integer.format(buf[30..32], ub[13], "x2"); 271 Integer.format(buf[32..34], ub[14], "x2"); 272 Integer.format(buf[34..36], ub[15], "x2"); 273 } 274 return buf; 275 } 276 277 /** Determines if this UUID has the same value as another. */ 278 public const bool opEquals(ref const(Uuid) other) 279 { 280 return 281 _data.ui[0] == other._data.ui[0] && 282 _data.ui[1] == other._data.ui[1] && 283 _data.ui[2] == other._data.ui[2] && 284 _data.ui[3] == other._data.ui[3]; 285 } 286 287 /** Get a hash code representing this UUID. */ 288 public const hash_t toHash() nothrow @safe 289 { 290 with (_data) 291 { 292 // 29 is just a convenient prime number 293 return (((((ui[0] * 29) ^ ui[1]) * 29) ^ ui[2]) * 29) ^ ui[3]; 294 } 295 } 296 } 297 298 299 debug (UnitTest) 300 { 301 import tango.math.random.Kiss; 302 unittest 303 { 304 // Generate them in the correct format 305 for (int i = 0; i < 20; i++) 306 { 307 auto uu = Uuid.random(&Kiss.instance).toString(); 308 auto c = uu[19]; 309 assert (c == '9' || c == '8' || c == 'a' || c == 'b', uu); 310 auto d = uu[14]; 311 assert (d == '4', uu); 312 } 313 314 // empty 315 assert (Uuid.empty.toString() == "00000000-0000-0000-0000-000000000000", Uuid.empty.toString()); 316 317 ubyte[] bytes = [0x6b, 0xa7, 0xb8, 0x10, 0x9d, 0xad, 0x11, 0xd1, 318 0x80, 0xb4, 0x00, 0xc0, 0x4f, 0xd4, 0x30, 0xc8]; 319 Uuid u = Uuid(bytes.dup); 320 auto str = "64f2ad82-5182-4c6a-ade5-59728ca0567b"; 321 auto u2 = Uuid.parse(str); 322 323 // toString 324 assert (Uuid(bytes) == u); 325 assert (u2 != u); 326 327 assert (u2.format() == 4); 328 329 // tryParse 330 Uuid u3; 331 assert (Uuid.tryParse(str, u3)); 332 assert (u3 == u2); 333 } 334 335 unittest 336 { 337 Uuid fail; 338 // contains 'r' 339 assert (!Uuid.tryParse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7", fail)); 340 // too short 341 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba", fail)); 342 // hyphens matter 343 assert (!Uuid.tryParse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7", fail)); 344 // hyphens matter (2) 345 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7", fail)); 346 // hyphens matter (3) 347 assert (!Uuid.tryParse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7", fail)); 348 } 349 350 unittest 351 { 352 // contains 'r' 353 try 354 { 355 Uuid.parse("fecr0a9b-4d5a-439e-8e4b-9d087ff49ba7"); assert (false); 356 } 357 catch (IllegalArgumentException) {} 358 359 // too short 360 try 361 { 362 Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d087ff49ba"); assert (false); 363 } 364 catch (IllegalArgumentException) {} 365 366 // hyphens matter 367 try 368 { 369 Uuid.parse("fec70a9b 4d5a-439e-8e4b-9d087ff49ba7"); assert (false); 370 } 371 catch (IllegalArgumentException) {} 372 373 // hyphens matter (2) 374 try 375 { 376 Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-7ff49ba7"); assert (false); 377 } 378 catch (IllegalArgumentException) {} 379 380 // hyphens matter (3) 381 try 382 { 383 Uuid.parse("fec70a9b-4d5a-439e-8e4b-9d08-ff49ba7"); assert (false); 384 } 385 catch (IllegalArgumentException) {} 386 } 387 388 import tango.util.digest.Sha1; 389 unittest 390 { 391 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41"); 392 auto name = "hello"; 393 // This was generated with the uuid utility on linux/amd64. It might have different results on 394 // a ppc processor -- the spec says something about network byte order, but it's using an array 395 // of bytes at that point, so converting to NBO is a noop... 396 auto expected = Uuid.parse("2b1c6704-a43f-5d43-9abb-b13310b4458a"); 397 auto generated = Uuid.byName(namespace, name, new Sha1, cast(ubyte)5); 398 assert (generated == expected, "\nexpected: " ~ expected.toString() ~ "\nbut was: " ~ generated.toString()); 399 } 400 401 import tango.util.digest.Md5; 402 unittest 403 { 404 auto namespace = Uuid.parse("15288517-c402-4057-9fc5-05711726df41"); 405 auto name = "hello"; 406 auto expected = Uuid.parse("31a2b702-85a8-349a-9b0e-213b1bd753b8"); 407 auto generated = Uuid.byName(namespace, name, new Md5, cast(ubyte)3); 408 assert (generated == expected, "\nexpected: " ~ expected.toString() ~ "\nbut was: " ~ generated.toString()); 409 } 410 //void main(){} 411 } 412 413 /** A base interface for any UUID generator for UUIDs. That is, 414 * this interface is specified so that you write your code dependent on a 415 * UUID generator that takes an arbitrary random source, and easily switch 416 * to a different random source. Since the default uses KISS, if you find 417 * yourself needing more secure random numbers, you could trivially switch 418 * your code to use the Mersenne twister, or some other PRNG. 419 * 420 * You could also, if you wish, use this to switch to deterministic UUID 421 * generation, if your needs require it. 422 */ 423 interface UuidGen 424 { 425 Uuid next(); 426 } 427 428 /** Given a random number generator conforming to Tango's standard random 429 * interface, this will generate random UUIDs according to version 4 of 430 * RFC 4122. */ 431 class RandomGen(TRandom) : UuidGen 432 { 433 TRandom random; 434 this (TRandom random) 435 { 436 this.random = random; 437 } 438 439 Uuid next() 440 { 441 return Uuid.random(random); 442 } 443 } 444