1 /******************************************************************************* 2 3 Copyright: Copyright (C) 2008 Aaron Craelius & Kris Bell. 4 All rights reserved. 5 6 License: BSD style: $(LICENSE) 7 8 version: Initial release: July 2008 9 10 Authors: Aaron, Kris 11 12 *******************************************************************************/ 13 14 module tango.text.json.JsonParser; 15 16 private import tango.util.container.more.Stack; 17 18 /******************************************************************************* 19 20 *******************************************************************************/ 21 22 class JsonParser(T) 23 { 24 public enum Token 25 { 26 Empty, Name, String, Number, BeginObject, EndObject, 27 BeginArray, EndArray, True, False, Null 28 } 29 30 private enum State {Object, Array}; 31 32 private struct Iterator 33 { 34 const(T)* ptr; 35 const(T)* end; 36 const(T)[] text; 37 38 void reset (const(T)[] text) 39 { 40 this.text = text; 41 this.ptr = text.ptr; 42 this.end = ptr + text.length; 43 } 44 } 45 46 protected Iterator str; 47 private Stack!(State, 16) state; 48 private const(T)* curLoc; 49 private size_t curLen; 50 private State curState; 51 protected Token curType; 52 53 /*********************************************************************** 54 55 ***********************************************************************/ 56 57 this (const(T)[] text = null) 58 { 59 reset (text); 60 } 61 62 /*********************************************************************** 63 64 ***********************************************************************/ 65 66 @property final bool next () 67 { 68 if (str.ptr is null || str.end is null) 69 return false; 70 71 auto p = str.ptr; 72 auto e = str.end; 73 74 75 while (*p <= 32 && p < e) 76 ++p; 77 78 if ((str.ptr = p) >= e) 79 return false; 80 81 if (curState is State.Array) 82 return parseArrayValue(); 83 84 switch (curType) 85 { 86 case Token.Name: 87 return parseMemberValue(); 88 89 default: 90 break; 91 } 92 93 return parseMemberName(); 94 } 95 96 /*********************************************************************** 97 98 ***********************************************************************/ 99 100 @property final Token type () 101 { 102 return curType; 103 } 104 105 /*********************************************************************** 106 107 ***********************************************************************/ 108 109 @property final const(T)[] value () 110 { 111 return curLoc [0 .. curLen]; 112 } 113 114 /*********************************************************************** 115 116 ***********************************************************************/ 117 118 bool reset (const(T)[] json = null) 119 { 120 state.clear(); 121 str.reset (json); 122 curType = Token.Empty; 123 curState = State.Object; 124 125 if (json.length) 126 { 127 auto p = str.ptr; 128 auto e = str.end; 129 130 while (*p <= 32 && p < e) 131 ++p; 132 if (p < e) 133 return start (*(str.ptr = p)); 134 } 135 return false; 136 } 137 138 /*********************************************************************** 139 140 ***********************************************************************/ 141 142 protected final void expected (immutable(char)[] token) 143 { 144 throw new Exception ("expected " ~ token); 145 } 146 147 /*********************************************************************** 148 149 ***********************************************************************/ 150 151 protected final void expected (immutable(char)[] token, const(T)* point) 152 { 153 static char[] itoa (char[] buf, size_t i) 154 { 155 auto p = buf.ptr+buf.length; 156 do { 157 *--p = '0' + i % 10; 158 } while (i /= 10); 159 return p[0..(buf.ptr+buf.length)-p]; 160 } 161 char[32] tmp = void; 162 expected (token ~ " @input[" ~ itoa(tmp, point-str.text.ptr).idup~"]"); 163 } 164 165 /*********************************************************************** 166 167 ***********************************************************************/ 168 169 private void unexpectedEOF (immutable(char)[] msg) 170 { 171 throw new Exception ("unexpected end-of-input: " ~ msg); 172 } 173 174 /*********************************************************************** 175 176 ***********************************************************************/ 177 178 private bool start (T c) 179 { 180 if (c is '{') 181 return push (Token.BeginObject, State.Object); 182 183 if (c is '[') 184 return push (Token.BeginArray, State.Array); 185 186 expected ("'{' or '[' at start of document"); 187 assert(0); 188 } 189 190 /*********************************************************************** 191 192 ***********************************************************************/ 193 194 private bool parseMemberName () 195 { 196 auto p = str.ptr; 197 auto e = str.end; 198 199 if(*p is '}') 200 return pop (Token.EndObject); 201 202 if(*p is ',') 203 ++p; 204 205 while (*p <= 32) 206 ++p; 207 208 if (*p != '"') 209 { 210 if (*p == '}') 211 expected ("an attribute-name after (a potentially trailing) ','", p); 212 else 213 expected ("'\"' before attribute-name", p); 214 } 215 curLoc = p+1; 216 curType = Token.Name; 217 218 while (++p < e) 219 if (*p is '"' && !escaped(p)) 220 break; 221 222 if (p < e) 223 curLen = p - curLoc; 224 else 225 unexpectedEOF ("in attribute-name"); 226 227 str.ptr = p + 1; 228 return true; 229 } 230 231 /*********************************************************************** 232 233 ***********************************************************************/ 234 235 private bool parseMemberValue () 236 { 237 auto p = str.ptr; 238 239 if(*p != ':') 240 expected ("':' before attribute-value", p); 241 242 auto e = str.end; 243 while (++p < e && *p <= 32) {} 244 245 return parseValue (*(str.ptr = p)); 246 } 247 248 /*********************************************************************** 249 250 ***********************************************************************/ 251 252 private bool parseValue (T c) 253 { 254 switch (c) 255 { 256 case '{': 257 return push (Token.BeginObject, State.Object); 258 259 case '[': 260 return push (Token.BeginArray, State.Array); 261 262 case '"': 263 return doString(); 264 265 case 'n': 266 if (match ("null", Token.Null)) 267 return true; 268 expected ("'null'", str.ptr); 269 break; 270 case 't': 271 if (match ("true", Token.True)) 272 return true; 273 expected ("'true'", str.ptr); 274 break; 275 case 'f': 276 if (match ("false", Token.False)) 277 return true; 278 expected ("'false'", str.ptr); 279 break; 280 default: 281 break; 282 } 283 284 return parseNumber(); 285 } 286 287 /*********************************************************************** 288 289 ***********************************************************************/ 290 291 private bool doString () 292 { 293 auto p = str.ptr; 294 auto e = str.end; 295 296 curLoc = p+1; 297 curType = Token.String; 298 299 while (++p < e) 300 if (*p is '"' && !escaped(p)) 301 break; 302 303 if (p < e) 304 curLen = p - curLoc; 305 else 306 unexpectedEOF ("in string"); 307 308 str.ptr = p + 1; 309 return true; 310 } 311 312 /*********************************************************************** 313 314 ***********************************************************************/ 315 316 private bool parseNumber () 317 { 318 auto p = str.ptr; 319 auto e = str.end; 320 T c = *(curLoc = p); 321 322 curType = Token.Number; 323 324 if (c is '-' || c is '+') 325 c = *++p; 326 327 while (c >= '0' && c <= '9') c = *++p; 328 329 if (c is '.') 330 while (c = *++p, c >= '0' && c <= '9') {} 331 332 if (c is 'e' || c is 'E') 333 while (c = *++p, c >= '0' && c <= '9') {} 334 335 if (p < e) 336 curLen = p - curLoc; 337 else 338 unexpectedEOF ("after number"); 339 340 str.ptr = p; 341 return curLen > 0; 342 } 343 344 /*********************************************************************** 345 346 ***********************************************************************/ 347 348 private bool match (const(T)[] name, Token token) 349 { 350 auto i = name.length; 351 if (str.ptr[0 .. i] == name) 352 { 353 curLoc = str.ptr; 354 curType = token; 355 str.ptr += i; 356 curLen = i; 357 return true; 358 } 359 return false; 360 } 361 362 /*********************************************************************** 363 364 ***********************************************************************/ 365 366 private bool push (Token token, State next) 367 { 368 curLen = 0; 369 curType = token; 370 curLoc = str.ptr++; 371 state.push (curState); 372 curState = next; 373 return true; 374 } 375 376 /*********************************************************************** 377 378 ***********************************************************************/ 379 380 private bool pop (Token token) 381 { 382 curLen = 0; 383 curType = token; 384 curLoc = str.ptr++; 385 curState = state.pop(); 386 return true; 387 } 388 389 /*********************************************************************** 390 391 ***********************************************************************/ 392 393 private bool parseArrayValue () 394 { 395 auto p = str.ptr; 396 if (*p is ']') 397 return pop (Token.EndArray); 398 399 if (*p is ',') 400 ++p; 401 402 auto e = str.end; 403 while (p < e && *p <= 32) 404 ++p; 405 406 return parseValue (*(str.ptr = p)); 407 } 408 409 /*********************************************************************** 410 411 ***********************************************************************/ 412 413 private int escaped (const(T)* p) 414 { 415 int i; 416 417 while (*--p is '\\') 418 ++i; 419 return i & 1; 420 } 421 } 422 423 424 425 debug(UnitTest) 426 { 427 immutable(char)[] json = 428 "{" 429 "\"glossary\": {" 430 "\"title\": \"example glossary\"," 431 "\"GlossDiv\": {" 432 " \"title\": \"S\"," 433 " \"GlossList\": {" 434 " \"GlossEntry\": {" 435 " \"ID\": \"SGML\"," 436 " \"SortAs\": \"SGML\"," 437 " \"GlossTerm\": \"Standard Generalized Markup Language\"," 438 " \"Acronym\": \"SGML\"," 439 " \"Abbrev\": \"ISO 8879:1986\"," 440 " \"GlossDef\": {" 441 " \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\"," 442 " \"GlossSeeAlso\": [\"GML\", \"XML\"]" 443 " }," 444 " \"GlossSee\": \"markup\"," 445 " \"ANumber\": 12345.6e7" 446 " \"True\": true" 447 " \"False\": false" 448 " \"Null\": null" 449 " }" 450 " }" 451 "}" 452 "}" 453 "}"; 454 455 unittest 456 { 457 auto p = new JsonParser!(char)(json); 458 assert(p); 459 assert(p.type == p.Token.BeginObject); 460 assert(p.next); 461 assert(p.type == p.Token.Name); 462 assert(p.value == "glossary", p.value); 463 assert(p.next); 464 assert(p.value == "", p.value); 465 assert(p.type == p.Token.BeginObject); 466 assert(p.next); 467 assert(p.type == p.Token.Name); 468 assert(p.value == "title", p.value); 469 assert(p.next); 470 assert(p.type == p.Token.String); 471 assert(p.value == "example glossary", p.value); 472 assert(p.next); 473 assert(p.type == p.Token.Name); 474 assert(p.value == "GlossDiv", p.value); 475 assert(p.next); 476 assert(p.type == p.Token.BeginObject); 477 assert(p.next); 478 assert(p.type == p.Token.Name); 479 assert(p.value == "title", p.value); 480 assert(p.next); 481 assert(p.type == p.Token.String); 482 assert(p.value == "S", p.value); 483 assert(p.next); 484 assert(p.type == p.Token.Name); 485 assert(p.value == "GlossList", p.value); 486 assert(p.next); 487 assert(p.type == p.Token.BeginObject); 488 assert(p.next); 489 assert(p.type == p.Token.Name); 490 assert(p.value == "GlossEntry", p.value); 491 assert(p.next); 492 assert(p.type == p.Token.BeginObject); 493 assert(p.next); 494 assert(p.type == p.Token.Name); 495 assert(p.value == "ID", p.value); 496 assert(p.next); 497 assert(p.type == p.Token.String); 498 assert(p.value == "SGML", p.value); 499 assert(p.next); 500 assert(p.type == p.Token.Name); 501 assert(p.value == "SortAs", p.value); 502 assert(p.next); 503 assert(p.type == p.Token.String); 504 assert(p.value == "SGML", p.value); 505 assert(p.next); 506 assert(p.type == p.Token.Name); 507 assert(p.value == "GlossTerm", p.value); 508 assert(p.next); 509 assert(p.type == p.Token.String); 510 assert(p.value == "Standard Generalized Markup Language", p.value); 511 assert(p.next); 512 assert(p.type == p.Token.Name); 513 assert(p.value == "Acronym", p.value); 514 assert(p.next); 515 assert(p.type == p.Token.String); 516 assert(p.value == "SGML", p.value); 517 assert(p.next); 518 assert(p.type == p.Token.Name); 519 assert(p.value == "Abbrev", p.value); 520 assert(p.next); 521 assert(p.type == p.Token.String); 522 assert(p.value == "ISO 8879:1986", p.value); 523 assert(p.next); 524 assert(p.type == p.Token.Name); 525 assert(p.value == "GlossDef", p.value); 526 assert(p.next); 527 assert(p.type == p.Token.BeginObject); 528 assert(p.next); 529 assert(p.type == p.Token.Name); 530 assert(p.value == "para", p.value); 531 assert(p.next); 532 533 assert(p.type == p.Token.String); 534 assert(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value); 535 assert(p.next); 536 assert(p.type == p.Token.Name); 537 assert(p.value == "GlossSeeAlso", p.value); 538 assert(p.next); 539 assert(p.type == p.Token.BeginArray); 540 assert(p.next); 541 assert(p.type == p.Token.String); 542 assert(p.value == "GML", p.value); 543 assert(p.next); 544 assert(p.type == p.Token.String); 545 assert(p.value == "XML", p.value); 546 assert(p.next); 547 assert(p.type == p.Token.EndArray); 548 assert(p.next); 549 assert(p.type == p.Token.EndObject); 550 assert(p.next); 551 assert(p.type == p.Token.Name); 552 assert(p.value == "GlossSee", p.value); 553 assert(p.next); 554 assert(p.type == p.Token.String); 555 assert(p.value == "markup", p.value); 556 assert(p.next); 557 assert(p.type == p.Token.Name); 558 assert(p.value == "ANumber", p.value); 559 assert(p.next); 560 assert(p.type == p.Token.Number); 561 assert(p.value == "12345.6e7", p.value); 562 assert(p.next); 563 assert(p.type == p.Token.Name); 564 assert(p.value == "True", p.value); 565 assert(p.next); 566 assert(p.type == p.Token.True); 567 assert(p.next); 568 assert(p.type == p.Token.Name); 569 assert(p.value == "False", p.value); 570 assert(p.next); 571 assert(p.type == p.Token.False); 572 assert(p.next); 573 assert(p.type == p.Token.Name); 574 assert(p.value == "Null", p.value); 575 assert(p.next); 576 assert(p.type == p.Token.Null); 577 assert(p.next); 578 assert(p.type == p.Token.EndObject); 579 assert(p.next); 580 assert(p.type == p.Token.EndObject); 581 assert(p.next); 582 assert(p.type == p.Token.EndObject); 583 assert(p.next); 584 assert(p.type == p.Token.EndObject); 585 assert(p.next); 586 assert(p.type == p.Token.EndObject); 587 assert(!p.next); 588 589 assert(p.state.size == 0); 590 591 } 592 593 } 594 595 596 debug (JsonParser) 597 { 598 void main() 599 { 600 auto json = new JsonParser!(char); 601 } 602 } 603