1 /******************************************************************************* 2 3 Copyright: Copyright (C) 2008 Aaron Craelius & Kris Bell. 4 All rights reserved. 5 6 License: BSD style: $(LICENSE) 7 8 version: Initial release: July 2008 9 10 Authors: Aaron, Kris 11 12 *******************************************************************************/ 13 14 module tango.text.json.JsonParser; 15 16 private import tango.util.container.more.Stack; 17 18 /******************************************************************************* 19 20 *******************************************************************************/ 21 22 class JsonParser(T) 23 { 24 public enum Token 25 { 26 Empty, Name, String, Number, BeginObject, EndObject, 27 BeginArray, EndArray, True, False, Null 28 } 29 30 private enum State {Object, Array}; 31 32 private struct Iterator 33 { 34 const(T)* ptr; 35 const(T)* end; 36 const(T)[] text; 37 38 void reset (const(T)[] text) 39 { 40 this.text = text; 41 this.ptr = text.ptr; 42 this.end = ptr + text.length; 43 } 44 } 45 46 protected Iterator str; 47 private Stack!(State, 16) state; 48 private const(T)* curLoc; 49 private size_t curLen; 50 private State curState; 51 protected Token curType; 52 53 /*********************************************************************** 54 55 ***********************************************************************/ 56 57 this (const(T)[] text = null) 58 { 59 reset (text); 60 } 61 62 /*********************************************************************** 63 64 ***********************************************************************/ 65 66 @property final bool next () 67 { 68 if (str.ptr is null || str.end is null) 69 return false; 70 71 auto p = str.ptr; 72 auto e = str.end; 73 74 75 while (*p <= 32 && p < e) 76 ++p; 77 78 if ((str.ptr = p) >= e) 79 return false; 80 81 if (curState is State.Array) 82 return parseArrayValue(); 83 84 switch (curType) 85 { 86 case Token.Name: 87 return parseMemberValue(); 88 89 default: 90 break; 91 } 92 93 return parseMemberName(); 94 } 95 96 /*********************************************************************** 97 98 ***********************************************************************/ 99 100 @property final Token type () 101 { 102 return curType; 103 } 104 105 /*********************************************************************** 106 107 ***********************************************************************/ 108 109 @property final const(T)[] value () 110 { 111 return curLoc [0 .. curLen]; 112 } 113 114 /*********************************************************************** 115 116 ***********************************************************************/ 117 118 bool reset (const(T)[] json = null) 119 { 120 state.clear(); 121 str.reset (json); 122 curType = Token.Empty; 123 curState = State.Object; 124 125 if (json.length) 126 { 127 auto p = str.ptr; 128 auto e = str.end; 129 130 while (*p <= 32 && p < e) 131 ++p; 132 if (p < e) 133 return start (*(str.ptr = p)); 134 } 135 return false; 136 } 137 138 /*********************************************************************** 139 140 ***********************************************************************/ 141 142 protected final void expected (immutable(char)[] token) 143 { 144 throw new Exception ("expected " ~ token); 145 } 146 147 /*********************************************************************** 148 149 ***********************************************************************/ 150 151 protected final void expected (immutable(char)[] token, const(T)* point) 152 { 153 static char[] itoa (char[] buf, size_t i) 154 { 155 auto p = buf.ptr+buf.length; 156 do { 157 *--p = '0' + i % 10; 158 } while (i /= 10); 159 return p[0..(buf.ptr+buf.length)-p]; 160 } 161 char[32] tmp = void; 162 expected (token ~ " @input[" ~ itoa(tmp, point-str.text.ptr).idup~"]"); 163 } 164 165 /*********************************************************************** 166 167 ***********************************************************************/ 168 169 private void unexpectedEOF (immutable(char)[] msg) 170 { 171 throw new Exception ("unexpected end-of-input: " ~ msg); 172 } 173 174 /*********************************************************************** 175 176 ***********************************************************************/ 177 178 private bool start (T c) 179 { 180 if (c is '{') 181 return push (Token.BeginObject, State.Object); 182 183 if (c is '[') 184 return push (Token.BeginArray, State.Array); 185 186 expected ("'{' or '[' at start of document"); 187 assert(0); 188 } 189 190 /*********************************************************************** 191 192 ***********************************************************************/ 193 194 private bool parseMemberName () 195 { 196 auto p = str.ptr; 197 auto e = str.end; 198 199 if(*p is '}') 200 return pop (Token.EndObject); 201 202 if(*p is ',') 203 ++p; 204 205 while (*p <= 32) 206 ++p; 207 208 if (*p != '"') 209 { 210 if (*p == '}') 211 expected ("an attribute-name after (a potentially trailing) ','", p); 212 else 213 expected ("'\"' before attribute-name", p); 214 } 215 curLoc = p+1; 216 curType = Token.Name; 217 218 while (++p < e) 219 if (*p is '"' && !escaped(p)) 220 break; 221 222 if (p < e) 223 curLen = p - curLoc; 224 else 225 unexpectedEOF ("in attribute-name"); 226 227 str.ptr = p + 1; 228 return true; 229 } 230 231 /*********************************************************************** 232 233 ***********************************************************************/ 234 235 private bool parseMemberValue () 236 { 237 auto p = str.ptr; 238 239 if(*p != ':') 240 expected ("':' before attribute-value", p); 241 242 auto e = str.end; 243 while (++p < e && *p <= 32) {} 244 245 return parseValue (*(str.ptr = p)); 246 } 247 248 /*********************************************************************** 249 250 ***********************************************************************/ 251 252 private bool parseValue (T c) 253 { 254 switch (c) 255 { 256 case '{': 257 return push (Token.BeginObject, State.Object); 258 259 case '[': 260 return push (Token.BeginArray, State.Array); 261 262 case '"': 263 return doString(); 264 265 case 'n': 266 if (match ("null", Token.Null)) 267 return true; 268 expected ("'null'", str.ptr); 269 break; 270 case 't': 271 if (match ("true", Token.True)) 272 return true; 273 expected ("'true'", str.ptr); 274 break; 275 case 'f': 276 if (match ("false", Token.False)) 277 return true; 278 expected ("'false'", str.ptr); 279 break; 280 default: 281 break; 282 } 283 284 return parseNumber(); 285 } 286 287 /*********************************************************************** 288 289 ***********************************************************************/ 290 291 private bool doString () 292 { 293 auto p = str.ptr; 294 auto e = str.end; 295 296 curLoc = p+1; 297 curType = Token.String; 298 299 while (++p < e) 300 if (*p is '"' && !escaped(p)) 301 break; 302 303 if (p < e) 304 curLen = p - curLoc; 305 else 306 unexpectedEOF ("in string"); 307 308 str.ptr = p + 1; 309 return true; 310 } 311 312 /*********************************************************************** 313 314 ***********************************************************************/ 315 316 private bool parseNumber () 317 { 318 auto p = str.ptr; 319 auto e = str.end; 320 T c = *(curLoc = p); 321 322 curType = Token.Number; 323 324 if (c is '-' || c is '+') 325 c = *++p; 326 327 while (c >= '0' && c <= '9') c = *++p; 328 329 if (c is '.') 330 while (true) 331 { 332 p++; 333 c = *p; 334 if (!(c >= '0' && c <= '9')) break; 335 } 336 337 if (c is 'e' || c is 'E') 338 while (true) 339 { 340 p++; 341 c = *p; 342 if (!(c >= '0' && c <= '9')) break; 343 } 344 345 if (p < e) 346 curLen = p - curLoc; 347 else 348 unexpectedEOF ("after number"); 349 350 str.ptr = p; 351 return curLen > 0; 352 } 353 354 /*********************************************************************** 355 356 ***********************************************************************/ 357 358 private bool match (const(T)[] name, Token token) 359 { 360 auto i = name.length; 361 if (str.ptr[0 .. i] == name) 362 { 363 curLoc = str.ptr; 364 curType = token; 365 str.ptr += i; 366 curLen = i; 367 return true; 368 } 369 return false; 370 } 371 372 /*********************************************************************** 373 374 ***********************************************************************/ 375 376 private bool push (Token token, State next) 377 { 378 curLen = 0; 379 curType = token; 380 curLoc = str.ptr++; 381 state.push (curState); 382 curState = next; 383 return true; 384 } 385 386 /*********************************************************************** 387 388 ***********************************************************************/ 389 390 private bool pop (Token token) 391 { 392 curLen = 0; 393 curType = token; 394 curLoc = str.ptr++; 395 curState = state.pop(); 396 return true; 397 } 398 399 /*********************************************************************** 400 401 ***********************************************************************/ 402 403 private bool parseArrayValue () 404 { 405 auto p = str.ptr; 406 if (*p is ']') 407 return pop (Token.EndArray); 408 409 if (*p is ',') 410 ++p; 411 412 auto e = str.end; 413 while (p < e && *p <= 32) 414 ++p; 415 416 return parseValue (*(str.ptr = p)); 417 } 418 419 /*********************************************************************** 420 421 ***********************************************************************/ 422 423 private int escaped (const(T)* p) 424 { 425 int i; 426 427 while (*--p is '\\') 428 ++i; 429 return i & 1; 430 } 431 } 432 433 434 435 debug(UnitTest) 436 { 437 immutable(char)[] json = 438 `{ 439 "glossary": { 440 "title": "example glossary", 441 "GlossDiv": { 442 "title": "S", 443 "GlossList": { 444 "GlossEntry": { 445 "ID": "SGML", 446 "SortAs": "SGML", 447 "GlossTerm": "Standard Generalized Markup Language", 448 "Acronym": "SGML", 449 "Abbrev": "ISO 8879:1986", 450 "GlossDef": { 451 "para": "A meta-markup language, used to create markup languages such as DocBook.", 452 "GlossSeeAlso": ["GML", "XML"] 453 }, 454 "GlossSee": "markup", 455 "ANumber": 12345.6e7 456 "True": true 457 "False": false 458 "Null": null 459 } 460 } 461 } 462 } 463 }`; 464 465 unittest 466 { 467 auto p = new JsonParser!(char)(json); 468 assert(p); 469 assert(p.type == p.Token.BeginObject); 470 assert(p.next); 471 assert(p.type == p.Token.Name); 472 assert(p.value == "glossary", p.value); 473 assert(p.next); 474 assert(p.value == "", p.value); 475 assert(p.type == p.Token.BeginObject); 476 assert(p.next); 477 assert(p.type == p.Token.Name); 478 assert(p.value == "title", p.value); 479 assert(p.next); 480 assert(p.type == p.Token.String); 481 assert(p.value == "example glossary", p.value); 482 assert(p.next); 483 assert(p.type == p.Token.Name); 484 assert(p.value == "GlossDiv", p.value); 485 assert(p.next); 486 assert(p.type == p.Token.BeginObject); 487 assert(p.next); 488 assert(p.type == p.Token.Name); 489 assert(p.value == "title", p.value); 490 assert(p.next); 491 assert(p.type == p.Token.String); 492 assert(p.value == "S", p.value); 493 assert(p.next); 494 assert(p.type == p.Token.Name); 495 assert(p.value == "GlossList", p.value); 496 assert(p.next); 497 assert(p.type == p.Token.BeginObject); 498 assert(p.next); 499 assert(p.type == p.Token.Name); 500 assert(p.value == "GlossEntry", p.value); 501 assert(p.next); 502 assert(p.type == p.Token.BeginObject); 503 assert(p.next); 504 assert(p.type == p.Token.Name); 505 assert(p.value == "ID", p.value); 506 assert(p.next); 507 assert(p.type == p.Token.String); 508 assert(p.value == "SGML", p.value); 509 assert(p.next); 510 assert(p.type == p.Token.Name); 511 assert(p.value == "SortAs", p.value); 512 assert(p.next); 513 assert(p.type == p.Token.String); 514 assert(p.value == "SGML", p.value); 515 assert(p.next); 516 assert(p.type == p.Token.Name); 517 assert(p.value == "GlossTerm", p.value); 518 assert(p.next); 519 assert(p.type == p.Token.String); 520 assert(p.value == "Standard Generalized Markup Language", p.value); 521 assert(p.next); 522 assert(p.type == p.Token.Name); 523 assert(p.value == "Acronym", p.value); 524 assert(p.next); 525 assert(p.type == p.Token.String); 526 assert(p.value == "SGML", p.value); 527 assert(p.next); 528 assert(p.type == p.Token.Name); 529 assert(p.value == "Abbrev", p.value); 530 assert(p.next); 531 assert(p.type == p.Token.String); 532 assert(p.value == "ISO 8879:1986", p.value); 533 assert(p.next); 534 assert(p.type == p.Token.Name); 535 assert(p.value == "GlossDef", p.value); 536 assert(p.next); 537 assert(p.type == p.Token.BeginObject); 538 assert(p.next); 539 assert(p.type == p.Token.Name); 540 assert(p.value == "para", p.value); 541 assert(p.next); 542 543 assert(p.type == p.Token.String); 544 assert(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value); 545 assert(p.next); 546 assert(p.type == p.Token.Name); 547 assert(p.value == "GlossSeeAlso", p.value); 548 assert(p.next); 549 assert(p.type == p.Token.BeginArray); 550 assert(p.next); 551 assert(p.type == p.Token.String); 552 assert(p.value == "GML", p.value); 553 assert(p.next); 554 assert(p.type == p.Token.String); 555 assert(p.value == "XML", p.value); 556 assert(p.next); 557 assert(p.type == p.Token.EndArray); 558 assert(p.next); 559 assert(p.type == p.Token.EndObject); 560 assert(p.next); 561 assert(p.type == p.Token.Name); 562 assert(p.value == "GlossSee", p.value); 563 assert(p.next); 564 assert(p.type == p.Token.String); 565 assert(p.value == "markup", p.value); 566 assert(p.next); 567 assert(p.type == p.Token.Name); 568 assert(p.value == "ANumber", p.value); 569 assert(p.next); 570 assert(p.type == p.Token.Number); 571 assert(p.value == "12345.6e7", p.value); 572 assert(p.next); 573 assert(p.type == p.Token.Name); 574 assert(p.value == "True", p.value); 575 assert(p.next); 576 assert(p.type == p.Token.True); 577 assert(p.next); 578 assert(p.type == p.Token.Name); 579 assert(p.value == "False", p.value); 580 assert(p.next); 581 assert(p.type == p.Token.False); 582 assert(p.next); 583 assert(p.type == p.Token.Name); 584 assert(p.value == "Null", p.value); 585 assert(p.next); 586 assert(p.type == p.Token.Null); 587 assert(p.next); 588 assert(p.type == p.Token.EndObject); 589 assert(p.next); 590 assert(p.type == p.Token.EndObject); 591 assert(p.next); 592 assert(p.type == p.Token.EndObject); 593 assert(p.next); 594 assert(p.type == p.Token.EndObject); 595 assert(p.next); 596 assert(p.type == p.Token.EndObject); 597 assert(!p.next); 598 599 assert(p.state.size == 0); 600 601 } 602 603 } 604 605 606 debug (JsonParser) 607 { 608 void main() 609 { 610 auto json = new JsonParser!(char); 611 } 612 } 613