1 /*******************************************************************************
2 
3         Copyright: Copyright (C) 2008 Aaron Craelius & Kris Bell.  
4                    All rights reserved.
5 
6         License:   BSD style: $(LICENSE)
7 
8         version:   Initial release: July 2008      
9 
10         Authors:   Aaron, Kris
11 
12 *******************************************************************************/
13 
14 module tango.text.json.JsonParser;
15 
16 private import tango.util.container.more.Stack;
17 
18 /*******************************************************************************
19 
20 *******************************************************************************/
21 
22 class JsonParser(T)
23 {
24         public enum Token
25                {
26                Empty, Name, String, Number, BeginObject, EndObject, 
27                BeginArray, EndArray, True, False, Null
28                }
29 
30         private enum State {Object, Array};
31 
32         private struct Iterator
33         {
34                 const(T)*      ptr;
35                 const(T)*      end;
36                 const(T)[]     text;
37 
38                 void reset (const(T)[] text)
39                 {
40                         this.text = text;
41                         this.ptr = text.ptr;
42                         this.end = ptr + text.length;
43                 }
44         }
45 
46         protected Iterator              str;
47         private Stack!(State, 16)       state;
48         private const(T)*               curLoc;
49         private size_t                  curLen;
50         private State                   curState; 
51         protected Token                 curType;
52         
53         /***********************************************************************
54         
55         ***********************************************************************/
56         
57         this (const(T)[] text = null)
58         {
59                 reset (text);
60         }
61         
62         /***********************************************************************
63         
64         ***********************************************************************/
65         
66         @property final bool next ()
67         {
68                 if (str.ptr is null || str.end is null)
69                     return false;
70 
71                 auto p = str.ptr;
72                 auto e = str.end;
73 
74 
75                 while (*p <= 32 && p < e) 
76                        ++p; 
77 
78                 if ((str.ptr = p) >= e) 
79                      return false;
80 
81                 if (curState is State.Array) 
82                     return parseArrayValue();
83 
84                 switch (curType)
85                        {
86                        case Token.Name:
87                             return parseMemberValue();
88 
89                        default:                
90                             break;
91                        }
92 
93                 return parseMemberName();
94         }
95         
96         /***********************************************************************
97         
98         ***********************************************************************/
99         
100         @property final Token type ()
101         {
102                 return curType;
103         }
104         
105         /***********************************************************************
106         
107         ***********************************************************************/
108         
109         @property final const(T)[] value ()
110         {
111                 return curLoc [0 .. curLen];
112         }
113         
114         /***********************************************************************
115         
116         ***********************************************************************/
117         
118         bool reset (const(T)[] json = null)
119         {
120                 state.clear();
121                 str.reset (json);
122                 curType = Token.Empty;
123                 curState = State.Object;
124 
125                 if (json.length)
126                    {
127                    auto p = str.ptr;
128                    auto e = str.end;
129 
130                    while (*p <= 32 && p < e) 
131                           ++p; 
132                    if (p < e)
133                        return start (*(str.ptr = p));
134                    }
135                 return false;
136         }
137 
138         /***********************************************************************
139         
140         ***********************************************************************/
141         
142         protected final void expected (immutable(char)[] token)
143         {
144                 throw new Exception ("expected " ~ token);
145         }
146         
147         /***********************************************************************
148         
149         ***********************************************************************/
150         
151         protected final void expected (immutable(char)[] token, const(T)* point)
152         {
153                 static char[] itoa (char[] buf, size_t i)
154                 {
155                         auto p = buf.ptr+buf.length;
156                         do {
157                            *--p = '0' + i % 10;
158                            } while (i /= 10);
159                         return p[0..(buf.ptr+buf.length)-p];
160                 }
161                 char[32] tmp = void;
162                 expected (token ~ " @input[" ~ itoa(tmp, point-str.text.ptr).idup~"]");
163         }
164         
165         /***********************************************************************
166         
167         ***********************************************************************/
168         
169         private void unexpectedEOF (immutable(char)[] msg)
170         {
171                 throw new Exception ("unexpected end-of-input: " ~ msg);
172         }
173                 
174         /***********************************************************************
175         
176         ***********************************************************************/
177         
178         private bool start (T c)
179         {
180                 if (c is '{') 
181                     return push (Token.BeginObject, State.Object);
182 
183                 if (c is '[') 
184                     return push (Token.BeginArray, State.Array);
185 
186                 expected ("'{' or '[' at start of document");
187                 assert(0);
188         }
189 
190         /***********************************************************************
191         
192         ***********************************************************************/
193         
194         private bool parseMemberName ()
195         {
196                 auto p = str.ptr;
197                 auto e = str.end;
198 
199                 if(*p is '}') 
200                     return pop (Token.EndObject);
201                 
202                 if(*p is ',') 
203                     ++p;
204                 
205                 while (*p <= 32) 
206                        ++p;
207 
208                 if (*p != '"')
209                 {
210                     if (*p == '}')
211                         expected ("an attribute-name after (a potentially trailing) ','", p);
212                     else
213                        expected ("'\"' before attribute-name", p);
214                 }
215                 curLoc = p+1;
216                 curType = Token.Name;
217 
218                 while (++p < e) 
219                        if (*p is '"' && !escaped(p))
220                            break;
221 
222                 if (p < e) 
223                     curLen = p - curLoc;
224                 else
225                    unexpectedEOF ("in attribute-name");
226 
227                 str.ptr = p + 1;
228                 return true;
229         }
230         
231         /***********************************************************************
232         
233         ***********************************************************************/
234         
235         private bool parseMemberValue ()
236         {
237                 auto p = str.ptr;
238 
239                 if(*p != ':') 
240                    expected ("':' before attribute-value", p);
241 
242                 auto e = str.end;
243                 while (++p < e && *p <= 32) {}
244 
245                 return parseValue (*(str.ptr = p));
246         }
247         
248         /***********************************************************************
249         
250         ***********************************************************************/
251         
252         private bool parseValue (T c)
253         {                       
254                 switch (c)
255                        {
256                        case '{':
257                             return push (Token.BeginObject, State.Object);
258          
259                        case '[':
260                             return push (Token.BeginArray, State.Array);
261         
262                        case '"':
263                             return doString();
264         
265                        case 'n':
266                             if (match ("null", Token.Null))
267                                 return true;
268                             expected ("'null'", str.ptr);
269                             break;
270                        case 't':
271                             if (match ("true", Token.True))
272                                 return true;
273                             expected ("'true'", str.ptr);
274                             break;
275                        case 'f':
276                             if (match ("false", Token.False))
277                                 return true;
278                             expected ("'false'", str.ptr);
279                             break;
280                        default:
281                             break;
282                        }
283 
284                 return parseNumber();
285         }
286         
287         /***********************************************************************
288         
289         ***********************************************************************/
290         
291         private bool doString ()
292         {
293                 auto p = str.ptr;
294                 auto e = str.end;
295 
296                 curLoc = p+1;
297                 curType = Token.String;
298                 
299                 while (++p < e) 
300                        if (*p is '"' && !escaped(p))
301                            break;
302 
303                 if (p < e) 
304                     curLen = p - curLoc;
305                 else
306                    unexpectedEOF ("in string");
307 
308                 str.ptr = p + 1;
309                 return true;
310         }
311         
312         /***********************************************************************
313         
314         ***********************************************************************/
315         
316         private bool parseNumber ()
317         {
318                 auto p = str.ptr;
319                 auto e = str.end;
320                 T c = *(curLoc = p);
321 
322                 curType = Token.Number;
323 
324                 if (c is '-' || c is '+')
325                     c = *++p;
326 
327                 while (c >= '0' && c <= '9') c = *++p;                 
328 
329                 if (c is '.')
330                     while (true)
331                     {
332                         p++;
333                         c = *p;
334                         if (!(c >= '0' && c <= '9')) break;
335                     }
336 
337                 if (c is 'e' || c is 'E')
338                     while (true)
339                     {
340                         p++;
341                         c = *p;
342                         if (!(c >= '0' && c <= '9')) break;
343                     }
344 
345                 if (p < e) 
346                     curLen = p - curLoc;
347                 else
348                    unexpectedEOF ("after number");
349 
350                 str.ptr = p;
351                 return curLen > 0;
352         }
353         
354         /***********************************************************************
355         
356         ***********************************************************************/
357         
358         private bool match (const(T)[] name, Token token)
359         {
360                 auto i = name.length;
361                 if (str.ptr[0 .. i] == name)
362                    {
363                    curLoc = str.ptr;
364                    curType = token;
365                    str.ptr += i;
366                    curLen = i;
367                    return true;
368                    }
369                 return false;
370         }
371         
372         /***********************************************************************
373         
374         ***********************************************************************/
375         
376         private bool push (Token token, State next)
377         {
378                 curLen = 0;
379                 curType = token;
380                 curLoc = str.ptr++;
381                 state.push (curState);
382                 curState = next;
383                 return true;
384         }
385         
386         /***********************************************************************
387         
388         ***********************************************************************/
389         
390         private bool pop (Token token)
391         {
392                 curLen = 0;
393                 curType = token;
394                 curLoc = str.ptr++;
395                 curState = state.pop();
396                 return true;
397         }
398 
399         /***********************************************************************
400         
401         ***********************************************************************/
402         
403         private bool parseArrayValue ()
404         {
405                 auto p = str.ptr;
406                 if (*p is ']') 
407                     return pop (Token.EndArray);
408                 
409                 if (*p is ',') 
410                     ++p;
411 
412                 auto e = str.end;
413                 while (p < e && *p <= 32) 
414                        ++p;
415 
416                 return parseValue (*(str.ptr = p));
417         }
418 
419         /***********************************************************************
420         
421         ***********************************************************************/
422         
423         private int escaped (const(T)* p)
424         {
425                 int i;
426 
427                 while (*--p is '\\')
428                        ++i;
429                 return i & 1;
430         }
431 }
432 
433 
434 
435 debug(UnitTest)
436 {       
437                 immutable(char)[] json = 
438                 `{
439                 "glossary": {
440                 "title": "example glossary",
441                 "GlossDiv": {
442                    "title": "S",
443                    "GlossList": {
444                        "GlossEntry": {
445                            "ID": "SGML",
446                            "SortAs": "SGML",
447                            "GlossTerm": "Standard Generalized Markup Language",
448                            "Acronym": "SGML",
449                            "Abbrev": "ISO 8879:1986",
450                            "GlossDef": {
451                                 "para": "A meta-markup language, used to create markup languages such as DocBook.",
452                             "GlossSeeAlso": ["GML", "XML"]
453                            },
454                           "GlossSee": "markup",
455                           "ANumber": 12345.6e7
456                           "True": true
457                           "False": false
458                           "Null": null
459                         }
460                     }
461                 }
462                 }
463                 }`;
464        
465 unittest
466 {
467         auto p = new JsonParser!(char)(json);
468         assert(p);
469         assert(p.type == p.Token.BeginObject);
470         assert(p.next);
471         assert(p.type == p.Token.Name);
472         assert(p.value == "glossary", p.value);
473         assert(p.next);
474         assert(p.value == "", p.value);
475         assert(p.type == p.Token.BeginObject);
476         assert(p.next);
477         assert(p.type == p.Token.Name);
478         assert(p.value == "title", p.value);
479         assert(p.next);
480         assert(p.type == p.Token.String);
481         assert(p.value == "example glossary", p.value);
482         assert(p.next);
483         assert(p.type == p.Token.Name);
484         assert(p.value == "GlossDiv", p.value);
485         assert(p.next);
486         assert(p.type == p.Token.BeginObject);
487         assert(p.next);
488         assert(p.type == p.Token.Name);
489         assert(p.value == "title", p.value);
490         assert(p.next);
491         assert(p.type == p.Token.String);
492         assert(p.value == "S", p.value);
493         assert(p.next);
494         assert(p.type == p.Token.Name);
495         assert(p.value == "GlossList", p.value);
496         assert(p.next);
497         assert(p.type == p.Token.BeginObject);
498         assert(p.next);
499         assert(p.type == p.Token.Name);
500         assert(p.value == "GlossEntry", p.value);
501         assert(p.next);
502         assert(p.type == p.Token.BeginObject);
503         assert(p.next);
504         assert(p.type == p.Token.Name);
505         assert(p.value == "ID", p.value);
506         assert(p.next);
507         assert(p.type == p.Token.String);
508         assert(p.value == "SGML", p.value);
509         assert(p.next);
510         assert(p.type == p.Token.Name);
511         assert(p.value == "SortAs", p.value);
512         assert(p.next);
513         assert(p.type == p.Token.String);
514         assert(p.value == "SGML", p.value);
515         assert(p.next);
516         assert(p.type == p.Token.Name);
517         assert(p.value == "GlossTerm", p.value);
518         assert(p.next);
519         assert(p.type == p.Token.String);
520         assert(p.value == "Standard Generalized Markup Language", p.value);
521         assert(p.next);
522         assert(p.type == p.Token.Name);
523         assert(p.value == "Acronym", p.value);
524         assert(p.next);
525         assert(p.type == p.Token.String);
526         assert(p.value == "SGML", p.value);
527         assert(p.next);
528         assert(p.type == p.Token.Name);
529         assert(p.value == "Abbrev", p.value);
530         assert(p.next);
531         assert(p.type == p.Token.String);
532         assert(p.value == "ISO 8879:1986", p.value);
533         assert(p.next);
534         assert(p.type == p.Token.Name);
535         assert(p.value == "GlossDef", p.value);
536         assert(p.next);
537         assert(p.type == p.Token.BeginObject);
538         assert(p.next);
539         assert(p.type == p.Token.Name);
540         assert(p.value == "para", p.value);
541         assert(p.next);
542 
543         assert(p.type == p.Token.String);
544         assert(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value);
545         assert(p.next);
546         assert(p.type == p.Token.Name);
547         assert(p.value == "GlossSeeAlso", p.value);
548         assert(p.next);
549         assert(p.type == p.Token.BeginArray);
550         assert(p.next);
551         assert(p.type == p.Token.String);
552         assert(p.value == "GML", p.value);
553         assert(p.next);
554         assert(p.type == p.Token.String);
555         assert(p.value == "XML", p.value);
556         assert(p.next);
557         assert(p.type == p.Token.EndArray);
558         assert(p.next);
559         assert(p.type == p.Token.EndObject);
560         assert(p.next);
561         assert(p.type == p.Token.Name);
562         assert(p.value == "GlossSee", p.value);
563         assert(p.next);
564         assert(p.type == p.Token.String);
565         assert(p.value == "markup", p.value);
566         assert(p.next);
567         assert(p.type == p.Token.Name);
568         assert(p.value == "ANumber", p.value);
569         assert(p.next);
570         assert(p.type == p.Token.Number);
571         assert(p.value == "12345.6e7", p.value);
572         assert(p.next);
573         assert(p.type == p.Token.Name);
574         assert(p.value == "True", p.value);
575         assert(p.next);
576         assert(p.type == p.Token.True);
577         assert(p.next);
578         assert(p.type == p.Token.Name);
579         assert(p.value == "False", p.value);
580         assert(p.next);
581         assert(p.type == p.Token.False);
582         assert(p.next);
583         assert(p.type == p.Token.Name);
584         assert(p.value == "Null", p.value);
585         assert(p.next);
586         assert(p.type == p.Token.Null);
587         assert(p.next);
588         assert(p.type == p.Token.EndObject);
589         assert(p.next);
590         assert(p.type == p.Token.EndObject);
591         assert(p.next);
592         assert(p.type == p.Token.EndObject);
593         assert(p.next);
594         assert(p.type == p.Token.EndObject);
595         assert(p.next);
596         assert(p.type == p.Token.EndObject);
597         assert(!p.next);
598 
599         assert(p.state.size == 0);
600 
601 }
602 
603 }
604 
605 
606 debug (JsonParser)
607 {
608         void main()
609         {
610                 auto json = new JsonParser!(char);
611         }
612 }
613