1 /*******************************************************************************
2 
3         Copyright: Copyright (C) 2008 Aaron Craelius & Kris Bell.  
4                    All rights reserved.
5 
6         License:   BSD style: $(LICENSE)
7 
8         version:   Initial release: July 2008      
9 
10         Authors:   Aaron, Kris
11 
12 *******************************************************************************/
13 
14 module tango.text.json.JsonParser;
15 
16 private import tango.util.container.more.Stack;
17 
18 /*******************************************************************************
19 
20 *******************************************************************************/
21 
22 class JsonParser(T)
23 {
24         public enum Token
25                {
26                Empty, Name, String, Number, BeginObject, EndObject, 
27                BeginArray, EndArray, True, False, Null
28                }
29 
30         private enum State {Object, Array};
31 
32         private struct Iterator
33         {
34                 const(T)*      ptr;
35                 const(T)*      end;
36                 const(T)[]     text;
37 
38                 void reset (const(T)[] text)
39                 {
40                         this.text = text;
41                         this.ptr = text.ptr;
42                         this.end = ptr + text.length;
43                 }
44         }
45 
46         protected Iterator              str;
47         private Stack!(State, 16)       state;
48         private const(T)*               curLoc;
49         private size_t                  curLen;
50         private State                   curState; 
51         protected Token                 curType;
52         
53         /***********************************************************************
54         
55         ***********************************************************************/
56         
57         this (const(T)[] text = null)
58         {
59                 reset (text);
60         }
61         
62         /***********************************************************************
63         
64         ***********************************************************************/
65         
66         @property final bool next ()
67         {
68                 if (str.ptr is null || str.end is null)
69                     return false;
70 
71                 auto p = str.ptr;
72                 auto e = str.end;
73 
74 
75                 while (*p <= 32 && p < e) 
76                        ++p; 
77 
78                 if ((str.ptr = p) >= e) 
79                      return false;
80 
81                 if (curState is State.Array) 
82                     return parseArrayValue();
83 
84                 switch (curType)
85                        {
86                        case Token.Name:
87                             return parseMemberValue();
88 
89                        default:                
90                             break;
91                        }
92 
93                 return parseMemberName();
94         }
95         
96         /***********************************************************************
97         
98         ***********************************************************************/
99         
100         @property final Token type ()
101         {
102                 return curType;
103         }
104         
105         /***********************************************************************
106         
107         ***********************************************************************/
108         
109         @property final const(T)[] value ()
110         {
111                 return curLoc [0 .. curLen];
112         }
113         
114         /***********************************************************************
115         
116         ***********************************************************************/
117         
118         bool reset (const(T)[] json = null)
119         {
120                 state.clear();
121                 str.reset (json);
122                 curType = Token.Empty;
123                 curState = State.Object;
124 
125                 if (json.length)
126                    {
127                    auto p = str.ptr;
128                    auto e = str.end;
129 
130                    while (*p <= 32 && p < e) 
131                           ++p; 
132                    if (p < e)
133                        return start (*(str.ptr = p));
134                    }
135                 return false;
136         }
137 
138         /***********************************************************************
139         
140         ***********************************************************************/
141         
142         protected final void expected (immutable(char)[] token)
143         {
144                 throw new Exception ("expected " ~ token);
145         }
146         
147         /***********************************************************************
148         
149         ***********************************************************************/
150         
151         protected final void expected (immutable(char)[] token, const(T)* point)
152         {
153                 static char[] itoa (char[] buf, size_t i)
154                 {
155                         auto p = buf.ptr+buf.length;
156                         do {
157                            *--p = '0' + i % 10;
158                            } while (i /= 10);
159                         return p[0..(buf.ptr+buf.length)-p];
160                 }
161                 char[32] tmp = void;
162                 expected (token ~ " @input[" ~ itoa(tmp, point-str.text.ptr).idup~"]");
163         }
164         
165         /***********************************************************************
166         
167         ***********************************************************************/
168         
169         private void unexpectedEOF (immutable(char)[] msg)
170         {
171                 throw new Exception ("unexpected end-of-input: " ~ msg);
172         }
173                 
174         /***********************************************************************
175         
176         ***********************************************************************/
177         
178         private bool start (T c)
179         {
180                 if (c is '{') 
181                     return push (Token.BeginObject, State.Object);
182 
183                 if (c is '[') 
184                     return push (Token.BeginArray, State.Array);
185 
186                 expected ("'{' or '[' at start of document");
187                 assert(0);
188         }
189 
190         /***********************************************************************
191         
192         ***********************************************************************/
193         
194         private bool parseMemberName ()
195         {
196                 auto p = str.ptr;
197                 auto e = str.end;
198 
199                 if(*p is '}') 
200                     return pop (Token.EndObject);
201                 
202                 if(*p is ',') 
203                     ++p;
204                 
205                 while (*p <= 32) 
206                        ++p;
207 
208                 if (*p != '"')
209                 {
210                     if (*p == '}')
211                         expected ("an attribute-name after (a potentially trailing) ','", p);
212                     else
213                        expected ("'\"' before attribute-name", p);
214                 }
215                 curLoc = p+1;
216                 curType = Token.Name;
217 
218                 while (++p < e) 
219                        if (*p is '"' && !escaped(p))
220                            break;
221 
222                 if (p < e) 
223                     curLen = p - curLoc;
224                 else
225                    unexpectedEOF ("in attribute-name");
226 
227                 str.ptr = p + 1;
228                 return true;
229         }
230         
231         /***********************************************************************
232         
233         ***********************************************************************/
234         
235         private bool parseMemberValue ()
236         {
237                 auto p = str.ptr;
238 
239                 if(*p != ':') 
240                    expected ("':' before attribute-value", p);
241 
242                 auto e = str.end;
243                 while (++p < e && *p <= 32) {}
244 
245                 return parseValue (*(str.ptr = p));
246         }
247         
248         /***********************************************************************
249         
250         ***********************************************************************/
251         
252         private bool parseValue (T c)
253         {                       
254                 switch (c)
255                        {
256                        case '{':
257                             return push (Token.BeginObject, State.Object);
258          
259                        case '[':
260                             return push (Token.BeginArray, State.Array);
261         
262                        case '"':
263                             return doString();
264         
265                        case 'n':
266                             if (match ("null", Token.Null))
267                                 return true;
268                             expected ("'null'", str.ptr);
269                             break;
270                        case 't':
271                             if (match ("true", Token.True))
272                                 return true;
273                             expected ("'true'", str.ptr);
274                             break;
275                        case 'f':
276                             if (match ("false", Token.False))
277                                 return true;
278                             expected ("'false'", str.ptr);
279                             break;
280                        default:
281                             break;
282                        }
283 
284                 return parseNumber();
285         }
286         
287         /***********************************************************************
288         
289         ***********************************************************************/
290         
291         private bool doString ()
292         {
293                 auto p = str.ptr;
294                 auto e = str.end;
295 
296                 curLoc = p+1;
297                 curType = Token.String;
298                 
299                 while (++p < e) 
300                        if (*p is '"' && !escaped(p))
301                            break;
302 
303                 if (p < e) 
304                     curLen = p - curLoc;
305                 else
306                    unexpectedEOF ("in string");
307 
308                 str.ptr = p + 1;
309                 return true;
310         }
311         
312         /***********************************************************************
313         
314         ***********************************************************************/
315         
316         private bool parseNumber ()
317         {
318                 auto p = str.ptr;
319                 auto e = str.end;
320                 T c = *(curLoc = p);
321 
322                 curType = Token.Number;
323 
324                 if (c is '-' || c is '+')
325                     c = *++p;
326 
327                 while (c >= '0' && c <= '9') c = *++p;                 
328 
329                 if (c is '.')
330                     while (c = *++p, c >= '0' && c <= '9') {}                 
331 
332                 if (c is 'e' || c is 'E')
333                     while (c = *++p, c >= '0' && c <= '9') {}
334 
335                 if (p < e) 
336                     curLen = p - curLoc;
337                 else
338                    unexpectedEOF ("after number");
339 
340                 str.ptr = p;
341                 return curLen > 0;
342         }
343         
344         /***********************************************************************
345         
346         ***********************************************************************/
347         
348         private bool match (const(T)[] name, Token token)
349         {
350                 auto i = name.length;
351                 if (str.ptr[0 .. i] == name)
352                    {
353                    curLoc = str.ptr;
354                    curType = token;
355                    str.ptr += i;
356                    curLen = i;
357                    return true;
358                    }
359                 return false;
360         }
361         
362         /***********************************************************************
363         
364         ***********************************************************************/
365         
366         private bool push (Token token, State next)
367         {
368                 curLen = 0;
369                 curType = token;
370                 curLoc = str.ptr++;
371                 state.push (curState);
372                 curState = next;
373                 return true;
374         }
375         
376         /***********************************************************************
377         
378         ***********************************************************************/
379         
380         private bool pop (Token token)
381         {
382                 curLen = 0;
383                 curType = token;
384                 curLoc = str.ptr++;
385                 curState = state.pop();
386                 return true;
387         }
388 
389         /***********************************************************************
390         
391         ***********************************************************************/
392         
393         private bool parseArrayValue ()
394         {
395                 auto p = str.ptr;
396                 if (*p is ']') 
397                     return pop (Token.EndArray);
398                 
399                 if (*p is ',') 
400                     ++p;
401 
402                 auto e = str.end;
403                 while (p < e && *p <= 32) 
404                        ++p;
405 
406                 return parseValue (*(str.ptr = p));
407         }
408 
409         /***********************************************************************
410         
411         ***********************************************************************/
412         
413         private int escaped (const(T)* p)
414         {
415                 int i;
416 
417                 while (*--p is '\\')
418                        ++i;
419                 return i & 1;
420         }
421 }
422 
423 
424 
425 debug(UnitTest)
426 {       
427                 immutable(char)[] json = 
428                 "{"
429                 "\"glossary\": {"
430                 "\"title\": \"example glossary\","
431                 "\"GlossDiv\": {"
432                 "   \"title\": \"S\","
433                 "   \"GlossList\": {"
434                 "       \"GlossEntry\": {"
435                 "           \"ID\": \"SGML\","
436                 "           \"SortAs\": \"SGML\","
437                 "           \"GlossTerm\": \"Standard Generalized Markup Language\","
438                 "           \"Acronym\": \"SGML\","
439                 "           \"Abbrev\": \"ISO 8879:1986\","
440                 "           \"GlossDef\": {"
441                 "                \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\","
442                 "            \"GlossSeeAlso\": [\"GML\", \"XML\"]"
443                 "           },"
444                 "          \"GlossSee\": \"markup\","
445                 "          \"ANumber\": 12345.6e7"
446                 "          \"True\": true"
447                 "          \"False\": false"
448                 "          \"Null\": null"
449                 "        }"
450                 "    }"
451                 "}"
452                 "}"
453                 "}";
454        
455 unittest
456 {
457         auto p = new JsonParser!(char)(json);
458         assert(p);
459         assert(p.type == p.Token.BeginObject);
460         assert(p.next);
461         assert(p.type == p.Token.Name);
462         assert(p.value == "glossary", p.value);
463         assert(p.next);
464         assert(p.value == "", p.value);
465         assert(p.type == p.Token.BeginObject);
466         assert(p.next);
467         assert(p.type == p.Token.Name);
468         assert(p.value == "title", p.value);
469         assert(p.next);
470         assert(p.type == p.Token.String);
471         assert(p.value == "example glossary", p.value);
472         assert(p.next);
473         assert(p.type == p.Token.Name);
474         assert(p.value == "GlossDiv", p.value);
475         assert(p.next);
476         assert(p.type == p.Token.BeginObject);
477         assert(p.next);
478         assert(p.type == p.Token.Name);
479         assert(p.value == "title", p.value);
480         assert(p.next);
481         assert(p.type == p.Token.String);
482         assert(p.value == "S", p.value);
483         assert(p.next);
484         assert(p.type == p.Token.Name);
485         assert(p.value == "GlossList", p.value);
486         assert(p.next);
487         assert(p.type == p.Token.BeginObject);
488         assert(p.next);
489         assert(p.type == p.Token.Name);
490         assert(p.value == "GlossEntry", p.value);
491         assert(p.next);
492         assert(p.type == p.Token.BeginObject);
493         assert(p.next);
494         assert(p.type == p.Token.Name);
495         assert(p.value == "ID", p.value);
496         assert(p.next);
497         assert(p.type == p.Token.String);
498         assert(p.value == "SGML", p.value);
499         assert(p.next);
500         assert(p.type == p.Token.Name);
501         assert(p.value == "SortAs", p.value);
502         assert(p.next);
503         assert(p.type == p.Token.String);
504         assert(p.value == "SGML", p.value);
505         assert(p.next);
506         assert(p.type == p.Token.Name);
507         assert(p.value == "GlossTerm", p.value);
508         assert(p.next);
509         assert(p.type == p.Token.String);
510         assert(p.value == "Standard Generalized Markup Language", p.value);
511         assert(p.next);
512         assert(p.type == p.Token.Name);
513         assert(p.value == "Acronym", p.value);
514         assert(p.next);
515         assert(p.type == p.Token.String);
516         assert(p.value == "SGML", p.value);
517         assert(p.next);
518         assert(p.type == p.Token.Name);
519         assert(p.value == "Abbrev", p.value);
520         assert(p.next);
521         assert(p.type == p.Token.String);
522         assert(p.value == "ISO 8879:1986", p.value);
523         assert(p.next);
524         assert(p.type == p.Token.Name);
525         assert(p.value == "GlossDef", p.value);
526         assert(p.next);
527         assert(p.type == p.Token.BeginObject);
528         assert(p.next);
529         assert(p.type == p.Token.Name);
530         assert(p.value == "para", p.value);
531         assert(p.next);
532 
533         assert(p.type == p.Token.String);
534         assert(p.value == "A meta-markup language, used to create markup languages such as DocBook.", p.value);
535         assert(p.next);
536         assert(p.type == p.Token.Name);
537         assert(p.value == "GlossSeeAlso", p.value);
538         assert(p.next);
539         assert(p.type == p.Token.BeginArray);
540         assert(p.next);
541         assert(p.type == p.Token.String);
542         assert(p.value == "GML", p.value);
543         assert(p.next);
544         assert(p.type == p.Token.String);
545         assert(p.value == "XML", p.value);
546         assert(p.next);
547         assert(p.type == p.Token.EndArray);
548         assert(p.next);
549         assert(p.type == p.Token.EndObject);
550         assert(p.next);
551         assert(p.type == p.Token.Name);
552         assert(p.value == "GlossSee", p.value);
553         assert(p.next);
554         assert(p.type == p.Token.String);
555         assert(p.value == "markup", p.value);
556         assert(p.next);
557         assert(p.type == p.Token.Name);
558         assert(p.value == "ANumber", p.value);
559         assert(p.next);
560         assert(p.type == p.Token.Number);
561         assert(p.value == "12345.6e7", p.value);
562         assert(p.next);
563         assert(p.type == p.Token.Name);
564         assert(p.value == "True", p.value);
565         assert(p.next);
566         assert(p.type == p.Token.True);
567         assert(p.next);
568         assert(p.type == p.Token.Name);
569         assert(p.value == "False", p.value);
570         assert(p.next);
571         assert(p.type == p.Token.False);
572         assert(p.next);
573         assert(p.type == p.Token.Name);
574         assert(p.value == "Null", p.value);
575         assert(p.next);
576         assert(p.type == p.Token.Null);
577         assert(p.next);
578         assert(p.type == p.Token.EndObject);
579         assert(p.next);
580         assert(p.type == p.Token.EndObject);
581         assert(p.next);
582         assert(p.type == p.Token.EndObject);
583         assert(p.next);
584         assert(p.type == p.Token.EndObject);
585         assert(p.next);
586         assert(p.type == p.Token.EndObject);
587         assert(!p.next);
588 
589         assert(p.state.size == 0);
590 
591 }
592 
593 }
594 
595 
596 debug (JsonParser)
597 {
598         void main()
599         {
600                 auto json = new JsonParser!(char);
601         }
602 }
603