1 /*******************************************************************************
2 
3         Copyright: Copyright (C) 2008 Kris Bell, all rights reserved
4 
5         License:   BSD style: $(LICENSE)
6 
7         version:   July 2008: Initial release
8 
9         Authors:   Kris
10 
11 *******************************************************************************/
12 
13 module tango.text.json.JsonEscape;
14 
15 private import tango.text.json.JsonParser;
16 
17 private import Util = tango.text.Util;
18 
19 private import Utf = tango.text.convert.Utf;
20 
21 /******************************************************************************
22 
23         Convert 'escaped' chars to normal ones. For example: \\ => \
24 
25         The provided output buffer should be at least as long as the 
26         input string, or it will be allocated from the heap instead.
27 
28         Returns a slice of dst where the content required conversion, 
29         or the provided src otherwise
30         
31 ******************************************************************************/
32 
33 T[] unescape(T) (const(T)[] src, T[] dst = null)
34 {
35         size_t content;
36 
37         void append (const(T)[] s)
38         {
39                 if (content + s.length > dst.length)
40                     dst.length = dst.length + s.length + 1024;
41                 dst[content .. content+s.length] = s[];
42                 content += s.length;
43         }
44 
45         unescape (src, &append);
46         return dst [0 .. content];
47 }
48 
49 
50 /******************************************************************************
51 
52         Convert reserved chars to escaped ones. For example: \ => \\ 
53 
54         Either a slice of the provided output buffer is returned, or the 
55         original content, depending on whether there were reserved chars
56         present or not. The output buffer will be expanded as necessary
57         
58 ******************************************************************************/
59 
60 T[] escape(T) (const(T)[] src, T[] dst = null)
61 {
62         size_t content;
63 
64         void append (const(T)[] s)
65         {
66                 if (content + s.length > dst.length)
67                     dst.length = dst.length + s.length + 1024;
68                 dst[content .. content+s.length] = s;
69                 content += s.length;
70         }
71 
72         escape (src, &append);
73         return dst [0..content];
74 }
75 
76 
77 /******************************************************************************
78 
79         Convert 'escaped' chars to normal ones. For example: \\ => \
80 
81         This variant does not require an interim workspace, and instead
82         emits directly via the provided delegate
83               
84 ******************************************************************************/
85 
86 void unescape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
87 {
88         size_t delta;
89         auto s = src.ptr;
90         auto len = src.length;
91         enum:T {slash = '\\'}
92 
93         // take a peek first to see if there's anything
94         if ((delta = Util.indexOf (s, slash, len)) < len)
95            {
96            // copy segments over, a chunk at a time
97            do {
98               emit (s[0 .. delta]);
99               len -= delta;
100               s += delta;
101 
102               // bogus trailing '\'
103               if (len < 2)
104                  {
105                  emit ("\\");
106                  len = 0;
107                  break;
108                  }
109 
110               // translate \c
111               switch (s[1])
112                      {
113                       case '\\':
114                            emit ("\\");
115                            break;
116 
117                       case '/':
118                            emit ("/");
119                            break;
120 
121                       case '"':
122                            emit (`"`);
123                            break;
124 
125                       case 'b':
126                            emit ("\b");
127                            break;
128 
129                       case 'f':
130                            emit ("\f");
131                            break;
132 
133                       case 'n':
134                            emit ("\n");
135                            break;
136 
137                       case 'r':
138                            emit ("\r");
139                            break;
140 
141                       case 't':
142                            emit ("\t");
143                            break;
144 
145                       case 'u':
146                            if (len < 6)
147                                goto default;
148                            else
149                               {
150                               dchar v = 0;
151                               T[6]  t = void;
152 
153                               for (auto i=2; i < 6; ++i)
154                                   {
155                                   T c = s[i];
156                                   if (c >= '0' && c <= '9')
157                                      {}
158                                   else
159                                      if (c >= 'a' && c <= 'f')
160                                          c -= 39;
161                                      else
162                                         if (c >= 'A' && c <= 'F')
163                                             c -= 7;
164                                         else
165                                            goto default;
166                                   v = (v << 4) + c - '0';
167                                   }
168                               
169                               emit (Utf.fromString32 ((&v)[0..1], t));
170                               len -= 4;
171                               s += 4;
172                               }
173                            break;
174 
175                       default:
176                            throw new Exception ("invalid escape");
177                      }
178 
179               s += 2;
180               len -= 2;           
181               } while ((delta = Util.indexOf (s, slash, len)) < len);
182 
183            // copy tail too
184            emit (s [0 .. len]);
185            }
186         else
187            emit (src);
188 }
189 
190 
191 /******************************************************************************
192 
193         Convert reserved chars to escaped ones. For example: \ => \\ 
194 
195         This variant does not require an interim workspace, and instead
196         emits directly via the provided delegate
197         
198 ******************************************************************************/
199 
200 void escape(T) (const(T)[] src, scope void delegate(const(T)[]) emit)
201 {
202         T[2] patch = '\\';
203         auto s = src.ptr;
204         auto t = s;
205         auto e = s + src.length;
206 
207         while (s < e)
208               {
209               switch (*s)
210                      {
211                      case '"':
212                      case '/':
213                      case '\\':
214                           patch[1] = *s;
215                           break;
216                      case '\r':
217                           patch[1] = 'r';
218                           break;
219                      case '\n':
220                           patch[1] = 'n';
221                           break;
222                      case '\t':
223                           patch[1] = 't';
224                           break;
225                      case '\b':
226                           patch[1] = 'b';
227                           break;
228                      case '\f':
229                           patch[1] = 'f';
230                           break;
231                      default:
232                           ++s;
233                           continue;
234                      }
235               emit (t [0 .. s - t]);
236               emit (patch);
237               t = ++s;
238               }
239 
240         // did we change anything? Copy tail also
241         if (t is src.ptr)
242             emit (src);
243         else
244            emit (t [0 .. e - t]);
245 }
246 
247 
248 /******************************************************************************
249 
250 ******************************************************************************/
251 
252 debug (JsonEscape)
253 {
254         import tango.io.Stdout;
255 
256         void main()
257         {
258                 escape ("abc");
259                 assert (escape ("abc") == "abc");
260                 assert (escape ("/abc") == `\/abc`, escape ("/abc"));
261                 assert (escape ("ab\\c") == `ab\\c`, escape ("ab\\c"));
262                 assert (escape ("abc\"") == `abc\"`);
263                 assert (escape ("abc/") == `abc\/`);
264                 assert (escape ("\n\t\r\b\f") == `\n\t\r\b\f`);
265 
266                 unescape ("abc");
267                 unescape ("abc\\u0020x", (char[] p){Stdout(p);});
268                 assert (unescape ("abc") == "abc");
269                 assert (unescape ("abc\\") == "abc\\");
270                 assert (unescape ("abc\\t") == "abc\t");
271                 assert (unescape ("abc\\tc") == "abc\tc");
272                 assert (unescape ("\\t") == "\t");
273                 assert (unescape ("\\tx") == "\tx");
274                 assert (unescape ("\\r\\rx") == "\r\rx");
275                 assert (unescape ("abc\\t\\n\\bc") == "abc\t\n\bc");
276 
277                 assert (unescape ("abc\"\\n\\bc") == "abc\"\n\bc");
278                 assert (unescape ("abc\\u002bx") == "abc+x");
279         }
280 
281 }
282