1 /******************************************************************************* 2 3 Copyright: Copyright (C) 2008 Kris Bell, all rights reserved 4 5 License: BSD style: $(LICENSE) 6 7 version: July 2008: Initial release 8 9 Authors: Kris 10 11 *******************************************************************************/ 12 13 module tango.text.json.JsonEscape; 14 15 private import tango.text.json.JsonParser; 16 17 private import Util = tango.text.Util; 18 19 private import Utf = tango.text.convert.Utf; 20 21 /****************************************************************************** 22 23 Convert 'escaped' chars to normal ones. For example: \\ => \ 24 25 The provided output buffer should be at least as long as the 26 input string, or it will be allocated from the heap instead. 27 28 Returns a slice of dst where the content required conversion, 29 or the provided src otherwise 30 31 ******************************************************************************/ 32 33 T[] unescape(T) (const(T)[] src, T[] dst = null) 34 { 35 size_t content; 36 37 void append (const(T)[] s) 38 { 39 if (content + s.length > dst.length) 40 dst.length = dst.length + s.length + 1024; 41 dst[content .. content+s.length] = s[]; 42 content += s.length; 43 } 44 45 unescape (src, &append); 46 return dst [0 .. content]; 47 } 48 49 50 /****************************************************************************** 51 52 Convert reserved chars to escaped ones. For example: \ => \\ 53 54 Either a slice of the provided output buffer is returned, or the 55 original content, depending on whether there were reserved chars 56 present or not. The output buffer will be expanded as necessary 57 58 ******************************************************************************/ 59 60 T[] escape(T) (const(T)[] src, T[] dst = null) 61 { 62 size_t content; 63 64 void append (const(T)[] s) 65 { 66 if (content + s.length > dst.length) 67 dst.length = dst.length + s.length + 1024; 68 dst[content .. content+s.length] = s; 69 content += s.length; 70 } 71 72 escape (src, &append); 73 return dst [0..content]; 74 } 75 76 77 /****************************************************************************** 78 79 Convert 'escaped' chars to normal ones. For example: \\ => \ 80 81 This variant does not require an interim workspace, and instead 82 emits directly via the provided delegate 83 84 ******************************************************************************/ 85 86 void unescape(T) (const(T)[] src, scope void delegate(const(T)[]) emit) 87 { 88 size_t delta; 89 auto s = src.ptr; 90 auto len = src.length; 91 enum:T {slash = '\\'} 92 93 // take a peek first to see if there's anything 94 if ((delta = Util.indexOf (s, slash, len)) < len) 95 { 96 // copy segments over, a chunk at a time 97 do { 98 emit (s[0 .. delta]); 99 len -= delta; 100 s += delta; 101 102 // bogus trailing '\' 103 if (len < 2) 104 { 105 emit ("\\"); 106 len = 0; 107 break; 108 } 109 110 // translate \c 111 switch (s[1]) 112 { 113 case '\\': 114 emit ("\\"); 115 break; 116 117 case '/': 118 emit ("/"); 119 break; 120 121 case '"': 122 emit (`"`); 123 break; 124 125 case 'b': 126 emit ("\b"); 127 break; 128 129 case 'f': 130 emit ("\f"); 131 break; 132 133 case 'n': 134 emit ("\n"); 135 break; 136 137 case 'r': 138 emit ("\r"); 139 break; 140 141 case 't': 142 emit ("\t"); 143 break; 144 145 case 'u': 146 if (len < 6) 147 goto default; 148 else 149 { 150 dchar v = 0; 151 T[6] t = void; 152 153 for (auto i=2; i < 6; ++i) 154 { 155 T c = s[i]; 156 if (c >= '0' && c <= '9') 157 {} 158 else 159 if (c >= 'a' && c <= 'f') 160 c -= 39; 161 else 162 if (c >= 'A' && c <= 'F') 163 c -= 7; 164 else 165 goto default; 166 v = (v << 4) + c - '0'; 167 } 168 169 emit (Utf.fromString32 ((&v)[0..1], t)); 170 len -= 4; 171 s += 4; 172 } 173 break; 174 175 default: 176 throw new Exception ("invalid escape"); 177 } 178 179 s += 2; 180 len -= 2; 181 } while ((delta = Util.indexOf (s, slash, len)) < len); 182 183 // copy tail too 184 emit (s [0 .. len]); 185 } 186 else 187 emit (src); 188 } 189 190 191 /****************************************************************************** 192 193 Convert reserved chars to escaped ones. For example: \ => \\ 194 195 This variant does not require an interim workspace, and instead 196 emits directly via the provided delegate 197 198 ******************************************************************************/ 199 200 void escape(T) (const(T)[] src, scope void delegate(const(T)[]) emit) 201 { 202 T[2] patch = '\\'; 203 auto s = src.ptr; 204 auto t = s; 205 auto e = s + src.length; 206 207 while (s < e) 208 { 209 switch (*s) 210 { 211 case '"': 212 case '/': 213 case '\\': 214 patch[1] = *s; 215 break; 216 case '\r': 217 patch[1] = 'r'; 218 break; 219 case '\n': 220 patch[1] = 'n'; 221 break; 222 case '\t': 223 patch[1] = 't'; 224 break; 225 case '\b': 226 patch[1] = 'b'; 227 break; 228 case '\f': 229 patch[1] = 'f'; 230 break; 231 default: 232 ++s; 233 continue; 234 } 235 emit (t [0 .. s - t]); 236 emit (patch); 237 t = ++s; 238 } 239 240 // did we change anything? Copy tail also 241 if (t is src.ptr) 242 emit (src); 243 else 244 emit (t [0 .. e - t]); 245 } 246 247 248 /****************************************************************************** 249 250 ******************************************************************************/ 251 252 debug (JsonEscape) 253 { 254 import tango.io.Stdout; 255 256 void main() 257 { 258 escape ("abc"); 259 assert (escape ("abc") == "abc"); 260 assert (escape ("/abc") == `\/abc`, escape ("/abc")); 261 assert (escape ("ab\\c") == `ab\\c`, escape ("ab\\c")); 262 assert (escape ("abc\"") == `abc\"`); 263 assert (escape ("abc/") == `abc\/`); 264 assert (escape ("\n\t\r\b\f") == `\n\t\r\b\f`); 265 266 unescape ("abc"); 267 unescape ("abc\\u0020x", (char[] p){Stdout(p);}); 268 assert (unescape ("abc") == "abc"); 269 assert (unescape ("abc\\") == "abc\\"); 270 assert (unescape ("abc\\t") == "abc\t"); 271 assert (unescape ("abc\\tc") == "abc\tc"); 272 assert (unescape ("\\t") == "\t"); 273 assert (unescape ("\\tx") == "\tx"); 274 assert (unescape ("\\r\\rx") == "\r\rx"); 275 assert (unescape ("abc\\t\\n\\bc") == "abc\t\n\bc"); 276 277 assert (unescape ("abc\"\\n\\bc") == "abc\"\n\bc"); 278 assert (unescape ("abc\\u002bx") == "abc+x"); 279 } 280 281 } 282