1 /******************************************************************************* 2 3 Copyright: Copyright (C) 2008 Kris Bell. All rights reserved. 4 5 License: BSD style: $(LICENSE) 6 7 version: Aug 2008: Initial release 8 9 Authors: Kris 10 11 *******************************************************************************/ 12 13 module tango.text.xml.DocEntity; 14 15 private import Util = tango.text.Util; 16 17 /****************************************************************************** 18 19 Convert XML entity patterns to normal characters 20 21 <pre> 22 & => ; 23 " => " 24 etc. 25 </pre> 26 27 ******************************************************************************/ 28 29 T[] fromEntity (T) (const(T)[] src, T[] dst = null) 30 { 31 ptrdiff_t delta; 32 auto s = src.ptr; 33 auto len = src.length; 34 35 // take a peek first to see if there's anything 36 if ((delta = Util.indexOf (s, '&', len)) < len) 37 { 38 // make some room if not enough provided 39 if (dst.length < src.length) 40 dst.length = src.length; 41 auto d = dst.ptr; 42 43 // copy segments over, a chunk at a time 44 do { 45 d [0 .. delta] = s [0 .. delta]; 46 len -= delta; 47 s += delta; 48 d += delta; 49 50 // translate entity 51 auto token = 0; 52 53 switch (s[1]) 54 { 55 case 'a': 56 if (len > 4 && s[1..5] == "amp;") 57 *d++ = '&', token = 5; 58 else 59 if (len > 5 && s[1..6] == "apos;") 60 *d++ = '\'', token = 6; 61 break; 62 63 case 'g': 64 if (len > 3 && s[1..4] == "gt;") 65 *d++ = '>', token = 4; 66 break; 67 68 case 'l': 69 if (len > 3 && s[1..4] == "lt;") 70 *d++ = '<', token = 4; 71 break; 72 73 case 'q': 74 if (len > 5 && s[1..6] == "quot;") 75 *d++ = '"', token = 6; 76 break; 77 78 default: 79 break; 80 } 81 82 if (token is 0) 83 *d++ = '&', token = 1; 84 85 s += token, len -= token; 86 } while ((delta = Util.indexOf (s, '&', len)) < len); 87 88 // copy tail too 89 d [0 .. len] = s [0 .. len]; 90 return dst [0 .. (d + len) - dst.ptr]; 91 } 92 93 if(dst.length < src.length) 94 dst.length = src.length; 95 dst[0..src.length] = src; 96 return dst[0..src.length]; 97 } 98 99 100 /****************************************************************************** 101 102 Convert XML entity patterns to normal characters 103 <pre> 104 & => ; 105 " => " 106 etc 107 </pre> 108 109 This variant does not require an interim workspace, and instead 110 emits directly via the provided delegate 111 112 ******************************************************************************/ 113 114 void fromEntity (T) (const(T)[] src, scope void delegate(const(T)[]) emit) 115 { 116 ptrdiff_t delta; 117 auto s = src.ptr; 118 auto len = src.length; 119 120 // take a peek first to see if there's anything 121 if ((delta = Util.indexOf (s, '&', len)) < len) 122 { 123 // copy segments over, a chunk at a time 124 do { 125 emit (s [0 .. delta]); 126 len -= delta; 127 s += delta; 128 129 // translate entity 130 auto token = 0; 131 132 switch (s[1]) 133 { 134 case 'a': 135 if (len > 4 && s[1..5] == "amp;") 136 emit("&"), token = 5; 137 else 138 if (len > 5 && s[1..6] == "apos;") 139 emit("'"), token = 6; 140 break; 141 142 case 'g': 143 if (len > 3 && s[1..4] == "gt;") 144 emit(">"), token = 4; 145 break; 146 147 case 'l': 148 if (len > 3 && s[1..4] == "lt;") 149 emit("<"), token = 4; 150 break; 151 152 case 'q': 153 if (len > 5 && s[1..6] == "quot;") 154 emit("\""), token = 6; 155 break; 156 157 default: 158 break; 159 } 160 161 if (token is 0) 162 emit ("&"), token = 1; 163 164 s += token, len -= token; 165 } while ((delta = Util.indexOf (s, '&', len)) < len); 166 167 // copy tail too 168 emit (s [0 .. len]); 169 } 170 else 171 emit (src); 172 } 173 174 175 /****************************************************************************** 176 177 Convert reserved chars to entities. For example: " => " 178 179 A slice of the provided output buffer is returned. The output buffer should be sufficiently large to 180 accomodate the converted output, or it will be allocated from the 181 heap instead 182 183 ******************************************************************************/ 184 185 T[] toEntity(T) (const(T)[] src, T[] dst = null) 186 { 187 const(T)[] entity; 188 auto s = src.ptr; 189 auto t = s; 190 auto e = s + src.length; 191 auto index = 0; 192 193 while (s < e) 194 switch (*s) 195 { 196 case '"': 197 entity = """; 198 goto common; 199 200 case '>': 201 entity = ">"; 202 goto common; 203 204 case '<': 205 entity = "<"; 206 goto common; 207 208 case '&': 209 entity = "&"; 210 goto common; 211 212 case '\'': 213 entity = "'"; 214 goto common; 215 216 common: 217 auto len = s - t; 218 if (dst.length <= index + len + entity.length) 219 dst.length = (dst.length + len + entity.length) + dst.length / 2; 220 221 dst [index .. index + len] = t [0 .. len]; 222 index += len; 223 224 dst [index .. index + entity.length] = entity; 225 index += entity.length; 226 t = ++s; 227 break; 228 229 default: 230 ++s; 231 break; 232 } 233 234 235 // did we change anything? 236 if (index) 237 { 238 // copy tail too 239 auto len = e - t; 240 if (dst.length <= index + len) 241 dst.length = index + len; 242 243 dst [index .. index + len] = t [0 .. len]; 244 return dst [0 .. index + len]; 245 } 246 247 if(dst.length < src.length) 248 dst.length = src.length; 249 dst[0..src.length] = src; 250 return dst[0..src.length]; 251 } 252 253 254 /****************************************************************************** 255 256 Convert reserved chars to entities. For example: " => " 257 258 This variant does not require an interim workspace, and instead 259 emits directly via the provided delegate 260 261 ******************************************************************************/ 262 263 void toEntity(T) (const(T)[] src, scope void delegate(const(T)[]) emit) 264 { 265 const(T)[] entity; 266 auto s = src.ptr; 267 auto t = s; 268 auto e = s + src.length; 269 270 while (s < e) 271 switch (*s) 272 { 273 case '"': 274 entity = """; 275 goto common; 276 277 case '>': 278 entity = ">"; 279 goto common; 280 281 case '<': 282 entity = "<"; 283 goto common; 284 285 case '&': 286 entity = "&"; 287 goto common; 288 289 case '\'': 290 entity = "'"; 291 goto common; 292 293 common: 294 if (s - t > 0) 295 emit (t [0 .. s - t]); 296 emit (entity); 297 t = ++s; 298 break; 299 300 default: 301 ++s; 302 break; 303 } 304 305 // did we change anything? Copy tail also 306 if (entity.length) 307 emit (t [0 .. e - t]); 308 else 309 emit (src); 310 } 311 312 313 314 /******************************************************************************* 315 316 *******************************************************************************/ 317 318 debug (DocEntity) 319 { 320 import tango.io.Console; 321 322 void main() 323 { 324 auto s = fromEntity ("&"); 325 assert (s == "&"); 326 s = fromEntity ("""); 327 assert (s == "\""); 328 s = fromEntity ("'"); 329 assert (s == "'"); 330 s = fromEntity (">"); 331 assert (s == ">"); 332 s = fromEntity ("<"); 333 assert (s == "<"); 334 s = fromEntity ("<&'"); 335 assert (s == "<&'"); 336 s = fromEntity ("*<&'*"); 337 assert (s == "*<&'*"); 338 339 assert (fromEntity ("abc") == "abc"); 340 assert (fromEntity ("abc&") == "abc&"); 341 assert (fromEntity ("abc<") == "abc<"); 342 assert (fromEntity ("abc>goo") == "abc>goo"); 343 assert (fromEntity ("&") == "&"); 344 assert (fromEntity (""'") == "\"'"); 345 assert (fromEntity ("&q&s") == "&q&s"); 346 347 auto d = toEntity (">"); 348 assert (d == ">"); 349 d = toEntity ("<"); 350 assert (d == "<"); 351 d = toEntity ("&"); 352 assert (d == "&"); 353 d = toEntity ("'"); 354 assert (d == "'"); 355 d = toEntity ("\""); 356 assert (d == """); 357 d = toEntity ("^^>*>*"); 358 assert (d == "^^>*>*"); 359 } 360 }