1 /******************************************************************************* 2 3 copyright: Copyright (c) 2006 Tango. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Jan 2006: initial release 8 9 author: Kris, Nthalk 10 11 *******************************************************************************/ 12 13 module tango.io.stream.Quotes; 14 15 private import tango.io.stream.Iterator; 16 17 /******************************************************************************* 18 19 Iterate over a set of delimited, optionally-quoted, text fields. 20 21 Each field is exposed to the client as a slice of the original 22 content, where the slice is transient. If you need to retain the 23 exposed content, then you should .dup it appropriately. 24 25 The content exposed via an iterator is supposed to be entirely 26 read-only. All current iterators abide by this rule, but it is 27 possible a user could mutate the content through a get() slice. 28 To enforce the desired read-only aspect, the code would have to 29 introduce redundant copying or the compiler would have to support 30 read-only arrays. 31 32 Usage: 33 --- 34 auto f = new File ("my.csv"); 35 auto l = new Lines (f); 36 auto b = new Array (0); 37 auto q = new Quotes!(char)(",", b); 38 39 foreach (line; l) 40 { 41 b.assign (line); 42 foreach (field, index; q) 43 Stdout (index, field); 44 Stdout.newline; 45 } 46 --- 47 48 See Iterator, Lines, Patterns, Delimiters. 49 50 *******************************************************************************/ 51 52 class Quotes(T) : Iterator!(T) 53 { 54 private const(T)[] delim; 55 56 /*********************************************************************** 57 58 This splits on delimiters only. If there is a quote, it 59 suspends delimiter splitting until the quote is finished. 60 61 ***********************************************************************/ 62 63 this (const(T)[] delim, InputStream stream = null) 64 { 65 super (stream); 66 this.delim = delim; 67 } 68 69 /*********************************************************************** 70 71 This splits on delimiters only. If there is a quote, it 72 suspends delimiter splitting until the quote is finished. 73 74 ***********************************************************************/ 75 76 protected override size_t scan (const(void)[] data) 77 { 78 T quote = 0; 79 int escape = 0; 80 auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof]; 81 82 foreach (i, c; content) 83 // within a quote block? 84 if (quote) 85 { 86 if (c is '\\') 87 ++escape; 88 else 89 { 90 // matched the initial quote char? 91 if (c is quote && escape % 2 is 0) 92 quote = 0; 93 escape = 0; 94 } 95 } 96 else 97 // begin a quote block? 98 if (c is '"' || c is '\'') 99 quote = c; 100 else 101 if (has (delim, c)) 102 return found (set (content.ptr, 0, i)); 103 return notFound(); 104 } 105 } 106 107 108 /******************************************************************************* 109 110 *******************************************************************************/ 111 112 debug (UnitTest) 113 { 114 private import tango.io.Stdout; 115 private import tango.text.Util; 116 private import tango.io.device.Array; 117 118 unittest 119 { 120 const(char)[][] expected = 121 [ 122 `0` 123 ,`` 124 ,`` 125 ,`"3"` 126 ,`""` 127 ,`5` 128 ,`",6"` 129 ,`"7,"` 130 ,`8` 131 ,`"9,\\\","` 132 ,`10` 133 ,`',11",'` 134 ,`"12"` 135 ]; 136 137 auto b = new Array (expected.join (",")); 138 foreach (i, f; new Quotes!(char)(",", b)) 139 if (i >= expected.length) 140 Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f); 141 else 142 if (f != expected[i]) 143 Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]); 144 } 145 } 146