1 /******************************************************************************* 2 3 copyright: Copyright (c) 2006 Tango. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Jan 2006: initial release 8 9 author: Kris, Nthalk 10 11 *******************************************************************************/ 12 13 module tango.io.stream.Quotes; 14 15 private import tango.io.device.Conduit; 16 17 private import tango.io.stream.Iterator; 18 19 /******************************************************************************* 20 21 Iterate over a set of delimited, optionally-quoted, text fields. 22 23 Each field is exposed to the client as a slice of the original 24 content, where the slice is transient. If you need to retain the 25 exposed content, then you should .dup it appropriately. 26 27 The content exposed via an iterator is supposed to be entirely 28 read-only. All current iterators abide by this rule, but it is 29 possible a user could mutate the content through a get() slice. 30 To enforce the desired read-only aspect, the code would have to 31 introduce redundant copying or the compiler would have to support 32 read-only arrays. 33 34 Usage: 35 --- 36 auto f = new File ("my.csv"); 37 auto l = new Lines (f); 38 auto b = new Array (0); 39 auto q = new Quotes!(char)(",", b); 40 41 foreach (line; l) 42 { 43 b.assign (line); 44 foreach (field, index; q) 45 Stdout (index, field); 46 Stdout.newline; 47 } 48 --- 49 50 See Iterator, Lines, Patterns, Delimiters. 51 52 *******************************************************************************/ 53 54 class Quotes(T) : Iterator!(T) 55 { 56 private const(T)[] delim; 57 58 /*********************************************************************** 59 60 This splits on delimiters only. If there is a quote, it 61 suspends delimiter splitting until the quote is finished. 62 63 ***********************************************************************/ 64 65 this (const(T)[] delim, InputStream stream = null) 66 { 67 super (stream); 68 this.delim = delim; 69 } 70 71 /*********************************************************************** 72 73 This splits on delimiters only. If there is a quote, it 74 suspends delimiter splitting until the quote is finished. 75 76 ***********************************************************************/ 77 78 protected override size_t scan (const(void)[] data) 79 { 80 T quote = 0; 81 int escape = 0; 82 auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof]; 83 84 foreach (i, c; content) 85 // within a quote block? 86 if (quote) 87 { 88 if (c is '\\') 89 ++escape; 90 else 91 { 92 // matched the initial quote char? 93 if (c is quote && escape % 2 is 0) 94 quote = 0; 95 escape = 0; 96 } 97 } 98 else 99 // begin a quote block? 100 if (c is '"' || c is '\'') 101 quote = c; 102 else 103 if (has (delim, c)) 104 return found (set (content.ptr, 0, i)); 105 return notFound(); 106 } 107 } 108 109 110 /******************************************************************************* 111 112 *******************************************************************************/ 113 114 debug (UnitTest) 115 { 116 private import tango.io.Stdout; 117 private import tango.text.Util; 118 private import tango.io.device.Array; 119 120 unittest 121 { 122 const(char)[][] expected = 123 [ 124 `0` 125 ,`` 126 ,`` 127 ,`"3"` 128 ,`""` 129 ,`5` 130 ,`",6"` 131 ,`"7,"` 132 ,`8` 133 ,`"9,\\\","` 134 ,`10` 135 ,`',11",'` 136 ,`"12"` 137 ]; 138 139 auto b = new Array (expected.join (",")); 140 foreach (i, f; new Quotes!(char)(",", b)) 141 if (i >= expected.length) 142 Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f); 143 else 144 if (f != expected[i]) 145 Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]); 146 } 147 } 148