1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2006 Tango. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Jan 2006: initial release
8 
9         author:         Kris, Nthalk
10 
11 *******************************************************************************/
12 
13 module tango.io.stream.Quotes;
14 
15 private import tango.io.device.Conduit;
16 
17 private import tango.io.stream.Iterator;
18 
19 /*******************************************************************************
20 
21         Iterate over a set of delimited, optionally-quoted, text fields.
22 
23         Each field is exposed to the client as a slice of the original
24         content, where the slice is transient. If you need to retain the
25         exposed content, then you should .dup it appropriately.
26 
27         The content exposed via an iterator is supposed to be entirely
28         read-only. All current iterators abide by this rule, but it is
29         possible a user could mutate the content through a get() slice.
30         To enforce the desired read-only aspect, the code would have to
31         introduce redundant copying or the compiler would have to support
32         read-only arrays.
33 
34         Usage:
35         ---
36         auto f = new File ("my.csv");
37         auto l = new Lines (f);
38         auto b = new Array (0);
39         auto q = new Quotes!(char)(",", b);
40 
41         foreach (line; l)
42                 {
43                 b.assign (line);
44                 foreach (field, index; q)
45                          Stdout (index, field);
46                 Stdout.newline;
47                 }
48         ---
49 
50         See Iterator, Lines, Patterns, Delimiters.
51 
52 *******************************************************************************/
53 
54 class Quotes(T) : Iterator!(T)
55 {
56         private const(T)[] delim;
57 
58         /***********************************************************************
59 
60                 This splits on delimiters only. If there is a quote, it
61                 suspends delimiter splitting until the quote is finished.
62 
63         ***********************************************************************/
64 
65         this (const(T)[] delim, InputStream stream = null)
66         {
67                 super (stream);
68                 this.delim = delim;
69         }
70 
71         /***********************************************************************
72 
73                 This splits on delimiters only. If there is a quote, it
74                 suspends delimiter splitting until the quote is finished.
75 
76         ***********************************************************************/
77 
78         protected override size_t scan (const(void)[] data)
79         {
80                 T    quote = 0;
81                 int  escape = 0;
82                 auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof];
83 
84                 foreach (i, c; content)
85                          // within a quote block?
86                          if (quote)
87                             {
88                             if (c is '\\')
89                                 ++escape;
90                             else
91                                {
92                                // matched the initial quote char?
93                                if (c is quote && escape % 2 is 0)
94                                    quote = 0;
95                                escape = 0;
96                                }
97                             }
98                          else
99                             // begin a quote block?
100                             if (c is '"' || c is '\'')
101                                 quote = c;
102                             else
103                                if (has (delim, c))
104                                    return found (set (content.ptr, 0, i));
105                 return notFound();
106         }
107 }
108 
109 
110 /*******************************************************************************
111 
112 *******************************************************************************/
113 
114 debug (UnitTest)
115 {
116         private import tango.io.Stdout;
117         private import tango.text.Util;
118         private import tango.io.device.Array;
119 
120         unittest
121         {
122                 const(char)[][] expected =
123                          [
124                          `0`
125                          ,``
126                          ,``
127                          ,`"3"`
128                          ,`""`
129                          ,`5`
130                          ,`",6"`
131                          ,`"7,"`
132                          ,`8`
133                          ,`"9,\\\","`
134                          ,`10`
135                          ,`',11",'`
136                          ,`"12"`
137                          ];
138 
139                 auto b = new Array (expected.join (","));
140                 foreach (i, f; new Quotes!(char)(",", b))
141                          if (i >= expected.length)
142                             Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f);
143                          else
144                             if (f != expected[i])
145                                 Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]);
146         }
147 }
148