1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2006 Tango. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Jan 2006: initial release
8 
9         author:         Kris, Nthalk
10 
11 *******************************************************************************/
12 
13 module tango.io.stream.Quotes;
14 
15 private import tango.io.stream.Iterator;
16 
17 /*******************************************************************************
18 
19         Iterate over a set of delimited, optionally-quoted, text fields.
20 
21         Each field is exposed to the client as a slice of the original
22         content, where the slice is transient. If you need to retain the
23         exposed content, then you should .dup it appropriately.
24 
25         The content exposed via an iterator is supposed to be entirely
26         read-only. All current iterators abide by this rule, but it is
27         possible a user could mutate the content through a get() slice.
28         To enforce the desired read-only aspect, the code would have to
29         introduce redundant copying or the compiler would have to support
30         read-only arrays.
31 
32         Usage:
33         ---
34         auto f = new File ("my.csv");
35         auto l = new Lines (f);
36         auto b = new Array (0);
37         auto q = new Quotes!(char)(",", b);
38 
39         foreach (line; l)
40                 {
41                 b.assign (line);
42                 foreach (field, index; q)
43                          Stdout (index, field);
44                 Stdout.newline;
45                 }
46         ---
47 
48         See Iterator, Lines, Patterns, Delimiters.
49 
50 *******************************************************************************/
51 
52 class Quotes(T) : Iterator!(T)
53 {
54         private const(T)[] delim;
55 
56         /***********************************************************************
57 
58                 This splits on delimiters only. If there is a quote, it
59                 suspends delimiter splitting until the quote is finished.
60 
61         ***********************************************************************/
62 
63         this (const(T)[] delim, InputStream stream = null)
64         {
65                 super (stream);
66                 this.delim = delim;
67         }
68 
69         /***********************************************************************
70 
71                 This splits on delimiters only. If there is a quote, it
72                 suspends delimiter splitting until the quote is finished.
73 
74         ***********************************************************************/
75 
76         protected override size_t scan (const(void)[] data)
77         {
78                 T    quote = 0;
79                 int  escape = 0;
80                 auto content = (cast(const(T)*) data.ptr) [0 .. data.length / T.sizeof];
81 
82                 foreach (i, c; content)
83                          // within a quote block?
84                          if (quote)
85                             {
86                             if (c is '\\')
87                                 ++escape;
88                             else
89                                {
90                                // matched the initial quote char?
91                                if (c is quote && escape % 2 is 0)
92                                    quote = 0;
93                                escape = 0;
94                                }
95                             }
96                          else
97                             // begin a quote block?
98                             if (c is '"' || c is '\'')
99                                 quote = c;
100                             else
101                                if (has (delim, c))
102                                    return found (set (content.ptr, 0, i));
103                 return notFound();
104         }
105 }
106 
107 
108 /*******************************************************************************
109 
110 *******************************************************************************/
111 
112 debug (UnitTest)
113 {
114         private import tango.io.Stdout;
115         private import tango.text.Util;
116         private import tango.io.device.Array;
117 
118         unittest
119         {
120                 const(char)[][] expected =
121                          [
122                          `0`
123                          ,``
124                          ,``
125                          ,`"3"`
126                          ,`""`
127                          ,`5`
128                          ,`",6"`
129                          ,`"7,"`
130                          ,`8`
131                          ,`"9,\\\","`
132                          ,`10`
133                          ,`',11",'`
134                          ,`"12"`
135                          ];
136 
137                 auto b = new Array (expected.join (","));
138                 foreach (i, f; new Quotes!(char)(",", b))
139                          if (i >= expected.length)
140                             Stdout.formatln ("uhoh: unexpected match: {}, {}", i, f);
141                          else
142                             if (f != expected[i])
143                                 Stdout.formatln ("uhoh: bad match: {}, {}, {}", i, f, expected[i]);
144         }
145 }
146