1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2004 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Initial release: December 2005
8 
9         author:         Kris
10 
11 *******************************************************************************/
12 
13 module tango.io.stream.Iterator;
14 
15 private import tango.io.stream.Buffered;
16 
17 protected import tango.io.device.Conduit : InputFilter, InputBuffer, InputStream;
18 
19 /*******************************************************************************
20 
21         The base class for a set of stream iterators. These operate
22         upon a buffered input stream, and are designed to deal with
23         partial content. That is, stream iterators go to work the
24         moment any data becomes available in the buffer. Contrast
25         this behaviour with the tango.text.Util iterators, which
26         operate upon the extent of an array.
27 
28         There are two types of iterators supported; exclusive and
29         inclusive. The former are the more common kind, where a token
30         is delimited by elements that are considered foreign. Examples
31         include space, comma, and end-of-line delineation. Inclusive
32         tokens are just the opposite: they look for patterns in the
33         text that should be part of the token itself - everything else
34         is considered foreign. Currently tango.io.stream includes the
35         exclusive variety only.
36 
37         Each pattern is exposed to the client as a slice of the original
38         content, where the slice is transient. If you need to retain the
39         exposed content, then you should .dup it appropriately.
40 
41         The content provided to these iterators is intended to be fully
42         read-only. All current tokenizers abide by this rule, but it is
43         possible a user could mutate the content through a token slice.
44         To enforce the desired read-only aspect, the code would have to
45         introduce redundant copying or the compiler would have to support
46         read-only arrays (now in D2).
47 
48         See Delimiters, Lines, Patterns, Quotes.
49 
50 *******************************************************************************/
51 
52 class Iterator(T) : InputFilter
53 {
54         private InputBuffer     source;
55         protected const(T)[]    slice,
56                                 delim;
57 
58         /***********************************************************************
59 
60                 The pattern scanner, implemented via subclasses.
61 
62         ***********************************************************************/
63 
64         abstract protected size_t scan (const(void)[] data);
65 
66         /***********************************************************************
67 
68                 Instantiate with a buffer.
69 
70         ***********************************************************************/
71 
72         this (InputStream stream = null)
73         {
74                 super (stream);
75                 if (stream)
76                     set (stream);
77         }
78 
79         /***********************************************************************
80 
81                 Set the provided stream as the scanning source.
82 
83         ***********************************************************************/
84 
85         Iterator set (InputStream stream)
86         {
87                 assert (stream);
88                 source = BufferedInput.create (stream);
89                 super.source = source;
90                 return this;
91         }
92 
93         /***********************************************************************
94 
95                 Return the current token as a slice of the content.
96 
97         ***********************************************************************/
98 
99         final const(T)[] get ()
100         {
101                 return slice;
102         }
103 
104         /**********************************************************************
105 
106                 Iterate over the set of tokens. This should really
107                 provide read-only access to the tokens, but D does
108                 not support that at this time.
109 
110         **********************************************************************/
111 
112         int opApply (scope int delegate(ref const(T)[]) dg)
113         {
114                 bool more;
115                 int  result;
116 
117                 do {
118                    more = consume();
119                    result = dg (slice);
120                    } while (more && !result);
121                 return result;
122         }
123 
124         /**********************************************************************
125 
126                 Iterate over a set of tokens, exposing a token count
127                 starting at zero.
128 
129         **********************************************************************/
130 
131         int opApply (scope int delegate(ref int, ref const(T)[]) dg)
132         {
133                 bool more;
134                 int  result,
135                      tokens;
136 
137                 do {
138                    more = consume();
139                    result = dg (tokens, slice);
140                    ++tokens;
141                    } while (more && !result);
142                 return result;
143         }
144 
145         /**********************************************************************
146 
147                 Iterate over a set of tokens and delimiters, exposing a
148                 token count starting at zero.
149 
150         **********************************************************************/
151 
152         int opApply (scope int delegate(ref int, ref const(T)[], ref const(T)[]) dg)
153         {
154                 bool more;
155                 int  result,
156                      tokens;
157 
158                 do {
159                    delim = null;
160                    more = consume();
161                    result = dg (tokens, slice, delim);
162                    ++tokens;
163                    } while (more && !result);
164                 return result;
165         }
166 
167         /***********************************************************************
168 
169                 Locate the next token. Returns the token if found, null
170                 otherwise. Null indicates an end of stream condition. To
171                 sweep a conduit for lines using method next():
172                 ---
173                 auto lines = new Lines!(char) (new File("myfile"));
174                 while (lines.next)
175                        Cout (lines.get).newline;
176                 ---
177 
178                 Alternatively, we can extract one line from a conduit:
179                 ---
180                 auto line = (new Lines!(char) (new File("myfile"))).next;
181                 ---
182 
183                 The difference between next() and foreach() is that the
184                 latter processes all tokens in one go, whereas the former
185                 processes in a piecemeal fashion. To wit:
186                 ---
187                 foreach (line; new Lines!(char) (new File("myfile")))
188                          Cout(line).newline;
189                 ---
190 
191         ***********************************************************************/
192 
193         @property final const(T)[] next ()
194         {
195                 if (consume() || slice.length)
196                     return slice;
197                 return null;
198         }
199 
200         /***********************************************************************
201 
202                 Set the content of the current slice to the provided
203                 start and end points.
204 
205         ***********************************************************************/
206 
207         protected final size_t set (const(T)* content, size_t start, size_t end)
208         {
209                 slice = content [start .. end];
210                 return end;
211         }
212 
213         /***********************************************************************
214 
215                 Set the content of the current slice to the provided
216                 start and end points, and delimiter to the segment
217                 between end & next (inclusive.)
218 
219         ***********************************************************************/
220 
221         protected final size_t set (const(T)* content, size_t start, size_t end, size_t next)
222         {
223                 slice = content [start .. end];
224                 delim = content [end .. next+1];
225                 return end;
226         }
227 
228         /***********************************************************************
229 
230                 Called when a scanner fails to find a matching pattern.
231                 This may cause more content to be loaded, and a rescan
232                 initiated.
233 
234         ***********************************************************************/
235 
236         protected final size_t notFound ()
237         {
238                 return Eof;
239         }
240 
241         /***********************************************************************
242 
243                 Invoked when a scanner matches a pattern. The provided
244                 value should be the index of the last element of the
245                 matching pattern, which is converted back to a void[]
246                 index.
247 
248         ***********************************************************************/
249 
250         protected final size_t found (size_t i)
251         {
252                 return (i + 1) * T.sizeof;
253         }
254 
255         /***********************************************************************
256 
257                 See if set of characters holds a particular instance.
258 
259         ***********************************************************************/
260 
261         protected final bool has (const(T)[] set, T match)
262         {
263                 foreach (T c; set)
264                          if (match is c)
265                              return true;
266                 return false;
267         }
268 
269         /***********************************************************************
270 
271                 Consume the next token and place it in 'slice'. Returns
272                 true when there are potentially more tokens.
273 
274         ***********************************************************************/
275 
276         private bool consume ()
277         {
278                 if (source.next (&scan))
279                     return true;
280 
281                 // consume trailing token
282                 source.reader ((const(void)[] arr)
283                               {
284                               slice = (cast(const(T)*) arr.ptr) [0 .. arr.length/T.sizeof];
285                               return cast(size_t)arr.length;
286                               });
287                 return false;
288         }
289 }
290 
291