1 /******************************************************************************* 2 3 copyright: Copyright (c) 2004 Kris Bell. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Initial release: December 2005 8 9 author: Kris 10 11 *******************************************************************************/ 12 13 module tango.io.stream.Iterator; 14 15 private import tango.io.stream.Buffered; 16 17 protected import tango.io.device.Conduit : InputFilter, InputBuffer, InputStream; 18 19 /******************************************************************************* 20 21 The base class for a set of stream iterators. These operate 22 upon a buffered input stream, and are designed to deal with 23 partial content. That is, stream iterators go to work the 24 moment any data becomes available in the buffer. Contrast 25 this behaviour with the tango.text.Util iterators, which 26 operate upon the extent of an array. 27 28 There are two types of iterators supported; exclusive and 29 inclusive. The former are the more common kind, where a token 30 is delimited by elements that are considered foreign. Examples 31 include space, comma, and end-of-line delineation. Inclusive 32 tokens are just the opposite: they look for patterns in the 33 text that should be part of the token itself - everything else 34 is considered foreign. Currently tango.io.stream includes the 35 exclusive variety only. 36 37 Each pattern is exposed to the client as a slice of the original 38 content, where the slice is transient. If you need to retain the 39 exposed content, then you should .dup it appropriately. 40 41 The content provided to these iterators is intended to be fully 42 read-only. All current tokenizers abide by this rule, but it is 43 possible a user could mutate the content through a token slice. 44 To enforce the desired read-only aspect, the code would have to 45 introduce redundant copying or the compiler would have to support 46 read-only arrays (now in D2). 47 48 See Delimiters, Lines, Patterns, Quotes. 49 50 *******************************************************************************/ 51 52 class Iterator(T) : InputFilter 53 { 54 private InputBuffer source; 55 protected const(T)[] slice, 56 delim; 57 58 /*********************************************************************** 59 60 The pattern scanner, implemented via subclasses. 61 62 ***********************************************************************/ 63 64 abstract protected size_t scan (const(void)[] data); 65 66 /*********************************************************************** 67 68 Instantiate with a buffer. 69 70 ***********************************************************************/ 71 72 this (InputStream stream = null) 73 { 74 super (stream); 75 if (stream) 76 set (stream); 77 } 78 79 /*********************************************************************** 80 81 Set the provided stream as the scanning source. 82 83 ***********************************************************************/ 84 85 Iterator set (InputStream stream) 86 { 87 assert (stream); 88 source = BufferedInput.create (stream); 89 super.source = source; 90 return this; 91 } 92 93 /*********************************************************************** 94 95 Return the current token as a slice of the content. 96 97 ***********************************************************************/ 98 99 final const(T)[] get () 100 { 101 return slice; 102 } 103 104 /********************************************************************** 105 106 Iterate over the set of tokens. This should really 107 provide read-only access to the tokens, but D does 108 not support that at this time. 109 110 **********************************************************************/ 111 112 int opApply (scope int delegate(ref const(T)[]) dg) 113 { 114 bool more; 115 int result; 116 117 do { 118 more = consume(); 119 result = dg (slice); 120 } while (more && !result); 121 return result; 122 } 123 124 /********************************************************************** 125 126 Iterate over a set of tokens, exposing a token count 127 starting at zero. 128 129 **********************************************************************/ 130 131 int opApply (scope int delegate(ref int, ref const(T)[]) dg) 132 { 133 bool more; 134 int result, 135 tokens; 136 137 do { 138 more = consume(); 139 result = dg (tokens, slice); 140 ++tokens; 141 } while (more && !result); 142 return result; 143 } 144 145 /********************************************************************** 146 147 Iterate over a set of tokens and delimiters, exposing a 148 token count starting at zero. 149 150 **********************************************************************/ 151 152 int opApply (scope int delegate(ref int, ref const(T)[], ref const(T)[]) dg) 153 { 154 bool more; 155 int result, 156 tokens; 157 158 do { 159 delim = null; 160 more = consume(); 161 result = dg (tokens, slice, delim); 162 ++tokens; 163 } while (more && !result); 164 return result; 165 } 166 167 /*********************************************************************** 168 169 Locate the next token. Returns the token if found, null 170 otherwise. Null indicates an end of stream condition. To 171 sweep a conduit for lines using method next(): 172 --- 173 auto lines = new Lines!(char) (new File("myfile")); 174 while (lines.next) 175 Cout (lines.get).newline; 176 --- 177 178 Alternatively, we can extract one line from a conduit: 179 --- 180 auto line = (new Lines!(char) (new File("myfile"))).next; 181 --- 182 183 The difference between next() and foreach() is that the 184 latter processes all tokens in one go, whereas the former 185 processes in a piecemeal fashion. To wit: 186 --- 187 foreach (line; new Lines!(char) (new File("myfile"))) 188 Cout(line).newline; 189 --- 190 191 ***********************************************************************/ 192 193 @property final const(T)[] next () 194 { 195 if (consume() || slice.length) 196 return slice; 197 return null; 198 } 199 200 /*********************************************************************** 201 202 Set the content of the current slice to the provided 203 start and end points. 204 205 ***********************************************************************/ 206 207 protected final size_t set (const(T)* content, size_t start, size_t end) 208 { 209 slice = content [start .. end]; 210 return end; 211 } 212 213 /*********************************************************************** 214 215 Set the content of the current slice to the provided 216 start and end points, and delimiter to the segment 217 between end & next (inclusive.) 218 219 ***********************************************************************/ 220 221 protected final size_t set (const(T)* content, size_t start, size_t end, size_t next) 222 { 223 slice = content [start .. end]; 224 delim = content [end .. next+1]; 225 return end; 226 } 227 228 /*********************************************************************** 229 230 Called when a scanner fails to find a matching pattern. 231 This may cause more content to be loaded, and a rescan 232 initiated. 233 234 ***********************************************************************/ 235 236 protected final size_t notFound () 237 { 238 return Eof; 239 } 240 241 /*********************************************************************** 242 243 Invoked when a scanner matches a pattern. The provided 244 value should be the index of the last element of the 245 matching pattern, which is converted back to a void[] 246 index. 247 248 ***********************************************************************/ 249 250 protected final size_t found (size_t i) 251 { 252 return (i + 1) * T.sizeof; 253 } 254 255 /*********************************************************************** 256 257 See if set of characters holds a particular instance. 258 259 ***********************************************************************/ 260 261 protected final bool has (const(T)[] set, T match) 262 { 263 foreach (T c; set) 264 if (match is c) 265 return true; 266 return false; 267 } 268 269 /*********************************************************************** 270 271 Consume the next token and place it in 'slice'. Returns 272 true when there are potentially more tokens. 273 274 ***********************************************************************/ 275 276 private bool consume () 277 { 278 if (source.next (&scan)) 279 return true; 280 281 // consume trailing token 282 source.reader ((const(void)[] arr) 283 { 284 slice = (cast(const(T)*) arr.ptr) [0 .. arr.length/T.sizeof]; 285 return cast(size_t)arr.length; 286 }); 287 return false; 288 } 289 } 290 291