1 /******************************************************************************* 2 3 copyright: Copyright (c) 2007 Kris Bell. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Initial release: Nov 2007 8 9 author: Kris 10 11 UTF conversion streams, supporting cross-translation of char, wchar 12 and dchar variants. For supporting endian variations, configure the 13 appropriate EndianStream upstream of this one (closer to the source.) 14 15 *******************************************************************************/ 16 17 module tango.io.stream.Utf; 18 19 private import tango.io.device.Conduit; 20 21 private import tango.io.stream.Buffered; 22 23 private import Utf = tango.text.convert.Utf; 24 25 /******************************************************************************* 26 27 Streaming UTF converter. Type T is the target or destination type, 28 while S is the source type. Both types are either char/wchar/dchar. 29 30 *******************************************************************************/ 31 32 class UtfInput(T, S) : InputFilter, InputFilter.Mutator 33 { 34 static if (!is (S == char) && !is (S == wchar) && !is (S == dchar)) 35 pragma (msg, "Source type must be char, wchar, or dchar"); 36 37 static if (!is (T == char) && !is (T == wchar) && !is (T == dchar)) 38 pragma (msg, "Target type must be char, wchar, or dchar"); 39 40 private InputBuffer buffer; 41 42 /*********************************************************************** 43 44 Create a buffered utf input converter. 45 46 ***********************************************************************/ 47 48 this (InputStream stream) 49 { 50 super (buffer = BufferedInput.create (stream)); 51 } 52 53 /*********************************************************************** 54 55 Consume input of type T, and return the number of array 56 elements comsumed. 57 58 Returns Eof upon end-of-flow. 59 60 ***********************************************************************/ 61 62 final size_t consume (T[] dst) 63 { 64 auto x = read (dst); 65 if (x != Eof) 66 x /= T.sizeof; 67 return x; 68 } 69 70 /*********************************************************************** 71 72 ***********************************************************************/ 73 74 final override size_t read (void[] dst) 75 { 76 static if (is (S == T)) 77 return super.read (dst); 78 else 79 { 80 size_t consumed, 81 produced; 82 83 size_t reader (const(void)[] src) 84 { 85 if (src.length < S.sizeof) 86 return Eof; 87 88 auto output = BufferedInput.convert!(T)(dst); 89 auto input = BufferedInput.convert!(S)(src); 90 91 static if (is (T == char)) 92 produced = Utf.toString(input, output, &consumed).length; 93 94 static if (is (T == wchar)) 95 produced = Utf.toString16(input, output, &consumed).length; 96 97 static if (is (T == dchar)) 98 produced = Utf.toString32(input, output, &consumed).length; 99 100 // consume buffer content 101 return consumed * S.sizeof; 102 } 103 104 // must have some space available for converting 105 if (dst.length < T.sizeof) 106 conduit.error ("UtfStream.read :: target array is too small"); 107 108 // convert next chunk of input 109 if (buffer.next(&reader) is false) 110 return Eof; 111 112 return produced * T.sizeof; 113 } 114 } 115 } 116 117 118 /******************************************************************************* 119 120 Streaming UTF converter. Type T is the target or destination type, 121 while S is the source type. Both types are either char/wchar/dchar. 122 123 Note that the arguments are reversed from those of UtfInput. 124 125 *******************************************************************************/ 126 127 class UtfOutput (S, T) : OutputFilter, OutputFilter.Mutator 128 { 129 static if (!is (S == char) && !is (S == wchar) && !is (S == dchar)) 130 pragma (msg, "Source type must be char, wchar, or dchar"); 131 132 static if (!is (T == char) && !is (T == wchar) && !is (T == dchar)) 133 pragma (msg, "Target type must be char, wchar, or dchar"); 134 135 136 private OutputBuffer buffer; 137 138 /*********************************************************************** 139 140 Create a buffered utf output converter. 141 142 ***********************************************************************/ 143 144 this (OutputStream stream) 145 { 146 super (buffer = BufferedOutput.create (stream)); 147 } 148 149 /*********************************************************************** 150 151 Consume input of type T, and return the number of array 152 elements consumed. 153 154 Returns Eof upon end-of-flow. 155 156 ***********************************************************************/ 157 158 final size_t consume (const(S)[] dst) 159 { 160 auto x = write (dst); 161 if (x != Eof) 162 x /= S.sizeof; 163 return x; 164 } 165 166 /*********************************************************************** 167 168 Write to the output stream from a source array. The provided 169 src content is converted as necessary. Note that an attached 170 output buffer must be at least four bytes wide to accommodate 171 a conversion. 172 173 Returns the number of bytes consumed from src, which may be 174 less than the quantity provided. 175 176 ***********************************************************************/ 177 178 final override size_t write (const(void)[] src) 179 { 180 static if (is (S == T)) 181 return super.write (src); 182 else 183 { 184 uint consumed, 185 produced; 186 187 size_t writer (void[] dst) 188 { 189 // buffer must be at least 4 bytes wide 190 // to contain a generic conversion 191 if (dst.length < 4) 192 return Eof; 193 194 auto input = BufferedOutput.convert!(S)(src); 195 auto output = BufferedOutput.convert!(T)(dst); 196 197 static if (is (T == char)) 198 produced = Utf.toString(input, output, &consumed).length; 199 200 static if (is (T == wchar)) 201 produced = Utf.toString16(input, output, &consumed).length; 202 203 static if (is (T == dchar)) 204 produced = Utf.toString32(input, output, &consumed).length; 205 206 return produced * T.sizeof; 207 } 208 209 // write directly into buffered content and 210 // flush when the output is full 211 if (buffer.writer(&writer) is Eof) 212 { 213 buffer.flush; 214 if (buffer.writer(&writer) is Eof) 215 return Eof; 216 } 217 return consumed * S.sizeof; 218 } 219 } 220 } 221 222 223 /******************************************************************************* 224 225 *******************************************************************************/ 226 227 debug (Utf) 228 { 229 import tango.io.Stdout; 230 import tango.io.device.Array; 231 232 void main() 233 { 234 auto inp = new UtfInput!(dchar, char)(new Array("hello world".dup)); 235 auto oot = new UtfOutput!(dchar, char)(new Array(20)); 236 oot.copy(inp); 237 assert (oot.buffer.slice == "hello world"); 238 } 239 }