1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2007 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Initial release: Nov 2007
8 
9         author:         Kris
10 
11         UTF conversion streams, supporting cross-translation of char, wchar
12         and dchar variants. For supporting endian variations, configure the
13         appropriate EndianStream upstream of this one (closer to the source.)
14 
15 *******************************************************************************/
16 
17 module tango.io.stream.Utf;
18 
19 private import tango.io.device.Conduit;
20 
21 private import tango.io.stream.Buffered;
22 
23 private import Utf = tango.text.convert.Utf;
24 
25 /*******************************************************************************
26 
27         Streaming UTF converter. Type T is the target or destination type,
28         while S is the source type. Both types are either char/wchar/dchar.
29 
30 *******************************************************************************/
31 
32 class UtfInput(T, S) : InputFilter, InputFilter.Mutator
33 {
34         static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
35                     pragma (msg, "Source type must be char, wchar, or dchar");
36 
37         static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
38                     pragma (msg, "Target type must be char, wchar, or dchar");
39 
40         private InputBuffer buffer;
41 
42         /***********************************************************************
43 
44                 Create a buffered utf input converter.
45 
46         ***********************************************************************/
47 
48         this (InputStream stream)
49         {
50                 super (buffer = BufferedInput.create (stream));
51         }
52 
53         /***********************************************************************
54 
55                 Consume input of type T, and return the number of array
56                 elements comsumed.
57 
58                 Returns Eof upon end-of-flow.
59 
60         ***********************************************************************/
61 
62         final size_t consume (T[] dst)
63         {
64                 auto x = read (dst);
65                 if (x != Eof)
66                     x /= T.sizeof;
67                 return x;
68         }
69 
70         /***********************************************************************
71 
72         ***********************************************************************/
73 
74         final override size_t read (void[] dst)
75         {
76                 static if (is (S == T))
77                            return super.read (dst);
78                 else
79                    {
80                    size_t   consumed,
81                             produced;
82 
83                    size_t reader (const(void)[] src)
84                    {
85                         if (src.length < S.sizeof)
86                             return Eof;
87 
88                         auto output = BufferedInput.convert!(T)(dst);
89                         auto input  = BufferedInput.convert!(S)(src);
90 
91                         static if (is (T == char))
92                                    produced = Utf.toString(input, output, &consumed).length;
93 
94                         static if (is (T == wchar))
95                                    produced = Utf.toString16(input, output, &consumed).length;
96 
97                         static if (is (T == dchar))
98                                    produced = Utf.toString32(input, output, &consumed).length;
99 
100                         // consume buffer content
101                         return consumed * S.sizeof;
102                    }
103 
104                    // must have some space available for converting
105                    if (dst.length < T.sizeof)
106                        conduit.error ("UtfStream.read :: target array is too small");
107 
108                    // convert next chunk of input
109                    if (buffer.next(&reader) is false)
110                        return Eof;
111 
112                    return produced * T.sizeof;
113                    }
114         }
115 }
116 
117 
118 /*******************************************************************************
119 
120         Streaming UTF converter. Type T is the target or destination type,
121         while S is the source type. Both types are either char/wchar/dchar.
122 
123         Note that the arguments are reversed from those of UtfInput.
124 
125 *******************************************************************************/
126 
127 class UtfOutput (S, T) : OutputFilter, OutputFilter.Mutator
128 {
129         static if (!is (S == char) && !is (S == wchar) && !is (S == dchar))
130                     pragma (msg, "Source type must be char, wchar, or dchar");
131 
132         static if (!is (T == char) && !is (T == wchar) && !is (T == dchar))
133                     pragma (msg, "Target type must be char, wchar, or dchar");
134 
135 
136         private OutputBuffer buffer;
137 
138         /***********************************************************************
139 
140                 Create a buffered utf output converter.
141 
142         ***********************************************************************/
143 
144         this (OutputStream stream)
145         {
146                 super (buffer = BufferedOutput.create (stream));
147         }
148 
149         /***********************************************************************
150 
151                 Consume input of type T, and return the number of array
152                 elements consumed.
153 
154                 Returns Eof upon end-of-flow.
155 
156         ***********************************************************************/
157 
158         final size_t consume (const(S)[] dst)
159         {
160                 auto x = write (dst);
161                 if (x != Eof)
162                     x /= S.sizeof;
163                 return x;
164         }
165 
166         /***********************************************************************
167 
168                 Write to the output stream from a source array. The provided
169                 src content is converted as necessary. Note that an attached
170                 output buffer must be at least four bytes wide to accommodate
171                 a conversion.
172 
173                 Returns the number of bytes consumed from src, which may be
174                 less than the quantity provided.
175 
176         ***********************************************************************/
177 
178         final override size_t write (const(void)[] src)
179         {
180                 static if (is (S == T))
181                            return super.write (src);
182                 else
183                    {
184                    uint   consumed,
185                           produced;
186 
187                    size_t writer (void[] dst)
188                    {
189                         // buffer must be at least 4 bytes wide
190                         // to contain a generic conversion
191                         if (dst.length < 4)
192                             return Eof;
193 
194                         auto input = BufferedOutput.convert!(S)(src);
195                         auto output = BufferedOutput.convert!(T)(dst);
196 
197                         static if (is (T == char))
198                                    produced = Utf.toString(input, output, &consumed).length;
199 
200                         static if (is (T == wchar))
201                                    produced = Utf.toString16(input, output, &consumed).length;
202 
203                         static if (is (T == dchar))
204                                    produced = Utf.toString32(input, output, &consumed).length;
205 
206                         return produced * T.sizeof;
207                    }
208 
209                    // write directly into buffered content and
210                    // flush when the output is full
211                    if (buffer.writer(&writer) is Eof)
212                       {
213                       buffer.flush;
214                       if (buffer.writer(&writer) is Eof)
215                           return Eof;
216                       }
217                    return consumed * S.sizeof;
218                    }
219         }
220 }
221 
222 
223 /*******************************************************************************
224 
225 *******************************************************************************/
226 
227 debug (Utf)
228 {
229         import tango.io.Stdout;
230         import tango.io.device.Array;
231 
232         void main()
233         {
234                 auto inp = new UtfInput!(dchar, char)(new Array("hello world".dup));
235                 auto oot = new UtfOutput!(dchar, char)(new Array(20));
236                 oot.copy(inp);
237                 assert (oot.buffer.slice == "hello world");
238         }
239 }