1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2008 Jeff Davey. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         author:         Jeff Davey
8 
9         standards:      rfc3548, rfc2045
10 
11         Since:          0.99.7
12 
13 *******************************************************************************/
14 
15 /*******************************************************************************
16 
17     This module is used to decode and encode base64 char[] arrays. 
18 
19     Example:
20     ---
21     char[] blah = "Hello there, my name is Jeff.";
22     scope encodebuf = new char[allocateEncodeSize(cast(ubyte[])blah)];
23     char[] encoded = encode(cast(ubyte[])blah, encodebuf);
24 
25     scope decodebuf = new ubyte[encoded.length];
26     if (cast(char[])decode(encoded, decodebuf) == "Hello there, my name is Jeff.")
27         Stdout("yay").newline;
28     ---
29 
30 *******************************************************************************/
31 
32 module tango.util.encode.Base64;
33 
34 /*******************************************************************************
35 
36     calculates and returns the size needed to encode the length of the 
37     array passed.
38 
39     Params:
40     data = An array that will be encoded
41 
42 *******************************************************************************/
43 
44 
45 size_t allocateEncodeSize(const(ubyte[]) data)
46 {
47     return allocateEncodeSize(data.length);
48 }
49 
50 /*******************************************************************************
51 
52     calculates and returns the size needed to encode the length passed.
53 
54     Params:
55     length = Number of bytes to be encoded
56 
57 *******************************************************************************/
58 
59 size_t allocateEncodeSize(size_t length)
60 {
61     size_t tripletCount = length / 3;
62     size_t tripletFraction = length % 3;
63     return (tripletCount + (tripletFraction ? 1 : 0)) * 4; // for every 3 bytes we need 4 bytes to encode, with any fraction needing an additional 4 bytes with padding
64 }
65 
66 
67 /*******************************************************************************
68 
69     encodes data into buff and returns the number of bytes encoded.
70     this will not terminate and pad any "leftover" bytes, and will instead
71     only encode up to the highest number of bytes divisible by three.
72 
73     returns the number of bytes left to encode
74 
75     Params:
76     data = what is to be encoded
77     buff = buffer large enough to hold encoded data
78     bytesEncoded = ref that returns how much of the buffer was filled
79 
80 *******************************************************************************/
81 
82 size_t encodeChunk(const(ubyte[]) data, char[] buff, ref size_t bytesEncoded)
83 {
84     size_t tripletCount = data.length / 3;
85     size_t rtn = 0;
86     char *rtnPtr = buff.ptr;
87     const(ubyte) *dataPtr = data.ptr;
88 
89     if (data.length > 0)
90     {
91         rtn = tripletCount * 3;
92         bytesEncoded = tripletCount * 4;
93         for (size_t i; i < tripletCount; i++)
94         {
95             *rtnPtr++ = _encodeTable[((dataPtr[0] & 0xFC) >> 2)];
96             *rtnPtr++ = _encodeTable[(((dataPtr[0] & 0x03) << 4) | ((dataPtr[1] & 0xF0) >> 4))];
97             *rtnPtr++ = _encodeTable[(((dataPtr[1] & 0x0F) << 2) | ((dataPtr[2] & 0xC0) >> 6))];
98             *rtnPtr++ = _encodeTable[(dataPtr[2] & 0x3F)];
99             dataPtr += 3;
100         }
101     }
102 
103     return rtn;
104 }
105 
106 /*******************************************************************************
107 
108     encodes data and returns as an ASCII base64 string.
109 
110     Params:
111     data = what is to be encoded
112     buff = buffer large enough to hold encoded data
113 
114     Example:
115     ---
116     char[512] encodebuf;
117     char[] myEncodedString = encode(cast(ubyte[])"Hello, how are you today?", encodebuf);
118     Stdout(myEncodedString).newline; // SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==
119     ---
120 
121 
122 *******************************************************************************/
123 
124 char[] encode(const(ubyte[]) data, char[] buff)
125 in
126 {
127     assert(data);
128     assert(buff.length >= allocateEncodeSize(data));
129 }
130 body
131 {
132     char[] rtn = null;
133 
134     if (data.length > 0)
135     {
136         size_t bytesEncoded = 0;
137         size_t numBytes = encodeChunk(data, buff, bytesEncoded);
138         char *rtnPtr = buff.ptr + bytesEncoded;
139         const(ubyte) *dataPtr = data.ptr + numBytes;
140         size_t tripletFraction = data.length - (dataPtr - data.ptr);
141 
142         switch (tripletFraction)
143         {
144             case 2:
145                 *rtnPtr++ = _encodeTable[((dataPtr[0] & 0xFC) >> 2)];
146                 *rtnPtr++ = _encodeTable[(((dataPtr[0] & 0x03) << 4) | ((dataPtr[1] & 0xF0) >> 4))];
147                 *rtnPtr++ = _encodeTable[((dataPtr[1] & 0x0F) << 2)];
148                 *rtnPtr++ = '=';
149                 break;
150             case 1:
151                 *rtnPtr++ = _encodeTable[((dataPtr[0] & 0xFC) >> 2)];
152                 *rtnPtr++ = _encodeTable[((dataPtr[0] & 0x03) << 4)];
153                 *rtnPtr++ = '=';
154                 *rtnPtr++ = '=';
155                 break;
156             default:
157                 break;
158         }
159         rtn = buff[0..(rtnPtr - buff.ptr)];
160     }
161 
162     return rtn;
163 }
164 
165 /*******************************************************************************
166 
167     encodes data and returns as an ASCII base64 string.
168 
169     Params:
170     data = what is to be encoded
171 
172     Example:
173     ---
174     char[] myEncodedString = encode(cast(ubyte[])"Hello, how are you today?");
175     Stdout(myEncodedString).newline; // SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==
176     ---
177 
178 
179 *******************************************************************************/
180 
181 
182 char[] encode(const(ubyte[]) data)
183 in
184 {
185     assert(data);
186 }
187 body
188 {
189     auto rtn = new char[allocateEncodeSize(data)];
190     return encode(data, rtn);
191 }
192 
193 /*******************************************************************************
194 
195     decodes an ASCCI base64 string and returns it as ubyte[] data. Pre-allocates
196     the size of the array.
197 
198     This decoder will ignore non-base64 characters. So:
199     SGVsbG8sIGhvd
200     yBhcmUgeW91IH
201     RvZGF5Pw==
202 
203     Is valid.
204 
205     Params:
206     data = what is to be decoded
207 
208     Example:
209     ---
210     char[] myDecodedString = cast(char[])decode("SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==");
211     Stdout(myDecodedString).newline; // Hello, how are you today?
212     ---
213 
214 *******************************************************************************/
215 
216 ubyte[] decode(const(char[]) data)
217 in
218 {
219     assert(data);
220 }
221 body
222 {
223     auto rtn = new ubyte[data.length];
224     return decode(data, rtn);
225 }
226 
227 /*******************************************************************************
228 
229     decodes an ASCCI base64 string and returns it as ubyte[] data.
230 
231     This decoder will ignore non-base64 characters. So:
232     SGVsbG8sIGhvd
233     yBhcmUgeW91IH
234     RvZGF5Pw==
235 
236     Is valid.
237 
238     Params:
239     data = what is to be decoded
240     buff = a big enough array to hold the decoded data
241 
242     Example:
243     ---
244     ubyte[512] decodebuf;
245     char[] myDecodedString = cast(char[])decode("SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==", decodebuf);
246     Stdout(myDecodedString).newline; // Hello, how are you today?
247     ---
248 
249 *******************************************************************************/
250        
251 ubyte[] decode(const(char[]) data, ubyte[] buff)
252 in
253 {
254     assert(data);
255 }
256 body
257 {
258     ubyte[] rtn;
259 
260     if (data.length > 0)
261     {
262         ubyte[4] base64Quad;
263         ubyte *quadPtr = base64Quad.ptr;
264         ubyte *endPtr = base64Quad.ptr + 4;
265         ubyte *rtnPt = buff.ptr;
266         size_t encodedLength = 0;
267 
268         ubyte padCount = 0;
269         ubyte endCount = 0;
270         ubyte paddedPos = 0;
271         foreach_reverse(char piece; data)
272         {
273             paddedPos++;
274             ubyte current = _decodeTable[piece];
275             if (current || piece == 'A')
276             {
277                 endCount++;
278                 if (current == BASE64_PAD)
279                     padCount++;
280             }
281             if (endCount == 4)
282                 break;
283         }
284 
285         if (padCount > 2)
286             throw new Exception("Improperly terminated base64 string. Base64 pad character (=) found where there shouldn't be one.");
287         if (padCount == 0)
288             paddedPos = 0;
289 
290         const(char)[] nonPadded = data[0..($ - paddedPos)];
291         foreach(piece; nonPadded)
292         {
293             ubyte next = _decodeTable[piece];
294             if (next || piece == 'A')
295                 *quadPtr++ = next;
296             if (quadPtr is endPtr)
297             {
298                 rtnPt[0] = cast(ubyte) ((base64Quad[0] << 2) | (base64Quad[1] >> 4));
299                 rtnPt[1] = cast(ubyte) ((base64Quad[1] << 4) | (base64Quad[2] >> 2));
300                 rtnPt[2] = cast(ubyte) ((base64Quad[2] << 6) | base64Quad[3]);
301                 encodedLength += 3;
302                 quadPtr = base64Quad.ptr;
303                 rtnPt += 3;
304             }
305         }
306 
307         // this will try and decode whatever is left, even if it isn't terminated properly (ie: missing last one or two =)
308         if (paddedPos)
309         {
310             const(char)[] padded = data[($ - paddedPos) .. $];
311             foreach(char piece; padded)
312             {
313                 ubyte next = _decodeTable[piece];
314                 if (next || piece == 'A')
315                     *quadPtr++ = next;
316                 if (quadPtr is endPtr)
317                 {
318                     *rtnPt++ = cast(ubyte) (((base64Quad[0] << 2) | (base64Quad[1]) >> 4));
319                     if (base64Quad[2] != BASE64_PAD)
320                     {
321                         *rtnPt++ = cast(ubyte) (((base64Quad[1] << 4) | (base64Quad[2] >> 2)));
322                         encodedLength += 2;
323                         break;
324                     }
325                     else
326                     {
327                         encodedLength++;
328                         break;
329                     }
330                 }
331             }
332         }
333 
334         rtn = buff[0..encodedLength];
335     }
336 
337     return rtn;
338 }
339 
340 version (Test)
341 {
342     import tango.scrapple.util.Test;
343     import tango.io.device.File;
344     import tango.time.StopWatch;
345     import tango.io.Stdout;
346 
347     unittest    
348     {
349         Test.Status encodeChunktest(ref char[][] messages)
350         {
351             immutable(char)[] str = "Hello, how are you today?";
352             char[] encoded = new char[allocateEncodeSize(cast(ubyte[])str)];
353             int bytesEncoded = 0;
354             int numBytesLeft = encodeChunk(cast(ubyte[])str, encoded, bytesEncoded);
355             char[] result = encoded[0..bytesEncoded] ~ encode(cast(ubyte[])str[numBytesLeft..$], encoded[bytesEncoded..$]);
356             if (result == "SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==")
357                 return Test.Status.Success;
358             return Test.Status.Failure;
359         }
360         Test.Status encodeTest(ref char[][] messages)
361         {
362             char[] encoded = new char[allocateEncodeSize(cast(ubyte[])"Hello, how are you today?")];
363             char[] result = encode(cast(ubyte[])"Hello, how are you today?", encoded);
364             if (result == "SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==")
365             {
366                 char[] result2 = encode(cast(ubyte[])"Hello, how are you today?");
367                 if (result == "SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==")
368                     return Test.Status.Success;
369             }
370 
371             return Test.Status.Failure;
372         }
373 
374         Test.Status decodeTest(ref char[][] messages)
375         {
376             ubyte[1024] decoded;
377             ubyte[] result = decode("SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==", decoded);
378             if (result == cast(ubyte[])"Hello, how are you today?")
379             {
380                 result = decode("SGVsbG8sIGhvdyBhcmUgeW91IHRvZGF5Pw==");
381                 if (result == cast(ubyte[])"Hello, how are you today?")
382                     return Test.Status.Success;
383             }
384             return Test.Status.Failure;
385         }
386         
387         Test.Status speedTest(ref char[][] messages)
388         {
389             Stdout("Reading...").newline;
390             char[] data = cast(char[])File.get ("blah.b64");
391             ubyte[] result = new ubyte[data.length];
392             auto t1 = new StopWatch();
393             Stdout("Decoding..").newline;
394             t1.start();
395             uint runs = 100000000;
396             for (uint i = 0; i < runs; i++)
397                 decode(data, result);
398             double blah = t1.stop();
399             Stdout.formatln("Decoded {} MB in {} seconds at {} MB/s", cast(double)(cast(double)(data.length * runs) / 1024 / 1024), blah, (cast(double)(data.length * runs)) / 1024 / 1024 / blah );
400             return Test.Status.Success;
401         }
402 
403         Test.Status speedTest2(ref char[][] messages)
404         {
405             Stdout("Reading...").newline;
406 //            ubyte[] data = cast(ubyte[])FileData("blah.txt").read;
407             ubyte[] data = cast(ubyte[])"I am a small string, Wee...";
408             char[] result = new char[allocateEncodeSize(data)];
409             auto t1 = new StopWatch();
410             uint runs = 100000000;
411             Stdout("Encoding..").newline;
412             t1.start();
413             for (uint i = 0; i < runs; i++)
414                 encode(data, result);
415             double blah = t1.stop();
416             Stdout.formatln("Encoded {} MB in {} seconds at {} MB/s", cast(double)(cast(double)(data.length * runs) / 1024 / 1024), blah, (cast(double)(data.length * runs)) / 1024 / 1024 / blah );
417             return Test.Status.Success;
418         }
419 
420         auto t = new Test("tango.util.encode.Base64");
421         t["Encode"] = &encodeTest;
422         t["Encode Stream"] = &encodeChunktest;
423         t["Decode"] = &decodeTest;
424 //        t["Speed"] = &speedTest;
425 //        t["Speed2"] = &speedTest2;
426         t.run();
427     }
428 }
429 
430 
431 
432 private:
433 
434 /*
435     Static immutable tables used for fast lookups to 
436     encode and decode data.
437 */
438 immutable ubyte BASE64_PAD = 64;
439 immutable char[] _encodeTable = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
440 
441 immutable ubyte[] _decodeTable = [
442     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
443     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
444     0,0,0,62,0,0,0,63,52,53,54,55,56,57,58,
445     59,60,61,0,0,0,BASE64_PAD,0,0,0,0,1,2,3,
446     4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,
447     19,20,21,22,23,24,25,0,0,0,0,0,0,26,27,
448     28,29,30,31,32,33,34,35,36,37,38,39,40,
449     41,42,43,44,45,46,47,48,49,50,51,
450     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
451     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
452     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
453     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
454     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
455     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
456     0,0,0,0,0,0,0,0,0,0,0,0,0
457 ];
458