1 /*******************************************************************************
2 
3         copyright:      Copyright (c) 2007 Kris Bell. All rights reserved
4 
5         license:        BSD style: $(LICENSE)
6 
7         version:        Initial release: April 2007
8 
9         author:         Kris
10 
11 *******************************************************************************/
12 
13 module tango.sys.win32.CodePage;
14 
15 private import tango.sys.Common;
16 
17 private import tango.core.Exception;
18 
19 /*******************************************************************************
20 
21         Convert text to and from Windows 'code pages'. This is non-portable,
22         and will be unlikely to operate even across all Windows platforms.
23 
24 *******************************************************************************/
25 
26 struct CodePage
27 {
28         /**********************************************************************
29 
30                 Test a text array to see if it contains non-ascii elements.
31                 Returns true if ascii, false otherwise
32 
33         **********************************************************************/
34 
35         static bool isAscii (const(char)[] src)
36         {
37                 foreach (c; src)
38                          if (c & 0x80)
39                              return false;
40                 return true;
41         }
42 
43 
44         /**********************************************************************
45 
46                 Convert utf8 text to a codepage representation
47 
48                 page  0     - the ansi code page
49                       1     - the oem code page
50                       2     - the mac code page
51                       3     - ansi code page for the calling thread
52                       65000 - UTF-7 translation
53                       65001 - UTF-8 translation
54 
55                       or a region-specific codepage
56 
57                 returns: a slice of the provided output buffer
58                          representing converted text
59 
60                 Note that the input must be utf8 encoded. Note also
61                 that the dst output should be sufficiently large to
62                 accomodate the output; a size of 2*src.length would
63                 be enough to host almost any conversion
64 
65         **********************************************************************/
66 
67         static char[] into (const(char)[] src, char[] dst, uint page=0)
68         {
69                 return convert (src, dst, CP_UTF8, page);
70         }
71 
72 
73         /**********************************************************************
74 
75                 Convert codepage text to a utf8 representation
76 
77                 page  0     - the ansi code page
78                       1     - the oem code page
79                       2     - the mac code page
80                       3     - ansi code page for the calling thread
81                       65000 - UTF-7 translation
82                       65001 - UTF-8 translation
83 
84                       or a region-specific codepage
85 
86                 returns: a slice of the provided output buffer
87                          representing converted text
88 
89                 Note that the input will be utf8 encoded. Note also
90                 that the dst output should be sufficiently large to
91                 accomodate the output; a size of 2*src.length would
92                 be enough to host almost any conversion
93 
94         **********************************************************************/
95 
96         static char[] from (const(char)[] src, char[] dst, uint page=0)
97         {
98                 return convert (src, dst, page, CP_UTF8);
99         }
100 
101 
102         /**********************************************************************
103 
104                 Internal conversion routine; we avoid heap activity for
105                 strings of short and medium length. A zero is appended
106                 to the dst array in order to simplify C API conversions
107 
108         **********************************************************************/
109 
110         private static char[] convert (const(char)[] src, char[] dst, uint from, uint into)
111         {
112                 size_t len = 0;
113 
114                 // sanity check
115                 assert (dst.length);
116 
117                 // got some input?
118                 if (src.length > 0)
119                    {
120                    wchar[2000] tmp = void;
121                    wchar[] wide = (src.length <= tmp.length) ? tmp : new wchar[src.length];
122 
123                    len = MultiByteToWideChar (from, 0, cast(PCHAR)src.ptr, src.length,
124                                               wide.ptr, wide.length);
125                    if (len)
126                        len = WideCharToMultiByte (into, 0, wide.ptr, len,
127                                                   cast(PCHAR)dst.ptr, dst.length-1, null, null);
128                    if (len is 0)
129                        throw new IllegalArgumentException ("CodePage.convert :: "~SysError.lastMsg.idup);
130                    }
131 
132                 // append a null terminator
133                 dst[len] = 0;
134                 return dst [0 .. len];
135         }
136 }
137 
138 
139 debug(Test)
140 {
141         void main ()
142         {
143                 char[] s = "foo";
144                 char[3] x = void;
145 
146                 //if (! CodePage.isAscii (s))
147                       s = CodePage.into (s, x);
148                       s = CodePage.from (s, x);
149         }
150 }