1 /******************************************************************************* 2 3 copyright: Copyright (c) 2007 Kris Bell. All rights reserved 4 5 license: BSD style: $(LICENSE) 6 7 version: Initial release: April 2007 8 9 author: Kris 10 11 *******************************************************************************/ 12 13 module tango.sys.win32.CodePage; 14 15 private import tango.sys.Common; 16 17 private import tango.core.Exception; 18 19 /******************************************************************************* 20 21 Convert text to and from Windows 'code pages'. This is non-portable, 22 and will be unlikely to operate even across all Windows platforms. 23 24 *******************************************************************************/ 25 26 struct CodePage 27 { 28 /********************************************************************** 29 30 Test a text array to see if it contains non-ascii elements. 31 Returns true if ascii, false otherwise 32 33 **********************************************************************/ 34 35 static bool isAscii (const(char)[] src) 36 { 37 foreach (c; src) 38 if (c & 0x80) 39 return false; 40 return true; 41 } 42 43 44 /********************************************************************** 45 46 Convert utf8 text to a codepage representation 47 48 page 0 - the ansi code page 49 1 - the oem code page 50 2 - the mac code page 51 3 - ansi code page for the calling thread 52 65000 - UTF-7 translation 53 65001 - UTF-8 translation 54 55 or a region-specific codepage 56 57 returns: a slice of the provided output buffer 58 representing converted text 59 60 Note that the input must be utf8 encoded. Note also 61 that the dst output should be sufficiently large to 62 accomodate the output; a size of 2*src.length would 63 be enough to host almost any conversion 64 65 **********************************************************************/ 66 67 static char[] into (const(char)[] src, char[] dst, uint page=0) 68 { 69 return convert (src, dst, CP_UTF8, page); 70 } 71 72 73 /********************************************************************** 74 75 Convert codepage text to a utf8 representation 76 77 page 0 - the ansi code page 78 1 - the oem code page 79 2 - the mac code page 80 3 - ansi code page for the calling thread 81 65000 - UTF-7 translation 82 65001 - UTF-8 translation 83 84 or a region-specific codepage 85 86 returns: a slice of the provided output buffer 87 representing converted text 88 89 Note that the input will be utf8 encoded. Note also 90 that the dst output should be sufficiently large to 91 accomodate the output; a size of 2*src.length would 92 be enough to host almost any conversion 93 94 **********************************************************************/ 95 96 static char[] from (const(char)[] src, char[] dst, uint page=0) 97 { 98 return convert (src, dst, page, CP_UTF8); 99 } 100 101 102 /********************************************************************** 103 104 Internal conversion routine; we avoid heap activity for 105 strings of short and medium length. A zero is appended 106 to the dst array in order to simplify C API conversions 107 108 **********************************************************************/ 109 110 private static char[] convert (const(char)[] src, char[] dst, uint from, uint into) 111 { 112 size_t len = 0; 113 114 // sanity check 115 assert (dst.length); 116 117 // got some input? 118 if (src.length > 0) 119 { 120 wchar[2000] tmp = void; 121 wchar[] wide = (src.length <= tmp.length) ? tmp : new wchar[src.length]; 122 123 len = MultiByteToWideChar (from, 0, cast(PCHAR)src.ptr, src.length, 124 wide.ptr, wide.length); 125 if (len) 126 len = WideCharToMultiByte (into, 0, wide.ptr, len, 127 cast(PCHAR)dst.ptr, dst.length-1, null, null); 128 if (len is 0) 129 throw new IllegalArgumentException ("CodePage.convert :: "~SysError.lastMsg.idup); 130 } 131 132 // append a null terminator 133 dst[len] = 0; 134 return dst [0 .. len]; 135 } 136 } 137 138 139 debug(Test) 140 { 141 void main () 142 { 143 char[] s = "foo"; 144 char[3] x = void; 145 146 //if (! CodePage.isAscii (s)) 147 s = CodePage.into (s, x); 148 s = CodePage.from (s, x); 149 } 150 }