1 // helper program is in ~me/encodings.d to make more tables from wikipedia 2 3 /** 4 This is meant to help get data from the wild into utf8 strings 5 so you can work with them easily inside D. 6 7 The main function is convertToUtf8(), which takes a byte array 8 of your raw data (a byte array because it isn't really a D string 9 yet until it is utf8), and a runtime string telling it's current 10 encoding. 11 12 The current encoding argument is meant to come from the data's 13 metadata, and is flexible on exact format - it is case insensitive 14 and takes several variations on the names. 15 16 This way, you should be able to send it the encoding string directly 17 from an XML document, a HTTP header, or whatever you have, and it 18 ought to just work. 19 20 Example: 21 auto data = cast(immutable(ubyte)[]) 22 std.file.read("my-windows-file.txt"); 23 string utf8String = convertToUtf8(data, "windows-1252"); 24 // utf8String can now be used 25 26 27 The encodings currently implemented for decoding are: 28 UTF-8 (a no-op; it simply casts the array to string) 29 UTF-16, 30 UTF-32, 31 Windows-1252, 32 ISO 8859 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, and 16. 33 34 It treats ISO 8859-1, Latin-1, and Windows-1252 the same way, since 35 those labels are pretty much de-facto the same thing in wild documents. 36 37 38 This module currently makes no attempt to look at control characters. 39 */ 40 module arsd.characterencodings; 41 42 import std.string; 43 import std.array; 44 import std.conv; 45 46 /// Like convertToUtf8, but if the encoding is unknown, it just strips all chars > 127 and calls it done instead of throwing 47 string convertToUtf8Lossy(immutable(ubyte)[] data, string dataCharacterEncoding) { 48 try { 49 return convertToUtf8(data, dataCharacterEncoding); 50 } catch(Exception e) { 51 string ret; 52 foreach(b; data) 53 if(b < 128) 54 ret ~= b; 55 return ret; 56 } 57 } 58 59 /// Takes data from a given character encoding and returns it as UTF-8 60 string convertToUtf8(immutable(ubyte)[] data, string dataCharacterEncoding) { 61 // just to normalize the passed string... 62 auto encoding = dataCharacterEncoding.toLower(); 63 encoding = encoding.replace(" ", ""); 64 encoding = encoding.replace("-", ""); 65 encoding = encoding.replace("_", ""); 66 // should be good enough. 67 68 switch(encoding) { 69 default: 70 throw new Exception("I don't know how to convert " ~ dataCharacterEncoding ~ " to UTF-8"); 71 // since the input is immutable, these are ok too. 72 // just want to cover all the bases with one runtime function. 73 case "utf16": 74 case "utf16le": 75 return to!string(cast(wstring) data); 76 case "utf32": 77 case "utf32le": 78 return to!string(cast(dstring) data); 79 // FIXME: does the big endian to little endian conversion work? 80 case "ascii": 81 case "usascii": // utf-8 is a superset of ascii 82 case "utf8": 83 return cast(string) data; 84 // and now the various 8 bit encodings we support. 85 case "windows1252": 86 return decodeImpl(data, ISO_8859_1, Windows_1252); 87 case "windows1251": 88 return decodeImpl(data, Windows_1251, Windows_1251_Lower); 89 case "koi8r": 90 return decodeImpl(data, KOI8_R, KOI8_R_Lower); 91 case "latin1": 92 case "iso88591": 93 // Why am I putting Windows_1252 here? A lot of 94 // stuff in the wild is mislabeled, so this will 95 // do some good in the Just Works department. 96 // Regardless, I don't handle the 97 // control char set in that zone anyway right now. 98 return decodeImpl(data, ISO_8859_1, Windows_1252); 99 case "iso88592": 100 return decodeImpl(data, ISO_8859_2); 101 case "iso88593": 102 return decodeImpl(data, ISO_8859_3); 103 case "iso88594": 104 return decodeImpl(data, ISO_8859_4); 105 case "iso88595": 106 return decodeImpl(data, ISO_8859_5); 107 case "iso88596": 108 return decodeImpl(data, ISO_8859_6); 109 case "iso88597": 110 return decodeImpl(data, ISO_8859_7); 111 case "iso88598": 112 return decodeImpl(data, ISO_8859_8); 113 case "iso88599": 114 return decodeImpl(data, ISO_8859_9); 115 case "iso885910": 116 return decodeImpl(data, ISO_8859_10); 117 case "iso885911": 118 return decodeImpl(data, ISO_8859_11); 119 case "iso885913": 120 return decodeImpl(data, ISO_8859_13); 121 case "iso885914": 122 return decodeImpl(data, ISO_8859_14); 123 case "iso885915": 124 return decodeImpl(data, ISO_8859_15); 125 case "iso885916": 126 return decodeImpl(data, ISO_8859_16); 127 } 128 129 assert(0); 130 } 131 132 /// Tries to determine the current encoding based on the content. 133 /// Only really helps with the UTF variants. 134 /// Returns null if it can't be reasonably sure. 135 string tryToDetermineEncoding(in ubyte[] rawdata) { 136 import std.utf; 137 try { 138 validate!string(cast(string) rawdata); 139 // the odds of non stuff validating as utf-8 are pretty low 140 return "UTF-8"; 141 } catch(UTFException t) { 142 // it's definitely not UTF-8! 143 // we'll look at the first few characters. If there's a 144 // BOM, it's probably UTF-16 or UTF-32 145 146 if(rawdata.length > 4) { 147 // not checking for utf8 bom; if it was that, we 148 // wouldn't be here. 149 if(rawdata[0] == 0xff && rawdata[1] == 0xfe) 150 return "UTF-16 LE"; 151 else if(rawdata[0] == 0xfe && rawdata[1] == 0xff) 152 return "UTF-16 BE"; 153 else if(rawdata[0] == 0x00 && rawdata[1] == 0x00 154 && rawdata[2] == 0xfe && rawdata[3] == 0xff) 155 return "UTF-32 BE"; 156 else if(rawdata[0] == 0xff && rawdata[1] == 0xfe 157 && rawdata[2] == 0x00 && rawdata[3] == 0x00) 158 return "UTF-32 LE"; 159 else { 160 // this space is intentionally left blank 161 } 162 } 163 } 164 165 // we don't know with enough confidence. The app will have to find another way. 166 return null; 167 } 168 169 // this function actually does the work, using the translation tables 170 // below. 171 string decodeImpl(in ubyte[] data, in dchar[] chars160to255, in dchar[] chars128to159 = null, in dchar[] chars0to127 = null) 172 in { 173 assert(chars160to255.length == 256 - 160); 174 assert(chars128to159 is null || chars128to159.length == 160 - 128); 175 assert(chars0to127 is null || chars0to127.length == 128 - 0); 176 } 177 out(ret) { 178 import std.utf; 179 validate(ret); 180 } 181 body { 182 string utf8; 183 184 /// I'm sure this could be a lot more efficient, but whatever, it 185 /// works. 186 foreach(octet; data) { 187 if(octet < 128) { 188 if(chars0to127 !is null) 189 utf8 ~= chars0to127[octet]; 190 else 191 utf8 ~= cast(char) octet; // ascii is the same 192 } else if(octet < 160) { 193 if(chars128to159 !is null) 194 utf8 ~= chars128to159[octet - 128]; 195 else 196 utf8 ~= " "; 197 } else { 198 utf8 ~= chars160to255[octet - 160]; 199 } 200 } 201 202 return utf8; 203 } 204 205 206 // Here come the translation tables. 207 208 // this table gives characters for decimal 128 through 159. 209 // the < 128 characters are the same as ascii, and > 159 the same as 210 // iso 8859 1, seen below. 211 immutable dchar[] Windows_1252 = [ 212 '€', ' ', '‚', 'ƒ', '„', '…', '†', '‡', 213 'ˆ', '‰', 'Š', '‹', 'Œ', ' ', 'Ž', ' ', 214 ' ', '‘', '’', '“', '”', '•', '–', '—', 215 '˜', '™', 'š', '›', 'œ', ' ', 'ž', 'Ÿ']; 216 217 // the following tables give the characters from decimal 160 up to 255 218 // in the given encodings. 219 220 immutable dchar[] ISO_8859_1 = [ 221 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 222 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 223 '°', '±', '²', '³', '´', 'µ', '¶', '·', 224 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 225 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 226 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 227 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 228 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 229 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 230 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 231 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 232 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 233 234 immutable dchar[] ISO_8859_2 = [ 235 ' ', 'Ą', '˘', 'Ł', '¤', 'Ľ', 'Ś', '§', 236 '¨', 'Š', 'Ş', 'Ť', 'Ź', '', 'Ž', 'Ż', 237 '°', 'ą', '˛', 'ł', '´', 'ľ', 'ś', 'ˇ', 238 '¸', 'š', 'ş', 'ť', 'ź', '˝', 'ž', 'ż', 239 'Ŕ', 'Á', 'Â', 'Ă', 'Ä', 'Ĺ', 'Ć', 'Ç', 240 'Č', 'É', 'Ę', 'Ë', 'Ě', 'Í', 'Î', 'Ď', 241 'Đ', 'Ń', 'Ň', 'Ó', 'Ô', 'Ő', 'Ö', '×', 242 'Ř', 'Ů', 'Ú', 'Ű', 'Ü', 'Ý', 'Ţ', 'ß', 243 'ŕ', 'á', 'â', 'ă', 'ä', 'ĺ', 'ć', 'ç', 244 'č', 'é', 'ę', 'ë', 'ě', 'í', 'î', 'ď', 245 'đ', 'ń', 'ň', 'ó', 'ô', 'ő', 'ö', '÷', 246 'ř', 'ů', 'ú', 'ű', 'ü', 'ý', 'ţ', '˙']; 247 248 immutable dchar[] ISO_8859_3 = [ 249 ' ', 'Ħ', '˘', '£', '¤', ' ', 'Ĥ', '§', 250 '¨', 'İ', 'Ş', 'Ğ', 'Ĵ', '', ' ', 'Ż', 251 '°', 'ħ', '²', '³', '´', 'µ', 'ĥ', '·', 252 '¸', 'ı', 'ş', 'ğ', 'ĵ', '½', ' ', 'ż', 253 'À', 'Á', 'Â', ' ', 'Ä', 'Ċ', 'Ĉ', 'Ç', 254 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 255 ' ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ġ', 'Ö', '×', 256 'Ĝ', 'Ù', 'Ú', 'Û', 'Ü', 'Ŭ', 'Ŝ', 'ß', 257 'à', 'á', 'â', ' ', 'ä', 'ċ', 'ĉ', 'ç', 258 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 259 ' ', 'ñ', 'ò', 'ó', 'ô', 'ġ', 'ö', '÷', 260 'ĝ', 'ù', 'ú', 'û', 'ü', 'ŭ', 'ŝ', '˙']; 261 262 immutable dchar[] ISO_8859_4 = [ 263 ' ', 'Ą', 'ĸ', 'Ŗ', '¤', 'Ĩ', 'Ļ', '§', 264 '¨', 'Š', 'Ē', 'Ģ', 'Ŧ', '', 'Ž', '¯', 265 '°', 'ą', '˛', 'ŗ', '´', 'ĩ', 'ļ', 'ˇ', 266 '¸', 'š', 'ē', 'ģ', 'ŧ', 'Ŋ', 'ž', 'ŋ', 267 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 268 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ī', 269 'Đ', 'Ņ', 'Ō', 'Ķ', 'Ô', 'Õ', 'Ö', '×', 270 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ũ', 'Ū', 'ß', 271 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 272 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ī', 273 'đ', 'ņ', 'ō', 'ķ', 'ô', 'õ', 'ö', '÷', 274 'ø', 'ų', 'ú', 'û', 'ü', 'ũ', 'ū', '˙']; 275 276 immutable dchar[] ISO_8859_5 = [ 277 ' ', 'Ё', 'Ђ', 'Ѓ', 'Є', 'Ѕ', 'І', 'Ї', 278 'Ј', 'Љ', 'Њ', 'Ћ', 'Ќ', '', 'Ў', 'Џ', 279 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 280 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 281 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 282 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 283 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 284 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 285 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 286 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я', 287 '№', 'ё', 'ђ', 'ѓ', 'є', 'ѕ', 'і', 'ї', 288 'ј', 'љ', 'њ', 'ћ', 'ќ', '§', 'ў', 'џ']; 289 290 immutable dchar[] ISO_8859_6 = [ 291 ' ', ' ', ' ', ' ', '¤', ' ', ' ', ' ', 292 ' ', ' ', ' ', ' ', '،', '', ' ', ' ', 293 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 294 ' ', ' ', ' ', '؛', ' ', ' ', ' ', '؟', 295 ' ', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 296 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 297 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 298 'ظ', 'ع', 'غ', ' ', ' ', ' ', ' ', ' ', 299 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 300 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 301 'ِ', 'ّ', 'ْ', ' ', ' ', ' ', ' ', ' ', 302 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']; 303 304 immutable dchar[] ISO_8859_7 = [ 305 ' ', '‘', '’', '£', '€', '₯', '¦', '§', 306 '¨', '©', 'ͺ', '«', '¬', '', ' ', '―', 307 '°', '±', '²', '³', '΄', '΅', 'Ά', '·', 308 'Έ', 'Ή', 'Ί', '»', 'Ό', '½', 'Ύ', 'Ώ', 309 'ΐ', 'Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 310 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 311 'Π', 'Ρ', ' ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 312 'Ψ', 'Ω', 'Ϊ', 'Ϋ', 'ά', 'έ', 'ή', 'ί', 313 'ΰ', 'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 314 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 315 'π', 'ρ', 'ς', 'σ', 'τ', 'υ', 'φ', 'χ', 316 'ψ', 'ω', 'ϊ', 'ϋ', 'ό', 'ύ', 'ώ', ' ']; 317 318 immutable dchar[] ISO_8859_8 = [ 319 ' ', ' ', '¢', '£', '¤', '¥', '¦', '§', 320 '¨', '©', '×', '«', '¬', '', '®', '¯', 321 '°', '±', '²', '³', '´', 'µ', '¶', '·', 322 '¸', '¹', '÷', '»', '¼', '½', '¾', ' ', 323 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 324 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 325 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 326 ' ', ' ', ' ', ' ', ' ', ' ', ' ', '‗', 327 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 328 'ט', 'י', 'ך', 'כ', 'ל', 'ם', 'מ', 'ן', 329 'נ', 'ס', 'ע', 'ף', 'פ', 'ץ', 'צ', 'ק', 330 // v v those are wrong 331 'ר', 'ש', 'ת', ' ', ' ', ' ', ' ', ' ']; // FIXME: those ones marked wrong are supposed to be left to right and right to left markers, not spaces. lol maybe it isn't wrong 332 333 immutable dchar[] ISO_8859_9 = [ 334 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 335 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 336 '°', '±', '²', '³', '´', 'µ', '¶', '·', 337 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 338 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 339 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 340 'Ğ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 341 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'İ', 'Ş', 'ß', 342 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 343 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 344 'ğ', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 345 'ø', 'ù', 'ú', 'û', 'ü', 'ı', 'ş', 'ÿ']; 346 347 immutable dchar[] ISO_8859_10 = [ 348 ' ', 'Ą', 'Ē', 'Ģ', 'Ī', 'Ĩ', 'Ķ', '§', 349 'Ļ', 'Đ', 'Š', 'Ŧ', 'Ž', '', 'Ū', 'Ŋ', 350 '°', 'ą', 'ē', 'ģ', 'ī', 'ĩ', 'ķ', '·', 351 'ļ', 'đ', 'š', 'ŧ', 'ž', '―', 'ū', 'ŋ', 352 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 353 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ï', 354 'Ð', 'Ņ', 'Ō', 'Ó', 'Ô', 'Õ', 'Ö', 'Ũ', 355 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 356 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 357 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ï', 358 'ð', 'ņ', 'ō', 'ó', 'ô', 'õ', 'ö', 'ũ', 359 'ø', 'ų', 'ú', 'û', 'ü', 'ý', 'þ', 'ĸ']; 360 361 immutable dchar[] ISO_8859_11 = [ 362 ' ', 'ก', 'ข', 'ฃ', 'ค', 'ฅ', 'ฆ', 'ง', 363 'จ', 'ฉ', 'ช', 'ซ', 'ฌ', 'ญ', 'ฎ', 'ฏ', 364 'ฐ', 'ฑ', 'ฒ', 'ณ', 'ด', 'ต', 'ถ', 'ท', 365 'ธ', 'น', 'บ', 'ป', 'ผ', 'ฝ', 'พ', 'ฟ', 366 'ภ', 'ม', 'ย', 'ร', 'ฤ', 'ล', 'ฦ', 'ว', 367 'ศ', 'ษ', 'ส', 'ห', 'ฬ', 'อ', 'ฮ', 'ฯ', 368 'ะ', 'ั', 'า', 'ำ', 'ิ', 'ี', 'ึ', 'ื', 369 'ุ', 'ู', 'ฺ', ' ', ' ', ' ', ' ', '฿', 370 'เ', 'แ', 'โ', 'ใ', 'ไ', 'ๅ', 'ๆ', '็', 371 '่', '้', '๊', '๋', '์', 'ํ', '๎', '๏', 372 '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', 373 '๘', '๙', '๚', '๛', ' ', ' ', ' ', ' ']; 374 375 immutable dchar[] ISO_8859_13 = [ 376 ' ', '”', '¢', '£', '¤', '„', '¦', '§', 377 'Ø', '©', 'Ŗ', '«', '¬', '', '®', 'Æ', 378 '°', '±', '²', '³', '“', 'µ', '¶', '·', 379 'ø', '¹', 'ŗ', '»', '¼', '½', '¾', 'æ', 380 'Ą', 'Į', 'Ā', 'Ć', 'Ä', 'Å', 'Ę', 'Ē', 381 'Č', 'É', 'Ź', 'Ė', 'Ģ', 'Ķ', 'Ī', 'Ļ', 382 'Š', 'Ń', 'Ņ', 'Ó', 'Ō', 'Ő', 'Ö', '×', 383 'Ų', 'Ł', 'Ś', 'Ū', 'Ü', 'Ż', 'Ž', 'ß', 384 'ą', 'į', 'ā', 'ć', 'ä', 'å', 'ę', 'ē', 385 'č', 'é', 'ź', 'ė', 'ģ', 'ķ', 'ī', 'ļ', 386 'š', 'ń', 'ņ', 'ó', 'ō', 'ő', 'ö', '÷', 387 'ų', 'ł', 'ś', 'ū', 'ü', 'ż', 'ž', '’']; 388 389 immutable dchar[] ISO_8859_14 = [ 390 ' ', 'Ḃ', 'ḃ', '£', 'Ċ', 'ċ', 'Ḋ', '§', 391 'Ẁ', '©', 'Ẃ', 'ḋ', 'Ỳ', '', '®', 'Ÿ', 392 'Ḟ', 'ḟ', 'Ġ', 'ġ', 'Ṁ', 'ṁ', '¶', 'Ṗ', 393 'ẁ', 'ṗ', 'ẃ', 'Ṡ', 'ỳ', 'Ẅ', 'ẅ', 'ṡ', 394 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 395 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 396 'Ŵ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ṫ', 397 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Ŷ', 'ß', 398 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 399 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 400 'ŵ', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', 'ṫ', 401 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ŷ', 'ÿ']; 402 403 immutable dchar[] ISO_8859_15 = [ 404 ' ', '¡', '¢', '£', '€', '¥', 'Š', '§', 405 'š', '©', 'ª', '«', '¬', '', '®', '¯', 406 '°', '±', '²', '³', 'Ž', 'µ', '¶', '·', 407 'ž', '¹', 'º', '»', 'Œ', 'œ', 'Ÿ', '¿', 408 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 409 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 410 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', '×', 411 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 412 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 413 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 414 'ð', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', '÷', 415 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 416 417 immutable dchar[] ISO_8859_16 = [ 418 ' ', 'Ą', 'ą', 'Ł', '€', '„', 'Š', '§', 419 'š', '©', 'Ș', '«', 'Ź', '', 'ź', 'Ż', 420 '°', '±', 'Č', 'ł', 'Ž', '”', '¶', '·', 421 'ž', 'č', 'ș', '»', 'Œ', 'œ', 'Ÿ', 'ż', 422 'À', 'Á', 'Â', 'Ă', 'Ä', 'Ć', 'Æ', 'Ç', 423 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 424 'Ð', 'Ń', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ś', 425 'Ű', 'Ù', 'Ú', 'Û', 'Ü', 'Ę', 'Ț', 'ß', 426 'à', 'á', 'â', 'ă', 'ä', 'ć', 'æ', 'ç', 427 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 428 'đ', 'ń', 'ò', 'ó', 'ô', 'ő', 'ö', 'ś', 429 'ű', 'ù', 'ú', 'û', 'ü', 'ę', 'ț', 'ÿ']; 430 431 immutable dchar[] KOI8_R_Lower = [ 432 '─', '│', '┌', '┐', '└', '┘', '├', '┤', 433 '┬', '┴', '┼', '▀', '▄', '█', '▌', '▐', 434 '░', '▒', '▓', '⌠', '■', '∙', '√', '≈', 435 '≤', '≥', '\u00a0', '⌡', '°', '²', '·', '÷']; 436 437 immutable dchar[] KOI8_R = [ 438 '═', '║', '╒', 'ё', '╓', '╔', '╕', '╖', 439 '╗', '╘', '╙', '╚', '╛', '╜', '╝', '╞', 440 '╟', '╠', '╡', 'ё', '╢', '╣', '╤', '╥', 441 '╦', '╧', '╨', '╩', '╪', '╫', '╬', '©', 442 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 443 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 444 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 445 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ', 446 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 447 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 448 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 449 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ']; 450 451 immutable dchar[] Windows_1251_Lower = [ 452 'Ђ', 'Ѓ', '‚', 'ѓ', '„', '…', '†', '‡', 453 '€', '‰', 'Љ', '‹', 'Њ', 'Ќ', 'Ћ', 'Џ', 454 'ђ', '‘', '’', '“', '”', '•', '–', '—', 455 ' ', '™', 'љ', '›', 'њ', 'ќ', 'ћ', 'џ']; 456 457 immutable dchar[] Windows_1251 = [ 458 ' ', 'Ў', 'ў', 'Ј', '¤', 'Ґ', '¦', '§', 459 'Ё', '©', 'Є', '«', '¬', '', '®', 'Ї', 460 '°', '±', 'І', 'і', 'ґ', 'µ', '¶', '·', 461 'ё', '№', 'є', '»', 'ј', 'Ѕ', 'ѕ', 'ї', 462 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 463 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 464 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 465 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 466 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 467 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 468 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 469 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']; 470