5 #include <lib/base/eerror.h>
6 #include <lib/base/encoding.h>
7 #include <lib/base/estring.h>
10 std::string buildShortName( const std::string &str )
13 static char stropen[] = "\xc2\x86";
14 static char strclose[] = "\xc2\x87";
15 size_t open = std::string::npos-1;
16 while ((open = str.find(stropen, open+2)) != std::string::npos)
18 size_t close = str.find(strclose, open);
19 if (close != std::string::npos)
20 tmp += str.substr(open+2, close-(open+2));
22 return tmp.length() ? tmp : str;
25 std::string getNum(int val, int sys)
27 // Returns a string that contain the value val as string
28 // if sys == 16 than hexadezimal if sys == 10 than decimal
32 snprintf(buf, 12, "%i", val);
34 snprintf(buf, 12, "%X", val);
41 // 8859-x to ucs-16 coding tables. taken from www.unicode.org/Public/MAPPINGS/ISO8859/
43 static unsigned long c88592[96]={
44 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
45 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
46 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
47 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
48 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
49 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9};
51 static unsigned long c88593[96]={
52 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0x0000, 0x017B,
53 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C,
54 0x00C0, 0x00C1, 0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
55 0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
56 0x00E0, 0x00E1, 0x00E2, 0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
57 0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9};
59 static unsigned long c88594[96]={
60 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
61 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
62 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
63 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
64 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
65 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9};
67 static unsigned long c88595[96]={
68 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
69 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
70 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
71 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
72 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
73 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F};
75 static unsigned long c88596[96]={
76 0x00A0, 0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x060C, 0x00AD, 0x0000, 0x0000,
77 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F,
78 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
79 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
80 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
81 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000};
83 static unsigned long c88597[96]={
84 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x0000, 0x2015,
85 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
86 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
87 0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
88 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
89 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000};
91 static unsigned long c88598[96]={
92 0x00A0, 0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
93 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000,
94 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
95 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
96 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
97 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000};
99 static unsigned long c88599[96]={
100 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
101 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
102 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
103 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
104 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
105 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF};
107 static unsigned long c885910[96]={
108 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
109 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
110 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
111 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
112 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
113 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138};
115 static unsigned long c885911[96]={
116 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
117 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
118 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
119 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0E3F,
120 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
121 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0x0000, 0x0000, 0x0000, 0x0000};
123 static unsigned long c885913[96]={
124 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
125 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
126 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
127 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
128 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
129 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019};
131 static unsigned long c885914[96]={
132 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
133 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
134 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
135 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
136 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
137 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
139 static unsigned long c885915[96]={
140 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
141 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
142 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
143 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
144 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
145 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF};
147 static unsigned long c885916[96]={
148 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
149 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
150 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
151 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
152 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
153 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF};
155 static freesatHuffmanDecoder huffmanDecoder;
157 static unsigned long iso6937[96]={
158 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0000, 0x00A7, 0x00A4, 0x2018, 0x201C, 0x00AB, 0x2190, 0x2191, 0x2192, 0x2193,
159 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00D7, 0x00B5, 0x00B6, 0x00B7, 0x00F7, 0x2019, 0x201D, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
160 0x0000, 0xE002, 0xE003, 0xE004, 0xE005, 0xE006, 0xE007, 0xE008, 0xE009, 0xE00C, 0xE00A, 0xE00B, 0x0000, 0xE00D, 0xE00E, 0xE00F,
161 0x2015, 0x00B9, 0x00AE, 0x00A9, 0x2122, 0x266A, 0x00AC, 0x00A6, 0x0000, 0x0000, 0x0000, 0x0000, 0x215B, 0x215C, 0x215D, 0x215E,
162 0x2126, 0x00C6, 0x0110, 0x00AA, 0x0126, 0x0000, 0x0132, 0x013F, 0x0141, 0x00D8, 0x0152, 0x00BA, 0x00DE, 0x0166, 0x014A, 0x0149,
163 0x0138, 0x00E6, 0x0111, 0x00F0, 0x0127, 0x0131, 0x0133, 0x0140, 0x0142, 0x00F8, 0x0153, 0x00DF, 0x00FE, 0x0167, 0x014B, 0x00AD};
165 // Two Char Mapping (aka ISO6937) ( many polish services and UPC Direct/HBO services)
166 // get from http://mitglied.lycos.de/buran/charsets/videotex-suppl.html
167 static inline unsigned int doVideoTexSuppl(int c1, int c2)
174 case 0x61: return 224; case 0x41: return 192;
175 case 0x65: return 232; case 0x45: return 200;
176 case 0x69: return 236; case 0x49: return 204;
177 case 0x6f: return 242; case 0x4f: return 210;
178 case 0x75: return 249; case 0x55: return 217;
184 case 0x20: return 180;
185 case 0x61: return 225; case 0x41: return 193;
186 case 0x65: return 233; case 0x45: return 201;
187 case 0x69: return 237; case 0x49: return 205;
188 case 0x6f: return 243; case 0x4f: return 211;
189 case 0x75: return 250; case 0x55: return 218;
190 case 0x79: return 253; case 0x59: return 221;
191 case 0x63: return 263; case 0x43: return 262;
192 case 0x6c: return 314; case 0x4c: return 313;
193 case 0x6e: return 324; case 0x4e: return 323;
194 case 0x72: return 341; case 0x52: return 340;
195 case 0x73: return 347; case 0x53: return 346;
196 case 0x7a: return 378; case 0x5a: return 377;
199 case 0xC3: // cedilla
202 case 0x61: return 226; case 0x41: return 194;
203 case 0x65: return 234; case 0x45: return 202;
204 case 0x69: return 238; case 0x49: return 206;
205 case 0x6f: return 244; case 0x4f: return 212;
206 case 0x75: return 251; case 0x55: return 219;
207 case 0x79: return 375; case 0x59: return 374;
208 case 0x63: return 265; case 0x43: return 264;
209 case 0x67: return 285; case 0x47: return 284;
210 case 0x68: return 293; case 0x48: return 292;
211 case 0x6a: return 309; case 0x4a: return 308;
212 case 0x73: return 349; case 0x53: return 348;
213 case 0x77: return 373; case 0x57: return 372;
219 case 0x61: return 227; case 0x41: return 195;
220 case 0x6e: return 241; case 0x4e: return 209;
221 case 0x69: return 297; case 0x49: return 296;
222 case 0x6f: return 245; case 0x4f: return 213;
223 case 0x75: return 361; case 0x55: return 360;
229 case 0x20: return 175;
230 case 0x41: return 256; case 0x61: return 257;
231 case 0x45: return 274; case 0x65: return 275;
232 case 0x49: return 298; case 0x69: return 299;
233 case 0x4f: return 332; case 0x6f: return 333;
238 case 0x20: return 728;
239 case 0x61: return 259; case 0x41: return 258;
240 case 0x67: return 287; case 0x47: return 286;
241 case 0x75: return 365; case 0x55: return 364;
244 case 0xC7: // dot above
247 case 0x20: return 729;
248 case 0x63: return 267; case 0x43: return 266;
249 case 0x65: return 279; case 0x45: return 278;
250 case 0x67: return 289; case 0x47: return 288;
251 case 0x5a: return 379; case 0x49: return 304;
252 case 0x7a: return 380;
255 case 0xC8: // diaeresis
258 case 0x20: return 168;
259 case 0x61: return 228; case 0x41: return 196;
260 case 0x65: return 235; case 0x45: return 203;
261 case 0x69: return 239; case 0x49: return 207;
262 case 0x6f: return 246; case 0x4f: return 214;
263 case 0x75: return 252; case 0x55: return 220;
264 case 0x79: return 255; case 0x59: return 376;
267 case 0xCA: // ring above
270 case 0x20: return 730;
271 case 0x61: return 229; case 0x41: return 197;
272 case 0x75: return 367; case 0x55: return 366;
275 case 0xCB: // cedilla
278 case 0x63: return 231; case 0x43: return 199;
279 case 0x67: return 291; case 0x47: return 290;
280 case 0x6b: return 311; case 0x4b: return 310;
281 case 0x6c: return 316; case 0x4c: return 315;
282 case 0x6e: return 326; case 0x4e: return 325;
283 case 0x72: return 343; case 0x52: return 342;
284 case 0x73: return 351; case 0x53: return 350;
285 case 0x74: return 355; case 0x54: return 354;
288 case 0xCD: // double acute accent
291 case 0x20: return 733;
292 case 0x6f: return 337; case 0x4f: return 336;
293 case 0x75: return 369; case 0x55: return 368;
299 case 0x20: return 731;
300 case 0x61: return 261; case 0x41: return 260;
301 case 0x65: return 281; case 0x45: return 280;
302 case 0x69: return 303; case 0x49: return 302;
303 case 0x75: return 371; case 0x55: return 370;
309 case 0x20: return 711;
310 case 0x63: return 269; case 0x43: return 268;
311 case 0x64: return 271; case 0x44: return 270;
312 case 0x65: return 283; case 0x45: return 282;
313 case 0x6c: return 318; case 0x4c: return 317;
314 case 0x6e: return 328; case 0x4e: return 327;
315 case 0x72: return 345; case 0x52: return 344;
316 case 0x73: return 353; case 0x53: return 352;
317 case 0x74: return 357; case 0x54: return 356;
318 case 0x7a: return 382; case 0x5a: return 381;
325 static inline unsigned int recode(unsigned char d, int cp)
331 case 0: return iso6937[d-0xA0]; // ISO6937
332 case 1: return d; // 8859-1 -> unicode mapping
333 case 2: return c88592[d-0xA0]; // 8859-2 -> unicode mapping
334 case 3: return c88593[d-0xA0]; // 8859-3 -> unicode mapping
335 case 4: return c88594[d-0xA0]; // 8859-2 -> unicode mapping
336 case 5: return c88595[d-0xA0]; // 8859-5 -> unicode mapping
337 case 6: return c88596[d-0xA0]; // 8859-6 -> unicode mapping
338 case 7: return c88597[d-0xA0]; // 8859-7 -> unicode mapping
339 case 8: return c88598[d-0xA0]; // 8859-8 -> unicode mapping
340 case 9: return c88599[d-0xA0]; // 8859-9 -> unicode mapping
341 case 10: return c885910[d-0xA0]; // 8859-10 -> unicode mapping
342 case 11: return c885911[d-0xA0]; // 8859-11 -> unicode mapping
343 // case 12: return c885912[d-0xA0]; // 8859-12 -> unicode mapping // reserved for indian use..
344 case 13: return c885913[d-0xA0]; // 8859-13 -> unicode mapping
345 case 14: return c885914[d-0xA0]; // 8859-14 -> unicode mapping
346 case 15: return c885915[d-0xA0]; // 8859-15 -> unicode mapping
347 case 16: return c885916[d-0xA0]; // 8859-16 -> unicode mapping
352 std::string UnicodeToUTF8(long c)
355 char utf[2] = {static_cast<char>(c), 0};
356 return std::string(utf, 1);
358 else if ( c < 0x800) {
359 char utf[3] = { static_cast<char>(0xc0 | (c >> 6)), static_cast<char>(0x80 | (c & 0x3f)), 0};
360 return std::string(utf, 2);
362 else if ( c < 0x10000) {
363 char utf[4] = { static_cast<char>(0xe0 | (c >> 12)), static_cast<char>(0x80 | ((c >> 6) & 0x3f)),
364 static_cast<char>(0x80 | (c & 0x3f)), 0};
365 return std::string(utf, 3);
367 else if ( c < 0x200000) {
368 char utf[5] = { static_cast<char>(0xf0 | (c >> 18)), static_cast<char>(0x80 | ((c >> 12) & 0x3f)),
369 static_cast<char>(0x80 | ((c >> 6) & 0x3f)), static_cast<char>(0x80 | (c & 0x3f)), 0};
370 return std::string(utf, 4);
372 eDebug("[UnicodeToUTF8] invalid unicode character: code=0x%08lx", c); // not a valid unicode
376 std::string convertDVBUTF8(const unsigned char *data, int len, int table, int tsidonid)
382 std::string output = "";
385 encodingHandler.getTransponderDefaultMapping(tsidonid, table);
387 // first byte in strings may override general encoding table.
390 case ISO8859_5 ... ISO8859_15:
391 // For Thai providers, encoding char is present but faulty.
395 // eDebug("[convertDVBUTF8] (1..11)text encoded in ISO-8859-%d", table);
399 int n = data[++i] << 8;
401 // eDebug("[convertDVBUTF8] (0x10)text encoded in ISO-8859-%d",n);
405 case 0x0C: // ETSI EN 300 468 Table A.4: Reserved for future use
406 eDebug("[convertDVBUTF8] ISO 8859-12 encoding unsupported");
414 case UNICODE_ENCODING: // Basic Multilingual Plane of ISO/IEC 10646-1 enc (UTF-16... Unicode)
415 table = UNICODE_ENCODING;
419 case KSX1001_ENCODING:
421 eDebug("[convertDVBUTF8] KSC 5601 encoding unsupported.");
423 case GB18030_ENCODING:
425 eDebug("[convertDVBUTF8] GB-2312-1980 encoding unsupported.");
429 eDebug("[convertDVBUTF8] Big5 subset of ISO/IEC 10646-1 encoding unsupported.");
431 case UTF8_ENCODING: // UTF-8 encoding of ISO/IEC 10646-1
433 table = UTF8_ENCODING;
435 case UTF16BE_ENCODING:
437 table = UTF16BE_ENCODING;
439 case UTF16LE_ENCODING:
441 table = UTF16LE_ENCODING;
445 // Attempt to decode Freesat Huffman encoded string
446 std::string decoded_string = huffmanDecoder.decode(data, len);
447 if (!decoded_string.empty())
448 return decoded_string;
451 eDebug("[convertDVBUTF8] failed to decode bbc freesat huffman");
456 eDebug("[convertDVBUTF8] reserved %d", data[0]);
461 bool useTwoCharMapping = !table || (tsidonid && encodingHandler.getTransponderUseTwoCharMapping(tsidonid));
463 if (useTwoCharMapping && table == 5) { // i hope this dont break other transponders which realy use ISO8859-5 and two char byte mapping...
464 // eDebug("[convertDVBUTF8] Cyfra / Cyfrowy Polsat HACK... override given ISO8859-5 with ISO6937");
467 else if ( table == -1 )
468 table = defaultEncodingTable;
473 output = std::string((char*)data + i, len - i);
476 std::string res = "";
479 unsigned long code = 0;
480 if (useTwoCharMapping && i+1 < len && (code = doVideoTexSuppl(data[i], data[i+1])))
482 else if (table == UTF16BE_ENCODING || table == UNICODE_ENCODING) {
485 unsigned long w1 = ((unsigned long)(data[i])<<8) | ((unsigned long)(data[i+1]));
486 if (w1 < 0xD800UL || w1 > 0xDFFFUL) {
490 else if (w1 > 0xDBFFUL)
492 else if (i+4 < len) {
493 unsigned long w2 = ((unsigned long)(data[i+2]) << 8) | ((unsigned long)(data[i+3]));
494 if (w2 < 0xDC00UL || w2 > 0xDFFFUL)
495 return std::string("");
496 code = 0x10000UL + (((w1 & 0x03FFUL) << 10 ) | (w2 & 0x03FFUL));
502 else if (table == UTF16LE_ENCODING) {
505 unsigned long w1 = ((unsigned long)(data[i+1]) << 8) | ((unsigned long)(data[i]));
506 if (w1 < 0xD800UL || w1 > 0xDFFFUL) {
510 else if (w1 > 0xDBFFUL)
512 else if (i+4 < len) {
513 unsigned long w2 = ((unsigned long)(data[i+3]) << 8) | ((unsigned long)(data[i+2]));
514 if (w2 < 0xDC00UL || w2 > 0xDFFFUL)
516 code = 0x10000UL + (((w2 & 0x03FFUL) << 10 ) | (w1 & 0x03FFUL));
523 code = recode(data[i++], table);
527 res += UnicodeToUTF8(code);
535 std::string convertUTF8DVB(const std::string &string, int table)
537 unsigned long *coding_table=0;
539 int len=string.length(), t=0;
541 unsigned char buf[len];
543 for (int i = 0; i < len; i++)
545 unsigned char c1 = string[i];
552 unsigned char c2 = string[i];
553 c = ((c1&0x3F)<<6) + (c2&0x3F);
554 if (table == 0 || table == 1 || c1 < 0xA0)
562 case 2: coding_table = c88592; break;
563 case 3: coding_table = c88593; break;
564 case 4: coding_table = c88594; break;
565 case 5: coding_table = c88595; break;
566 case 6: coding_table = c88596; break;
567 case 7: coding_table = c88597; break;
568 case 8: coding_table = c88598; break;
569 case 9: coding_table = c88599; break;
570 case 10: coding_table = c885910; break;
571 case 11: coding_table = c885911; break;
572 // case 12: coding_table = c885912; break; // reserved.. for indian use
573 case 13: coding_table = c885913; break;
574 case 14: coding_table = c885914; break;
575 case 15: coding_table = c885915; break;
576 case 16: coding_table = c885916; break;
578 eFatal("[convertUTF8DVB] unknown coding table %d", table);
582 for (unsigned int j = 0; j < 96; j++)
584 if (coding_table[j] == c)
592 buf[t++] = (unsigned char)c;
594 return std::string((char*)buf, t);
597 std::string convertLatin1UTF8(const std::string &string)
599 unsigned int i = 0, len = string.size();
601 std::string res = "";
605 unsigned long code = (unsigned char)string[i++];
606 res += UnicodeToUTF8(code);
611 int isUTF8(const std::string &string)
613 unsigned int len = string.size();
615 // Unicode chars: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
616 // (i.e. any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
617 // Avoid "compatibility characters", as defined in section 2.3 of The Unicode Standard, Version 5.0.0.
618 // Following characters are also discouraged. They are either control characters or permanently
619 // undefined Unicode characters:
620 //[#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF],
621 //[#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF],
622 //[#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF],
623 //[#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF],
624 //[#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF],
625 //[#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF],
626 //[#x10FFFE-#x10FFFF].
628 for (unsigned int i = 0; i < len; ++i)
630 if (!(string[i] & 0x80)) // normal ASCII
633 if ((string[i] & 0xE0) == 0xC0) // 2-byte
635 else if ((string[i] & 0xF0) == 0xE0) // 3-byte
637 else if ((string[i] & 0xF8) == 0xF0) // 4-byte
639 if (l == 0 || i + l >= len) // no UTF leader or not enough bytes
643 if ((string[++i] & 0xC0) != 0x80)
647 return 1; // can be UTF8 (or pure ASCII, at least no non-UTF-8 8bit characters)
650 unsigned int truncateUTF8(std::string &s, unsigned int newsize)
652 unsigned int len = s.size();
654 // Assume s is a real UTF8 string!!!
655 while (len > newsize) {
656 while (len-- > 0 && (s[len] & 0xC0) == 0x80)
657 ; // remove UTF data bytes, e.g. range 0x80 - 0xBF
658 if (len > 0) // remove the UTF startbyte, or normal ascii character
665 std::string removeDVBChars(const std::string &s)
669 int len = s.length();
671 for (int i = 0; i < len; i++)
673 unsigned char c1 = s[i];
676 /* UTF8? decode (but only simple) */
677 if ((c1 > 0x80) && (i < len-1))
679 unsigned char c2 = s[i + 1];
680 c = ((c1&0x3F)<<6) + (c2&0x3F);
681 if ((c >= 0x80) && (c <= 0x9F))
683 ++i; /* skip 2nd utf8 char */
693 void makeUpper(std::string &s)
695 std::transform(s.begin(), s.end(), s.begin(), (int(*)(int)) toupper);
698 std::string replace_all(const std::string &in, const std::string &entity, const std::string &symbol, int table)
700 std::string out = in;
701 std::string::size_type loc = 0;
703 table = defaultEncodingTable;
706 while (loc < out.length()) {
707 if ( (entity.length() + loc) <= out.length() && !out.compare(loc, entity.length(), entity)) {
708 out.replace(loc, entity.length(), symbol);
709 loc += symbol.length();
712 if (out.at(loc) < 0x80)
714 else if ((out.at(loc) & 0xE0) == 0xC0)
716 else if ((out.at(loc) & 0xF0) == 0xE0)
718 else if ((out.at(loc) & 0xF8) == 0xF0)
723 case UTF16BE_ENCODING:
724 case UTF16LE_ENCODING:
725 while (loc<out.length()) {
726 if ((entity.length() + loc) <= out.length() && !out.compare(loc, entity.length(), entity)) {
727 out.replace(loc, entity.length(), symbol);
728 loc += symbol.length();
736 while ((loc = out.find(entity, loc)) != std::string::npos)
738 out.replace(loc, entity.length(), symbol);
739 loc += symbol.length();