Support turbo2.
[vuplus_dvbapp] / lib / base / estring.cpp
1 #include <algorithm>
2 #include <cctype>
3 #include <climits>
4 #include <string>
5 #include <lib/base/eerror.h>
6 #include <lib/base/encoding.h>
7 #include <lib/base/estring.h>
8 #include "freesatv2.h"
9
10 std::string buildShortName( const std::string &str )
11 {
12         std::string tmp;
13         static char stropen[] = "\xc2\x86";
14         static char strclose[] = "\xc2\x87";
15         size_t open = std::string::npos-1;
16         while ((open = str.find(stropen, open+2)) != std::string::npos)
17         {
18                 size_t close = str.find(strclose, open);
19                 if (close != std::string::npos)
20                         tmp += str.substr(open+2, close-(open+2));
21         }
22         return tmp.length() ? tmp : str;
23 }
24
25 std::string getNum(int val, int sys)
26 {
27 //      Returns a string that contain the value val as string
28 //      if sys == 16 than hexadezimal if sys == 10 than decimal
29         char buf[12];
30
31         if (sys == 10)
32                 snprintf(buf, 12, "%i", val);
33         else if (sys == 16)
34                 snprintf(buf, 12, "%X", val);
35
36         std::string res;
37         res.assign(buf);
38         return res;
39 }
40
41                 // 8859-x to ucs-16 coding tables. taken from www.unicode.org/Public/MAPPINGS/ISO8859/
42
43 static unsigned long c88592[96]={
44 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7, 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
45 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7, 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
46 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
47 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7, 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
48 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
49 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7, 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9};
50
51 static unsigned long c88593[96]={
52 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0x0000, 0x0124, 0x00A7, 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0x0000, 0x017B,
53 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7, 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0x0000, 0x017C,
54 0x00C0, 0x00C1, 0x00C2, 0x0000, 0x00C4, 0x010A, 0x0108, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
55 0x0000, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7, 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
56 0x00E0, 0x00E1, 0x00E2, 0x0000, 0x00E4, 0x010B, 0x0109, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
57 0x0000, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7, 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9};
58
59 static unsigned long c88594[96]={
60 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7, 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
61 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7, 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
62 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
63 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
64 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
65 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9};
66
67 static unsigned long c88595[96]={
68 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
69 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
70 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
71 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
72 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
73 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F};
74
75 static unsigned long c88596[96]={
76 0x00A0, 0x0000, 0x0000, 0x0000, 0x00A4, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x060C, 0x00AD, 0x0000, 0x0000,
77 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x061B, 0x0000, 0x0000, 0x0000, 0x061F,
78 0x0000, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
79 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, 0x0638, 0x0639, 0x063A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
80 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
81 0x0650, 0x0651, 0x0652, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000};
82
83 static unsigned long c88597[96]={
84 0x00A0, 0x2018, 0x2019, 0x00A3, 0x20AC, 0x20AF, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x037A, 0x00AB, 0x00AC, 0x00AD, 0x0000, 0x2015,
85 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7, 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
86 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
87 0x03A0, 0x03A1, 0x0000, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
88 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
89 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0x0000};
90
91 static unsigned long c88598[96]={
92 0x00A0, 0x0000, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
93 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x0000,
94 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
95 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2017,
96 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
97 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, 0x0000, 0x0000, 0x200E, 0x200F, 0x0000};
98
99 static unsigned long c88599[96]={
100 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
101 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
102 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
103 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
104 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
105 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF};
106
107 static unsigned long c885910[96]={
108 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7, 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
109 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7, 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
110 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E, 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
111 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168, 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
112 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F, 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
113 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169, 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138};
114
115 static unsigned long c885911[96]={
116 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07, 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
117 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17, 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
118 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27, 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
119 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37, 0x0E38, 0x0E39, 0x0E3A, 0x0000, 0x0000, 0x0000, 0x0000, 0x0E3F,
120 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47, 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
121 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57, 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0x0000, 0x0000, 0x0000, 0x0000};
122
123 static unsigned long c885913[96]={
124 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7, 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
125 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7, 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
126 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112, 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
127 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7, 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
128 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113, 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
129 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7, 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019};
130
131 static unsigned long c885914[96]={
132 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7, 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
133 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56, 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
134 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
135 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
136 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
137 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF};
138
139 static unsigned long c885915[96]={
140 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
141 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7, 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
142 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
143 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
144 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
145 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF};
146
147 static unsigned long c885916[96]={
148 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7, 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
149 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7, 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
150 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
151 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A, 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
152 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
153 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B, 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF};
154
155 static freesatHuffmanDecoder huffmanDecoder;
156
157 static unsigned long iso6937[96]={
158 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0000, 0x00A7, 0x00A4, 0x2018, 0x201C, 0x00AB, 0x2190, 0x2191, 0x2192, 0x2193,
159 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00D7, 0x00B5, 0x00B6, 0x00B7, 0x00F7, 0x2019, 0x201D, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
160 0x0000, 0xE002, 0xE003, 0xE004, 0xE005, 0xE006, 0xE007, 0xE008, 0xE009, 0xE00C, 0xE00A, 0xE00B, 0x0000, 0xE00D, 0xE00E, 0xE00F,
161 0x2015, 0x00B9, 0x00AE, 0x00A9, 0x2122, 0x266A, 0x00AC, 0x00A6, 0x0000, 0x0000, 0x0000, 0x0000, 0x215B, 0x215C, 0x215D, 0x215E,
162 0x2126, 0x00C6, 0x0110, 0x00AA, 0x0126, 0x0000, 0x0132, 0x013F, 0x0141, 0x00D8, 0x0152, 0x00BA, 0x00DE, 0x0166, 0x014A, 0x0149,
163 0x0138, 0x00E6, 0x0111, 0x00F0, 0x0127, 0x0131, 0x0133, 0x0140, 0x0142, 0x00F8, 0x0153, 0x00DF, 0x00FE, 0x0167, 0x014B, 0x00AD};
164
165 // Two Char Mapping (aka ISO6937) ( many polish services and UPC Direct/HBO services)
166 // get from http://mitglied.lycos.de/buran/charsets/videotex-suppl.html
167 static inline unsigned int doVideoTexSuppl(int c1, int c2)
168 {
169         switch (c1)
170         {
171                 case 0xC1: // grave
172                         switch (c2)
173                         {
174                                 case 0x61: return 224;                          case 0x41: return 192;
175                                 case 0x65: return 232;                          case 0x45: return 200;
176                                 case 0x69: return 236;                          case 0x49: return 204;
177                                 case 0x6f: return 242;                          case 0x4f: return 210;
178                                 case 0x75: return 249;                          case 0x55: return 217;
179                                 default: return 0;
180                         }
181                 case 0xC2: // acute
182                         switch (c2)
183                         {
184                                 case 0x20: return 180;
185                                 case 0x61: return 225;                          case 0x41: return 193;
186                                 case 0x65: return 233;                          case 0x45: return 201;
187                                 case 0x69: return 237;                          case 0x49: return 205;
188                                 case 0x6f: return 243;                          case 0x4f: return 211;
189                                 case 0x75: return 250;                          case 0x55: return 218;
190                                 case 0x79: return 253;                          case 0x59: return 221;
191                                 case 0x63: return 263;                          case 0x43: return 262;
192                                 case 0x6c: return 314;                          case 0x4c: return 313;
193                                 case 0x6e: return 324;                          case 0x4e: return 323;
194                                 case 0x72: return 341;                          case 0x52: return 340;
195                                 case 0x73: return 347;                          case 0x53: return 346;
196                                 case 0x7a: return 378;                          case 0x5a: return 377;
197                                 default: return 0;
198                         }
199                 case 0xC3: // cedilla
200                         switch (c2)
201                         {
202                                 case 0x61: return 226;                          case 0x41: return 194;
203                                 case 0x65: return 234;                          case 0x45: return 202;
204                                 case 0x69: return 238;                          case 0x49: return 206;
205                                 case 0x6f: return 244;                          case 0x4f: return 212;
206                                 case 0x75: return 251;                          case 0x55: return 219;
207                                 case 0x79: return 375;                          case 0x59: return 374;
208                                 case 0x63: return 265;                          case 0x43: return 264;
209                                 case 0x67: return 285;                          case 0x47: return 284;
210                                 case 0x68: return 293;                          case 0x48: return 292;
211                                 case 0x6a: return 309;                          case 0x4a: return 308;
212                                 case 0x73: return 349;                          case 0x53: return 348;
213                                 case 0x77: return 373;                          case 0x57: return 372;
214                                 default: return 0;
215                         }
216                 case 0xC4: // tilde
217                         switch (c2)
218                         {
219                                 case 0x61: return 227;                          case 0x41: return 195;
220                                 case 0x6e: return 241;                          case 0x4e: return 209;
221                                 case 0x69: return 297;                          case 0x49: return 296;
222                                 case 0x6f: return 245;                          case 0x4f: return 213;
223                                 case 0x75: return 361;                          case 0x55: return 360;
224                                 default: return 0;
225                         }
226                 case 0xC5: // macron
227                         switch (c2)
228                         {
229                                 case 0x20: return 175;
230                                 case 0x41: return 256;                          case 0x61: return 257;
231                                 case 0x45: return 274;                          case 0x65: return 275;
232                                 case 0x49: return 298;                          case 0x69: return 299;
233                                 case 0x4f: return 332;                          case 0x6f: return 333;
234                         }
235                 case 0xC6: // breve
236                         switch (c2)
237                         {
238                                 case 0x20: return 728;
239                                 case 0x61: return 259;                          case 0x41: return 258;
240                                 case 0x67: return 287;                          case 0x47: return 286;
241                                 case 0x75: return 365;                          case 0x55: return 364;
242                                 default: return 0;
243                         }
244                 case 0xC7: // dot above
245                         switch (c2)
246                         {
247                                 case 0x20: return 729;
248                                 case 0x63: return 267;                          case 0x43: return 266;
249                                 case 0x65: return 279;                          case 0x45: return 278;
250                                 case 0x67: return 289;                          case 0x47: return 288;
251                                 case 0x5a: return 379;                          case 0x49: return 304;
252                                 case 0x7a: return 380;
253                                 default: return 0;
254                         }
255                 case 0xC8: // diaeresis
256                         switch (c2)
257                         {
258                                 case 0x20: return 168;
259                                 case 0x61: return 228;                          case 0x41: return 196;
260                                 case 0x65: return 235;                          case 0x45: return 203;
261                                 case 0x69: return 239;                          case 0x49: return 207;
262                                 case 0x6f: return 246;                          case 0x4f: return 214;
263                                 case 0x75: return 252;                          case 0x55: return 220;
264                                 case 0x79: return 255;                          case 0x59: return 376;
265                                 default: return 0;
266                                 }
267                 case 0xCA: // ring above
268                         switch (c2)
269                         {
270                                 case 0x20: return 730;
271                                 case 0x61: return 229;                          case 0x41: return 197;
272                                 case 0x75: return 367;                          case 0x55: return 366;
273                                 default: return 0;
274                         }
275                 case 0xCB: // cedilla
276                         switch (c2)
277                         {
278                                 case 0x63: return 231;                          case 0x43: return 199;
279                                 case 0x67: return 291;                          case 0x47: return 290;
280                                 case 0x6b: return 311;                          case 0x4b: return 310;
281                                 case 0x6c: return 316;                          case 0x4c: return 315;
282                                 case 0x6e: return 326;                          case 0x4e: return 325;
283                                 case 0x72: return 343;                          case 0x52: return 342;
284                                 case 0x73: return 351;                          case 0x53: return 350;
285                                 case 0x74: return 355;                          case 0x54: return 354;
286                                 default: return 0;
287                         }
288                 case 0xCD: // double acute accent
289                         switch (c2)
290                         {
291                                 case 0x20: return 733;
292                                 case 0x6f: return 337;                          case 0x4f: return 336;
293                                 case 0x75: return 369;                          case 0x55: return 368;
294                                 default: return 0;
295                         }
296                 case 0xCE: // ogonek
297                         switch (c2)
298                         {
299                                 case 0x20: return 731;
300                                 case 0x61: return 261;                          case 0x41: return 260;
301                                 case 0x65: return 281;                          case 0x45: return 280;
302                                 case 0x69: return 303;                          case 0x49: return 302;
303                                 case 0x75: return 371;                          case 0x55: return 370;
304                                 default: return 0;
305                         }
306                 case 0xCF: // caron
307                         switch (c2)
308                         {
309                                 case 0x20: return 711;
310                                 case 0x63: return 269;                          case 0x43: return 268;
311                                 case 0x64: return 271;                          case 0x44: return 270;
312                                 case 0x65: return 283;                          case 0x45: return 282;
313                                 case 0x6c: return 318;                          case 0x4c: return 317;
314                                 case 0x6e: return 328;                          case 0x4e: return 327;
315                                 case 0x72: return 345;                          case 0x52: return 344;
316                                 case 0x73: return 353;                          case 0x53: return 352;
317                                 case 0x74: return 357;                          case 0x54: return 356;
318                                 case 0x7a: return 382;                          case 0x5a: return 381;
319                                 default: return 0;
320                         }
321         }
322         return 0;
323 }
324
325 static inline unsigned int recode(unsigned char d, int cp)
326 {
327         if (d < 0xA0)
328                 return d;
329         switch (cp)
330         {
331         case 0:  return iso6937[d-0xA0]; // ISO6937
332         case 1:  return d;               // 8859-1 -> unicode mapping
333         case 2:  return c88592[d-0xA0];  // 8859-2 -> unicode mapping
334         case 3:  return c88593[d-0xA0];  // 8859-3 -> unicode mapping
335         case 4:  return c88594[d-0xA0];  // 8859-2 -> unicode mapping
336         case 5:  return c88595[d-0xA0];  // 8859-5 -> unicode mapping
337         case 6:  return c88596[d-0xA0];  // 8859-6 -> unicode mapping
338         case 7:  return c88597[d-0xA0];  // 8859-7 -> unicode mapping
339         case 8:  return c88598[d-0xA0];  // 8859-8 -> unicode mapping
340         case 9:  return c88599[d-0xA0];  // 8859-9 -> unicode mapping
341         case 10: return c885910[d-0xA0]; // 8859-10 -> unicode mapping
342         case 11: return c885911[d-0xA0]; // 8859-11 -> unicode mapping
343 //      case 12: return c885912[d-0xA0]; // 8859-12 -> unicode mapping  // reserved for indian use..
344         case 13: return c885913[d-0xA0]; // 8859-13 -> unicode mapping
345         case 14: return c885914[d-0xA0]; // 8859-14 -> unicode mapping
346         case 15: return c885915[d-0xA0]; // 8859-15 -> unicode mapping
347         case 16: return c885916[d-0xA0]; // 8859-16 -> unicode mapping
348         default: return d;
349         }
350 }
351
352 std::string UnicodeToUTF8(long c)
353 {
354         if ( c < 0x80 ) {
355                 char utf[2] = {static_cast<char>(c), 0};
356                 return std::string(utf, 1);
357         }
358         else if ( c < 0x800) {
359                 char utf[3] = { static_cast<char>(0xc0 | (c >> 6)), static_cast<char>(0x80 | (c & 0x3f)), 0};
360                 return std::string(utf, 2);
361         }
362         else if ( c < 0x10000) {
363                 char utf[4] = { static_cast<char>(0xe0 | (c >> 12)), static_cast<char>(0x80 | ((c >> 6) & 0x3f)),
364                                 static_cast<char>(0x80 | (c & 0x3f)), 0};
365                 return std::string(utf, 3);
366         }
367         else if ( c < 0x200000) {
368                 char utf[5] = { static_cast<char>(0xf0 | (c >> 18)), static_cast<char>(0x80 | ((c >> 12) & 0x3f)),
369                                 static_cast<char>(0x80 | ((c >> 6) & 0x3f)), static_cast<char>(0x80 | (c & 0x3f)), 0};
370                 return std::string(utf, 4);
371         }
372         eDebug("[UnicodeToUTF8] invalid unicode character: code=0x%08lx", c); // not a valid unicode
373         return "";
374 }
375
376 std::string convertDVBUTF8(const unsigned char *data, int len, int table, int tsidonid)
377 {
378         if (!len)
379                 return "";
380
381         int i = 0;
382         std::string output = "";
383
384         if (tsidonid)
385                 encodingHandler.getTransponderDefaultMapping(tsidonid, table);
386
387         // first byte in strings may override general encoding table.
388         switch(data[0])
389         {
390                 case ISO8859_5 ... ISO8859_15:
391                         // For Thai providers, encoding char is present but faulty.
392                         if (table != 11)
393                                 table = data[i] + 4;
394                         ++i;
395 //                      eDebug("[convertDVBUTF8] (1..11)text encoded in ISO-8859-%d", table);
396                         break;
397                 case ISO8859_xx:
398                 {
399                         int n = data[++i] << 8;
400                         n |= (data[++i]);
401 //                      eDebug("[convertDVBUTF8] (0x10)text encoded in ISO-8859-%d",n);
402                         ++i;
403                         switch(n)
404                         {
405                                 case 0x0C: // ETSI EN 300 468 Table A.4: Reserved for future use
406                                         eDebug("[convertDVBUTF8] ISO 8859-12 encoding unsupported");
407                                         break;
408                                 default:
409                                         table = n;
410                                         break;
411                         }
412                         break;
413                 }
414                 case UNICODE_ENCODING: //  Basic Multilingual Plane of ISO/IEC 10646-1 enc  (UTF-16... Unicode)
415                         table = UNICODE_ENCODING;
416                         tsidonid = 0;
417                         ++i;
418                         break;
419                 case KSX1001_ENCODING:
420                         ++i;
421                         eDebug("[convertDVBUTF8] KSC 5601 encoding unsupported.");
422                         break;
423                 case GB18030_ENCODING:
424                         ++i;
425                         eDebug("[convertDVBUTF8] GB-2312-1980 encoding unsupported.");
426                         break;
427                 case BIG5_ENCODING:
428                         ++i;
429                         eDebug("[convertDVBUTF8] Big5 subset of ISO/IEC 10646-1 encoding unsupported.");
430                         break;
431                 case UTF8_ENCODING: // UTF-8 encoding of ISO/IEC 10646-1
432                         ++i;
433                         table = UTF8_ENCODING;
434                         break;
435                 case UTF16BE_ENCODING:
436                         ++i;
437                         table = UTF16BE_ENCODING;
438                         break;
439                 case UTF16LE_ENCODING:
440                         ++i;
441                         table = UTF16LE_ENCODING;
442                         break;
443                 case 0x1F:
444                         {
445                                 // Attempt to decode Freesat Huffman encoded string
446                                 std::string decoded_string = huffmanDecoder.decode(data, len);
447                                 if (!decoded_string.empty())
448                                         return decoded_string;
449                         }
450                         ++i;
451                         eDebug("[convertDVBUTF8] failed to decode bbc freesat huffman");
452                         break;
453                 case 0x0:
454                 case 0xC ... 0xF:
455                 case 0x18 ... 0x1E:
456                         eDebug("[convertDVBUTF8] reserved %d", data[0]);
457                         ++i;
458                         break;
459         }
460
461         bool useTwoCharMapping = !table || (tsidonid && encodingHandler.getTransponderUseTwoCharMapping(tsidonid));
462
463         if (useTwoCharMapping && table == 5) { // i hope this dont break other transponders which realy use ISO8859-5 and two char byte mapping...
464 //              eDebug("[convertDVBUTF8] Cyfra / Cyfrowy Polsat HACK... override given ISO8859-5 with ISO6937");
465                 table = 0;
466         }
467         else if ( table == -1 )
468                 table = defaultEncodingTable;
469
470         switch(table)
471         {
472                 case UTF8_ENCODING:
473                         output = std::string((char*)data + i, len - i);
474                         break;
475                 default:
476                         std::string res = "";
477                         while (i < len)
478                         {
479                                 unsigned long code = 0;
480                                 if (useTwoCharMapping && i+1 < len && (code = doVideoTexSuppl(data[i], data[i+1])))
481                                         i += 2;
482                                 else if (table == UTF16BE_ENCODING || table == UNICODE_ENCODING) {
483                                         if (i+2 > len)
484                                                 break;
485                                         unsigned long w1 = ((unsigned long)(data[i])<<8) | ((unsigned long)(data[i+1]));
486                                         if (w1 < 0xD800UL || w1 > 0xDFFFUL) {
487                                                 code = w1;
488                                                 i += 2;
489                                         }
490                                         else if (w1 > 0xDBFFUL)
491                                                 break;
492                                         else if (i+4 < len) {
493                                                 unsigned long w2 = ((unsigned long)(data[i+2]) << 8) | ((unsigned long)(data[i+3]));
494                                                 if (w2 < 0xDC00UL || w2 > 0xDFFFUL)
495                                                         return std::string("");
496                                                 code = 0x10000UL + (((w1 & 0x03FFUL) << 10 ) | (w2 & 0x03FFUL));
497                                                 i += 4;
498                                         }
499                                         else
500                                                 break;
501                                 }
502                                 else if (table == UTF16LE_ENCODING) {
503                                         if ((i+2) > len)
504                                                 break;
505                                         unsigned long w1 = ((unsigned long)(data[i+1]) << 8) | ((unsigned long)(data[i]));
506                                         if (w1 < 0xD800UL || w1 > 0xDFFFUL) {
507                                                 code = w1;
508                                                 i += 2;
509                                         }
510                                         else if (w1 > 0xDBFFUL)
511                                                 break;
512                                         else if (i+4 < len) {
513                                                 unsigned long w2 = ((unsigned long)(data[i+3]) << 8) | ((unsigned long)(data[i+2]));
514                                                 if (w2 < 0xDC00UL || w2 > 0xDFFFUL)
515                                                         break;
516                                                 code = 0x10000UL + (((w2 & 0x03FFUL) << 10 ) | (w1 & 0x03FFUL));
517                                                 i += 4;
518                                         }
519                                         else
520                                                 break;
521                                 }
522                                 if (!code)
523                                         code = recode(data[i++], table);
524
525                                 if (!code)
526                                         continue;
527                                 res += UnicodeToUTF8(code);
528                         }
529                         output = res;
530                         break;
531         }
532         return output;
533 }
534
535 std::string convertUTF8DVB(const std::string &string, int table)
536 {
537         unsigned long *coding_table=0;
538
539         int len=string.length(), t=0;
540
541         unsigned char buf[len];
542
543         for (int i = 0; i < len; i++)
544         {
545                 unsigned char c1 = string[i];
546                 unsigned int c;
547                 if (c1 < 0x80)
548                         c = c1;
549                 else
550                 {
551                         ++i;
552                         unsigned char c2 = string[i];
553                         c = ((c1&0x3F)<<6) + (c2&0x3F);
554                         if (table == 0 || table == 1 || c1 < 0xA0)
555                                 ;
556                         else
557                         {
558                                 if (!coding_table)
559                                 {
560                                         switch (table)
561                                         {
562                                                 case 2: coding_table = c88592; break;
563                                                 case 3: coding_table = c88593; break;
564                                                 case 4: coding_table = c88594; break;
565                                                 case 5: coding_table = c88595; break;
566                                                 case 6: coding_table = c88596; break;
567                                                 case 7: coding_table = c88597; break;
568                                                 case 8: coding_table = c88598; break;
569                                                 case 9: coding_table = c88599; break;
570                                                 case 10: coding_table = c885910; break;
571                                                 case 11: coding_table = c885911; break;
572 //                                              case 12: coding_table = c885912; break; // reserved.. for indian use
573                                                 case 13: coding_table = c885913; break;
574                                                 case 14: coding_table = c885914; break;
575                                                 case 15: coding_table = c885915; break;
576                                                 case 16: coding_table = c885916; break;
577                                                 default:
578                                                         eFatal("[convertUTF8DVB] unknown coding table %d", table);
579                                                         break;
580                                         }
581                                 }
582                                 for (unsigned int j = 0; j < 96; j++)
583                                 {
584                                         if (coding_table[j] == c)
585                                         {
586                                                 c = 0xA0 + j;
587                                                 break;
588                                         }
589                                 }
590                         }
591                 }
592                 buf[t++] = (unsigned char)c;
593         }
594         return std::string((char*)buf, t);
595 }
596
597 std::string convertLatin1UTF8(const std::string &string)
598 {
599         unsigned int i = 0, len = string.size();
600
601         std::string res = "";
602
603         while (i < len)
604         {
605                 unsigned long code = (unsigned char)string[i++];
606                 res += UnicodeToUTF8(code);
607         }
608         return res;
609 }
610
611 int isUTF8(const std::string &string)
612 {
613         unsigned int len = string.size();
614
615         // Unicode chars: #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
616         // (i.e. any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
617         // Avoid "compatibility characters", as defined in section 2.3 of The Unicode Standard, Version 5.0.0.
618         // Following characters are also discouraged. They are either control characters or permanently
619         // undefined Unicode characters:
620         //[#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF],
621         //[#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF],
622         //[#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF],
623         //[#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF],
624         //[#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF],
625         //[#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF],
626         //[#x10FFFE-#x10FFFF].
627
628         for (unsigned int i = 0; i < len; ++i)
629         {
630                 if (!(string[i] & 0x80)) // normal ASCII
631                         continue;
632                 int l = 0;
633                 if ((string[i] & 0xE0) == 0xC0) // 2-byte
634                         l = 1;
635                 else if ((string[i] & 0xF0) == 0xE0)  // 3-byte
636                         l = 2;
637                 else if ((string[i] & 0xF8) == 0xF0) // 4-byte
638                         l = 3;
639                 if (l == 0 || i + l >= len) // no UTF leader or not enough bytes
640                         return 0;
641
642                 while (l-- > 0) {
643                         if ((string[++i] & 0xC0) != 0x80)
644                                 return 0;
645                 }
646         }
647         return 1; // can be UTF8 (or pure ASCII, at least no non-UTF-8 8bit characters)
648 }
649
650 unsigned int truncateUTF8(std::string &s, unsigned int newsize)
651 {
652         unsigned int len = s.size();
653
654         // Assume s is a real UTF8 string!!!
655         while (len > newsize) {
656                 while (len-- > 0  && (s[len] & 0xC0) == 0x80)
657                         ; // remove UTF data bytes,  e.g. range 0x80 - 0xBF
658                 if (len > 0)   // remove the UTF startbyte, or normal ascii character
659                          --len;
660         }
661         s.resize(len);
662         return len;
663 }
664
665 std::string removeDVBChars(const std::string &s)
666 {
667         std::string res;
668
669         int len = s.length();
670
671         for (int i = 0; i < len; i++)
672         {
673                 unsigned char c1 = s[i];
674                 unsigned int c;
675
676                         /* UTF8? decode (but only simple) */
677                 if ((c1 > 0x80) && (i < len-1))
678                 {
679                         unsigned char c2 = s[i + 1];
680                         c = ((c1&0x3F)<<6) + (c2&0x3F);
681                         if ((c >= 0x80) && (c <= 0x9F))
682                         {
683                                 ++i; /* skip 2nd utf8 char */
684                                 continue;
685                         }
686                 }
687                 res += s[i];
688         }
689
690         return res;
691 }
692
693 void makeUpper(std::string &s)
694 {
695         std::transform(s.begin(), s.end(), s.begin(), (int(*)(int)) toupper);
696 }
697
698 std::string replace_all(const std::string &in, const std::string &entity, const std::string &symbol, int table)
699 {
700         std::string out = in;
701         std::string::size_type loc = 0;
702         if( table == -1 )
703                 table = defaultEncodingTable;
704         switch(table){
705         case UTF8_ENCODING:
706                 while (loc < out.length()) {
707                         if ( (entity.length() + loc) <= out.length() && !out.compare(loc, entity.length(), entity)) {
708                                 out.replace(loc, entity.length(), symbol);
709                                 loc += symbol.length();
710                                 continue;
711                         }
712                         if (out.at(loc) < 0x80)
713                                 ++loc;
714                         else if ((out.at(loc) & 0xE0) == 0xC0)
715                                 loc += 2;
716                         else if ((out.at(loc) & 0xF0) == 0xE0)
717                                 loc += 3;
718                         else if ((out.at(loc) & 0xF8) == 0xF0)
719                                 loc += 4;
720                 }
721                 break;
722
723         case UTF16BE_ENCODING:
724         case UTF16LE_ENCODING:
725                 while (loc<out.length()) {
726                         if ((entity.length() + loc) <= out.length() && !out.compare(loc, entity.length(), entity)) {
727                                 out.replace(loc, entity.length(), symbol);
728                                 loc += symbol.length();
729                                 continue;
730                         }
731                         loc += 2;
732                 }
733                 break;
734
735         default:
736                 while ((loc = out.find(entity, loc)) != std::string::npos)
737                 {
738                         out.replace(loc, entity.length(), symbol);
739                         loc += symbol.length();
740                 }
741                 break;
742         }
743         return out;
744 }