Merge pull request #4539 from Matricom/amcodec
[vuplus_xbmc] / xbmc / utils / test / TestCharsetConverter.cpp
1 /*
2  *      Copyright (C) 2005-2013 Team XBMC
3  *      http://xbmc.org
4  *
5  *  This Program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2, or (at your option)
8  *  any later version.
9  *
10  *  This Program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with XBMC; see the file COPYING.  If not, see
17  *  <http://www.gnu.org/licenses/>.
18  *
19  */
20
21 #include "settings/Settings.h"
22 #include "utils/CharsetConverter.h"
23 #include "utils/StdString.h"
24 #include "utils/Utf8Utils.h"
25 #include "system.h"
26
27 #include "gtest/gtest.h"
28
29 static const uint16_t refutf16LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54,
30                                         0xff3f, 0xff55, 0xff54, 0xff46,
31                                         0xff11, 0xff16, 0xff2c, 0xff25,
32                                         0xff54, 0xff4f, 0xff57, 0x0 };
33
34 static const uint16_t refutf16LE2[] = { 0xff54, 0xff45, 0xff53, 0xff54,
35                                         0xff3f, 0xff55, 0xff54, 0xff46,
36                                         0xff18, 0xff34, 0xff4f, 0xff1a,
37                                         0xff3f, 0xff43, 0xff48, 0xff41,
38                                         0xff52, 0xff53, 0xff45, 0xff54,
39                                         0xff3f, 0xff35, 0xff34, 0xff26,
40                                         0xff0d, 0xff11, 0xff16, 0xff2c,
41                                         0xff25, 0xff0c, 0xff3f, 0xff23,
42                                         0xff33, 0xff54, 0xff44, 0xff33,
43                                         0xff54, 0xff52, 0xff49, 0xff4e,
44                                         0xff47, 0xff11, 0xff16, 0x0 };
45
46 static const char refutf16LE3[] = "T\377E\377S\377T\377?\377S\377T\377"
47                                   "R\377I\377N\377G\377#\377H\377A\377"
48                                   "R\377S\377E\377T\377\064\377O\377\065"
49                                   "\377T\377F\377\030\377";
50
51 static const uint16_t refutf16LE4[] = { 0xff54, 0xff45, 0xff53, 0xff54,
52                                         0xff3f, 0xff55, 0xff54, 0xff46,
53                                         0xff11, 0xff16, 0xff2c, 0xff25,
54                                         0xff54, 0xff4f, 0xff35, 0xff34,
55                                         0xff26, 0xff18, 0x0 };
56
57 static const uint32_t refutf32LE1[] = { 0xff54, 0xff45, 0xff53, 0xff54,
58                                        0xff3f, 0xff55, 0xff54, 0xff46,
59                                        0xff18, 0xff34, 0xff4f, 0xff1a,
60                                        0xff3f, 0xff43, 0xff48, 0xff41,
61                                        0xff52, 0xff53, 0xff45, 0xff54,
62                                        0xff3f, 0xff35, 0xff34, 0xff26,
63                                        0xff0d, 0xff13, 0xff12, 0xff2c,
64                                        0xff25, 0xff0c, 0xff3f, 0xff23,
65                                        0xff33, 0xff54, 0xff44, 0xff33,
66                                        0xff54, 0xff52, 0xff49, 0xff4e,
67                                        0xff47, 0xff13, 0xff12, 0xff3f,
68 #ifdef TARGET_DARWIN
69                                        0x0 };
70 #else
71                                        0x1f42d, 0x1f42e, 0x0 };
72 #endif
73
74 static const uint16_t refutf16BE[] = { 0x54ff, 0x45ff, 0x53ff, 0x54ff,
75                                        0x3fff, 0x55ff, 0x54ff, 0x46ff,
76                                        0x11ff, 0x16ff, 0x22ff, 0x25ff,
77                                        0x54ff, 0x4fff, 0x35ff, 0x34ff,
78                                        0x26ff, 0x18ff, 0x0};
79
80 static const uint16_t refucs2[] = { 0xff54, 0xff45, 0xff53, 0xff54,
81                                     0xff3f, 0xff55, 0xff43, 0xff53,
82                                     0xff12, 0xff54, 0xff4f, 0xff35,
83                                     0xff34, 0xff26, 0xff18, 0x0 };
84
85 class TestCharsetConverter : public testing::Test
86 {
87 protected:
88   TestCharsetConverter()
89   {
90     /* Add default settings for locale.
91      * Settings here are taken from CGUISettings::Initialize()
92      */
93     /* TODO
94     CSettingsCategory *loc = CSettings::Get().AddCategory(7, "locale", 14090);
95     CSettings::Get().AddString(loc, "locale.language",248,"english",
96                             SPIN_CONTROL_TEXT);
97     CSettings::Get().AddString(loc, "locale.country", 20026, "USA",
98                             SPIN_CONTROL_TEXT);
99     CSettings::Get().AddString(loc, "locale.charset", 14091, "DEFAULT",
100                             SPIN_CONTROL_TEXT); // charset is set by the
101                                                 // language file
102
103     // Add default settings for subtitles
104     CSettingsCategory *sub = CSettings::Get().AddCategory(5, "subtitles", 287);
105     CSettings::Get().AddString(sub, "subtitles.charset", 735, "DEFAULT",
106                             SPIN_CONTROL_TEXT);
107     */
108
109     g_charsetConverter.reset();
110     g_charsetConverter.clear();
111   }
112
113   ~TestCharsetConverter()
114   {
115     CSettings::Get().Unload();
116   }
117
118   CStdStringA refstra1, refstra2, varstra1;
119   CStdStringW refstrw1, varstrw1;
120   CStdString16 refstr16_1, varstr16_1;
121   CStdString32 refstr32_1, varstr32_1;
122   CStdString refstr1;
123 };
124
125 TEST_F(TestCharsetConverter, utf8ToW)
126 {
127   refstra1 = "test utf8ToW";
128   refstrw1 = L"test utf8ToW";
129   varstrw1.clear();
130   g_charsetConverter.utf8ToW(refstra1, varstrw1, true, false, NULL);
131   EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
132 }
133
134 TEST_F(TestCharsetConverter, utf16LEtoW)
135 {
136   refstrw1 = L"test_utf16LEtow";
137   /* TODO: Should be able to use '=' operator instead of assign() */
138   refstr16_1.assign(refutf16LE1);
139   varstrw1.clear();
140   g_charsetConverter.utf16LEtoW(refstr16_1, varstrw1);
141   EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
142 }
143
144 TEST_F(TestCharsetConverter, subtitleCharsetToUtf8)
145 {
146   refstra1 = "test subtitleCharsetToW";
147   varstra1.clear();
148   g_charsetConverter.subtitleCharsetToUtf8(refstra1, varstra1);
149
150   /* Assign refstra1 to refstrw1 so that we can compare */
151   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
152 }
153
154 TEST_F(TestCharsetConverter, utf8ToStringCharset_1)
155 {
156   refstra1 = "test utf8ToStringCharset";
157   varstra1.clear();
158   g_charsetConverter.utf8ToStringCharset(refstra1, varstra1);
159   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
160 }
161
162 TEST_F(TestCharsetConverter, utf8ToStringCharset_2)
163 {
164   refstra1 = "test utf8ToStringCharset";
165   varstra1 = "test utf8ToStringCharset";
166   g_charsetConverter.utf8ToStringCharset(varstra1);
167   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
168 }
169
170 TEST_F(TestCharsetConverter, utf8ToSystem)
171 {
172   refstra1 = "test utf8ToSystem";
173   varstra1 = "test utf8ToSystem";
174   g_charsetConverter.utf8ToSystem(varstra1);
175   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
176 }
177
178 TEST_F(TestCharsetConverter, utf8To_ASCII)
179 {
180   refstra1 = "test utf8To: charset ASCII, CStdStringA";
181   varstra1.clear();
182   g_charsetConverter.utf8To("ASCII", refstra1, varstra1);
183   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
184 }
185
186 TEST_F(TestCharsetConverter, utf8To_UTF16LE)
187 {
188   refstra1 = "test_utf8To:_charset_UTF-16LE,_"
189              "CStdString16";
190   refstr16_1.assign(refutf16LE2);
191   varstr16_1.clear();
192   g_charsetConverter.utf8To("UTF-16LE", refstra1, varstr16_1);
193   EXPECT_TRUE(!memcmp(refstr16_1.c_str(), varstr16_1.c_str(),
194                       refstr16_1.length() * sizeof(uint16_t)));
195 }
196
197 TEST_F(TestCharsetConverter, utf8To_UTF32LE)
198 {
199   refstra1 = "test_utf8To:_charset_UTF-32LE,_"
200 #ifdef TARGET_DARWIN
201 /* OSX has it's own 'special' utf-8 charset which we use (see UTF8_SOURCE in CharsetConverter.cpp)
202    which is basically NFD (decomposed) utf-8.  The trouble is, it fails on the COW FACE and MOUSE FACE
203    characters for some reason (possibly anything over 0x100000, or maybe there's a decomposed form of these
204    that I couldn't find???)  If UTF8_SOURCE is switched to UTF-8 then this test would pass as-is, but then
205    some filenames stored in utf8-mac wouldn't display correctly in the UI. */
206              "CStdString32_";
207 #else
208              "CStdString32_🐭🐮";
209 #endif
210   refstr32_1.assign(refutf32LE1);
211   varstr32_1.clear();
212   g_charsetConverter.utf8To("UTF-32LE", refstra1, varstr32_1);
213   EXPECT_TRUE(!memcmp(refstr32_1.c_str(), varstr32_1.c_str(),
214                       sizeof(refutf32LE1)));
215 }
216
217 TEST_F(TestCharsetConverter, stringCharsetToUtf8)
218 {
219   refstra1 = "test_stringCharsetToUtf8";
220   varstra1.clear();
221   g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
222   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
223 }
224
225 TEST_F(TestCharsetConverter, isValidUtf8_1)
226 {
227   varstra1.clear();
228   g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
229   EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str()));
230 }
231
232 TEST_F(TestCharsetConverter, isValidUtf8_2)
233 {
234   refstr1 = refutf16LE3;
235   EXPECT_FALSE(CUtf8Utils::isValidUtf8(refstr1));
236 }
237
238 TEST_F(TestCharsetConverter, isValidUtf8_3)
239 {
240   varstra1.clear();
241   g_charsetConverter.ToUtf8("UTF-16LE", refutf16LE3, varstra1);
242   EXPECT_TRUE(CUtf8Utils::isValidUtf8(varstra1.c_str()));
243 }
244
245 TEST_F(TestCharsetConverter, isValidUtf8_4)
246 {
247   EXPECT_FALSE(CUtf8Utils::isValidUtf8(refutf16LE3));
248 }
249
250 /* TODO: Resolve correct input/output for this function */
251 // TEST_F(TestCharsetConverter, ucs2CharsetToStringCharset)
252 // {
253 //   void ucs2CharsetToStringCharset(const CStdStringW& strSource,
254 //                                   CStdStringA& strDest, bool swap = false);
255 // }
256
257 TEST_F(TestCharsetConverter, wToUTF8)
258 {
259   refstrw1 = L"test_wToUTF8";
260   refstra1 = "test_wToUTF8";
261   varstra1.clear();
262   g_charsetConverter.wToUTF8(refstrw1, varstra1);
263   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
264 }
265
266 TEST_F(TestCharsetConverter, utf16BEtoUTF8)
267 {
268   refstr16_1.assign(refutf16BE);
269   refstra1 = "test_utf16BEtoUTF8";
270   varstra1.clear();
271   g_charsetConverter.utf16BEtoUTF8(refstr16_1, varstra1);
272   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
273 }
274
275 TEST_F(TestCharsetConverter, utf16LEtoUTF8)
276 {
277   refstr16_1.assign(refutf16LE4);
278   refstra1 = "test_utf16LEtoUTF8";
279   varstra1.clear();
280   g_charsetConverter.utf16LEtoUTF8(refstr16_1, varstra1);
281   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
282 }
283
284 TEST_F(TestCharsetConverter, ucs2ToUTF8)
285 {
286   refstr16_1.assign(refucs2);
287   refstra1 = "test_ucs2toUTF8";
288   varstra1.clear();
289   g_charsetConverter.ucs2ToUTF8(refstr16_1, varstra1);
290   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
291 }
292
293 TEST_F(TestCharsetConverter, utf8logicalToVisualBiDi)
294 {
295   refstra1 = "test_utf8logicalToVisualBiDi";
296   refstra2 = "test_utf8logicalToVisualBiDi";
297   varstra1.clear();
298   g_charsetConverter.utf8logicalToVisualBiDi(refstra1, varstra1);
299   EXPECT_STREQ(refstra2.c_str(), varstra1.c_str());
300 }
301
302 /* TODO: Resolve correct input/output for this function */
303 // TEST_F(TestCharsetConverter, utf32ToStringCharset)
304 // {
305 //   void utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest);
306 // }
307
308 TEST_F(TestCharsetConverter, getCharsetLabels)
309 {
310   std::vector<CStdString> reflabels;
311   reflabels.push_back("Western Europe (ISO)");
312   reflabels.push_back("Central Europe (ISO)");
313   reflabels.push_back("South Europe (ISO)");
314   reflabels.push_back("Baltic (ISO)");
315   reflabels.push_back("Cyrillic (ISO)");
316   reflabels.push_back("Arabic (ISO)");
317   reflabels.push_back("Greek (ISO)");
318   reflabels.push_back("Hebrew (ISO)");
319   reflabels.push_back("Turkish (ISO)");
320   reflabels.push_back("Central Europe (Windows)");
321   reflabels.push_back("Cyrillic (Windows)");
322   reflabels.push_back("Western Europe (Windows)");
323   reflabels.push_back("Greek (Windows)");
324   reflabels.push_back("Turkish (Windows)");
325   reflabels.push_back("Hebrew (Windows)");
326   reflabels.push_back("Arabic (Windows)");
327   reflabels.push_back("Baltic (Windows)");
328   reflabels.push_back("Vietnamesse (Windows)");
329   reflabels.push_back("Thai (Windows)");
330   reflabels.push_back("Chinese Traditional (Big5)");
331   reflabels.push_back("Chinese Simplified (GBK)");
332   reflabels.push_back("Japanese (Shift-JIS)");
333   reflabels.push_back("Korean");
334   reflabels.push_back("Hong Kong (Big5-HKSCS)");
335
336   std::vector<std::string> varlabels = g_charsetConverter.getCharsetLabels();
337   ASSERT_EQ(reflabels.size(), varlabels.size());
338
339   std::vector<std::string>::iterator it;
340   for (it = varlabels.begin(); it < varlabels.end(); it++)
341   {
342     EXPECT_STREQ((reflabels.at(it - varlabels.begin())).c_str(), (*it).c_str());
343   }
344 }
345
346 TEST_F(TestCharsetConverter, getCharsetLabelByName)
347 {
348   CStdString varstr =
349     g_charsetConverter.getCharsetLabelByName("ISO-8859-1");
350   EXPECT_STREQ("Western Europe (ISO)", varstr.c_str());
351   varstr.clear();
352   varstr = g_charsetConverter.getCharsetLabelByName("Bogus");
353   EXPECT_STREQ("", varstr.c_str());
354 }
355
356 TEST_F(TestCharsetConverter, getCharsetNameByLabel)
357 {
358   CStdString varstr =
359     g_charsetConverter.getCharsetNameByLabel("Western Europe (ISO)");
360   EXPECT_STREQ("ISO-8859-1", varstr.c_str());
361   varstr.clear();
362   varstr = g_charsetConverter.getCharsetNameByLabel("Bogus");
363   EXPECT_STREQ("", varstr.c_str());
364 }
365
366 TEST_F(TestCharsetConverter, unknownToUTF8_1)
367 {
368   refstra1 = "test_unknownToUTF8";
369   varstra1 = "test_unknownToUTF8";
370   g_charsetConverter.unknownToUTF8(varstra1);
371   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
372 }
373
374 TEST_F(TestCharsetConverter, unknownToUTF8_2)
375 {
376   refstra1 = "test_unknownToUTF8";
377   varstra1.clear();
378   g_charsetConverter.unknownToUTF8(refstra1, varstra1);
379   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
380 }
381
382 TEST_F(TestCharsetConverter, toW)
383 {
384   refstra1 = "test_toW:_charset_UTF-16LE";
385   refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF"
386              L"\x94BD\xBDEF\xEF8F\xB7BC\xBCEF\xEF9A\xBFBC\xBDEF"
387              L"\xEF83\x88BD\xBDEF\xEF81\x92BD\xBDEF\xEF93\x85BD"
388              L"\xBDEF\xEF94\xBFBC\xBCEF\xEFB5\xB4BC\xBCEF\xEFA6"
389              L"\x8DBC\xBCEF\xEF91\x96BC\xBCEF\xEFAC\xA5BC";
390   varstrw1.clear();
391   g_charsetConverter.toW(refstra1, varstrw1, "UTF-16LE");
392   EXPECT_STREQ(refstrw1.c_str(), varstrw1.c_str());
393 }
394
395 TEST_F(TestCharsetConverter, fromW)
396 {
397   refstrw1 = L"\xBDEF\xEF94\x85BD\xBDEF\xEF93\x94BD\xBCEF\xEFBF"
398              L"\x86BD\xBDEF\xEF92\x8FBD\xBDEF\xEF8D\xB7BC\xBCEF"
399              L"\xEF9A\xBFBC\xBDEF\xEF83\x88BD\xBDEF\xEF81\x92BD"
400              L"\xBDEF\xEF93\x85BD\xBDEF\xEF94\xBFBC\xBCEF\xEFB5"
401              L"\xB4BC\xBCEF\xEFA6\x8DBC\xBCEF\xEF91\x96BC\xBCEF"
402              L"\xEFAC\xA5BC";
403   refstra1 = "test_fromW:_charset_UTF-16LE";
404   varstra1.clear();
405   g_charsetConverter.fromW(refstrw1, varstra1, "UTF-16LE");
406   EXPECT_STREQ(refstra1.c_str(), varstra1.c_str());
407 }