2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #include "CharsetConverter.h"
23 #include <fribidi/fribidi.h>
25 #include "threads/SingleLock.h"
31 #if defined(TARGET_DARWIN)
33 #define WCHAR_CHARSET "UTF-32BE"
35 #define WCHAR_CHARSET "UTF-32LE"
37 #define UTF8_SOURCE "UTF-8-MAC"
39 #define WCHAR_CHARSET "UTF-16LE"
40 #define UTF8_SOURCE "UTF-8"
41 #pragma comment(lib, "libfribidi.lib")
42 #pragma comment(lib, "libiconv.lib")
43 #elif defined(TARGET_ANDROID)
44 #define UTF8_SOURCE "UTF-8"
46 #define WCHAR_CHARSET "UTF-32BE"
48 #define WCHAR_CHARSET "UTF-32LE"
51 #define WCHAR_CHARSET "WCHAR_T"
52 #define UTF8_SOURCE "UTF-8"
56 static iconv_t m_iconvStringCharsetToFontCharset = (iconv_t)-1;
57 static iconv_t m_iconvSubtitleCharsetToW = (iconv_t)-1;
58 static iconv_t m_iconvUtf8ToStringCharset = (iconv_t)-1;
59 static iconv_t m_iconvStringCharsetToUtf8 = (iconv_t)-1;
60 static iconv_t m_iconvUcs2CharsetToStringCharset = (iconv_t)-1;
61 static iconv_t m_iconvUtf32ToStringCharset = (iconv_t)-1;
62 static iconv_t m_iconvWtoUtf8 = (iconv_t)-1;
63 static iconv_t m_iconvUtf16LEtoW = (iconv_t)-1;
64 static iconv_t m_iconvUtf16BEtoUtf8 = (iconv_t)-1;
65 static iconv_t m_iconvUtf16LEtoUtf8 = (iconv_t)-1;
66 static iconv_t m_iconvUtf8toW = (iconv_t)-1;
67 static iconv_t m_iconvUcs2CharsetToUtf8 = (iconv_t)-1;
69 #if defined(FRIBIDI_CHAR_SET_NOT_FOUND)
70 static FriBidiCharSet m_stringFribidiCharset = FRIBIDI_CHAR_SET_NOT_FOUND;
71 #define FRIBIDI_UTF8 FRIBIDI_CHAR_SET_UTF8
72 #define FRIBIDI_NOTFOUND FRIBIDI_CHAR_SET_NOT_FOUND
73 #else /* compatibility to older version */
74 static FriBidiCharSet m_stringFribidiCharset = FRIBIDI_CHARSET_NOT_FOUND;
75 #define FRIBIDI_UTF8 FRIBIDI_CHARSET_UTF8
76 #define FRIBIDI_NOTFOUND FRIBIDI_CHARSET_NOT_FOUND
79 static CCriticalSection m_critSection;
81 static struct SFribidMapping
86 #if defined(FRIBIDI_CHAR_SET_NOT_FOUND)
87 { FRIBIDI_CHAR_SET_ISO8859_6, "ISO-8859-6" }
88 , { FRIBIDI_CHAR_SET_ISO8859_8, "ISO-8859-8" }
89 , { FRIBIDI_CHAR_SET_CP1255 , "CP1255" }
90 , { FRIBIDI_CHAR_SET_CP1255 , "Windows-1255" }
91 , { FRIBIDI_CHAR_SET_CP1256 , "CP1256" }
92 , { FRIBIDI_CHAR_SET_CP1256 , "Windows-1256" }
93 , { FRIBIDI_CHAR_SET_NOT_FOUND, NULL }
94 #else /* compatibility to older version */
95 { FRIBIDI_CHARSET_ISO8859_6, "ISO-8859-6" }
96 , { FRIBIDI_CHARSET_ISO8859_8, "ISO-8859-8" }
97 , { FRIBIDI_CHARSET_CP1255 , "CP1255" }
98 , { FRIBIDI_CHARSET_CP1255 , "Windows-1255" }
99 , { FRIBIDI_CHARSET_CP1256 , "CP1256" }
100 , { FRIBIDI_CHARSET_CP1256 , "Windows-1256" }
101 , { FRIBIDI_CHARSET_NOT_FOUND, NULL }
105 static struct SCharsetMapping
110 { "ISO-8859-1", "Western Europe (ISO)" }
111 , { "ISO-8859-2", "Central Europe (ISO)" }
112 , { "ISO-8859-3", "South Europe (ISO)" }
113 , { "ISO-8859-4", "Baltic (ISO)" }
114 , { "ISO-8859-5", "Cyrillic (ISO)" }
115 , { "ISO-8859-6", "Arabic (ISO)" }
116 , { "ISO-8859-7", "Greek (ISO)" }
117 , { "ISO-8859-8", "Hebrew (ISO)" }
118 , { "ISO-8859-9", "Turkish (ISO)" }
119 , { "CP1250" , "Central Europe (Windows)" }
120 , { "CP1251" , "Cyrillic (Windows)" }
121 , { "CP1252" , "Western Europe (Windows)" }
122 , { "CP1253" , "Greek (Windows)" }
123 , { "CP1254" , "Turkish (Windows)" }
124 , { "CP1255" , "Hebrew (Windows)" }
125 , { "CP1256" , "Arabic (Windows)" }
126 , { "CP1257" , "Baltic (Windows)" }
127 , { "CP1258" , "Vietnamesse (Windows)" }
128 , { "CP874" , "Thai (Windows)" }
129 , { "BIG5" , "Chinese Traditional (Big5)" }
130 , { "GBK" , "Chinese Simplified (GBK)" }
131 , { "SHIFT_JIS" , "Japanese (Shift-JIS)" }
132 , { "CP949" , "Korean" }
133 , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" }
138 #define UTF8_DEST_MULTIPLIER 6
140 #define ICONV_PREPARE(iconv) iconv=(iconv_t)-1
141 #define ICONV_SAFE_CLOSE(iconv) if (iconv!=(iconv_t)-1) { iconv_close(iconv); iconv=(iconv_t)-1; }
143 size_t iconv_const (void* cd, const char** inbuf, size_t *inbytesleft,
144 char* * outbuf, size_t *outbytesleft)
146 struct iconv_param_adapter {
147 iconv_param_adapter(const char**p) : p(p) {}
148 iconv_param_adapter(char**p) : p((const char**)p) {}
149 operator char**() const
153 operator const char**() const
155 return(const char**)p;
160 return iconv((iconv_t)cd, iconv_param_adapter(inbuf), inbytesleft, outbuf, outbytesleft);
163 template<class INPUT,class OUTPUT>
164 static bool convert_checked(iconv_t& type, int multiplier, const CStdString& strFromCharset, const CStdString& strToCharset, const INPUT& strSource, OUTPUT& strDest)
166 if (type == (iconv_t)-1)
168 type = iconv_open(strToCharset.c_str(), strFromCharset.c_str());
169 if (type == (iconv_t)-1) //iconv_open failed
171 CLog::Log(LOGERROR, "%s iconv_open() failed from %s to %s, errno=%d(%s)",
172 __FUNCTION__, strFromCharset.c_str(), strToCharset.c_str(), errno, strerror(errno));
177 if (strSource.IsEmpty())
179 strDest.clear(); //empty strings are easy
183 //input buffer for iconv() is the buffer from strSource
184 size_t inBufSize = (strSource.length() + 1) * sizeof(strSource[0]);
185 const char* inBuf = (const char*)strSource.c_str();
187 //allocate output buffer for iconv()
188 size_t outBufSize = (strSource.length() + 1) * multiplier;
189 char* outBuf = (char*)malloc(outBufSize);
191 size_t inBytesAvail = inBufSize; //how many bytes iconv() can read
192 size_t outBytesAvail = outBufSize; //how many bytes iconv() can write
193 const char* inBufStart = inBuf; //where in our input buffer iconv() should start reading
194 char* outBufStart = outBuf; //where in out output buffer iconv() should start writing
198 //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
199 size_t returnV = iconv_const(type, &inBufStart, &inBytesAvail, &outBufStart, &outBytesAvail);
201 if ((returnV == (size_t)-1) && (errno != EINVAL))
203 if (errno == E2BIG) //output buffer is not big enough
205 //save where iconv() ended converting, realloc might make outBufStart invalid
206 size_t bytesConverted = outBufSize - outBytesAvail;
208 //make buffer twice as big
210 char* newBuf = (char*)realloc(outBuf, outBufSize);
213 CLog::Log(LOGERROR, "%s realloc failed with buffer=%p size=%zu errno=%d(%s)",
214 __FUNCTION__, outBuf, outBufSize, errno, strerror(errno));
220 //update the buffer pointer and counter
221 outBufStart = outBuf + bytesConverted;
222 outBytesAvail = outBufSize - bytesConverted;
224 //continue in the loop and convert the rest
226 else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input
232 //continue in the loop and convert the rest
234 else //iconv() had some other error
236 CLog::Log(LOGERROR, "%s iconv() failed from %s to %s, errno=%d(%s)",
237 __FUNCTION__, strFromCharset.c_str(), strToCharset.c_str(), errno, strerror(errno));
244 //complete the conversion, otherwise the current data will prefix the data on the next call
245 returnV = iconv_const(type, NULL, NULL, &outBufStart, &outBytesAvail);
246 if (returnV == (size_t)-1)
247 CLog::Log(LOGERROR, "%s failed cleanup errno=%d(%s)", __FUNCTION__, errno, strerror(errno));
254 size_t bytesWritten = outBufSize - outBytesAvail;
255 char* dest = (char*)strDest.GetBuffer(bytesWritten);
257 //copy the output from iconv() into the CStdString
258 memcpy(dest, outBuf, bytesWritten);
260 strDest.ReleaseBuffer();
267 template<class INPUT,class OUTPUT>
268 static void convert(iconv_t& type, int multiplier, const CStdString& strFromCharset, const CStdString& strToCharset, const INPUT& strSource, OUTPUT& strDest)
270 if(!convert_checked(type, multiplier, strFromCharset, strToCharset, strSource, strDest))
276 static void logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest, FriBidiCharSet fribidiCharset, FriBidiCharType base = FRIBIDI_TYPE_LTR, bool* bWasFlipped =NULL)
278 // libfribidi is not threadsafe, so make sure we make it so
279 CSingleLock lock(m_critSection);
281 vector<CStdString> lines;
282 CUtil::Tokenize(strSource, lines, "\n");
283 CStdString resultString;
286 *bWasFlipped = false;
288 for (unsigned int i = 0; i < lines.size(); i++)
290 int sourceLen = lines[i].length();
292 // Convert from the selected charset to Unicode
293 FriBidiChar* logical = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar));
294 int len = fribidi_charset_to_unicode(fribidiCharset, (char*) lines[i].c_str(), sourceLen, logical);
296 FriBidiChar* visual = (FriBidiChar*) malloc((len + 1) * sizeof(FriBidiChar));
297 FriBidiLevel* levels = (FriBidiLevel*) malloc((len + 1) * sizeof(FriBidiLevel));
299 if (fribidi_log2vis(logical, len, &base, visual, NULL, NULL, NULL))
301 // Removes bidirectional marks
302 len = fribidi_remove_bidi_marks(visual, len, NULL, NULL, NULL);
304 // Apperently a string can get longer during this transformation
305 // so make sure we allocate the maximum possible character utf8
306 // can generate atleast, should cover all bases
307 char *result = strDest.GetBuffer(len*4);
309 // Convert back from Unicode to the charset
310 int len2 = fribidi_unicode_to_charset(fribidiCharset, visual, len, result);
311 ASSERT(len2 <= len*4);
312 strDest.ReleaseBuffer();
314 resultString += strDest;
316 // Check whether the string was flipped if one of the embedding levels is greater than 0
317 if (bWasFlipped && !*bWasFlipped)
319 for (int i = 0; i < len; i++)
321 if ((int) levels[i] > 0)
335 strDest = resultString;
338 CCharsetConverter::CCharsetConverter()
342 void CCharsetConverter::clear()
346 vector<CStdString> CCharsetConverter::getCharsetLabels()
348 vector<CStdString> lab;
349 for(SCharsetMapping * c = g_charsets; c->charset; c++)
350 lab.push_back(c->caption);
355 CStdString CCharsetConverter::getCharsetLabelByName(const CStdString& charsetName)
357 for(SCharsetMapping * c = g_charsets; c->charset; c++)
359 if (charsetName.Equals(c->charset))
366 CStdString CCharsetConverter::getCharsetNameByLabel(const CStdString& charsetLabel)
368 for(SCharsetMapping *c = g_charsets; c->charset; c++)
370 if (charsetLabel.Equals(c->caption))
377 bool CCharsetConverter::isBidiCharset(const CStdString& charset)
379 for(SFribidMapping *c = g_fribidi; c->charset; c++)
381 if (charset.Equals(c->charset))
387 void CCharsetConverter::reset(void)
389 CSingleLock lock(m_critSection);
391 ICONV_SAFE_CLOSE(m_iconvStringCharsetToFontCharset);
392 ICONV_SAFE_CLOSE(m_iconvUtf8ToStringCharset);
393 ICONV_SAFE_CLOSE(m_iconvStringCharsetToUtf8);
394 ICONV_SAFE_CLOSE(m_iconvUcs2CharsetToStringCharset);
395 ICONV_SAFE_CLOSE(m_iconvSubtitleCharsetToW);
396 ICONV_SAFE_CLOSE(m_iconvWtoUtf8);
397 ICONV_SAFE_CLOSE(m_iconvUtf16BEtoUtf8);
398 ICONV_SAFE_CLOSE(m_iconvUtf16LEtoUtf8);
399 ICONV_SAFE_CLOSE(m_iconvUtf32ToStringCharset);
400 ICONV_SAFE_CLOSE(m_iconvUtf8toW);
401 ICONV_SAFE_CLOSE(m_iconvUcs2CharsetToUtf8);
404 m_stringFribidiCharset = FRIBIDI_NOTFOUND;
406 CStdString strCharset=g_langInfo.GetGuiCharSet();
407 for(SFribidMapping *c = g_fribidi; c->charset; c++)
409 if (strCharset.Equals(c->charset))
410 m_stringFribidiCharset = c->name;
414 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
415 // of the string is already made or the string is not displayed in the GUI
416 void CCharsetConverter::utf8ToW(const CStdStringA& utf8String, CStdStringW &wString, bool bVisualBiDiFlip/*=true*/, bool forceLTRReadingOrder /*=false*/, bool* bWasFlipped/*=NULL*/)
418 // Try to flip hebrew/arabic characters, if any
421 CStdStringA strFlipped;
422 FriBidiCharType charset = forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF;
423 logicalToVisualBiDi(utf8String, strFlipped, FRIBIDI_UTF8, charset, bWasFlipped);
424 CSingleLock lock(m_critSection);
425 convert(m_iconvUtf8toW,sizeof(wchar_t),UTF8_SOURCE,WCHAR_CHARSET,strFlipped,wString);
429 CSingleLock lock(m_critSection);
430 convert(m_iconvUtf8toW,sizeof(wchar_t),UTF8_SOURCE,WCHAR_CHARSET,utf8String,wString);
434 void CCharsetConverter::subtitleCharsetToW(const CStdStringA& strSource, CStdStringW& strDest)
436 // No need to flip hebrew/arabic as mplayer does the flipping
437 CSingleLock lock(m_critSection);
438 convert(m_iconvSubtitleCharsetToW,sizeof(wchar_t),g_langInfo.GetSubtitleCharSet(),WCHAR_CHARSET,strSource,strDest);
441 void CCharsetConverter::fromW(const CStdStringW& strSource,
442 CStdStringA& strDest, const CStdString& enc)
445 ICONV_PREPARE(iconvString);
446 convert(iconvString,4,WCHAR_CHARSET,enc,strSource,strDest);
447 iconv_close(iconvString);
450 void CCharsetConverter::toW(const CStdStringA& strSource,
451 CStdStringW& strDest, const CStdString& enc)
454 ICONV_PREPARE(iconvString);
455 convert(iconvString,sizeof(wchar_t),enc,WCHAR_CHARSET,strSource,strDest);
456 iconv_close(iconvString);
459 void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest)
461 CSingleLock lock(m_critSection);
462 convert(m_iconvUtf8ToStringCharset,1,UTF8_SOURCE,g_langInfo.GetGuiCharSet(),strSource,strDest);
465 void CCharsetConverter::utf8ToStringCharset(CStdStringA& strSourceDest)
468 utf8ToStringCharset(strSourceDest, strDest);
469 strSourceDest=strDest;
472 void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest)
475 ICONV_PREPARE(iconvString);
476 convert(iconvString,UTF8_DEST_MULTIPLIER,strSourceCharset,"UTF-8",strSource,strDest);
477 iconv_close(iconvString);
480 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdStringA& strDest)
482 if (strDestCharset == "UTF-8")
483 { // simple case - no conversion necessary
488 ICONV_PREPARE(iconvString);
489 convert(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest);
490 iconv_close(iconvString);
493 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdString16& strDest)
496 ICONV_PREPARE(iconvString);
497 if(!convert_checked(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest))
499 iconv_close(iconvString);
502 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdString32& strDest)
505 ICONV_PREPARE(iconvString);
506 if(!convert_checked(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest))
508 iconv_close(iconvString);
511 void CCharsetConverter::unknownToUTF8(CStdStringA &sourceAndDest)
513 CStdString source = sourceAndDest;
514 unknownToUTF8(source, sourceAndDest);
517 void CCharsetConverter::unknownToUTF8(const CStdStringA &source, CStdStringA &dest)
519 // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
520 if (isValidUtf8(source))
524 CSingleLock lock(m_critSection);
525 convert(m_iconvStringCharsetToUtf8, UTF8_DEST_MULTIPLIER, g_langInfo.GetGuiCharSet(), "UTF-8", source, dest);
529 void CCharsetConverter::wToUTF8(const CStdStringW& strSource, CStdStringA &strDest)
531 CSingleLock lock(m_critSection);
532 convert(m_iconvWtoUtf8,UTF8_DEST_MULTIPLIER,WCHAR_CHARSET,"UTF-8",strSource,strDest);
535 void CCharsetConverter::utf16BEtoUTF8(const CStdString16& strSource, CStdStringA &strDest)
537 CSingleLock lock(m_critSection);
538 if(!convert_checked(m_iconvUtf16BEtoUtf8,UTF8_DEST_MULTIPLIER,"UTF-16BE","UTF-8",strSource,strDest))
542 void CCharsetConverter::utf16LEtoUTF8(const CStdString16& strSource,
543 CStdStringA &strDest)
545 CSingleLock lock(m_critSection);
546 if(!convert_checked(m_iconvUtf16LEtoUtf8,UTF8_DEST_MULTIPLIER,"UTF-16LE","UTF-8",strSource,strDest))
550 void CCharsetConverter::ucs2ToUTF8(const CStdString16& strSource, CStdStringA& strDest)
552 CSingleLock lock(m_critSection);
553 if(!convert_checked(m_iconvUcs2CharsetToUtf8,UTF8_DEST_MULTIPLIER,"UCS-2LE","UTF-8",strSource,strDest))
557 void CCharsetConverter::utf16LEtoW(const CStdString16& strSource, CStdStringW &strDest)
559 CSingleLock lock(m_critSection);
560 if(!convert_checked(m_iconvUtf16LEtoW,sizeof(wchar_t),"UTF-16LE",WCHAR_CHARSET,strSource,strDest))
564 void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap)
566 CStdStringW strCopy = strSource;
569 char* s = (char*) strCopy.c_str();
571 while (*s || *(s + 1))
581 CSingleLock lock(m_critSection);
582 convert(m_iconvUcs2CharsetToStringCharset,4,"UTF-16LE",
583 g_langInfo.GetGuiCharSet(),strCopy,strDest);
586 void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest)
588 CSingleLock lock(m_critSection);
590 if (m_iconvUtf32ToStringCharset == (iconv_t) - 1)
592 CStdString strCharset=g_langInfo.GetGuiCharSet();
593 m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE");
596 if (m_iconvUtf32ToStringCharset != (iconv_t) - 1)
598 const unsigned long* ptr=strSource;
600 const char* src = (const char*) strSource;
601 size_t inBytes = (ptr-strSource+1)*4;
603 char *dst = strDest.GetBuffer(inBytes);
604 size_t outBytes = inBytes;
606 if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1)
608 CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
609 strDest.ReleaseBuffer();
610 strDest = (const char *)strSource;
614 if (iconv(m_iconvUtf32ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1)
616 CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
617 strDest.ReleaseBuffer();
618 strDest = (const char *)strSource;
622 strDest.ReleaseBuffer();
626 void CCharsetConverter::utf8ToSystem(CStdStringA& strSourceDest)
629 g_charsetConverter.utf8To("", strSourceDest, strDest);
630 strSourceDest = strDest;
633 // Taken from RFC2640
634 bool CCharsetConverter::isValidUtf8(const char *buf, unsigned int len)
636 const unsigned char *endbuf = (unsigned char*)buf + len;
637 unsigned char byte2mask=0x00, c;
638 int trailing=0; // trailing (continuation) bytes to follow
640 while ((unsigned char*)buf != endbuf)
644 if ((c & 0xc0) == 0x80) // does trailing byte follow UTF-8 format ?
646 if (byte2mask) // need to check 2nd byte for proper range
648 if (c & byte2mask) // are appropriate bits set ?
658 if ((c & 0x80) == 0x00) continue; // valid 1-byte UTF-8
659 else if ((c & 0xe0) == 0xc0) // valid 2-byte UTF-8
660 if (c & 0x1e) //is UTF-8 byte in proper range ?
664 else if ((c & 0xf0) == 0xe0) // valid 3-byte UTF-8
666 if (!(c & 0x0f)) // is UTF-8 byte in proper range ?
667 byte2mask = 0x20; // if not set mask
668 trailing = 2; // to check next byte
670 else if ((c & 0xf8) == 0xf0) // valid 4-byte UTF-8
672 if (!(c & 0x07)) // is UTF-8 byte in proper range ?
673 byte2mask = 0x30; // if not set mask
674 trailing = 3; // to check next byte
676 else if ((c & 0xfc) == 0xf8) // valid 5-byte UTF-8
678 if (!(c & 0x03)) // is UTF-8 byte in proper range ?
679 byte2mask = 0x38; // if not set mask
680 trailing = 4; // to check next byte
682 else if ((c & 0xfe) == 0xfc) // valid 6-byte UTF-8
684 if (!(c & 0x01)) // is UTF-8 byte in proper range ?
685 byte2mask = 0x3c; // if not set mask
686 trailing = 5; // to check next byte
691 return trailing == 0;
694 bool CCharsetConverter::isValidUtf8(const CStdString& str)
696 return isValidUtf8(str.c_str(), str.size());
699 void CCharsetConverter::utf8logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest)
701 logicalToVisualBiDi(strSource, strDest, FRIBIDI_UTF8, FRIBIDI_TYPE_RTL);