2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #include "CharsetConverter.h"
23 #include "utils/StringUtils.h"
24 #include <fribidi/fribidi.h>
26 #include "guilib/LocalizeStrings.h"
27 #include "settings/lib/Setting.h"
28 #include "settings/Settings.h"
29 #include "threads/SingleLock.h"
30 #include "utils/Utf8Utils.h"
36 #if !defined(TARGET_WINDOWS) && defined(HAVE_CONFIG_H)
40 #ifdef WORDS_BIGENDIAN
41 #define ENDIAN_SUFFIX "BE"
43 #define ENDIAN_SUFFIX "LE"
46 #if defined(TARGET_DARWIN)
47 #define WCHAR_IS_UCS_4 1
48 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
49 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
50 #define UTF8_SOURCE "UTF-8-MAC"
51 #define WCHAR_CHARSET UTF32_CHARSET
52 #elif defined(TARGET_WINDOWS)
53 #define WCHAR_IS_UTF16 1
54 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
55 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
56 #define UTF8_SOURCE "UTF-8"
57 #define WCHAR_CHARSET UTF16_CHARSET
58 #pragma comment(lib, "libfribidi.lib")
59 #pragma comment(lib, "libiconv.lib")
60 #elif defined(TARGET_ANDROID)
61 #define WCHAR_IS_UCS_4 1
62 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
63 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
64 #define UTF8_SOURCE "UTF-8"
65 #define WCHAR_CHARSET UTF32_CHARSET
67 #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
68 #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
69 #define UTF8_SOURCE "UTF-8"
70 #define WCHAR_CHARSET "WCHAR_T"
71 #if __STDC_ISO_10646__
73 #if SIZEOF_WCHAR_T == 4
74 #define WCHAR_IS_UCS_4 1
75 #elif SIZEOF_WCHAR_T == 2
76 #define WCHAR_IS_UCS_2 1
82 #define NO_ICONV ((iconv_t)-1)
86 NotSpecialCharset = 0,
88 UserCharset /* locale.charset */,
89 SubtitleCharset /* subtitles.charset */,
90 KaraokeCharset /* karaoke.charset */
94 class CConverterType : public CCriticalSection
97 CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
98 CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
99 CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
100 CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
101 CConverterType(const CConverterType& other);
104 iconv_t GetConverter(CSingleLock& converterLock);
107 void ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
108 std::string GetSourceCharset(void) const { return m_sourceCharset; }
109 std::string GetTargetCharset(void) const { return m_targetCharset; }
110 unsigned int GetTargetSingleCharMaxLen(void) const { return m_targetSingleCharMaxLen; }
113 static std::string ResolveSpecialCharset(enum SpecialCharset charset);
115 enum SpecialCharset m_sourceSpecialCharset;
116 std::string m_sourceCharset;
117 enum SpecialCharset m_targetSpecialCharset;
118 std::string m_targetCharset;
120 unsigned int m_targetSingleCharMaxLen;
123 CConverterType::CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
124 m_sourceSpecialCharset(NotSpecialCharset),
125 m_sourceCharset(sourceCharset),
126 m_targetSpecialCharset(NotSpecialCharset),
127 m_targetCharset(targetCharset),
129 m_targetSingleCharMaxLen(targetSingleCharMaxLen)
133 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
134 m_sourceSpecialCharset(sourceSpecialCharset),
136 m_targetSpecialCharset(NotSpecialCharset),
137 m_targetCharset(targetCharset),
139 m_targetSingleCharMaxLen(targetSingleCharMaxLen)
143 CConverterType::CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
144 m_sourceSpecialCharset(NotSpecialCharset),
145 m_sourceCharset(sourceCharset),
146 m_targetSpecialCharset(targetSpecialCharset),
149 m_targetSingleCharMaxLen(targetSingleCharMaxLen)
153 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
154 m_sourceSpecialCharset(sourceSpecialCharset),
156 m_targetSpecialCharset(targetSpecialCharset),
159 m_targetSingleCharMaxLen(targetSingleCharMaxLen)
163 CConverterType::CConverterType(const CConverterType& other) : CCriticalSection(),
164 m_sourceSpecialCharset(other.m_sourceSpecialCharset),
165 m_sourceCharset(other.m_sourceCharset),
166 m_targetSpecialCharset(other.m_targetSpecialCharset),
167 m_targetCharset(other.m_targetCharset),
169 m_targetSingleCharMaxLen(other.m_targetSingleCharMaxLen)
174 CConverterType::~CConverterType()
176 CSingleLock lock(*this);
177 if (m_iconv != NO_ICONV)
178 iconv_close(m_iconv);
179 lock.Leave(); // ensure unlocking before final destruction
183 iconv_t CConverterType::GetConverter(CSingleLock& converterLock)
185 // ensure that this unique instance is locked externally
186 if (&converterLock.get_underlying() != this)
189 if (m_iconv == NO_ICONV)
191 if (m_sourceSpecialCharset)
192 m_sourceCharset = ResolveSpecialCharset(m_sourceSpecialCharset);
193 if (m_targetSpecialCharset)
194 m_targetCharset = ResolveSpecialCharset(m_targetSpecialCharset);
196 m_iconv = iconv_open(m_targetCharset.c_str(), m_sourceCharset.c_str());
198 if (m_iconv == NO_ICONV)
199 CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)",
200 __FUNCTION__, m_sourceCharset.c_str(), m_targetCharset.c_str(), errno, strerror(errno));
207 void CConverterType::Reset(void)
209 CSingleLock lock(*this);
210 if (m_iconv != NO_ICONV)
212 iconv_close(m_iconv);
216 if (m_sourceSpecialCharset)
217 m_sourceCharset.clear();
218 if (m_targetSpecialCharset)
219 m_targetCharset.clear();
223 void CConverterType::ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/)
225 CSingleLock lock(*this);
226 if (sourceCharset != m_sourceCharset || targetCharset != m_targetCharset)
228 if (m_iconv != NO_ICONV)
230 iconv_close(m_iconv);
234 m_sourceSpecialCharset = NotSpecialCharset;
235 m_sourceCharset = sourceCharset;
236 m_targetSpecialCharset = NotSpecialCharset;
237 m_targetCharset = targetCharset;
238 m_targetSingleCharMaxLen = targetSingleCharMaxLen;
242 std::string CConverterType::ResolveSpecialCharset(enum SpecialCharset charset)
249 return g_langInfo.GetGuiCharSet();
250 case SubtitleCharset:
251 return g_langInfo.GetSubtitleCharSet();
254 CSetting* karaokeSetting = CSettings::Get().GetSetting("karaoke.charset");
255 if (karaokeSetting == NULL || ((CSettingString*)karaokeSetting)->GetValue() == "DEFAULT")
256 return g_langInfo.GetGuiCharSet();
258 return ((CSettingString*)karaokeSetting)->GetValue();
260 case NotSpecialCharset:
262 return "UTF-8"; /* dummy value */
267 enum StdConversionType /* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */
285 NumberOfStdConversionTypes /* Dummy sentinel entry */
289 /* We don't want to pollute header file with many additional includes and definitions, so put
290 here all staff that require usage of types defined in this file or in additional headers */
291 class CCharsetConverter::CInnerConverter
294 static bool logicalToVisualBiDi(const std::u32string& stringSrc, std::u32string& stringDst, FriBidiCharType base = FRIBIDI_TYPE_LTR, const bool failOnBadString = false);
296 template<class INPUT,class OUTPUT>
297 static bool stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
298 template<class INPUT,class OUTPUT>
299 static bool customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
301 template<class INPUT,class OUTPUT>
302 static bool convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
304 static CConverterType m_stdConversion[NumberOfStdConversionTypes];
305 static CCriticalSection m_critSectionFriBiDi;
308 /* single symbol sizes in chars */
309 const int CCharsetConverter::m_Utf8CharMinSize = 1;
310 const int CCharsetConverter::m_Utf8CharMaxSize = 4;
312 CConverterType CCharsetConverter::CInnerConverter::m_stdConversion[NumberOfStdConversionTypes] = /* keep it in sync with enum StdConversionType */
314 /* Utf8ToUtf32 */ CConverterType(UTF8_SOURCE, UTF32_CHARSET),
315 /* Utf32ToUtf8 */ CConverterType(UTF32_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
316 /* Utf32ToW */ CConverterType(UTF32_CHARSET, WCHAR_CHARSET),
317 /* WToUtf32 */ CConverterType(WCHAR_CHARSET, UTF32_CHARSET),
318 /* SubtitleCharsetToW */ CConverterType(SubtitleCharset, WCHAR_CHARSET),
319 /* Utf8ToUserCharset */ CConverterType(UTF8_SOURCE, UserCharset),
320 /* UserCharsetToUtf8 */ CConverterType(UserCharset, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
321 /* Utf32ToUserCharset */ CConverterType(UTF32_CHARSET, UserCharset),
322 /* WtoUtf8 */ CConverterType(WCHAR_CHARSET, "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
323 /* Utf16LEtoW */ CConverterType("UTF-16LE", WCHAR_CHARSET),
324 /* Utf16BEtoUtf8 */ CConverterType("UTF-16BE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
325 /* Utf16LEtoUtf8 */ CConverterType("UTF-16LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
326 /* Utf8toW */ CConverterType(UTF8_SOURCE, WCHAR_CHARSET),
327 /* Utf8ToSystem */ CConverterType(UTF8_SOURCE, SystemCharset),
328 /* Ucs2CharsetToUtf8 */ CConverterType("UCS-2LE", "UTF-8", CCharsetConverter::m_Utf8CharMaxSize)
331 CCriticalSection CCharsetConverter::CInnerConverter::m_critSectionFriBiDi;
335 template<class INPUT,class OUTPUT>
336 bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
339 if (strSource.empty())
342 if (convertType < 0 || convertType >= NumberOfStdConversionTypes)
345 CConverterType& convType = m_stdConversion[convertType];
346 CSingleLock converterLock(convType);
348 return convert(convType.GetConverter(converterLock), convType.GetTargetSingleCharMaxLen(), strSource, strDest, failOnInvalidChar);
351 template<class INPUT,class OUTPUT>
352 bool CCharsetConverter::CInnerConverter::customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
355 if (strSource.empty())
358 iconv_t conv = iconv_open(targetCharset.c_str(), sourceCharset.c_str());
359 if (conv == NO_ICONV)
361 CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)",
362 __FUNCTION__, sourceCharset.c_str(), targetCharset.c_str(), errno, strerror(errno));
365 const int dstMultp = (targetCharset.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize : 1;
366 const bool result = convert(conv, dstMultp, strSource, strDest, failOnInvalidChar);
373 /* iconv may declare inbuf to be char** rather than const char** depending on platform and version,
374 so provide a wrapper that handles both */
375 struct charPtrPtrAdapter
377 const char** pointer;
378 charPtrPtrAdapter(const char** p) :
381 { return const_cast<char**>(pointer); }
382 operator const char**()
386 template<class INPUT,class OUTPUT>
387 bool CCharsetConverter::CInnerConverter::convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
389 if (type == NO_ICONV)
392 //input buffer for iconv() is the buffer from strSource
393 size_t inBufSize = (strSource.length() + 1) * sizeof(typename INPUT::value_type);
394 const char* inBuf = (const char*)strSource.c_str();
396 //allocate output buffer for iconv()
397 size_t outBufSize = (strSource.length() + 1) * sizeof(typename OUTPUT::value_type) * multiplier;
398 char* outBuf = (char*)malloc(outBufSize);
401 CLog::Log(LOGSEVERE, "%s: malloc failed", __FUNCTION__);
405 size_t inBytesAvail = inBufSize; //how many bytes iconv() can read
406 size_t outBytesAvail = outBufSize; //how many bytes iconv() can write
407 const char* inBufStart = inBuf; //where in our input buffer iconv() should start reading
408 char* outBufStart = outBuf; //where in out output buffer iconv() should start writing
413 //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
414 returnV = iconv(type, charPtrPtrAdapter(&inBufStart), &inBytesAvail, &outBufStart, &outBytesAvail);
416 if (returnV == (size_t)-1)
418 if (errno == E2BIG) //output buffer is not big enough
420 //save where iconv() ended converting, realloc might make outBufStart invalid
421 size_t bytesConverted = outBufSize - outBytesAvail;
423 //make buffer twice as big
425 char* newBuf = (char*)realloc(outBuf, outBufSize);
428 CLog::Log(LOGSEVERE, "%s realloc failed with errno=%d(%s)",
429 __FUNCTION__, errno, strerror(errno));
434 //update the buffer pointer and counter
435 outBufStart = outBuf + bytesConverted;
436 outBytesAvail = outBufSize - bytesConverted;
438 //continue in the loop and convert the rest
441 else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input
443 if (failOnInvalidChar)
449 //continue in the loop and convert the rest
452 else if (errno == EINVAL) /* Invalid sequence at the end of input buffer */
454 if (!failOnInvalidChar)
455 returnV = 0; /* reset error status to use converted part */
459 else //iconv() had some other error
461 CLog::Log(LOGERROR, "%s: iconv() failed, errno=%d (%s)",
462 __FUNCTION__, errno, strerror(errno));
468 //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call
469 if (iconv(type, NULL, NULL, &outBufStart, &outBytesAvail) == (size_t)-1)
470 CLog::Log(LOGERROR, "%s failed cleanup errno=%d(%s)", __FUNCTION__, errno, strerror(errno));
472 if (returnV == (size_t)-1)
479 const typename OUTPUT::size_type sizeInChars = (typename OUTPUT::size_type) (outBufSize - outBytesAvail) / sizeof(typename OUTPUT::value_type);
480 typename OUTPUT::const_pointer strPtr = (typename OUTPUT::const_pointer) outBuf;
481 /* Make sure that all buffer is assigned and string is stopped at end of buffer */
482 if (strPtr[sizeInChars-1] == 0 && strSource[strSource.length()-1] != 0)
483 strDest.assign(strPtr, sizeInChars-1);
485 strDest.assign(strPtr, sizeInChars);
492 bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi(const std::u32string& stringSrc, std::u32string& stringDst, FriBidiCharType base /*= FRIBIDI_TYPE_LTR*/, const bool failOnBadString /*= false*/)
496 const size_t srcLen = stringSrc.length();
500 stringDst.reserve(srcLen);
501 size_t lineStart = 0;
503 // libfribidi is not threadsafe, so make sure we make it so
504 CSingleLock lock(m_critSectionFriBiDi);
507 size_t lineEnd = stringSrc.find('\n', lineStart);
508 if (lineEnd >= srcLen) // equal to 'lineEnd == std::string::npos'
511 lineEnd++; // include '\n'
513 const size_t lineLen = lineEnd - lineStart;
515 FriBidiChar* visual = (FriBidiChar*) malloc((lineLen + 1) * sizeof(FriBidiChar));
519 CLog::Log(LOGSEVERE, "%s: can't allocate memory", __FUNCTION__);
523 bool bidiFailed = false;
524 FriBidiCharType baseCopy = base; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value
525 if (fribidi_log2vis((const FriBidiChar*)(stringSrc.c_str() + lineStart), lineLen, &baseCopy, visual, NULL, NULL, NULL))
527 // Removes bidirectional marks
528 const int newLen = fribidi_remove_bidi_marks(visual, lineLen, NULL, NULL, NULL);
530 stringDst.append((const char32_t*)visual, (size_t)newLen);
532 bidiFailed = failOnBadString;
535 bidiFailed = failOnBadString;
543 } while (lineStart < srcLen);
545 return !stringDst.empty();
549 static struct SCharsetMapping
554 { "ISO-8859-1", "Western Europe (ISO)" }
555 , { "ISO-8859-2", "Central Europe (ISO)" }
556 , { "ISO-8859-3", "South Europe (ISO)" }
557 , { "ISO-8859-4", "Baltic (ISO)" }
558 , { "ISO-8859-5", "Cyrillic (ISO)" }
559 , { "ISO-8859-6", "Arabic (ISO)" }
560 , { "ISO-8859-7", "Greek (ISO)" }
561 , { "ISO-8859-8", "Hebrew (ISO)" }
562 , { "ISO-8859-9", "Turkish (ISO)" }
563 , { "CP1250", "Central Europe (Windows)" }
564 , { "CP1251", "Cyrillic (Windows)" }
565 , { "CP1252", "Western Europe (Windows)" }
566 , { "CP1253", "Greek (Windows)" }
567 , { "CP1254", "Turkish (Windows)" }
568 , { "CP1255", "Hebrew (Windows)" }
569 , { "CP1256", "Arabic (Windows)" }
570 , { "CP1257", "Baltic (Windows)" }
571 , { "CP1258", "Vietnamesse (Windows)" }
572 , { "CP874", "Thai (Windows)" }
573 , { "BIG5", "Chinese Traditional (Big5)" }
574 , { "GBK", "Chinese Simplified (GBK)" }
575 , { "SHIFT_JIS", "Japanese (Shift-JIS)" }
576 , { "CP949", "Korean" }
577 , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" }
582 CCharsetConverter::CCharsetConverter()
586 void CCharsetConverter::OnSettingChanged(const CSetting* setting)
591 const std::string& settingId = setting->GetId();
592 if (settingId == "locale.charset")
594 else if (settingId == "subtitles.charset")
595 resetSubtitleCharset();
596 else if (settingId == "karaoke.charset")
597 resetKaraokeCharset();
600 void CCharsetConverter::clear()
604 std::vector<std::string> CCharsetConverter::getCharsetLabels()
606 std::vector<std::string> lab;
607 for(SCharsetMapping* c = g_charsets; c->charset; c++)
608 lab.push_back(c->caption);
613 std::string CCharsetConverter::getCharsetLabelByName(const std::string& charsetName)
615 for(SCharsetMapping* c = g_charsets; c->charset; c++)
617 if (StringUtils::EqualsNoCase(charsetName,c->charset))
624 std::string CCharsetConverter::getCharsetNameByLabel(const std::string& charsetLabel)
626 for(SCharsetMapping* c = g_charsets; c->charset; c++)
628 if (StringUtils::EqualsNoCase(charsetLabel, c->caption))
635 void CCharsetConverter::reset(void)
637 for (int i = 0; i < NumberOfStdConversionTypes; i++)
638 CInnerConverter::m_stdConversion[i].Reset();
641 void CCharsetConverter::resetSystemCharset(void)
643 CInnerConverter::m_stdConversion[Utf8ToSystem].Reset();
646 void CCharsetConverter::resetUserCharset(void)
648 CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
649 CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
650 CInnerConverter::m_stdConversion[Utf32ToUserCharset].Reset();
651 resetSubtitleCharset();
652 resetKaraokeCharset();
655 void CCharsetConverter::resetSubtitleCharset(void)
657 CInnerConverter::m_stdConversion[SubtitleCharsetToW].Reset();
660 void CCharsetConverter::resetKaraokeCharset(void)
664 void CCharsetConverter::reinitCharsetsFromSettings(void)
666 resetUserCharset(); // this will also reinit Subtitle and Karaoke charsets
669 bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
671 return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
674 std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/)
676 std::u32string converted;
677 utf8ToUtf32(utf8StringSrc, converted, failOnBadChar);
681 bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
685 std::u32string converted;
686 if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, converted, failOnBadChar))
689 return CInnerConverter::logicalToVisualBiDi(converted, utf32StringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
691 return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
694 bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/)
696 return CInnerConverter::stdConvert(Utf32ToUtf8, utf32StringSrc, utf8StringDst, failOnBadChar);
699 std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/)
701 std::string converted;
702 utf32ToUtf8(utf32StringSrc, converted, failOnBadChar);
706 bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/)
708 #ifdef WCHAR_IS_UCS_4
709 wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length());
711 #else // !WCHAR_IS_UCS_4
712 return CInnerConverter::stdConvert(Utf32ToW, utf32StringSrc, wStringDst, failOnBadChar);
713 #endif // !WCHAR_IS_UCS_4
716 bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder /*= false*/, bool failOnBadString /*= false*/)
718 return CInnerConverter::logicalToVisualBiDi(logicalStringSrc, visualStringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadString);
721 bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
723 #ifdef WCHAR_IS_UCS_4
724 /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked.
725 * With this "conversion" we ensure that output will be valid UTF-32 string. */
727 return CInnerConverter::stdConvert(WToUtf32, wStringSrc, utf32StringDst, failOnBadChar);
730 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
731 // of the string is already made or the string is not displayed in the GUI
732 bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/,
733 bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
735 // Try to flip hebrew/arabic characters, if any
739 std::u32string utf32str;
740 if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32str, failOnBadChar))
743 std::u32string utf32flipped;
744 const bool bidiResult = CInnerConverter::logicalToVisualBiDi(utf32str, utf32flipped, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
746 return CInnerConverter::stdConvert(Utf32ToW, utf32flipped, wStringDst, failOnBadChar) && bidiResult;
749 return CInnerConverter::stdConvert(Utf8toW, utf8StringSrc, wStringDst, failOnBadChar);
752 bool CCharsetConverter::subtitleCharsetToW(const std::string& stringSrc, std::wstring& wStringDst)
754 return CInnerConverter::stdConvert(SubtitleCharsetToW, stringSrc, wStringDst, false);
757 bool CCharsetConverter::fromW(const std::wstring& wStringSrc,
758 std::string& stringDst, const std::string& enc)
760 return CInnerConverter::customConvert(WCHAR_CHARSET, enc, wStringSrc, stringDst);
763 bool CCharsetConverter::toW(const std::string& stringSrc,
764 std::wstring& wStringDst, const std::string& enc)
766 return CInnerConverter::customConvert(enc, WCHAR_CHARSET, stringSrc, wStringDst);
769 bool CCharsetConverter::utf8ToStringCharset(const std::string& utf8StringSrc, std::string& stringDst)
771 return CInnerConverter::stdConvert(Utf8ToUserCharset, utf8StringSrc, stringDst);
774 bool CCharsetConverter::utf8ToStringCharset(std::string& stringSrcDst)
776 std::string strSrc(stringSrcDst);
777 return utf8ToStringCharset(strSrc, stringSrcDst);
780 bool CCharsetConverter::ToUtf8(const std::string& strSourceCharset, const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
782 if (strSourceCharset == "UTF-8")
783 { // simple case - no conversion necessary
784 utf8StringDst = stringSrc;
788 return CInnerConverter::customConvert(strSourceCharset, "UTF-8", stringSrc, utf8StringDst, failOnBadChar);
791 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::string& stringDst)
793 if (strDestCharset == "UTF-8")
794 { // simple case - no conversion necessary
795 stringDst = utf8StringSrc;
799 return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, stringDst);
802 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u16string& utf16StringDst)
804 return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf16StringDst);
807 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u32string& utf32StringDst)
809 return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf32StringDst);
812 bool CCharsetConverter::unknownToUTF8(std::string& stringSrcDst)
814 std::string source(stringSrcDst);
815 return unknownToUTF8(source, stringSrcDst);
818 bool CCharsetConverter::unknownToUTF8(const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
820 // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
821 if (CUtf8Utils::isValidUtf8(stringSrc))
823 utf8StringDst = stringSrc;
826 return CInnerConverter::stdConvert(UserCharsetToUtf8, stringSrc, utf8StringDst, failOnBadChar);
829 bool CCharsetConverter::wToUTF8(const std::wstring& wStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
831 return CInnerConverter::stdConvert(WtoUtf8, wStringSrc, utf8StringDst, failOnBadChar);
834 bool CCharsetConverter::utf16BEtoUTF8(const std::u16string& utf16StringSrc, std::string& utf8StringDst)
836 return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst);
839 bool CCharsetConverter::utf16LEtoUTF8(const std::u16string& utf16StringSrc,
840 std::string& utf8StringDst)
842 return CInnerConverter::stdConvert(Utf16LEtoUtf8, utf16StringSrc, utf8StringDst);
845 bool CCharsetConverter::ucs2ToUTF8(const std::u16string& ucs2StringSrc, std::string& utf8StringDst)
847 return CInnerConverter::stdConvert(Ucs2CharsetToUtf8, ucs2StringSrc,utf8StringDst);
850 bool CCharsetConverter::utf16LEtoW(const std::u16string& utf16String, std::wstring& wString)
852 return CInnerConverter::stdConvert(Utf16LEtoW, utf16String, wString);
855 bool CCharsetConverter::utf32ToStringCharset(const std::u32string& utf32StringSrc, std::string& stringDst)
857 return CInnerConverter::stdConvert(Utf32ToUserCharset, utf32StringSrc, stringDst);
860 bool CCharsetConverter::utf8ToSystem(std::string& stringSrcDst, bool failOnBadChar /*= false*/)
862 std::string strSrc(stringSrcDst);
863 return CInnerConverter::stdConvert(Utf8ToSystem, strSrc, stringSrcDst, failOnBadChar);
866 bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string& utf8StringSrc, std::string& utf8StringDst, bool failOnBadString /*= false*/)
868 utf8StringDst.clear();
869 std::u32string utf32flipped;
870 if (!utf8ToUtf32Visual(utf8StringSrc, utf32flipped, true, true, failOnBadString))
873 return CInnerConverter::stdConvert(Utf32ToUtf8, utf32flipped, utf8StringDst, failOnBadString);
876 void CCharsetConverter::SettingOptionsCharsetsFiller(const CSetting* setting, std::vector< std::pair<std::string, std::string> >& list, std::string& current)
878 std::vector<std::string> vecCharsets = g_charsetConverter.getCharsetLabels();
879 sort(vecCharsets.begin(), vecCharsets.end(), sortstringbyname());
881 list.push_back(make_pair(g_localizeStrings.Get(13278), "DEFAULT")); // "Default"
882 for (int i = 0; i < (int) vecCharsets.size(); ++i)
883 list.push_back(make_pair(vecCharsets[i], g_charsetConverter.getCharsetNameByLabel(vecCharsets[i])));