code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/CharsetConverter.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20
  21 #include "CharsetConverter.h"
  22 #include "Util.h"
  23 #include "utils/StringUtils.h"
  24 #include <fribidi/fribidi.h>
  25 #include "LangInfo.h"
  26 #include "guilib/LocalizeStrings.h"
  27 #include "settings/lib/Setting.h"
  28 #include "settings/Settings.h"
  29 #include "threads/SingleLock.h"
  30 #include "utils/Utf8Utils.h"
  31 #include "log.h"
  32
  33 #include <errno.h>
  34 #include <iconv.h>
  35
  36 #if !defined(TARGET_WINDOWS) && defined(HAVE_CONFIG_H)
  37   #include "config.h"
  38 #endif
  39
  40 #ifdef WORDS_BIGENDIAN
  41   #define ENDIAN_SUFFIX "BE"
  42 #else
  43   #define ENDIAN_SUFFIX "LE"
  44 #endif
  45
  46 #if defined(TARGET_DARWIN)
  47   #define WCHAR_IS_UCS_4 1
  48   #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
  49   #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
  50   #define UTF8_SOURCE "UTF-8-MAC"
  51   #define WCHAR_CHARSET UTF32_CHARSET
  52 #elif defined(TARGET_WINDOWS)
  53   #define WCHAR_IS_UTF16 1
  54   #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
  55   #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
  56   #define UTF8_SOURCE "UTF-8"
  57   #define WCHAR_CHARSET UTF16_CHARSET
  58   #pragma comment(lib, "libfribidi.lib")
  59   #pragma comment(lib, "libiconv.lib")
  60 #elif defined(TARGET_ANDROID)
  61   #define WCHAR_IS_UCS_4 1
  62   #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
  63   #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
  64   #define UTF8_SOURCE "UTF-8"
  65   #define WCHAR_CHARSET UTF32_CHARSET
  66 #else
  67   #define UTF16_CHARSET "UTF-16" ENDIAN_SUFFIX
  68   #define UTF32_CHARSET "UTF-32" ENDIAN_SUFFIX
  69   #define UTF8_SOURCE "UTF-8"
  70   #define WCHAR_CHARSET "WCHAR_T"
  71   #if __STDC_ISO_10646__
  72     #ifdef SIZEOF_WCHAR_T
  73       #if SIZEOF_WCHAR_T == 4
  74         #define WCHAR_IS_UCS_4 1
  75       #elif SIZEOF_WCHAR_T == 2
  76         #define WCHAR_IS_UCS_2 1
  77       #endif
  78     #endif
  79   #endif
  80 #endif
  81
  82 #define NO_ICONV ((iconv_t)-1)
  83
  84 enum SpecialCharset
  85 {
  86   NotSpecialCharset = 0,
  87   SystemCharset,
  88   UserCharset /* locale.charset */,
  89   SubtitleCharset /* subtitles.charset */,
  90   KaraokeCharset /* karaoke.charset */
  91 };
  92
  93
  94 class CConverterType : public CCriticalSection
  95 {
  96 public:
  97   CConverterType(const std::string&  sourceCharset,        const std::string&  targetCharset,        unsigned int targetSingleCharMaxLen = 1);
  98   CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string&  targetCharset,        unsigned int targetSingleCharMaxLen = 1);
  99   CConverterType(const std::string&  sourceCharset,        enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
 100   CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen = 1);
 101   CConverterType(const CConverterType& other);
 102   ~CConverterType();
 103
 104   iconv_t GetConverter(CSingleLock& converterLock);
 105
 106   void Reset(void);
 107   void ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen = 1);
 108   std::string GetSourceCharset(void) const  { return m_sourceCharset; }
 109   std::string GetTargetCharset(void) const  { return m_targetCharset; }
 110   unsigned int GetTargetSingleCharMaxLen(void) const  { return m_targetSingleCharMaxLen; }
 111
 112 private:
 113   static std::string ResolveSpecialCharset(enum SpecialCharset charset);
 114
 115   enum SpecialCharset m_sourceSpecialCharset;
 116   std::string         m_sourceCharset;
 117   enum SpecialCharset m_targetSpecialCharset;
 118   std::string         m_targetCharset;
 119   iconv_t             m_iconv;
 120   unsigned int        m_targetSingleCharMaxLen;
 121 };
 122
 123 CConverterType::CConverterType(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
 124   m_sourceSpecialCharset(NotSpecialCharset),
 125   m_sourceCharset(sourceCharset),
 126   m_targetSpecialCharset(NotSpecialCharset),
 127   m_targetCharset(targetCharset),
 128   m_iconv(NO_ICONV),
 129   m_targetSingleCharMaxLen(targetSingleCharMaxLen)
 130 {
 131 }
 132
 133 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
 134   m_sourceSpecialCharset(sourceSpecialCharset),
 135   m_sourceCharset(),
 136   m_targetSpecialCharset(NotSpecialCharset),
 137   m_targetCharset(targetCharset),
 138   m_iconv(NO_ICONV),
 139   m_targetSingleCharMaxLen(targetSingleCharMaxLen)
 140 {
 141 }
 142
 143 CConverterType::CConverterType(const std::string& sourceCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
 144   m_sourceSpecialCharset(NotSpecialCharset),
 145   m_sourceCharset(sourceCharset),
 146   m_targetSpecialCharset(targetSpecialCharset),
 147   m_targetCharset(),
 148   m_iconv(NO_ICONV),
 149   m_targetSingleCharMaxLen(targetSingleCharMaxLen)
 150 {
 151 }
 152
 153 CConverterType::CConverterType(enum SpecialCharset sourceSpecialCharset, enum SpecialCharset targetSpecialCharset, unsigned int targetSingleCharMaxLen /*= 1*/) : CCriticalSection(),
 154   m_sourceSpecialCharset(sourceSpecialCharset),
 155   m_sourceCharset(),
 156   m_targetSpecialCharset(targetSpecialCharset),
 157   m_targetCharset(),
 158   m_iconv(NO_ICONV),
 159   m_targetSingleCharMaxLen(targetSingleCharMaxLen)
 160 {
 161 }
 162
 163 CConverterType::CConverterType(const CConverterType& other) : CCriticalSection(),
 164   m_sourceSpecialCharset(other.m_sourceSpecialCharset),
 165   m_sourceCharset(other.m_sourceCharset),
 166   m_targetSpecialCharset(other.m_targetSpecialCharset),
 167   m_targetCharset(other.m_targetCharset),
 168   m_iconv(NO_ICONV),
 169   m_targetSingleCharMaxLen(other.m_targetSingleCharMaxLen)
 170 {
 171 }
 172
 173
 174 CConverterType::~CConverterType()
 175 {
 176   CSingleLock lock(*this);
 177   if (m_iconv != NO_ICONV)
 178     iconv_close(m_iconv);
 179   lock.Leave(); // ensure unlocking before final destruction
 180 }
 181
 182
 183 iconv_t CConverterType::GetConverter(CSingleLock& converterLock)
 184 {
 185   // ensure that this unique instance is locked externally
 186   if (&converterLock.get_underlying() != this)
 187     return NO_ICONV;
 188
 189   if (m_iconv == NO_ICONV)
 190   {
 191     if (m_sourceSpecialCharset)
 192       m_sourceCharset = ResolveSpecialCharset(m_sourceSpecialCharset);
 193     if (m_targetSpecialCharset)
 194       m_targetCharset = ResolveSpecialCharset(m_targetSpecialCharset);
 195
 196     m_iconv = iconv_open(m_targetCharset.c_str(), m_sourceCharset.c_str());
 197
 198     if (m_iconv == NO_ICONV)
 199       CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)",
 200                 __FUNCTION__, m_sourceCharset.c_str(), m_targetCharset.c_str(), errno, strerror(errno));
 201   }
 202
 203   return m_iconv;
 204 }
 205
 206
 207 void CConverterType::Reset(void)
 208 {
 209   CSingleLock lock(*this);
 210   if (m_iconv != NO_ICONV)
 211   {
 212     iconv_close(m_iconv);
 213     m_iconv = NO_ICONV;
 214   }
 215
 216   if (m_sourceSpecialCharset)
 217     m_sourceCharset.clear();
 218   if (m_targetSpecialCharset)
 219     m_targetCharset.clear();
 220
 221 }
 222
 223 void CConverterType::ReinitTo(const std::string& sourceCharset, const std::string& targetCharset, unsigned int targetSingleCharMaxLen /*= 1*/)
 224 {
 225   CSingleLock lock(*this);
 226   if (sourceCharset != m_sourceCharset || targetCharset != m_targetCharset)
 227   {
 228     if (m_iconv != NO_ICONV)
 229     {
 230       iconv_close(m_iconv);
 231       m_iconv = NO_ICONV;
 232     }
 233
 234     m_sourceSpecialCharset = NotSpecialCharset;
 235     m_sourceCharset = sourceCharset;
 236     m_targetSpecialCharset = NotSpecialCharset;
 237     m_targetCharset = targetCharset;
 238     m_targetSingleCharMaxLen = targetSingleCharMaxLen;
 239   }
 240 }
 241
 242 std::string CConverterType::ResolveSpecialCharset(enum SpecialCharset charset)
 243 {
 244   switch (charset)
 245   {
 246   case SystemCharset:
 247     return "";
 248   case UserCharset:
 249     return g_langInfo.GetGuiCharSet();
 250   case SubtitleCharset:
 251     return g_langInfo.GetSubtitleCharSet();
 252   case KaraokeCharset:
 253     {
 254       CSetting* karaokeSetting = CSettings::Get().GetSetting("karaoke.charset");
 255       if (karaokeSetting == NULL || ((CSettingString*)karaokeSetting)->GetValue() == "DEFAULT")
 256         return g_langInfo.GetGuiCharSet();
 257
 258       return ((CSettingString*)karaokeSetting)->GetValue();
 259     }
 260   case NotSpecialCharset:
 261   default:
 262     return "UTF-8"; /* dummy value */
 263   }
 264 }
 265
 266
 267 enum StdConversionType /* Keep it in sync with CCharsetConverter::CInnerConverter::m_stdConversion */
 268 {
 269   NoConversion = -1,
 270   Utf8ToUtf32 = 0,
 271   Utf32ToUtf8,
 272   Utf32ToW,
 273   WToUtf32,
 274   SubtitleCharsetToW,
 275   Utf8ToUserCharset,
 276   UserCharsetToUtf8,
 277   Utf32ToUserCharset,
 278   WtoUtf8,
 279   Utf16LEtoW,
 280   Utf16BEtoUtf8,
 281   Utf16LEtoUtf8,
 282   Utf8toW,
 283   Utf8ToSystem,
 284   Ucs2CharsetToUtf8,
 285   NumberOfStdConversionTypes /* Dummy sentinel entry */
 286 };
 287
 288
 289 /* We don't want to pollute header file with many additional includes and definitions, so put
 290    here all staff that require usage of types defined in this file or in additional headers */
 291 class CCharsetConverter::CInnerConverter
 292 {
 293 public:
 294   static bool logicalToVisualBiDi(const std::u32string& stringSrc, std::u32string& stringDst, FriBidiCharType base = FRIBIDI_TYPE_LTR, const bool failOnBadString = false);
 295
 296   template<class INPUT,class OUTPUT>
 297   static bool stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
 298   template<class INPUT,class OUTPUT>
 299   static bool customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
 300
 301   template<class INPUT,class OUTPUT>
 302   static bool convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar = false);
 303
 304   static CConverterType m_stdConversion[NumberOfStdConversionTypes];
 305   static CCriticalSection m_critSectionFriBiDi;
 306 };
 307
 308 /* single symbol sizes in chars */
 309 const int CCharsetConverter::m_Utf8CharMinSize = 1;
 310 const int CCharsetConverter::m_Utf8CharMaxSize = 4;
 311
 312 CConverterType CCharsetConverter::CInnerConverter::m_stdConversion[NumberOfStdConversionTypes] = /* keep it in sync with enum StdConversionType */
 313 {
 314   /* Utf8ToUtf32 */         CConverterType(UTF8_SOURCE,     UTF32_CHARSET),
 315   /* Utf32ToUtf8 */         CConverterType(UTF32_CHARSET,   "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
 316   /* Utf32ToW */            CConverterType(UTF32_CHARSET,   WCHAR_CHARSET),
 317   /* WToUtf32 */            CConverterType(WCHAR_CHARSET,   UTF32_CHARSET),
 318   /* SubtitleCharsetToW */  CConverterType(SubtitleCharset, WCHAR_CHARSET),
 319   /* Utf8ToUserCharset */   CConverterType(UTF8_SOURCE,     UserCharset),
 320   /* UserCharsetToUtf8 */   CConverterType(UserCharset,     "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
 321   /* Utf32ToUserCharset */  CConverterType(UTF32_CHARSET,   UserCharset),
 322   /* WtoUtf8 */             CConverterType(WCHAR_CHARSET,   "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
 323   /* Utf16LEtoW */          CConverterType("UTF-16LE",      WCHAR_CHARSET),
 324   /* Utf16BEtoUtf8 */       CConverterType("UTF-16BE",      "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
 325   /* Utf16LEtoUtf8 */       CConverterType("UTF-16LE",      "UTF-8", CCharsetConverter::m_Utf8CharMaxSize),
 326   /* Utf8toW */             CConverterType(UTF8_SOURCE,     WCHAR_CHARSET),
 327   /* Utf8ToSystem */        CConverterType(UTF8_SOURCE,     SystemCharset),
 328   /* Ucs2CharsetToUtf8 */   CConverterType("UCS-2LE",       "UTF-8", CCharsetConverter::m_Utf8CharMaxSize)
 329 };
 330
 331 CCriticalSection CCharsetConverter::CInnerConverter::m_critSectionFriBiDi;
 332
 333
 334
 335 template<class INPUT,class OUTPUT>
 336 bool CCharsetConverter::CInnerConverter::stdConvert(StdConversionType convertType, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
 337 {
 338   strDest.clear();
 339   if (strSource.empty())
 340     return true;
 341
 342   if (convertType < 0 || convertType >= NumberOfStdConversionTypes)
 343     return false;
 344
 345   CConverterType& convType = m_stdConversion[convertType];
 346   CSingleLock converterLock(convType);
 347
 348   return convert(convType.GetConverter(converterLock), convType.GetTargetSingleCharMaxLen(), strSource, strDest, failOnInvalidChar);
 349 }
 350
 351 template<class INPUT,class OUTPUT>
 352 bool CCharsetConverter::CInnerConverter::customConvert(const std::string& sourceCharset, const std::string& targetCharset, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
 353 {
 354   strDest.clear();
 355   if (strSource.empty())
 356     return true;
 357
 358   iconv_t conv = iconv_open(targetCharset.c_str(), sourceCharset.c_str());
 359   if (conv == NO_ICONV)
 360   {
 361     CLog::Log(LOGERROR, "%s: iconv_open() for \"%s\" -> \"%s\" failed, errno = %d (%s)",
 362               __FUNCTION__, sourceCharset.c_str(), targetCharset.c_str(), errno, strerror(errno));
 363     return false;
 364   }
 365   const int dstMultp = (targetCharset.compare(0, 5, "UTF-8") == 0) ? CCharsetConverter::m_Utf8CharMaxSize : 1;
 366   const bool result = convert(conv, dstMultp, strSource, strDest, failOnInvalidChar);
 367   iconv_close(conv);
 368
 369   return result;
 370 }
 371
 372
 373 /* iconv may declare inbuf to be char** rather than const char** depending on platform and version,
 374     so provide a wrapper that handles both */
 375 struct charPtrPtrAdapter
 376 {
 377   const char** pointer;
 378   charPtrPtrAdapter(const char** p) :
 379     pointer(p) { }
 380   operator char**()
 381   { return const_cast<char**>(pointer); }
 382   operator const char**()
 383   { return pointer; }
 384 };
 385
 386 template<class INPUT,class OUTPUT>
 387 bool CCharsetConverter::CInnerConverter::convert(iconv_t type, int multiplier, const INPUT& strSource, OUTPUT& strDest, bool failOnInvalidChar /*= false*/)
 388 {
 389   if (type == NO_ICONV)
 390     return false;
 391
 392   //input buffer for iconv() is the buffer from strSource
 393   size_t      inBufSize  = (strSource.length() + 1) * sizeof(typename INPUT::value_type);
 394   const char* inBuf      = (const char*)strSource.c_str();
 395
 396   //allocate output buffer for iconv()
 397   size_t      outBufSize = (strSource.length() + 1) * sizeof(typename OUTPUT::value_type) * multiplier;
 398   char*       outBuf     = (char*)malloc(outBufSize);
 399   if (outBuf == NULL)
 400   {
 401       CLog::Log(LOGSEVERE, "%s: malloc failed", __FUNCTION__);
 402       return false;
 403   }
 404
 405   size_t      inBytesAvail  = inBufSize;  //how many bytes iconv() can read
 406   size_t      outBytesAvail = outBufSize; //how many bytes iconv() can write
 407   const char* inBufStart    = inBuf;      //where in our input buffer iconv() should start reading
 408   char*       outBufStart   = outBuf;     //where in out output buffer iconv() should start writing
 409
 410   size_t returnV;
 411   while(1)
 412   {
 413     //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
 414     returnV = iconv(type, charPtrPtrAdapter(&inBufStart), &inBytesAvail, &outBufStart, &outBytesAvail);
 415
 416     if (returnV == (size_t)-1)
 417     {
 418       if (errno == E2BIG) //output buffer is not big enough
 419       {
 420         //save where iconv() ended converting, realloc might make outBufStart invalid
 421         size_t bytesConverted = outBufSize - outBytesAvail;
 422
 423         //make buffer twice as big
 424         outBufSize   *= 2;
 425         char* newBuf  = (char*)realloc(outBuf, outBufSize);
 426         if (!newBuf)
 427         {
 428           CLog::Log(LOGSEVERE, "%s realloc failed with errno=%d(%s)",
 429                     __FUNCTION__, errno, strerror(errno));
 430           break;
 431         }
 432         outBuf = newBuf;
 433
 434         //update the buffer pointer and counter
 435         outBufStart   = outBuf + bytesConverted;
 436         outBytesAvail = outBufSize - bytesConverted;
 437
 438         //continue in the loop and convert the rest
 439         continue;
 440       }
 441       else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input
 442       {
 443         if (failOnInvalidChar)
 444           break;
 445
 446         //skip invalid byte
 447         inBufStart++;
 448         inBytesAvail--;
 449         //continue in the loop and convert the rest
 450         continue;
 451       }
 452       else if (errno == EINVAL) /* Invalid sequence at the end of input buffer */
 453       {
 454         if (!failOnInvalidChar)
 455           returnV = 0; /* reset error status to use converted part */
 456
 457         break;
 458       }
 459       else //iconv() had some other error
 460       {
 461         CLog::Log(LOGERROR, "%s: iconv() failed, errno=%d (%s)",
 462                   __FUNCTION__, errno, strerror(errno));
 463       }
 464     }
 465     break;
 466   }
 467
 468   //complete the conversion (reset buffers), otherwise the current data will prefix the data on the next call
 469   if (iconv(type, NULL, NULL, &outBufStart, &outBytesAvail) == (size_t)-1)
 470     CLog::Log(LOGERROR, "%s failed cleanup errno=%d(%s)", __FUNCTION__, errno, strerror(errno));
 471
 472   if (returnV == (size_t)-1)
 473   {
 474     free(outBuf);
 475     return false;
 476   }
 477   //we're done
 478
 479   const typename OUTPUT::size_type sizeInChars = (typename OUTPUT::size_type) (outBufSize - outBytesAvail) / sizeof(typename OUTPUT::value_type);
 480   typename OUTPUT::const_pointer strPtr = (typename OUTPUT::const_pointer) outBuf;
 481   /* Make sure that all buffer is assigned and string is stopped at end of buffer */
 482   if (strPtr[sizeInChars-1] == 0 && strSource[strSource.length()-1] != 0)
 483     strDest.assign(strPtr, sizeInChars-1);
 484   else
 485     strDest.assign(strPtr, sizeInChars);
 486
 487   free(outBuf);
 488
 489   return true;
 490 }
 491
 492 bool CCharsetConverter::CInnerConverter::logicalToVisualBiDi(const std::u32string& stringSrc, std::u32string& stringDst, FriBidiCharType base /*= FRIBIDI_TYPE_LTR*/, const bool failOnBadString /*= false*/)
 493 {
 494   stringDst.clear();
 495
 496   const size_t srcLen = stringSrc.length();
 497   if (srcLen == 0)
 498     return true;
 499
 500   stringDst.reserve(srcLen);
 501   size_t lineStart = 0;
 502
 503   // libfribidi is not threadsafe, so make sure we make it so
 504   CSingleLock lock(m_critSectionFriBiDi);
 505   do
 506   {
 507     size_t lineEnd = stringSrc.find('\n', lineStart);
 508     if (lineEnd >= srcLen) // equal to 'lineEnd == std::string::npos'
 509       lineEnd = srcLen;
 510     else
 511       lineEnd++; // include '\n'
 512
 513     const size_t lineLen = lineEnd - lineStart;
 514
 515     FriBidiChar* visual = (FriBidiChar*) malloc((lineLen + 1) * sizeof(FriBidiChar));
 516     if (visual == NULL)
 517     {
 518       free(visual);
 519       CLog::Log(LOGSEVERE, "%s: can't allocate memory", __FUNCTION__);
 520       return false;
 521     }
 522
 523     bool bidiFailed = false;
 524     FriBidiCharType baseCopy = base; // preserve same value for all lines, required because fribidi_log2vis will modify parameter value
 525     if (fribidi_log2vis((const FriBidiChar*)(stringSrc.c_str() + lineStart), lineLen, &baseCopy, visual, NULL, NULL, NULL))
 526     {
 527       // Removes bidirectional marks
 528       const int newLen = fribidi_remove_bidi_marks(visual, lineLen, NULL, NULL, NULL);
 529       if (newLen > 0)
 530         stringDst.append((const char32_t*)visual, (size_t)newLen);
 531       else if (newLen < 0)
 532         bidiFailed = failOnBadString;
 533     }
 534     else
 535       bidiFailed = failOnBadString;
 536
 537     free(visual);
 538
 539     if (bidiFailed)
 540       return false;
 541
 542     lineStart = lineEnd;
 543   } while (lineStart < srcLen);
 544
 545   return !stringDst.empty();
 546 }
 547
 548
 549 static struct SCharsetMapping
 550 {
 551   const char* charset;
 552   const char* caption;
 553 } g_charsets[] = {
 554   { "ISO-8859-1", "Western Europe (ISO)" }
 555   , { "ISO-8859-2", "Central Europe (ISO)" }
 556   , { "ISO-8859-3", "South Europe (ISO)" }
 557   , { "ISO-8859-4", "Baltic (ISO)" }
 558   , { "ISO-8859-5", "Cyrillic (ISO)" }
 559   , { "ISO-8859-6", "Arabic (ISO)" }
 560   , { "ISO-8859-7", "Greek (ISO)" }
 561   , { "ISO-8859-8", "Hebrew (ISO)" }
 562   , { "ISO-8859-9", "Turkish (ISO)" }
 563   , { "CP1250", "Central Europe (Windows)" }
 564   , { "CP1251", "Cyrillic (Windows)" }
 565   , { "CP1252", "Western Europe (Windows)" }
 566   , { "CP1253", "Greek (Windows)" }
 567   , { "CP1254", "Turkish (Windows)" }
 568   , { "CP1255", "Hebrew (Windows)" }
 569   , { "CP1256", "Arabic (Windows)" }
 570   , { "CP1257", "Baltic (Windows)" }
 571   , { "CP1258", "Vietnamesse (Windows)" }
 572   , { "CP874", "Thai (Windows)" }
 573   , { "BIG5", "Chinese Traditional (Big5)" }
 574   , { "GBK", "Chinese Simplified (GBK)" }
 575   , { "SHIFT_JIS", "Japanese (Shift-JIS)" }
 576   , { "CP949", "Korean" }
 577   , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)" }
 578   , { NULL, NULL }
 579 };
 580
 581
 582 CCharsetConverter::CCharsetConverter()
 583 {
 584 }
 585
 586 void CCharsetConverter::OnSettingChanged(const CSetting* setting)
 587 {
 588   if (setting == NULL)
 589     return;
 590
 591   const std::string& settingId = setting->GetId();
 592   if (settingId == "locale.charset")
 593     resetUserCharset();
 594   else if (settingId == "subtitles.charset")
 595     resetSubtitleCharset();
 596   else if (settingId == "karaoke.charset")
 597     resetKaraokeCharset();
 598 }
 599
 600 void CCharsetConverter::clear()
 601 {
 602 }
 603
 604 std::vector<std::string> CCharsetConverter::getCharsetLabels()
 605 {
 606   std::vector<std::string> lab;
 607   for(SCharsetMapping* c = g_charsets; c->charset; c++)
 608     lab.push_back(c->caption);
 609
 610   return lab;
 611 }
 612
 613 std::string CCharsetConverter::getCharsetLabelByName(const std::string& charsetName)
 614 {
 615   for(SCharsetMapping* c = g_charsets; c->charset; c++)
 616   {
 617     if (StringUtils::EqualsNoCase(charsetName,c->charset))
 618       return c->caption;
 619   }
 620
 621   return "";
 622 }
 623
 624 std::string CCharsetConverter::getCharsetNameByLabel(const std::string& charsetLabel)
 625 {
 626   for(SCharsetMapping* c = g_charsets; c->charset; c++)
 627   {
 628     if (StringUtils::EqualsNoCase(charsetLabel, c->caption))
 629       return c->charset;
 630   }
 631
 632   return "";
 633 }
 634
 635 void CCharsetConverter::reset(void)
 636 {
 637   for (int i = 0; i < NumberOfStdConversionTypes; i++)
 638     CInnerConverter::m_stdConversion[i].Reset();
 639 }
 640
 641 void CCharsetConverter::resetSystemCharset(void)
 642 {
 643   CInnerConverter::m_stdConversion[Utf8ToSystem].Reset();
 644 }
 645
 646 void CCharsetConverter::resetUserCharset(void)
 647 {
 648   CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
 649   CInnerConverter::m_stdConversion[UserCharsetToUtf8].Reset();
 650   CInnerConverter::m_stdConversion[Utf32ToUserCharset].Reset();
 651   resetSubtitleCharset();
 652   resetKaraokeCharset();
 653 }
 654
 655 void CCharsetConverter::resetSubtitleCharset(void)
 656 {
 657   CInnerConverter::m_stdConversion[SubtitleCharsetToW].Reset();
 658 }
 659
 660 void CCharsetConverter::resetKaraokeCharset(void)
 661 {
 662 }
 663
 664 void CCharsetConverter::reinitCharsetsFromSettings(void)
 665 {
 666   resetUserCharset(); // this will also reinit Subtitle and Karaoke charsets
 667 }
 668
 669 bool CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
 670 {
 671   return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
 672 }
 673
 674 std::u32string CCharsetConverter::utf8ToUtf32(const std::string& utf8StringSrc, bool failOnBadChar /*= true*/)
 675 {
 676   std::u32string converted;
 677   utf8ToUtf32(utf8StringSrc, converted, failOnBadChar);
 678   return converted;
 679 }
 680
 681 bool CCharsetConverter::utf8ToUtf32Visual(const std::string& utf8StringSrc, std::u32string& utf32StringDst, bool bVisualBiDiFlip /*= false*/, bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
 682 {
 683   if (bVisualBiDiFlip)
 684   {
 685     std::u32string converted;
 686     if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, converted, failOnBadChar))
 687       return false;
 688
 689     return CInnerConverter::logicalToVisualBiDi(converted, utf32StringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
 690   }
 691   return CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32StringDst, failOnBadChar);
 692 }
 693
 694 bool CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, std::string& utf8StringDst, bool failOnBadChar /*= true*/)
 695 {
 696   return CInnerConverter::stdConvert(Utf32ToUtf8, utf32StringSrc, utf8StringDst, failOnBadChar);
 697 }
 698
 699 std::string CCharsetConverter::utf32ToUtf8(const std::u32string& utf32StringSrc, bool failOnBadChar /*= false*/)
 700 {
 701   std::string converted;
 702   utf32ToUtf8(utf32StringSrc, converted, failOnBadChar);
 703   return converted;
 704 }
 705
 706 bool CCharsetConverter::utf32ToW(const std::u32string& utf32StringSrc, std::wstring& wStringDst, bool failOnBadChar /*= true*/)
 707 {
 708 #ifdef WCHAR_IS_UCS_4
 709   wStringDst.assign((const wchar_t*)utf32StringSrc.c_str(), utf32StringSrc.length());
 710   return true;
 711 #else // !WCHAR_IS_UCS_4
 712   return CInnerConverter::stdConvert(Utf32ToW, utf32StringSrc, wStringDst, failOnBadChar);
 713 #endif // !WCHAR_IS_UCS_4
 714 }
 715
 716 bool CCharsetConverter::utf32logicalToVisualBiDi(const std::u32string& logicalStringSrc, std::u32string& visualStringDst, bool forceLTRReadingOrder /*= false*/, bool failOnBadString /*= false*/)
 717 {
 718   return CInnerConverter::logicalToVisualBiDi(logicalStringSrc, visualStringDst, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadString);
 719 }
 720
 721 bool CCharsetConverter::wToUtf32(const std::wstring& wStringSrc, std::u32string& utf32StringDst, bool failOnBadChar /*= true*/)
 722 {
 723 #ifdef WCHAR_IS_UCS_4
 724   /* UCS-4 is almost equal to UTF-32, but UTF-32 has strict limits on possible values, while UCS-4 is usually unchecked.
 725    * With this "conversion" we ensure that output will be valid UTF-32 string. */
 726 #endif
 727   return CInnerConverter::stdConvert(WToUtf32, wStringSrc, utf32StringDst, failOnBadChar);
 728 }
 729
 730 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
 731 // of the string is already made or the string is not displayed in the GUI
 732 bool CCharsetConverter::utf8ToW(const std::string& utf8StringSrc, std::wstring& wStringDst, bool bVisualBiDiFlip /*= true*/,
 733                                 bool forceLTRReadingOrder /*= false*/, bool failOnBadChar /*= false*/)
 734 {
 735   // Try to flip hebrew/arabic characters, if any
 736   if (bVisualBiDiFlip)
 737   {
 738     wStringDst.clear();
 739     std::u32string utf32str;
 740     if (!CInnerConverter::stdConvert(Utf8ToUtf32, utf8StringSrc, utf32str, failOnBadChar))
 741       return false;
 742
 743     std::u32string utf32flipped;
 744     const bool bidiResult = CInnerConverter::logicalToVisualBiDi(utf32str, utf32flipped, forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF, failOnBadChar);
 745
 746     return CInnerConverter::stdConvert(Utf32ToW, utf32flipped, wStringDst, failOnBadChar) && bidiResult;
 747   }
 748
 749   return CInnerConverter::stdConvert(Utf8toW, utf8StringSrc, wStringDst, failOnBadChar);
 750 }
 751
 752 bool CCharsetConverter::subtitleCharsetToW(const std::string& stringSrc, std::wstring& wStringDst)
 753 {
 754   return CInnerConverter::stdConvert(SubtitleCharsetToW, stringSrc, wStringDst, false);
 755 }
 756
 757 bool CCharsetConverter::fromW(const std::wstring& wStringSrc,
 758                               std::string& stringDst, const std::string& enc)
 759 {
 760   return CInnerConverter::customConvert(WCHAR_CHARSET, enc, wStringSrc, stringDst);
 761 }
 762
 763 bool CCharsetConverter::toW(const std::string& stringSrc,
 764                             std::wstring& wStringDst, const std::string& enc)
 765 {
 766   return CInnerConverter::customConvert(enc, WCHAR_CHARSET, stringSrc, wStringDst);
 767 }
 768
 769 bool CCharsetConverter::utf8ToStringCharset(const std::string& utf8StringSrc, std::string& stringDst)
 770 {
 771   return CInnerConverter::stdConvert(Utf8ToUserCharset, utf8StringSrc, stringDst);
 772 }
 773
 774 bool CCharsetConverter::utf8ToStringCharset(std::string& stringSrcDst)
 775 {
 776   std::string strSrc(stringSrcDst);
 777   return utf8ToStringCharset(strSrc, stringSrcDst);
 778 }
 779
 780 bool CCharsetConverter::ToUtf8(const std::string& strSourceCharset, const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
 781 {
 782   if (strSourceCharset == "UTF-8")
 783   { // simple case - no conversion necessary
 784     utf8StringDst = stringSrc;
 785     return true;
 786   }
 787
 788   return CInnerConverter::customConvert(strSourceCharset, "UTF-8", stringSrc, utf8StringDst, failOnBadChar);
 789 }
 790
 791 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::string& stringDst)
 792 {
 793   if (strDestCharset == "UTF-8")
 794   { // simple case - no conversion necessary
 795     stringDst = utf8StringSrc;
 796     return true;
 797   }
 798
 799   return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, stringDst);
 800 }
 801
 802 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u16string& utf16StringDst)
 803 {
 804   return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf16StringDst);
 805 }
 806
 807 bool CCharsetConverter::utf8To(const std::string& strDestCharset, const std::string& utf8StringSrc, std::u32string& utf32StringDst)
 808 {
 809   return CInnerConverter::customConvert(UTF8_SOURCE, strDestCharset, utf8StringSrc, utf32StringDst);
 810 }
 811
 812 bool CCharsetConverter::unknownToUTF8(std::string& stringSrcDst)
 813 {
 814   std::string source(stringSrcDst);
 815   return unknownToUTF8(source, stringSrcDst);
 816 }
 817
 818 bool CCharsetConverter::unknownToUTF8(const std::string& stringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
 819 {
 820   // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
 821   if (CUtf8Utils::isValidUtf8(stringSrc))
 822   {
 823     utf8StringDst = stringSrc;
 824     return true;
 825   }
 826   return CInnerConverter::stdConvert(UserCharsetToUtf8, stringSrc, utf8StringDst, failOnBadChar);
 827 }
 828
 829 bool CCharsetConverter::wToUTF8(const std::wstring& wStringSrc, std::string& utf8StringDst, bool failOnBadChar /*= false*/)
 830 {
 831   return CInnerConverter::stdConvert(WtoUtf8, wStringSrc, utf8StringDst, failOnBadChar);
 832 }
 833
 834 bool CCharsetConverter::utf16BEtoUTF8(const std::u16string& utf16StringSrc, std::string& utf8StringDst)
 835 {
 836   return CInnerConverter::stdConvert(Utf16BEtoUtf8, utf16StringSrc, utf8StringDst);
 837 }
 838
 839 bool CCharsetConverter::utf16LEtoUTF8(const std::u16string& utf16StringSrc,
 840                                       std::string& utf8StringDst)
 841 {
 842   return CInnerConverter::stdConvert(Utf16LEtoUtf8, utf16StringSrc, utf8StringDst);
 843 }
 844
 845 bool CCharsetConverter::ucs2ToUTF8(const std::u16string& ucs2StringSrc, std::string& utf8StringDst)
 846 {
 847   return CInnerConverter::stdConvert(Ucs2CharsetToUtf8, ucs2StringSrc,utf8StringDst);
 848 }
 849
 850 bool CCharsetConverter::utf16LEtoW(const std::u16string& utf16String, std::wstring& wString)
 851 {
 852   return CInnerConverter::stdConvert(Utf16LEtoW, utf16String, wString);
 853 }
 854
 855 bool CCharsetConverter::utf32ToStringCharset(const std::u32string& utf32StringSrc, std::string& stringDst)
 856 {
 857   return CInnerConverter::stdConvert(Utf32ToUserCharset, utf32StringSrc, stringDst);
 858 }
 859
 860 bool CCharsetConverter::utf8ToSystem(std::string& stringSrcDst, bool failOnBadChar /*= false*/)
 861 {
 862   std::string strSrc(stringSrcDst);
 863   return CInnerConverter::stdConvert(Utf8ToSystem, strSrc, stringSrcDst, failOnBadChar);
 864 }
 865
 866 bool CCharsetConverter::utf8logicalToVisualBiDi(const std::string& utf8StringSrc, std::string& utf8StringDst, bool failOnBadString /*= false*/)
 867 {
 868   utf8StringDst.clear();
 869   std::u32string utf32flipped;
 870   if (!utf8ToUtf32Visual(utf8StringSrc, utf32flipped, true, true, failOnBadString))
 871     return false;
 872
 873   return CInnerConverter::stdConvert(Utf32ToUtf8, utf32flipped, utf8StringDst, failOnBadString);
 874 }
 875
 876 void CCharsetConverter::SettingOptionsCharsetsFiller(const CSetting* setting, std::vector< std::pair<std::string, std::string> >& list, std::string& current)
 877 {
 878   std::vector<std::string> vecCharsets = g_charsetConverter.getCharsetLabels();
 879   sort(vecCharsets.begin(), vecCharsets.end(), sortstringbyname());
 880
 881   list.push_back(make_pair(g_localizeStrings.Get(13278), "DEFAULT")); // "Default"
 882   for (int i = 0; i < (int) vecCharsets.size(); ++i)
 883     list.push_back(make_pair(vecCharsets[i], g_charsetConverter.getCharsetNameByLabel(vecCharsets[i])));
 884 }