code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/CharsetConverter.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://www.xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20
  21 #include "CharsetConverter.h"
  22 #include "Util.h"
  23 #include <fribidi/fribidi.h>
  24 #include "LangInfo.h"
  25 #include "threads/SingleLock.h"
  26 #include "log.h"
  27
  28 #include <errno.h>
  29 #include <iconv.h>
  30
  31 #if defined(TARGET_DARWIN)
  32 #ifdef __POWERPC__
  33   #define WCHAR_CHARSET "UTF-32BE"
  34 #else
  35   #define WCHAR_CHARSET "UTF-32LE"
  36 #endif
  37   #define UTF8_SOURCE "UTF-8-MAC"
  38 #elif defined(WIN32)
  39   #define WCHAR_CHARSET "UTF-16LE"
  40   #define UTF8_SOURCE "UTF-8"
  41   #pragma comment(lib, "libfribidi.lib")
  42   #pragma comment(lib, "libiconv.lib")
  43 #elif defined(TARGET_ANDROID)
  44   #define UTF8_SOURCE "UTF-8"
  45 #ifdef __BIG_ENDIAN__
  46   #define WCHAR_CHARSET "UTF-32BE"
  47 #else
  48   #define WCHAR_CHARSET "UTF-32LE"
  49 #endif
  50 #else
  51   #define WCHAR_CHARSET "WCHAR_T"
  52   #define UTF8_SOURCE "UTF-8"
  53 #endif
  54
  55
  56 static iconv_t m_iconvStringCharsetToFontCharset = (iconv_t)-1;
  57 static iconv_t m_iconvSubtitleCharsetToW         = (iconv_t)-1;
  58 static iconv_t m_iconvUtf8ToStringCharset        = (iconv_t)-1;
  59 static iconv_t m_iconvStringCharsetToUtf8        = (iconv_t)-1;
  60 static iconv_t m_iconvUcs2CharsetToStringCharset = (iconv_t)-1;
  61 static iconv_t m_iconvUtf32ToStringCharset       = (iconv_t)-1;
  62 static iconv_t m_iconvWtoUtf8                    = (iconv_t)-1;
  63 static iconv_t m_iconvUtf16LEtoW                 = (iconv_t)-1;
  64 static iconv_t m_iconvUtf16BEtoUtf8              = (iconv_t)-1;
  65 static iconv_t m_iconvUtf16LEtoUtf8              = (iconv_t)-1;
  66 static iconv_t m_iconvUtf8toW                    = (iconv_t)-1;
  67 static iconv_t m_iconvUcs2CharsetToUtf8          = (iconv_t)-1;
  68
  69 #if defined(FRIBIDI_CHAR_SET_NOT_FOUND)
  70 static FriBidiCharSet m_stringFribidiCharset     = FRIBIDI_CHAR_SET_NOT_FOUND;
  71 #define FRIBIDI_UTF8 FRIBIDI_CHAR_SET_UTF8
  72 #define FRIBIDI_NOTFOUND FRIBIDI_CHAR_SET_NOT_FOUND
  73 #else /* compatibility to older version */
  74 static FriBidiCharSet m_stringFribidiCharset     = FRIBIDI_CHARSET_NOT_FOUND;
  75 #define FRIBIDI_UTF8 FRIBIDI_CHARSET_UTF8
  76 #define FRIBIDI_NOTFOUND FRIBIDI_CHARSET_NOT_FOUND
  77 #endif
  78
  79 static CCriticalSection            m_critSection;
  80
  81 static struct SFribidMapping
  82 {
  83   FriBidiCharSet name;
  84   const char*    charset;
  85 } g_fribidi[] = {
  86 #if defined(FRIBIDI_CHAR_SET_NOT_FOUND)
  87   { FRIBIDI_CHAR_SET_ISO8859_6, "ISO-8859-6"   }
  88 , { FRIBIDI_CHAR_SET_ISO8859_8, "ISO-8859-8"   }
  89 , { FRIBIDI_CHAR_SET_CP1255   , "CP1255"       }
  90 , { FRIBIDI_CHAR_SET_CP1255   , "Windows-1255" }
  91 , { FRIBIDI_CHAR_SET_CP1256   , "CP1256"       }
  92 , { FRIBIDI_CHAR_SET_CP1256   , "Windows-1256" }
  93 , { FRIBIDI_CHAR_SET_NOT_FOUND, NULL           }
  94 #else /* compatibility to older version */
  95   { FRIBIDI_CHARSET_ISO8859_6, "ISO-8859-6"   }
  96 , { FRIBIDI_CHARSET_ISO8859_8, "ISO-8859-8"   }
  97 , { FRIBIDI_CHARSET_CP1255   , "CP1255"       }
  98 , { FRIBIDI_CHARSET_CP1255   , "Windows-1255" }
  99 , { FRIBIDI_CHARSET_CP1256   , "CP1256"       }
 100 , { FRIBIDI_CHARSET_CP1256   , "Windows-1256" }
 101 , { FRIBIDI_CHARSET_NOT_FOUND, NULL           }
 102 #endif
 103 };
 104
 105 static struct SCharsetMapping
 106 {
 107   const char* charset;
 108   const char* caption;
 109 } g_charsets[] = {
 110    { "ISO-8859-1", "Western Europe (ISO)" }
 111  , { "ISO-8859-2", "Central Europe (ISO)" }
 112  , { "ISO-8859-3", "South Europe (ISO)"   }
 113  , { "ISO-8859-4", "Baltic (ISO)"         }
 114  , { "ISO-8859-5", "Cyrillic (ISO)"       }
 115  , { "ISO-8859-6", "Arabic (ISO)"         }
 116  , { "ISO-8859-7", "Greek (ISO)"          }
 117  , { "ISO-8859-8", "Hebrew (ISO)"         }
 118  , { "ISO-8859-9", "Turkish (ISO)"        }
 119  , { "CP1250"    , "Central Europe (Windows)" }
 120  , { "CP1251"    , "Cyrillic (Windows)"       }
 121  , { "CP1252"    , "Western Europe (Windows)" }
 122  , { "CP1253"    , "Greek (Windows)"          }
 123  , { "CP1254"    , "Turkish (Windows)"        }
 124  , { "CP1255"    , "Hebrew (Windows)"         }
 125  , { "CP1256"    , "Arabic (Windows)"         }
 126  , { "CP1257"    , "Baltic (Windows)"         }
 127  , { "CP1258"    , "Vietnamesse (Windows)"    }
 128  , { "CP874"     , "Thai (Windows)"           }
 129  , { "BIG5"      , "Chinese Traditional (Big5)" }
 130  , { "GBK"       , "Chinese Simplified (GBK)" }
 131  , { "SHIFT_JIS" , "Japanese (Shift-JIS)"     }
 132  , { "CP949"     , "Korean"                   }
 133  , { "BIG5-HKSCS", "Hong Kong (Big5-HKSCS)"   }
 134  , { NULL        , NULL                       }
 135 };
 136
 137
 138 #define UTF8_DEST_MULTIPLIER 6
 139
 140 #define ICONV_PREPARE(iconv) iconv=(iconv_t)-1
 141 #define ICONV_SAFE_CLOSE(iconv) if (iconv!=(iconv_t)-1) { iconv_close(iconv); iconv=(iconv_t)-1; }
 142
 143 size_t iconv_const (void* cd, const char** inbuf, size_t *inbytesleft,
 144                     char* * outbuf, size_t *outbytesleft)
 145 {
 146     struct iconv_param_adapter {
 147         iconv_param_adapter(const char**p) : p(p) {}
 148         iconv_param_adapter(char**p) : p((const char**)p) {}
 149         operator char**() const
 150         {
 151             return(char**)p;
 152         }
 153         operator const char**() const
 154         {
 155             return(const char**)p;
 156         }
 157         const char** p;
 158     };
 159
 160     return iconv((iconv_t)cd, iconv_param_adapter(inbuf), inbytesleft, outbuf, outbytesleft);
 161 }
 162
 163 template<class INPUT,class OUTPUT>
 164 static bool convert_checked(iconv_t& type, int multiplier, const CStdString& strFromCharset, const CStdString& strToCharset, const INPUT& strSource, OUTPUT& strDest)
 165 {
 166   if (type == (iconv_t)-1)
 167   {
 168     type = iconv_open(strToCharset.c_str(), strFromCharset.c_str());
 169     if (type == (iconv_t)-1) //iconv_open failed
 170     {
 171       CLog::Log(LOGERROR, "%s iconv_open() failed from %s to %s, errno=%d(%s)",
 172                 __FUNCTION__, strFromCharset.c_str(), strToCharset.c_str(), errno, strerror(errno));
 173       return false;
 174     }
 175   }
 176
 177   if (strSource.IsEmpty())
 178   {
 179     strDest.clear(); //empty strings are easy
 180     return true;
 181   }
 182
 183   //input buffer for iconv() is the buffer from strSource
 184   size_t      inBufSize  = (strSource.length() + 1) * sizeof(strSource[0]);
 185   const char* inBuf      = (const char*)strSource.c_str();
 186
 187   //allocate output buffer for iconv()
 188   size_t      outBufSize = (strSource.length() + 1) * multiplier;
 189   char*       outBuf     = (char*)malloc(outBufSize);
 190
 191   size_t      inBytesAvail  = inBufSize;  //how many bytes iconv() can read
 192   size_t      outBytesAvail = outBufSize; //how many bytes iconv() can write
 193   const char* inBufStart    = inBuf;      //where in our input buffer iconv() should start reading
 194   char*       outBufStart   = outBuf;     //where in out output buffer iconv() should start writing
 195
 196   while(1)
 197   {
 198     //iconv() will update inBufStart, inBytesAvail, outBufStart and outBytesAvail
 199     size_t returnV = iconv_const(type, &inBufStart, &inBytesAvail, &outBufStart, &outBytesAvail);
 200
 201     if ((returnV == (size_t)-1) && (errno != EINVAL))
 202     {
 203       if (errno == E2BIG) //output buffer is not big enough
 204       {
 205         //save where iconv() ended converting, realloc might make outBufStart invalid
 206         size_t bytesConverted = outBufSize - outBytesAvail;
 207
 208         //make buffer twice as big
 209         outBufSize   *= 2;
 210         char* newBuf  = (char*)realloc(outBuf, outBufSize);
 211         if (!newBuf)
 212         {
 213           CLog::Log(LOGERROR, "%s realloc failed with buffer=%p size=%zu errno=%d(%s)",
 214                     __FUNCTION__, outBuf, outBufSize, errno, strerror(errno));
 215           free(outBuf);
 216           return false;
 217         }
 218         outBuf = newBuf;
 219
 220         //update the buffer pointer and counter
 221         outBufStart   = outBuf + bytesConverted;
 222         outBytesAvail = outBufSize - bytesConverted;
 223
 224         //continue in the loop and convert the rest
 225       }
 226       else if (errno == EILSEQ) //An invalid multibyte sequence has been encountered in the input
 227       {
 228         //skip invalid byte
 229         inBufStart++;
 230         inBytesAvail--;
 231
 232         //continue in the loop and convert the rest
 233       }
 234       else //iconv() had some other error
 235       {
 236         CLog::Log(LOGERROR, "%s iconv() failed from %s to %s, errno=%d(%s)",
 237                   __FUNCTION__, strFromCharset.c_str(), strToCharset.c_str(), errno, strerror(errno));
 238         free(outBuf);
 239         return false;
 240       }
 241     }
 242     else
 243     {
 244       //complete the conversion, otherwise the current data will prefix the data on the next call
 245       returnV = iconv_const(type, NULL, NULL, &outBufStart, &outBytesAvail);
 246       if (returnV == (size_t)-1)
 247         CLog::Log(LOGERROR, "%s failed cleanup errno=%d(%s)", __FUNCTION__, errno, strerror(errno));
 248
 249       //we're done
 250       break;
 251     }
 252   }
 253
 254   size_t bytesWritten = outBufSize - outBytesAvail;
 255   char*  dest         = (char*)strDest.GetBuffer(bytesWritten);
 256
 257   //copy the output from iconv() into the CStdString
 258   memcpy(dest, outBuf, bytesWritten);
 259
 260   strDest.ReleaseBuffer();
 261
 262   free(outBuf);
 263
 264   return true;
 265 }
 266
 267 template<class INPUT,class OUTPUT>
 268 static void convert(iconv_t& type, int multiplier, const CStdString& strFromCharset, const CStdString& strToCharset, const INPUT& strSource,  OUTPUT& strDest)
 269 {
 270   if(!convert_checked(type, multiplier, strFromCharset, strToCharset, strSource, strDest))
 271     strDest = strSource;
 272 }
 273
 274 using namespace std;
 275
 276 static void logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest, FriBidiCharSet fribidiCharset, FriBidiCharType base = FRIBIDI_TYPE_LTR, bool* bWasFlipped =NULL)
 277 {
 278   // libfribidi is not threadsafe, so make sure we make it so
 279   CSingleLock lock(m_critSection);
 280
 281   vector<CStdString> lines;
 282   CUtil::Tokenize(strSource, lines, "\n");
 283   CStdString resultString;
 284
 285   if (bWasFlipped)
 286     *bWasFlipped = false;
 287
 288   for (unsigned int i = 0; i < lines.size(); i++)
 289   {
 290     int sourceLen = lines[i].length();
 291
 292     // Convert from the selected charset to Unicode
 293     FriBidiChar* logical = (FriBidiChar*) malloc((sourceLen + 1) * sizeof(FriBidiChar));
 294     int len = fribidi_charset_to_unicode(fribidiCharset, (char*) lines[i].c_str(), sourceLen, logical);
 295
 296     FriBidiChar* visual = (FriBidiChar*) malloc((len + 1) * sizeof(FriBidiChar));
 297     FriBidiLevel* levels = (FriBidiLevel*) malloc((len + 1) * sizeof(FriBidiLevel));
 298
 299     if (fribidi_log2vis(logical, len, &base, visual, NULL, NULL, NULL))
 300     {
 301       // Removes bidirectional marks
 302       len = fribidi_remove_bidi_marks(visual, len, NULL, NULL, NULL);
 303
 304       // Apperently a string can get longer during this transformation
 305       // so make sure we allocate the maximum possible character utf8
 306       // can generate atleast, should cover all bases
 307       char *result = strDest.GetBuffer(len*4);
 308
 309       // Convert back from Unicode to the charset
 310       int len2 = fribidi_unicode_to_charset(fribidiCharset, visual, len, result);
 311       ASSERT(len2 <= len*4);
 312       strDest.ReleaseBuffer();
 313
 314       resultString += strDest;
 315
 316       // Check whether the string was flipped if one of the embedding levels is greater than 0
 317       if (bWasFlipped && !*bWasFlipped)
 318       {
 319         for (int i = 0; i < len; i++)
 320         {
 321           if ((int) levels[i] > 0)
 322           {
 323             *bWasFlipped = true;
 324             break;
 325           }
 326         }
 327       }
 328     }
 329
 330     free(logical);
 331     free(visual);
 332     free(levels);
 333   }
 334
 335   strDest = resultString;
 336 }
 337
 338 CCharsetConverter::CCharsetConverter()
 339 {
 340 }
 341
 342 void CCharsetConverter::clear()
 343 {
 344 }
 345
 346 vector<CStdString> CCharsetConverter::getCharsetLabels()
 347 {
 348   vector<CStdString> lab;
 349   for(SCharsetMapping * c = g_charsets; c->charset; c++)
 350     lab.push_back(c->caption);
 351
 352   return lab;
 353 }
 354
 355 CStdString CCharsetConverter::getCharsetLabelByName(const CStdString& charsetName)
 356 {
 357   for(SCharsetMapping * c = g_charsets; c->charset; c++)
 358   {
 359     if (charsetName.Equals(c->charset))
 360       return c->caption;
 361   }
 362
 363   return "";
 364 }
 365
 366 CStdString CCharsetConverter::getCharsetNameByLabel(const CStdString& charsetLabel)
 367 {
 368   for(SCharsetMapping *c = g_charsets; c->charset; c++)
 369   {
 370     if (charsetLabel.Equals(c->caption))
 371       return c->charset;
 372   }
 373
 374   return "";
 375 }
 376
 377 bool CCharsetConverter::isBidiCharset(const CStdString& charset)
 378 {
 379   for(SFribidMapping *c = g_fribidi; c->charset; c++)
 380   {
 381     if (charset.Equals(c->charset))
 382       return true;
 383   }
 384   return false;
 385 }
 386
 387 void CCharsetConverter::reset(void)
 388 {
 389   CSingleLock lock(m_critSection);
 390
 391   ICONV_SAFE_CLOSE(m_iconvStringCharsetToFontCharset);
 392   ICONV_SAFE_CLOSE(m_iconvUtf8ToStringCharset);
 393   ICONV_SAFE_CLOSE(m_iconvStringCharsetToUtf8);
 394   ICONV_SAFE_CLOSE(m_iconvUcs2CharsetToStringCharset);
 395   ICONV_SAFE_CLOSE(m_iconvSubtitleCharsetToW);
 396   ICONV_SAFE_CLOSE(m_iconvWtoUtf8);
 397   ICONV_SAFE_CLOSE(m_iconvUtf16BEtoUtf8);
 398   ICONV_SAFE_CLOSE(m_iconvUtf16LEtoUtf8);
 399   ICONV_SAFE_CLOSE(m_iconvUtf32ToStringCharset);
 400   ICONV_SAFE_CLOSE(m_iconvUtf8toW);
 401   ICONV_SAFE_CLOSE(m_iconvUcs2CharsetToUtf8);
 402
 403
 404   m_stringFribidiCharset = FRIBIDI_NOTFOUND;
 405
 406   CStdString strCharset=g_langInfo.GetGuiCharSet();
 407   for(SFribidMapping *c = g_fribidi; c->charset; c++)
 408   {
 409     if (strCharset.Equals(c->charset))
 410       m_stringFribidiCharset = c->name;
 411   }
 412 }
 413
 414 // The bVisualBiDiFlip forces a flip of characters for hebrew/arabic languages, only set to false if the flipping
 415 // of the string is already made or the string is not displayed in the GUI
 416 void CCharsetConverter::utf8ToW(const CStdStringA& utf8String, CStdStringW &wString, bool bVisualBiDiFlip/*=true*/, bool forceLTRReadingOrder /*=false*/, bool* bWasFlipped/*=NULL*/)
 417 {
 418   // Try to flip hebrew/arabic characters, if any
 419   if (bVisualBiDiFlip)
 420   {
 421     CStdStringA strFlipped;
 422     FriBidiCharType charset = forceLTRReadingOrder ? FRIBIDI_TYPE_LTR : FRIBIDI_TYPE_PDF;
 423     logicalToVisualBiDi(utf8String, strFlipped, FRIBIDI_UTF8, charset, bWasFlipped);
 424     CSingleLock lock(m_critSection);
 425     convert(m_iconvUtf8toW,sizeof(wchar_t),UTF8_SOURCE,WCHAR_CHARSET,strFlipped,wString);
 426   }
 427   else
 428   {
 429     CSingleLock lock(m_critSection);
 430     convert(m_iconvUtf8toW,sizeof(wchar_t),UTF8_SOURCE,WCHAR_CHARSET,utf8String,wString);
 431   }
 432 }
 433
 434 void CCharsetConverter::subtitleCharsetToW(const CStdStringA& strSource, CStdStringW& strDest)
 435 {
 436   // No need to flip hebrew/arabic as mplayer does the flipping
 437   CSingleLock lock(m_critSection);
 438   convert(m_iconvSubtitleCharsetToW,sizeof(wchar_t),g_langInfo.GetSubtitleCharSet(),WCHAR_CHARSET,strSource,strDest);
 439 }
 440
 441 void CCharsetConverter::fromW(const CStdStringW& strSource,
 442                               CStdStringA& strDest, const CStdString& enc)
 443 {
 444   iconv_t iconvString;
 445   ICONV_PREPARE(iconvString);
 446   convert(iconvString,4,WCHAR_CHARSET,enc,strSource,strDest);
 447   iconv_close(iconvString);
 448 }
 449
 450 void CCharsetConverter::toW(const CStdStringA& strSource,
 451                             CStdStringW& strDest, const CStdString& enc)
 452 {
 453   iconv_t iconvString;
 454   ICONV_PREPARE(iconvString);
 455   convert(iconvString,sizeof(wchar_t),enc,WCHAR_CHARSET,strSource,strDest);
 456   iconv_close(iconvString);
 457 }
 458
 459 void CCharsetConverter::utf8ToStringCharset(const CStdStringA& strSource, CStdStringA& strDest)
 460 {
 461   CSingleLock lock(m_critSection);
 462   convert(m_iconvUtf8ToStringCharset,1,UTF8_SOURCE,g_langInfo.GetGuiCharSet(),strSource,strDest);
 463 }
 464
 465 void CCharsetConverter::utf8ToStringCharset(CStdStringA& strSourceDest)
 466 {
 467   CStdString strDest;
 468   utf8ToStringCharset(strSourceDest, strDest);
 469   strSourceDest=strDest;
 470 }
 471
 472 void CCharsetConverter::stringCharsetToUtf8(const CStdStringA& strSourceCharset, const CStdStringA& strSource, CStdStringA& strDest)
 473 {
 474   iconv_t iconvString;
 475   ICONV_PREPARE(iconvString);
 476   convert(iconvString,UTF8_DEST_MULTIPLIER,strSourceCharset,"UTF-8",strSource,strDest);
 477   iconv_close(iconvString);
 478 }
 479
 480 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdStringA& strDest)
 481 {
 482   if (strDestCharset == "UTF-8")
 483   { // simple case - no conversion necessary
 484     strDest = strSource;
 485     return;
 486   }
 487   iconv_t iconvString;
 488   ICONV_PREPARE(iconvString);
 489   convert(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest);
 490   iconv_close(iconvString);
 491 }
 492
 493 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdString16& strDest)
 494 {
 495   iconv_t iconvString;
 496   ICONV_PREPARE(iconvString);
 497   if(!convert_checked(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest))
 498     strDest.clear();
 499   iconv_close(iconvString);
 500 }
 501
 502 void CCharsetConverter::utf8To(const CStdStringA& strDestCharset, const CStdStringA& strSource, CStdString32& strDest)
 503 {
 504   iconv_t iconvString;
 505   ICONV_PREPARE(iconvString);
 506   if(!convert_checked(iconvString,UTF8_DEST_MULTIPLIER,UTF8_SOURCE,strDestCharset,strSource,strDest))
 507     strDest.clear();
 508   iconv_close(iconvString);
 509 }
 510
 511 void CCharsetConverter::unknownToUTF8(CStdStringA &sourceAndDest)
 512 {
 513   CStdString source = sourceAndDest;
 514   unknownToUTF8(source, sourceAndDest);
 515 }
 516
 517 void CCharsetConverter::unknownToUTF8(const CStdStringA &source, CStdStringA &dest)
 518 {
 519   // checks whether it's utf8 already, and if not converts using the sourceCharset if given, else the string charset
 520   if (isValidUtf8(source))
 521     dest = source;
 522   else
 523   {
 524     CSingleLock lock(m_critSection);
 525     convert(m_iconvStringCharsetToUtf8, UTF8_DEST_MULTIPLIER, g_langInfo.GetGuiCharSet(), "UTF-8", source, dest);
 526   }
 527 }
 528
 529 void CCharsetConverter::wToUTF8(const CStdStringW& strSource, CStdStringA &strDest)
 530 {
 531   CSingleLock lock(m_critSection);
 532   convert(m_iconvWtoUtf8,UTF8_DEST_MULTIPLIER,WCHAR_CHARSET,"UTF-8",strSource,strDest);
 533 }
 534
 535 void CCharsetConverter::utf16BEtoUTF8(const CStdString16& strSource, CStdStringA &strDest)
 536 {
 537   CSingleLock lock(m_critSection);
 538   if(!convert_checked(m_iconvUtf16BEtoUtf8,UTF8_DEST_MULTIPLIER,"UTF-16BE","UTF-8",strSource,strDest))
 539     strDest.clear();
 540 }
 541
 542 void CCharsetConverter::utf16LEtoUTF8(const CStdString16& strSource,
 543                                       CStdStringA &strDest)
 544 {
 545   CSingleLock lock(m_critSection);
 546   if(!convert_checked(m_iconvUtf16LEtoUtf8,UTF8_DEST_MULTIPLIER,"UTF-16LE","UTF-8",strSource,strDest))
 547     strDest.clear();
 548 }
 549
 550 void CCharsetConverter::ucs2ToUTF8(const CStdString16& strSource, CStdStringA& strDest)
 551 {
 552   CSingleLock lock(m_critSection);
 553   if(!convert_checked(m_iconvUcs2CharsetToUtf8,UTF8_DEST_MULTIPLIER,"UCS-2LE","UTF-8",strSource,strDest))
 554     strDest.clear();
 555 }
 556
 557 void CCharsetConverter::utf16LEtoW(const CStdString16& strSource, CStdStringW &strDest)
 558 {
 559   CSingleLock lock(m_critSection);
 560   if(!convert_checked(m_iconvUtf16LEtoW,sizeof(wchar_t),"UTF-16LE",WCHAR_CHARSET,strSource,strDest))
 561     strDest.clear();
 562 }
 563
 564 void CCharsetConverter::ucs2CharsetToStringCharset(const CStdStringW& strSource, CStdStringA& strDest, bool swap)
 565 {
 566   CStdStringW strCopy = strSource;
 567   if (swap)
 568   {
 569     char* s = (char*) strCopy.c_str();
 570
 571     while (*s || *(s + 1))
 572     {
 573       char c = *s;
 574       *s = *(s + 1);
 575       *(s + 1) = c;
 576
 577       s++;
 578       s++;
 579     }
 580   }
 581   CSingleLock lock(m_critSection);
 582   convert(m_iconvUcs2CharsetToStringCharset,4,"UTF-16LE",
 583           g_langInfo.GetGuiCharSet(),strCopy,strDest);
 584 }
 585
 586 void CCharsetConverter::utf32ToStringCharset(const unsigned long* strSource, CStdStringA& strDest)
 587 {
 588   CSingleLock lock(m_critSection);
 589
 590   if (m_iconvUtf32ToStringCharset == (iconv_t) - 1)
 591   {
 592     CStdString strCharset=g_langInfo.GetGuiCharSet();
 593     m_iconvUtf32ToStringCharset = iconv_open(strCharset.c_str(), "UTF-32LE");
 594   }
 595
 596   if (m_iconvUtf32ToStringCharset != (iconv_t) - 1)
 597   {
 598     const unsigned long* ptr=strSource;
 599     while (*ptr) ptr++;
 600     const char* src = (const char*) strSource;
 601     size_t inBytes = (ptr-strSource+1)*4;
 602
 603     char *dst = strDest.GetBuffer(inBytes);
 604     size_t outBytes = inBytes;
 605
 606     if (iconv_const(m_iconvUtf32ToStringCharset, &src, &inBytes, &dst, &outBytes) == (size_t)-1)
 607     {
 608       CLog::Log(LOGERROR, "%s failed", __FUNCTION__);
 609       strDest.ReleaseBuffer();
 610       strDest = (const char *)strSource;
 611       return;
 612     }
 613
 614     if (iconv(m_iconvUtf32ToStringCharset, NULL, NULL, &dst, &outBytes) == (size_t)-1)
 615     {
 616       CLog::Log(LOGERROR, "%s failed cleanup", __FUNCTION__);
 617       strDest.ReleaseBuffer();
 618       strDest = (const char *)strSource;
 619       return;
 620     }
 621
 622     strDest.ReleaseBuffer();
 623   }
 624 }
 625
 626 void CCharsetConverter::utf8ToSystem(CStdStringA& strSourceDest)
 627 {
 628   CStdString strDest;
 629   g_charsetConverter.utf8To("", strSourceDest, strDest);
 630   strSourceDest = strDest;
 631 }
 632
 633 // Taken from RFC2640
 634 bool CCharsetConverter::isValidUtf8(const char *buf, unsigned int len)
 635 {
 636   const unsigned char *endbuf = (unsigned char*)buf + len;
 637   unsigned char byte2mask=0x00, c;
 638   int trailing=0; // trailing (continuation) bytes to follow
 639
 640   while ((unsigned char*)buf != endbuf)
 641   {
 642     c = *buf++;
 643     if (trailing)
 644       if ((c & 0xc0) == 0x80) // does trailing byte follow UTF-8 format ?
 645       {
 646         if (byte2mask) // need to check 2nd byte for proper range
 647         {
 648           if (c & byte2mask) // are appropriate bits set ?
 649             byte2mask = 0x00;
 650           else
 651             return false;
 652         }
 653         trailing--;
 654       }
 655       else
 656         return 0;
 657     else
 658       if ((c & 0x80) == 0x00) continue; // valid 1-byte UTF-8
 659       else if ((c & 0xe0) == 0xc0)      // valid 2-byte UTF-8
 660         if (c & 0x1e)                   //is UTF-8 byte in proper range ?
 661           trailing = 1;
 662         else
 663           return false;
 664       else if ((c & 0xf0) == 0xe0)      // valid 3-byte UTF-8
 665        {
 666         if (!(c & 0x0f))                // is UTF-8 byte in proper range ?
 667           byte2mask = 0x20;             // if not set mask
 668         trailing = 2;                   // to check next byte
 669       }
 670       else if ((c & 0xf8) == 0xf0)      // valid 4-byte UTF-8
 671       {
 672         if (!(c & 0x07))                // is UTF-8 byte in proper range ?
 673           byte2mask = 0x30;             // if not set mask
 674         trailing = 3;                   // to check next byte
 675       }
 676       else if ((c & 0xfc) == 0xf8)      // valid 5-byte UTF-8
 677       {
 678         if (!(c & 0x03))                // is UTF-8 byte in proper range ?
 679           byte2mask = 0x38;             // if not set mask
 680         trailing = 4;                   // to check next byte
 681       }
 682       else if ((c & 0xfe) == 0xfc)      // valid 6-byte UTF-8
 683       {
 684         if (!(c & 0x01))                // is UTF-8 byte in proper range ?
 685           byte2mask = 0x3c;             // if not set mask
 686         trailing = 5;                   // to check next byte
 687       }
 688       else
 689         return false;
 690   }
 691   return trailing == 0;
 692 }
 693
 694 bool CCharsetConverter::isValidUtf8(const CStdString& str)
 695 {
 696   return isValidUtf8(str.c_str(), str.size());
 697 }
 698
 699 void CCharsetConverter::utf8logicalToVisualBiDi(const CStdStringA& strSource, CStdStringA& strDest)
 700 {
 701   logicalToVisualBiDi(strSource, strDest, FRIBIDI_UTF8, FRIBIDI_TYPE_RTL);
 702 }