code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToLower(string &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::tolower);
 167 }
 168
 169 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 170 {
 171   return EqualsNoCase(str1.c_str(), str2.c_str());
 172 }
 173
 174 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 175 {
 176   return EqualsNoCase(str1.c_str(), s2);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 180 {
 181   char c2; // we need only one char outside the loop
 182   do
 183   {
 184     const char c1 = *s1++; // const local variable should help compiler to optimize
 185     c2 = *s2++;
 186     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 187       return false;
 188   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 189   return true;
 190 }
 191
 192 string StringUtils::Left(const string &str, size_t count)
 193 {
 194   count = max((size_t)0, min(count, str.size()));
 195   return str.substr(0, count);
 196 }
 197
 198 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 199 {
 200   if (first + count > str.size())
 201     count = str.size() - first;
 202
 203   if (first > str.size())
 204     return string();
 205
 206   ASSERT(first + count <= str.size());
 207
 208   return str.substr(first, count);
 209 }
 210
 211 string StringUtils::Right(const string &str, size_t count)
 212 {
 213   count = max((size_t)0, min(count, str.size()));
 214   return str.substr(str.size() - count);
 215 }
 216
 217 std::string& StringUtils::Trim(std::string &str)
 218 {
 219   TrimLeft(str);
 220   return TrimRight(str);
 221 }
 222
 223 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 224 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 225 static int isspace_c(char c)
 226 {
 227   return ::isspace((unsigned char)c);
 228 }
 229
 230 std::string& StringUtils::TrimLeft(std::string &str)
 231 {
 232   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 233   return str;
 234 }
 235
 236 std::string& StringUtils::TrimLeft(std::string &str, const std::string& chars)
 237 {
 238   size_t nidx = str.find_first_not_of(chars);
 239   str.substr(nidx == str.npos ? 0 : nidx).swap(str);
 240   return str;
 241 }
 242
 243 std::string& StringUtils::TrimRight(std::string &str)
 244 {
 245   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 246   return str;
 247 }
 248
 249 std::string& StringUtils::TrimRight(std::string &str, const std::string& chars)
 250 {
 251   size_t nidx = str.find_last_not_of(chars);
 252   str.erase(str.npos == nidx ? 0 : ++nidx);
 253   return str;
 254 }
 255
 256 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 257 {
 258   std::string::iterator it = str.begin();
 259   bool onSpace = false;
 260   while(it != str.end())
 261   {
 262     if (*it == '\t')
 263       *it = ' ';
 264
 265     if (*it == ' ')
 266     {
 267       if (onSpace)
 268       {
 269         it = str.erase(it);
 270         continue;
 271       }
 272       else
 273         onSpace = true;
 274     }
 275     else
 276       onSpace = false;
 277
 278     ++it;
 279   }
 280   return str;
 281 }
 282
 283 int StringUtils::Replace(string &str, char oldChar, char newChar)
 284 {
 285   int replacedChars = 0;
 286   for (string::iterator it = str.begin(); it != str.end(); it++)
 287   {
 288     if (*it == oldChar)
 289     {
 290       *it = newChar;
 291       replacedChars++;
 292     }
 293   }
 294
 295   return replacedChars;
 296 }
 297
 298 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 299 {
 300   if (oldStr.empty())
 301     return 0;
 302
 303   int replacedChars = 0;
 304   size_t index = 0;
 305
 306   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 307   {
 308     str.replace(index, oldStr.size(), newStr);
 309     index += newStr.size();
 310     replacedChars++;
 311   }
 312
 313   return replacedChars;
 314 }
 315
 316 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 317 {
 318   return str1.compare(0, str2.size(), str2) == 0;
 319 }
 320
 321 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 322 {
 323   return StartsWith(str1.c_str(), s2);
 324 }
 325
 326 bool StringUtils::StartsWith(const char *s1, const char *s2)
 327 {
 328   while (*s2 != '\0')
 329   {
 330     if (*s1 != *s2)
 331       return false;
 332     s1++;
 333     s2++;
 334   }
 335   return true;
 336 }
 337
 338 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 339 {
 340   return StartsWithNoCase(str1.c_str(), str2.c_str());
 341 }
 342
 343 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 344 {
 345   return StartsWithNoCase(str1.c_str(), s2);
 346 }
 347
 348 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 349 {
 350   while (*s2 != '\0')
 351   {
 352     if (::tolower(*s1) != ::tolower(*s2))
 353       return false;
 354     s1++;
 355     s2++;
 356   }
 357   return true;
 358 }
 359
 360 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 361 {
 362   if (str1.size() < str2.size())
 363     return false;
 364   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 365 }
 366
 367 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 368 {
 369   size_t len2 = strlen(s2);
 370   if (str1.size() < len2)
 371     return false;
 372   return str1.compare(str1.size() - len2, len2, s2) == 0;
 373 }
 374
 375 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 376 {
 377   if (str1.size() < str2.size())
 378     return false;
 379   const char *s1 = str1.c_str() + str1.size() - str2.size();
 380   const char *s2 = str2.c_str();
 381   while (*s2 != '\0')
 382   {
 383     if (::tolower(*s1) != ::tolower(*s2))
 384       return false;
 385     s1++;
 386     s2++;
 387   }
 388   return true;
 389 }
 390
 391 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 392 {
 393   size_t len2 = strlen(s2);
 394   if (str1.size() < len2)
 395     return false;
 396   const char *s1 = str1.c_str() + str1.size() - len2;
 397   while (*s2 != '\0')
 398   {
 399     if (::tolower(*s1) != ::tolower(*s2))
 400       return false;
 401     s1++;
 402     s2++;
 403   }
 404   return true;
 405 }
 406
 407 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 408 {
 409   result = "";
 410   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 411     result += (*it) + delimiter;
 412
 413   if(result != "")
 414     result.Delete(result.size()-delimiter.size(), delimiter.size());
 415 }
 416
 417 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 418 {
 419   CStdString result;
 420   JoinString(strings, delimiter, result);
 421   return result;
 422 }
 423
 424 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 425 {
 426   CStdStringArray strArray;
 427   for (unsigned int index = 0; index < strings.size(); index++)
 428     strArray.push_back(strings.at(index));
 429
 430   return JoinString(strArray, delimiter);
 431 }
 432
 433 // Splits the string input into pieces delimited by delimiter.
 434 // if 2 delimiters are in a row, it will include the empty string between them.
 435 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 436 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 437 {
 438   int iPos = -1;
 439   int newPos = -1;
 440   int sizeS2 = delimiter.GetLength();
 441   int isize = input.GetLength();
 442
 443   results.clear();
 444
 445   vector<unsigned int> positions;
 446
 447   newPos = input.Find (delimiter, 0);
 448
 449   if ( newPos < 0 )
 450   {
 451     results.push_back(input);
 452     return 1;
 453   }
 454
 455   while ( newPos > iPos )
 456   {
 457     positions.push_back(newPos);
 458     iPos = newPos;
 459     newPos = input.Find (delimiter, iPos + sizeS2);
 460   }
 461
 462   // numFound is the number of delimiters which is one less
 463   // than the number of substrings
 464   unsigned int numFound = positions.size();
 465   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 466     numFound = iMaxStrings - 1;
 467
 468   for ( unsigned int i = 0; i <= numFound; i++ )
 469   {
 470     CStdString s;
 471     if ( i == 0 )
 472     {
 473       if ( i == numFound )
 474         s = input;
 475       else
 476         s = input.Mid( i, positions[i] );
 477     }
 478     else
 479     {
 480       int offset = positions[i - 1] + sizeS2;
 481       if ( offset < isize )
 482       {
 483         if ( i == numFound )
 484           s = input.Mid(offset);
 485         else if ( i > 0 )
 486           s = input.Mid( positions[i - 1] + sizeS2,
 487                          positions[i] - positions[i - 1] - sizeS2 );
 488       }
 489     }
 490     results.push_back(s);
 491   }
 492   // return the number of substrings
 493   return results.size();
 494 }
 495
 496 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 497 {
 498   CStdStringArray result;
 499   SplitString(input, delimiter, result, iMaxStrings);
 500   return result;
 501 }
 502
 503 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 504 {
 505   CStdStringArray result;
 506   SplitString(input, delimiter, result, iMaxStrings);
 507
 508   vector<string> strArray;
 509   for (unsigned int index = 0; index < result.size(); index++)
 510     strArray.push_back(result.at(index));
 511
 512   return strArray;
 513 }
 514
 515 // returns the number of occurrences of strFind in strInput.
 516 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 517 {
 518   int pos = strInput.Find(strFind, 0);
 519   int numfound = 0;
 520   while (pos >= 0)
 521   {
 522     numfound++;
 523     pos = strInput.Find(strFind, pos + 1);
 524   }
 525   return numfound;
 526 }
 527
 528 // Compares separately the numeric and alphabetic parts of a string.
 529 // returns negative if left < right, positive if left > right
 530 // and 0 if they are identical (essentially calculates left - right)
 531 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 532 {
 533   wchar_t *l = (wchar_t *)left;
 534   wchar_t *r = (wchar_t *)right;
 535   wchar_t *ld, *rd;
 536   wchar_t lc, rc;
 537   int64_t lnum, rnum;
 538   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 539   int cmp_res = 0;
 540   while (*l != 0 && *r != 0)
 541   {
 542     // check if we have a numerical value
 543     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 544     {
 545       ld = l;
 546       lnum = 0;
 547       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 548       { // compare only up to 15 digits
 549         lnum *= 10;
 550         lnum += *ld++ - '0';
 551       }
 552       rd = r;
 553       rnum = 0;
 554       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 555       { // compare only up to 15 digits
 556         rnum *= 10;
 557         rnum += *rd++ - L'0';
 558       }
 559       // do we have numbers?
 560       if (lnum != rnum)
 561       { // yes - and they're different!
 562         return lnum - rnum;
 563       }
 564       l = ld;
 565       r = rd;
 566       continue;
 567     }
 568     // do case less comparison
 569     lc = *l;
 570     if (lc >= L'A' && lc <= L'Z')
 571       lc += L'a'-L'A';
 572     rc = *r;
 573     if (rc >= L'A' && rc <= L'Z')
 574       rc += L'a'- L'A';
 575
 576     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 577     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 578     {
 579       return cmp_res;
 580     }
 581     l++; r++;
 582   }
 583   if (*r)
 584   { // r is longer
 585     return -1;
 586   }
 587   else if (*l)
 588   { // l is longer
 589     return 1;
 590   }
 591   return 0; // files are the same
 592 }
 593
 594 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 595 {
 596   CStdStringArray days;
 597   int splitCount = StringUtils::SplitString(dateString, "-", days);
 598   if (splitCount == 1)
 599     return atoi(days[0].c_str());
 600   else if (splitCount == 2)
 601     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 602   else if (splitCount == 3)
 603     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 604   else
 605     return -1;
 606 }
 607
 608 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 609 {
 610   CStdString strCopy(timeString);
 611   StringUtils::Trim(strCopy);
 612   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 613   {
 614     // this is imdb format of "XXX min"
 615     return 60 * atoi(strCopy.c_str());
 616   }
 617   else
 618   {
 619     CStdStringArray secs;
 620     StringUtils::SplitString(strCopy, ":", secs);
 621     int timeInSecs = 0;
 622     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 623     {
 624       timeInSecs *= 60;
 625       timeInSecs += atoi(secs[i]);
 626     }
 627     return timeInSecs;
 628   }
 629 }
 630
 631 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 632 {
 633   int hh = lSeconds / 3600;
 634   lSeconds = lSeconds % 3600;
 635   int mm = lSeconds / 60;
 636   int ss = lSeconds % 60;
 637
 638   if (format == TIME_FORMAT_GUESS)
 639     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 640   CStdString strHMS;
 641   if (format & TIME_FORMAT_HH)
 642     strHMS.AppendFormat("%02.2i", hh);
 643   else if (format & TIME_FORMAT_H)
 644     strHMS.AppendFormat("%i", hh);
 645   if (format & TIME_FORMAT_MM)
 646     strHMS.AppendFormat(strHMS.empty() ? "%02.2i" : ":%02.2i", mm);
 647   if (format & TIME_FORMAT_SS)
 648     strHMS.AppendFormat(strHMS.empty() ? "%02.2i" : ":%02.2i", ss);
 649   return strHMS;
 650 }
 651
 652 bool StringUtils::IsNaturalNumber(const CStdString& str)
 653 {
 654   size_t i = 0, n = 0;
 655   // allow whitespace,digits,whitespace
 656   while (i < str.size() && isspace((unsigned char) str[i]))
 657     i++;
 658   while (i < str.size() && isdigit((unsigned char) str[i]))
 659   {
 660     i++; n++;
 661   }
 662   while (i < str.size() && isspace((unsigned char) str[i]))
 663     i++;
 664   return i == str.size() && n > 0;
 665 }
 666
 667 bool StringUtils::IsInteger(const CStdString& str)
 668 {
 669   size_t i = 0, n = 0;
 670   // allow whitespace,-,digits,whitespace
 671   while (i < str.size() && isspace((unsigned char) str[i]))
 672     i++;
 673   if (i < str.size() && str[i] == '-')
 674     i++;
 675   while (i < str.size() && isdigit((unsigned char) str[i]))
 676   {
 677     i++; n++;
 678   }
 679   while (i < str.size() && isspace((unsigned char) str[i]))
 680     i++;
 681   return i == str.size() && n > 0;
 682 }
 683
 684 void StringUtils::RemoveCRLF(CStdString& strLine)
 685 {
 686   while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
 687   {
 688     strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
 689   }
 690 }
 691
 692 CStdString StringUtils::SizeToString(int64_t size)
 693 {
 694   CStdString strLabel;
 695   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 696   unsigned int i = 0;
 697   double s = (double)size;
 698   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 699   {
 700     s /= 1024.0;
 701     i++;
 702   }
 703
 704   if (!i)
 705     strLabel = StringUtils::Format("%.0lf %cB ", s, prefixes[i]);
 706   else if (s >= 100.0)
 707     strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
 708   else
 709     strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
 710
 711   return strLabel;
 712 }
 713
 714 // return -1 if not, else return the utf8 char length.
 715 int IsUTF8Letter(const unsigned char *str)
 716 {
 717   // reference:
 718   // unicode -> utf8 table: http://www.utf8-chartable.de/
 719   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 720   unsigned char ch = str[0];
 721   if (!ch)
 722     return -1;
 723   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 724     return 1;
 725   if (!(ch & 0x80))
 726     return -1;
 727   unsigned char ch2 = str[1];
 728   if (!ch2)
 729     return -1;
 730   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 731   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 732     return 2;
 733   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 734   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 735     return 2;
 736   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 737   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 738   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 739       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 740     return 2;
 741   return -1;
 742 }
 743
 744 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 745 {
 746   // NOTE: This assumes word is lowercase!
 747   unsigned char *s = (unsigned char *)str;
 748   do
 749   {
 750     // start with a compare
 751     unsigned char *c = s;
 752     unsigned char *w = (unsigned char *)wordLowerCase;
 753     bool same = true;
 754     while (same && *c && *w)
 755     {
 756       unsigned char lc = *c++;
 757       if (lc >= 'A' && lc <= 'Z')
 758         lc += 'a'-'A';
 759
 760       if (lc != *w++) // different
 761         same = false;
 762     }
 763     if (same && *w == 0)  // only the same if word has been exhausted
 764       return (const char *)s - str;
 765
 766     // otherwise, skip current word (composed by latin letters) or number
 767     int l;
 768     if (*s >= '0' && *s <= '9')
 769     {
 770       ++s;
 771       while (*s >= '0' && *s <= '9') ++s;
 772     }
 773     else if ((l = IsUTF8Letter(s)) > 0)
 774     {
 775       s += l;
 776       while ((l = IsUTF8Letter(s)) > 0) s += l;
 777     }
 778     else
 779       ++s;
 780     while (*s && *s == ' ') s++;
 781
 782     // and repeat until we're done
 783   } while (*s);
 784
 785   return CStdString::npos;
 786 }
 787
 788 // assumes it is called from after the first open bracket is found
 789 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 790 {
 791   int blocks = 1;
 792   for (unsigned int i = startPos; i < str.size(); i++)
 793   {
 794     if (str[i] == opener)
 795       blocks++;
 796     else if (str[i] == closer)
 797     {
 798       blocks--;
 799       if (!blocks)
 800         return i;
 801     }
 802   }
 803
 804   return (int)CStdString::npos;
 805 }
 806
 807 void StringUtils::WordToDigits(CStdString &word)
 808 {
 809   static const char word_to_letter[] = "22233344455566677778889999";
 810   word.ToLower();
 811   for (unsigned int i = 0; i < word.size(); ++i)
 812   { // NB: This assumes ascii, which probably needs extending at some  point.
 813     char letter = word[i];
 814     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 815     {
 816       word[i] = word_to_letter[letter-'a'];
 817     }
 818     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 819     {
 820       word[i] = ' ';  // replace everything else with a space
 821     }
 822   }
 823 }
 824
 825 CStdString StringUtils::CreateUUID()
 826 {
 827   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 828   * Version 4 conform local unique UUID based upon random number generation.
 829   */
 830   char UuidStrTmp[40];
 831   char *pUuidStr = UuidStrTmp;
 832   int i;
 833
 834   static bool m_uuidInitialized = false;
 835   if (!m_uuidInitialized)
 836   {
 837     /* use current time as the seed for rand()*/
 838     srand(time(NULL));
 839     m_uuidInitialized = true;
 840   }
 841
 842   /*Data1 - 8 characters.*/
 843   for(i = 0; i < 8; i++, pUuidStr++)
 844     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 845
 846   /*Data2 - 4 characters.*/
 847   *pUuidStr++ = '-';
 848   for(i = 0; i < 4; i++, pUuidStr++)
 849     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 850
 851   /*Data3 - 4 characters.*/
 852   *pUuidStr++ = '-';
 853   for(i = 0; i < 4; i++, pUuidStr++)
 854     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 855
 856   /*Data4 - 4 characters.*/
 857   *pUuidStr++ = '-';
 858   for(i = 0; i < 4; i++, pUuidStr++)
 859     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 860
 861   /*Data5 - 12 characters.*/
 862   *pUuidStr++ = '-';
 863   for(i = 0; i < 12; i++, pUuidStr++)
 864     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 865
 866   *pUuidStr = '\0';
 867
 868   m_lastUUID = UuidStrTmp;
 869   return UuidStrTmp;
 870 }
 871
 872 bool StringUtils::ValidateUUID(const CStdString &uuid)
 873 {
 874   CRegExp guidRE;
 875   guidRE.RegComp(ADDON_GUID_RE);
 876   return (guidRE.RegFind(uuid.c_str()) == 0);
 877 }
 878
 879 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 880 {
 881   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 882 }
 883
 884 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 885 {
 886   int best = -1;
 887   matchscore = 0;
 888
 889   int i = 0;
 890   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 891   {
 892     int maxlength = max(str.length(), it->length());
 893     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 894     if (score > matchscore)
 895     {
 896       matchscore = score;
 897       best = i;
 898     }
 899   }
 900   return best;
 901 }
 902
 903 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 904 {
 905   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 906   {
 907     if (str.find(*it) != str.npos)
 908       return true;
 909   }
 910   return false;
 911 }
 912
 913 size_t StringUtils::utf8_strlen(const char *s)
 914 {
 915   size_t length = 0;
 916   while (*s)
 917   {
 918     if ((*s++ & 0xC0) != 0x80)
 919       length++;
 920   }
 921   return length;
 922 }
 923
 924 std::string StringUtils::Paramify(const std::string &param)
 925 {
 926   std::string result = param;
 927   // escape backspaces
 928   StringUtils::Replace(result, "\\", "\\\\");
 929   // escape double quotes
 930   StringUtils::Replace(result, "\"", "\\\"");
 931
 932   // add double quotes around the whole string
 933   return "\"" + result + "\"";
 934 }
 935
 936 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 937 {
 938   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 939   // Skip delimiters at beginning.
 940   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 941   // Find first "non-delimiter".
 942   string::size_type pos = input.find_first_of(delimiters, lastPos);
 943
 944   while (string::npos != pos || string::npos != lastPos)
 945   {
 946     // Found a token, add it to the vector.
 947     tokens.push_back(input.substr(lastPos, pos - lastPos));
 948     // Skip delimiters.  Note the "not_of"
 949     lastPos = input.find_first_not_of(delimiters, pos);
 950     // Find next "non-delimiter"
 951     pos = input.find_first_of(delimiters, lastPos);
 952   }
 953 }