code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToLower(string &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::tolower);
 167 }
 168
 169 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 170 {
 171   return EqualsNoCase(str1.c_str(), str2.c_str());
 172 }
 173
 174 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 175 {
 176   return EqualsNoCase(str1.c_str(), s2);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 180 {
 181   char c2; // we need only one char outside the loop
 182   do
 183   {
 184     const char c1 = *s1++; // const local variable should help compiler to optimize
 185     c2 = *s2++;
 186     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 187       return false;
 188   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 189   return true;
 190 }
 191
 192 string StringUtils::Left(const string &str, size_t count)
 193 {
 194   count = max((size_t)0, min(count, str.size()));
 195   return str.substr(0, count);
 196 }
 197
 198 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 199 {
 200   if (first + count > str.size())
 201     count = str.size() - first;
 202
 203   if (first > str.size())
 204     return string();
 205
 206   ASSERT(first + count <= str.size());
 207
 208   return str.substr(first, count);
 209 }
 210
 211 string StringUtils::Right(const string &str, size_t count)
 212 {
 213   count = max((size_t)0, min(count, str.size()));
 214   return str.substr(str.size() - count);
 215 }
 216
 217 std::string& StringUtils::Trim(std::string &str)
 218 {
 219   TrimLeft(str);
 220   return TrimRight(str);
 221 }
 222
 223 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 224 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 225 static int isspace_c(char c)
 226 {
 227   return ::isspace((unsigned char)c);
 228 }
 229
 230 std::string& StringUtils::TrimLeft(std::string &str)
 231 {
 232   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 233   return str;
 234 }
 235
 236 std::string& StringUtils::TrimRight(std::string &str)
 237 {
 238   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 239   return str;
 240 }
 241
 242 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 243 {
 244   std::string::iterator it = str.begin();
 245   bool onSpace = false;
 246   while(it != str.end())
 247   {
 248     if (*it == '\t')
 249       *it = ' ';
 250
 251     if (*it == ' ')
 252     {
 253       if (onSpace)
 254       {
 255         it = str.erase(it);
 256         continue;
 257       }
 258       else
 259         onSpace = true;
 260     }
 261     else
 262       onSpace = false;
 263
 264     ++it;
 265   }
 266   return str;
 267 }
 268
 269 int StringUtils::Replace(string &str, char oldChar, char newChar)
 270 {
 271   int replacedChars = 0;
 272   for (string::iterator it = str.begin(); it != str.end(); it++)
 273   {
 274     if (*it == oldChar)
 275     {
 276       *it = newChar;
 277       replacedChars++;
 278     }
 279   }
 280
 281   return replacedChars;
 282 }
 283
 284 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 285 {
 286   if (oldStr.empty())
 287     return 0;
 288
 289   int replacedChars = 0;
 290   size_t index = 0;
 291
 292   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 293   {
 294     str.replace(index, oldStr.size(), newStr);
 295     index += newStr.size();
 296     replacedChars++;
 297   }
 298
 299   return replacedChars;
 300 }
 301
 302 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 303 {
 304   return str1.compare(0, str2.size(), str2) == 0;
 305 }
 306
 307 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 308 {
 309   return StartsWith(str1.c_str(), s2);
 310 }
 311
 312 bool StringUtils::StartsWith(const char *s1, const char *s2)
 313 {
 314   while (*s2 != '\0')
 315   {
 316     if (*s1 != *s2)
 317       return false;
 318     s1++;
 319     s2++;
 320   }
 321   return true;
 322 }
 323
 324 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 325 {
 326   return StartsWithNoCase(str1.c_str(), str2.c_str());
 327 }
 328
 329 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 330 {
 331   return StartsWithNoCase(str1.c_str(), s2);
 332 }
 333
 334 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 335 {
 336   while (*s2 != '\0')
 337   {
 338     if (::tolower(*s1) != ::tolower(*s2))
 339       return false;
 340     s1++;
 341     s2++;
 342   }
 343   return true;
 344 }
 345
 346 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 347 {
 348   if (str1.size() < str2.size())
 349     return false;
 350   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 351 }
 352
 353 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 354 {
 355   size_t len2 = strlen(s2);
 356   if (str1.size() < len2)
 357     return false;
 358   return str1.compare(str1.size() - len2, len2, s2) == 0;
 359 }
 360
 361 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 362 {
 363   if (str1.size() < str2.size())
 364     return false;
 365   const char *s1 = str1.c_str() + str1.size() - str2.size();
 366   const char *s2 = str2.c_str();
 367   while (*s2 != '\0')
 368   {
 369     if (::tolower(*s1) != ::tolower(*s2))
 370       return false;
 371     s1++;
 372     s2++;
 373   }
 374   return true;
 375 }
 376
 377 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 378 {
 379   size_t len2 = strlen(s2);
 380   if (str1.size() < len2)
 381     return false;
 382   const char *s1 = str1.c_str() + str1.size() - len2;
 383   while (*s2 != '\0')
 384   {
 385     if (::tolower(*s1) != ::tolower(*s2))
 386       return false;
 387     s1++;
 388     s2++;
 389   }
 390   return true;
 391 }
 392
 393 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 394 {
 395   result = "";
 396   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 397     result += (*it) + delimiter;
 398
 399   if(result != "")
 400     result.Delete(result.size()-delimiter.size(), delimiter.size());
 401 }
 402
 403 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 404 {
 405   CStdString result;
 406   JoinString(strings, delimiter, result);
 407   return result;
 408 }
 409
 410 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 411 {
 412   CStdStringArray strArray;
 413   for (unsigned int index = 0; index < strings.size(); index++)
 414     strArray.push_back(strings.at(index));
 415
 416   return JoinString(strArray, delimiter);
 417 }
 418
 419 // Splits the string input into pieces delimited by delimiter.
 420 // if 2 delimiters are in a row, it will include the empty string between them.
 421 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 422 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 423 {
 424   int iPos = -1;
 425   int newPos = -1;
 426   int sizeS2 = delimiter.GetLength();
 427   int isize = input.GetLength();
 428
 429   results.clear();
 430
 431   vector<unsigned int> positions;
 432
 433   newPos = input.Find (delimiter, 0);
 434
 435   if ( newPos < 0 )
 436   {
 437     results.push_back(input);
 438     return 1;
 439   }
 440
 441   while ( newPos > iPos )
 442   {
 443     positions.push_back(newPos);
 444     iPos = newPos;
 445     newPos = input.Find (delimiter, iPos + sizeS2);
 446   }
 447
 448   // numFound is the number of delimiters which is one less
 449   // than the number of substrings
 450   unsigned int numFound = positions.size();
 451   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 452     numFound = iMaxStrings - 1;
 453
 454   for ( unsigned int i = 0; i <= numFound; i++ )
 455   {
 456     CStdString s;
 457     if ( i == 0 )
 458     {
 459       if ( i == numFound )
 460         s = input;
 461       else
 462         s = input.Mid( i, positions[i] );
 463     }
 464     else
 465     {
 466       int offset = positions[i - 1] + sizeS2;
 467       if ( offset < isize )
 468       {
 469         if ( i == numFound )
 470           s = input.Mid(offset);
 471         else if ( i > 0 )
 472           s = input.Mid( positions[i - 1] + sizeS2,
 473                          positions[i] - positions[i - 1] - sizeS2 );
 474       }
 475     }
 476     results.push_back(s);
 477   }
 478   // return the number of substrings
 479   return results.size();
 480 }
 481
 482 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 483 {
 484   CStdStringArray result;
 485   SplitString(input, delimiter, result, iMaxStrings);
 486   return result;
 487 }
 488
 489 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 490 {
 491   CStdStringArray result;
 492   SplitString(input, delimiter, result, iMaxStrings);
 493
 494   vector<string> strArray;
 495   for (unsigned int index = 0; index < result.size(); index++)
 496     strArray.push_back(result.at(index));
 497
 498   return strArray;
 499 }
 500
 501 // returns the number of occurrences of strFind in strInput.
 502 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 503 {
 504   int pos = strInput.Find(strFind, 0);
 505   int numfound = 0;
 506   while (pos >= 0)
 507   {
 508     numfound++;
 509     pos = strInput.Find(strFind, pos + 1);
 510   }
 511   return numfound;
 512 }
 513
 514 // Compares separately the numeric and alphabetic parts of a string.
 515 // returns negative if left < right, positive if left > right
 516 // and 0 if they are identical (essentially calculates left - right)
 517 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 518 {
 519   wchar_t *l = (wchar_t *)left;
 520   wchar_t *r = (wchar_t *)right;
 521   wchar_t *ld, *rd;
 522   wchar_t lc, rc;
 523   int64_t lnum, rnum;
 524   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 525   int cmp_res = 0;
 526   while (*l != 0 && *r != 0)
 527   {
 528     // check if we have a numerical value
 529     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 530     {
 531       ld = l;
 532       lnum = 0;
 533       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 534       { // compare only up to 15 digits
 535         lnum *= 10;
 536         lnum += *ld++ - '0';
 537       }
 538       rd = r;
 539       rnum = 0;
 540       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 541       { // compare only up to 15 digits
 542         rnum *= 10;
 543         rnum += *rd++ - L'0';
 544       }
 545       // do we have numbers?
 546       if (lnum != rnum)
 547       { // yes - and they're different!
 548         return lnum - rnum;
 549       }
 550       l = ld;
 551       r = rd;
 552       continue;
 553     }
 554     // do case less comparison
 555     lc = *l;
 556     if (lc >= L'A' && lc <= L'Z')
 557       lc += L'a'-L'A';
 558     rc = *r;
 559     if (rc >= L'A' && rc <= L'Z')
 560       rc += L'a'- L'A';
 561
 562     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 563     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 564     {
 565       return cmp_res;
 566     }
 567     l++; r++;
 568   }
 569   if (*r)
 570   { // r is longer
 571     return -1;
 572   }
 573   else if (*l)
 574   { // l is longer
 575     return 1;
 576   }
 577   return 0; // files are the same
 578 }
 579
 580 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 581 {
 582   CStdStringArray days;
 583   int splitCount = StringUtils::SplitString(dateString, "-", days);
 584   if (splitCount == 1)
 585     return atoi(days[0].c_str());
 586   else if (splitCount == 2)
 587     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 588   else if (splitCount == 3)
 589     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 590   else
 591     return -1;
 592 }
 593
 594 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 595 {
 596   CStdString strCopy(timeString);
 597   strCopy.TrimLeft(" \n\r\t");
 598   strCopy.TrimRight(" \n\r\t");
 599   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 600   {
 601     // this is imdb format of "XXX min"
 602     return 60 * atoi(strCopy.c_str());
 603   }
 604   else
 605   {
 606     CStdStringArray secs;
 607     StringUtils::SplitString(strCopy, ":", secs);
 608     int timeInSecs = 0;
 609     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 610     {
 611       timeInSecs *= 60;
 612       timeInSecs += atoi(secs[i]);
 613     }
 614     return timeInSecs;
 615   }
 616 }
 617
 618 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 619 {
 620   int hh = lSeconds / 3600;
 621   lSeconds = lSeconds % 3600;
 622   int mm = lSeconds / 60;
 623   int ss = lSeconds % 60;
 624
 625   if (format == TIME_FORMAT_GUESS)
 626     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 627   CStdString strHMS;
 628   if (format & TIME_FORMAT_HH)
 629     strHMS.AppendFormat("%02.2i", hh);
 630   else if (format & TIME_FORMAT_H)
 631     strHMS.AppendFormat("%i", hh);
 632   if (format & TIME_FORMAT_MM)
 633     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", mm);
 634   if (format & TIME_FORMAT_SS)
 635     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", ss);
 636   return strHMS;
 637 }
 638
 639 bool StringUtils::IsNaturalNumber(const CStdString& str)
 640 {
 641   size_t i = 0, n = 0;
 642   // allow whitespace,digits,whitespace
 643   while (i < str.size() && isspace((unsigned char) str[i]))
 644     i++;
 645   while (i < str.size() && isdigit((unsigned char) str[i]))
 646   {
 647     i++; n++;
 648   }
 649   while (i < str.size() && isspace((unsigned char) str[i]))
 650     i++;
 651   return i == str.size() && n > 0;
 652 }
 653
 654 bool StringUtils::IsInteger(const CStdString& str)
 655 {
 656   size_t i = 0, n = 0;
 657   // allow whitespace,-,digits,whitespace
 658   while (i < str.size() && isspace((unsigned char) str[i]))
 659     i++;
 660   if (i < str.size() && str[i] == '-')
 661     i++;
 662   while (i < str.size() && isdigit((unsigned char) str[i]))
 663   {
 664     i++; n++;
 665   }
 666   while (i < str.size() && isspace((unsigned char) str[i]))
 667     i++;
 668   return i == str.size() && n > 0;
 669 }
 670
 671 void StringUtils::RemoveCRLF(CStdString& strLine)
 672 {
 673   while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
 674   {
 675     strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
 676   }
 677 }
 678
 679 CStdString StringUtils::SizeToString(int64_t size)
 680 {
 681   CStdString strLabel;
 682   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 683   unsigned int i = 0;
 684   double s = (double)size;
 685   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 686   {
 687     s /= 1024.0;
 688     i++;
 689   }
 690
 691   if (!i)
 692     strLabel.Format("%.0lf %cB ", s, prefixes[i]);
 693   else if (s >= 100.0)
 694     strLabel.Format("%.1lf %cB", s, prefixes[i]);
 695   else
 696     strLabel.Format("%.2lf %cB", s, prefixes[i]);
 697
 698   return strLabel;
 699 }
 700
 701 // return -1 if not, else return the utf8 char length.
 702 int IsUTF8Letter(const unsigned char *str)
 703 {
 704   // reference:
 705   // unicode -> utf8 table: http://www.utf8-chartable.de/
 706   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 707   unsigned char ch = str[0];
 708   if (!ch)
 709     return -1;
 710   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 711     return 1;
 712   if (!(ch & 0x80))
 713     return -1;
 714   unsigned char ch2 = str[1];
 715   if (!ch2)
 716     return -1;
 717   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 718   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 719     return 2;
 720   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 721   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 722     return 2;
 723   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 724   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 725   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 726       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 727     return 2;
 728   return -1;
 729 }
 730
 731 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 732 {
 733   // NOTE: This assumes word is lowercase!
 734   unsigned char *s = (unsigned char *)str;
 735   do
 736   {
 737     // start with a compare
 738     unsigned char *c = s;
 739     unsigned char *w = (unsigned char *)wordLowerCase;
 740     bool same = true;
 741     while (same && *c && *w)
 742     {
 743       unsigned char lc = *c++;
 744       if (lc >= 'A' && lc <= 'Z')
 745         lc += 'a'-'A';
 746
 747       if (lc != *w++) // different
 748         same = false;
 749     }
 750     if (same && *w == 0)  // only the same if word has been exhausted
 751       return (const char *)s - str;
 752
 753     // otherwise, skip current word (composed by latin letters) or number
 754     int l;
 755     if (*s >= '0' && *s <= '9')
 756     {
 757       ++s;
 758       while (*s >= '0' && *s <= '9') ++s;
 759     }
 760     else if ((l = IsUTF8Letter(s)) > 0)
 761     {
 762       s += l;
 763       while ((l = IsUTF8Letter(s)) > 0) s += l;
 764     }
 765     else
 766       ++s;
 767     while (*s && *s == ' ') s++;
 768
 769     // and repeat until we're done
 770   } while (*s);
 771
 772   return CStdString::npos;
 773 }
 774
 775 // assumes it is called from after the first open bracket is found
 776 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 777 {
 778   int blocks = 1;
 779   for (unsigned int i = startPos; i < str.size(); i++)
 780   {
 781     if (str[i] == opener)
 782       blocks++;
 783     else if (str[i] == closer)
 784     {
 785       blocks--;
 786       if (!blocks)
 787         return i;
 788     }
 789   }
 790
 791   return (int)CStdString::npos;
 792 }
 793
 794 void StringUtils::WordToDigits(CStdString &word)
 795 {
 796   static const char word_to_letter[] = "22233344455566677778889999";
 797   word.ToLower();
 798   for (unsigned int i = 0; i < word.size(); ++i)
 799   { // NB: This assumes ascii, which probably needs extending at some  point.
 800     char letter = word[i];
 801     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 802     {
 803       word[i] = word_to_letter[letter-'a'];
 804     }
 805     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 806     {
 807       word[i] = ' ';  // replace everything else with a space
 808     }
 809   }
 810 }
 811
 812 CStdString StringUtils::CreateUUID()
 813 {
 814   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 815   * Version 4 conform local unique UUID based upon random number generation.
 816   */
 817   char UuidStrTmp[40];
 818   char *pUuidStr = UuidStrTmp;
 819   int i;
 820
 821   static bool m_uuidInitialized = false;
 822   if (!m_uuidInitialized)
 823   {
 824     /* use current time as the seed for rand()*/
 825     srand(time(NULL));
 826     m_uuidInitialized = true;
 827   }
 828
 829   /*Data1 - 8 characters.*/
 830   for(i = 0; i < 8; i++, pUuidStr++)
 831     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 832
 833   /*Data2 - 4 characters.*/
 834   *pUuidStr++ = '-';
 835   for(i = 0; i < 4; i++, pUuidStr++)
 836     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 837
 838   /*Data3 - 4 characters.*/
 839   *pUuidStr++ = '-';
 840   for(i = 0; i < 4; i++, pUuidStr++)
 841     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 842
 843   /*Data4 - 4 characters.*/
 844   *pUuidStr++ = '-';
 845   for(i = 0; i < 4; i++, pUuidStr++)
 846     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 847
 848   /*Data5 - 12 characters.*/
 849   *pUuidStr++ = '-';
 850   for(i = 0; i < 12; i++, pUuidStr++)
 851     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 852
 853   *pUuidStr = '\0';
 854
 855   m_lastUUID = UuidStrTmp;
 856   return UuidStrTmp;
 857 }
 858
 859 bool StringUtils::ValidateUUID(const CStdString &uuid)
 860 {
 861   CRegExp guidRE;
 862   guidRE.RegComp(ADDON_GUID_RE);
 863   return (guidRE.RegFind(uuid.c_str()) == 0);
 864 }
 865
 866 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 867 {
 868   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 869 }
 870
 871 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 872 {
 873   int best = -1;
 874   matchscore = 0;
 875
 876   int i = 0;
 877   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 878   {
 879     int maxlength = max(str.length(), it->length());
 880     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 881     if (score > matchscore)
 882     {
 883       matchscore = score;
 884       best = i;
 885     }
 886   }
 887   return best;
 888 }
 889
 890 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 891 {
 892   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 893   {
 894     if (str.find(*it) != str.npos)
 895       return true;
 896   }
 897   return false;
 898 }
 899
 900 size_t StringUtils::utf8_strlen(const char *s)
 901 {
 902   size_t length = 0;
 903   while (*s)
 904   {
 905     if ((*s++ & 0xC0) != 0x80)
 906       length++;
 907   }
 908   return length;
 909 }
 910
 911 std::string StringUtils::Paramify(const std::string &param)
 912 {
 913   std::string result = param;
 914   // escape backspaces
 915   StringUtils::Replace(result, "\\", "\\\\");
 916   // escape double quotes
 917   StringUtils::Replace(result, "\"", "\\\"");
 918
 919   // add double quotes around the whole string
 920   return "\"" + result + "\"";
 921 }
 922
 923 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 924 {
 925   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 926   // Skip delimiters at beginning.
 927   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 928   // Find first "non-delimiter".
 929   string::size_type pos = input.find_first_of(delimiters, lastPos);
 930
 931   while (string::npos != pos || string::npos != lastPos)
 932   {
 933     // Found a token, add it to the vector.
 934     tokens.push_back(input.substr(lastPos, pos - lastPos));
 935     // Skip delimiters.  Note the "not_of"
 936     lastPos = input.find_first_not_of(delimiters, pos);
 937     // Find next "non-delimiter"
 938     pos = input.find_first_of(delimiters, lastPos);
 939   }
 940 }