code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToUpper(wstring &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::towupper);
 167 }
 168
 169 void StringUtils::ToLower(string &str)
 170 {
 171   transform(str.begin(), str.end(), str.begin(), ::tolower);
 172 }
 173
 174 void StringUtils::ToLower(wstring &str)
 175 {
 176   transform(str.begin(), str.end(), str.begin(), ::towlower);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 180 {
 181   return EqualsNoCase(str1.c_str(), str2.c_str());
 182 }
 183
 184 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 185 {
 186   return EqualsNoCase(str1.c_str(), s2);
 187 }
 188
 189 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 190 {
 191   char c2; // we need only one char outside the loop
 192   do
 193   {
 194     const char c1 = *s1++; // const local variable should help compiler to optimize
 195     c2 = *s2++;
 196     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 197       return false;
 198   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 199   return true;
 200 }
 201
 202 string StringUtils::Left(const string &str, size_t count)
 203 {
 204   count = max((size_t)0, min(count, str.size()));
 205   return str.substr(0, count);
 206 }
 207
 208 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 209 {
 210   if (first + count > str.size())
 211     count = str.size() - first;
 212
 213   if (first > str.size())
 214     return string();
 215
 216   ASSERT(first + count <= str.size());
 217
 218   return str.substr(first, count);
 219 }
 220
 221 string StringUtils::Right(const string &str, size_t count)
 222 {
 223   count = max((size_t)0, min(count, str.size()));
 224   return str.substr(str.size() - count);
 225 }
 226
 227 std::string& StringUtils::Trim(std::string &str)
 228 {
 229   TrimLeft(str);
 230   return TrimRight(str);
 231 }
 232
 233 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 234 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 235 static int isspace_c(char c)
 236 {
 237   return ::isspace((unsigned char)c);
 238 }
 239
 240 std::string& StringUtils::TrimLeft(std::string &str)
 241 {
 242   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 243   return str;
 244 }
 245
 246 std::string& StringUtils::TrimLeft(std::string &str, const std::string& chars)
 247 {
 248   size_t nidx = str.find_first_not_of(chars);
 249   str.substr(nidx == str.npos ? 0 : nidx).swap(str);
 250   return str;
 251 }
 252
 253 std::string& StringUtils::TrimRight(std::string &str)
 254 {
 255   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 256   return str;
 257 }
 258
 259 std::string& StringUtils::TrimRight(std::string &str, const std::string& chars)
 260 {
 261   size_t nidx = str.find_last_not_of(chars);
 262   str.erase(str.npos == nidx ? 0 : ++nidx);
 263   return str;
 264 }
 265
 266 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 267 {
 268   std::string::iterator it = str.begin();
 269   bool onSpace = false;
 270   while(it != str.end())
 271   {
 272     if (*it == '\t')
 273       *it = ' ';
 274
 275     if (*it == ' ')
 276     {
 277       if (onSpace)
 278       {
 279         it = str.erase(it);
 280         continue;
 281       }
 282       else
 283         onSpace = true;
 284     }
 285     else
 286       onSpace = false;
 287
 288     ++it;
 289   }
 290   return str;
 291 }
 292
 293 int StringUtils::Replace(string &str, char oldChar, char newChar)
 294 {
 295   int replacedChars = 0;
 296   for (string::iterator it = str.begin(); it != str.end(); it++)
 297   {
 298     if (*it == oldChar)
 299     {
 300       *it = newChar;
 301       replacedChars++;
 302     }
 303   }
 304
 305   return replacedChars;
 306 }
 307
 308 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 309 {
 310   if (oldStr.empty())
 311     return 0;
 312
 313   int replacedChars = 0;
 314   size_t index = 0;
 315
 316   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 317   {
 318     str.replace(index, oldStr.size(), newStr);
 319     index += newStr.size();
 320     replacedChars++;
 321   }
 322
 323   return replacedChars;
 324 }
 325
 326 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 327 {
 328   return str1.compare(0, str2.size(), str2) == 0;
 329 }
 330
 331 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 332 {
 333   return StartsWith(str1.c_str(), s2);
 334 }
 335
 336 bool StringUtils::StartsWith(const char *s1, const char *s2)
 337 {
 338   while (*s2 != '\0')
 339   {
 340     if (*s1 != *s2)
 341       return false;
 342     s1++;
 343     s2++;
 344   }
 345   return true;
 346 }
 347
 348 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 349 {
 350   return StartsWithNoCase(str1.c_str(), str2.c_str());
 351 }
 352
 353 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 354 {
 355   return StartsWithNoCase(str1.c_str(), s2);
 356 }
 357
 358 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 359 {
 360   while (*s2 != '\0')
 361   {
 362     if (::tolower(*s1) != ::tolower(*s2))
 363       return false;
 364     s1++;
 365     s2++;
 366   }
 367   return true;
 368 }
 369
 370 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 371 {
 372   if (str1.size() < str2.size())
 373     return false;
 374   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 375 }
 376
 377 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 378 {
 379   size_t len2 = strlen(s2);
 380   if (str1.size() < len2)
 381     return false;
 382   return str1.compare(str1.size() - len2, len2, s2) == 0;
 383 }
 384
 385 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 386 {
 387   if (str1.size() < str2.size())
 388     return false;
 389   const char *s1 = str1.c_str() + str1.size() - str2.size();
 390   const char *s2 = str2.c_str();
 391   while (*s2 != '\0')
 392   {
 393     if (::tolower(*s1) != ::tolower(*s2))
 394       return false;
 395     s1++;
 396     s2++;
 397   }
 398   return true;
 399 }
 400
 401 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 402 {
 403   size_t len2 = strlen(s2);
 404   if (str1.size() < len2)
 405     return false;
 406   const char *s1 = str1.c_str() + str1.size() - len2;
 407   while (*s2 != '\0')
 408   {
 409     if (::tolower(*s1) != ::tolower(*s2))
 410       return false;
 411     s1++;
 412     s2++;
 413   }
 414   return true;
 415 }
 416
 417 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 418 {
 419   result = "";
 420   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 421     result += (*it) + delimiter;
 422
 423   if(result != "")
 424     result.Delete(result.size()-delimiter.size(), delimiter.size());
 425 }
 426
 427 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 428 {
 429   CStdString result;
 430   JoinString(strings, delimiter, result);
 431   return result;
 432 }
 433
 434 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 435 {
 436   CStdStringArray strArray;
 437   for (unsigned int index = 0; index < strings.size(); index++)
 438     strArray.push_back(strings.at(index));
 439
 440   return JoinString(strArray, delimiter);
 441 }
 442
 443 // Splits the string input into pieces delimited by delimiter.
 444 // if 2 delimiters are in a row, it will include the empty string between them.
 445 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 446 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 447 {
 448   int iPos = -1;
 449   int newPos = -1;
 450   int sizeS2 = delimiter.GetLength();
 451   int isize = input.GetLength();
 452
 453   results.clear();
 454
 455   vector<unsigned int> positions;
 456
 457   newPos = input.Find (delimiter, 0);
 458
 459   if ( newPos < 0 )
 460   {
 461     results.push_back(input);
 462     return 1;
 463   }
 464
 465   while ( newPos > iPos )
 466   {
 467     positions.push_back(newPos);
 468     iPos = newPos;
 469     newPos = input.Find (delimiter, iPos + sizeS2);
 470   }
 471
 472   // numFound is the number of delimiters which is one less
 473   // than the number of substrings
 474   unsigned int numFound = positions.size();
 475   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 476     numFound = iMaxStrings - 1;
 477
 478   for ( unsigned int i = 0; i <= numFound; i++ )
 479   {
 480     CStdString s;
 481     if ( i == 0 )
 482     {
 483       if ( i == numFound )
 484         s = input;
 485       else
 486         s = input.Mid( i, positions[i] );
 487     }
 488     else
 489     {
 490       int offset = positions[i - 1] + sizeS2;
 491       if ( offset < isize )
 492       {
 493         if ( i == numFound )
 494           s = input.Mid(offset);
 495         else if ( i > 0 )
 496           s = input.Mid( positions[i - 1] + sizeS2,
 497                          positions[i] - positions[i - 1] - sizeS2 );
 498       }
 499     }
 500     results.push_back(s);
 501   }
 502   // return the number of substrings
 503   return results.size();
 504 }
 505
 506 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 507 {
 508   CStdStringArray result;
 509   SplitString(input, delimiter, result, iMaxStrings);
 510   return result;
 511 }
 512
 513 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 514 {
 515   CStdStringArray result;
 516   SplitString(input, delimiter, result, iMaxStrings);
 517
 518   vector<string> strArray;
 519   for (unsigned int index = 0; index < result.size(); index++)
 520     strArray.push_back(result.at(index));
 521
 522   return strArray;
 523 }
 524
 525 // returns the number of occurrences of strFind in strInput.
 526 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 527 {
 528   int pos = strInput.Find(strFind, 0);
 529   int numfound = 0;
 530   while (pos >= 0)
 531   {
 532     numfound++;
 533     pos = strInput.Find(strFind, pos + 1);
 534   }
 535   return numfound;
 536 }
 537
 538 // Compares separately the numeric and alphabetic parts of a string.
 539 // returns negative if left < right, positive if left > right
 540 // and 0 if they are identical (essentially calculates left - right)
 541 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 542 {
 543   wchar_t *l = (wchar_t *)left;
 544   wchar_t *r = (wchar_t *)right;
 545   wchar_t *ld, *rd;
 546   wchar_t lc, rc;
 547   int64_t lnum, rnum;
 548   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 549   int cmp_res = 0;
 550   while (*l != 0 && *r != 0)
 551   {
 552     // check if we have a numerical value
 553     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 554     {
 555       ld = l;
 556       lnum = 0;
 557       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 558       { // compare only up to 15 digits
 559         lnum *= 10;
 560         lnum += *ld++ - '0';
 561       }
 562       rd = r;
 563       rnum = 0;
 564       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 565       { // compare only up to 15 digits
 566         rnum *= 10;
 567         rnum += *rd++ - L'0';
 568       }
 569       // do we have numbers?
 570       if (lnum != rnum)
 571       { // yes - and they're different!
 572         return lnum - rnum;
 573       }
 574       l = ld;
 575       r = rd;
 576       continue;
 577     }
 578     // do case less comparison
 579     lc = *l;
 580     if (lc >= L'A' && lc <= L'Z')
 581       lc += L'a'-L'A';
 582     rc = *r;
 583     if (rc >= L'A' && rc <= L'Z')
 584       rc += L'a'- L'A';
 585
 586     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 587     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 588     {
 589       return cmp_res;
 590     }
 591     l++; r++;
 592   }
 593   if (*r)
 594   { // r is longer
 595     return -1;
 596   }
 597   else if (*l)
 598   { // l is longer
 599     return 1;
 600   }
 601   return 0; // files are the same
 602 }
 603
 604 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 605 {
 606   CStdStringArray days;
 607   int splitCount = StringUtils::SplitString(dateString, "-", days);
 608   if (splitCount == 1)
 609     return atoi(days[0].c_str());
 610   else if (splitCount == 2)
 611     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 612   else if (splitCount == 3)
 613     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 614   else
 615     return -1;
 616 }
 617
 618 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 619 {
 620   CStdString strCopy(timeString);
 621   StringUtils::Trim(strCopy);
 622   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 623   {
 624     // this is imdb format of "XXX min"
 625     return 60 * atoi(strCopy.c_str());
 626   }
 627   else
 628   {
 629     CStdStringArray secs;
 630     StringUtils::SplitString(strCopy, ":", secs);
 631     int timeInSecs = 0;
 632     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 633     {
 634       timeInSecs *= 60;
 635       timeInSecs += atoi(secs[i]);
 636     }
 637     return timeInSecs;
 638   }
 639 }
 640
 641 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 642 {
 643   int hh = lSeconds / 3600;
 644   lSeconds = lSeconds % 3600;
 645   int mm = lSeconds / 60;
 646   int ss = lSeconds % 60;
 647
 648   if (format == TIME_FORMAT_GUESS)
 649     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 650   CStdString strHMS;
 651   if (format & TIME_FORMAT_HH)
 652     strHMS.AppendFormat("%02.2i", hh);
 653   else if (format & TIME_FORMAT_H)
 654     strHMS.AppendFormat("%i", hh);
 655   if (format & TIME_FORMAT_MM)
 656     strHMS.AppendFormat(strHMS.empty() ? "%02.2i" : ":%02.2i", mm);
 657   if (format & TIME_FORMAT_SS)
 658     strHMS.AppendFormat(strHMS.empty() ? "%02.2i" : ":%02.2i", ss);
 659   return strHMS;
 660 }
 661
 662 bool StringUtils::IsNaturalNumber(const CStdString& str)
 663 {
 664   size_t i = 0, n = 0;
 665   // allow whitespace,digits,whitespace
 666   while (i < str.size() && isspace((unsigned char) str[i]))
 667     i++;
 668   while (i < str.size() && isdigit((unsigned char) str[i]))
 669   {
 670     i++; n++;
 671   }
 672   while (i < str.size() && isspace((unsigned char) str[i]))
 673     i++;
 674   return i == str.size() && n > 0;
 675 }
 676
 677 bool StringUtils::IsInteger(const CStdString& str)
 678 {
 679   size_t i = 0, n = 0;
 680   // allow whitespace,-,digits,whitespace
 681   while (i < str.size() && isspace((unsigned char) str[i]))
 682     i++;
 683   if (i < str.size() && str[i] == '-')
 684     i++;
 685   while (i < str.size() && isdigit((unsigned char) str[i]))
 686   {
 687     i++; n++;
 688   }
 689   while (i < str.size() && isspace((unsigned char) str[i]))
 690     i++;
 691   return i == str.size() && n > 0;
 692 }
 693
 694 void StringUtils::RemoveCRLF(CStdString& strLine)
 695 {
 696   while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
 697   {
 698     strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
 699   }
 700 }
 701
 702 CStdString StringUtils::SizeToString(int64_t size)
 703 {
 704   CStdString strLabel;
 705   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 706   unsigned int i = 0;
 707   double s = (double)size;
 708   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 709   {
 710     s /= 1024.0;
 711     i++;
 712   }
 713
 714   if (!i)
 715     strLabel = StringUtils::Format("%.0lf %cB ", s, prefixes[i]);
 716   else if (s >= 100.0)
 717     strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
 718   else
 719     strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
 720
 721   return strLabel;
 722 }
 723
 724 // return -1 if not, else return the utf8 char length.
 725 int IsUTF8Letter(const unsigned char *str)
 726 {
 727   // reference:
 728   // unicode -> utf8 table: http://www.utf8-chartable.de/
 729   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 730   unsigned char ch = str[0];
 731   if (!ch)
 732     return -1;
 733   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 734     return 1;
 735   if (!(ch & 0x80))
 736     return -1;
 737   unsigned char ch2 = str[1];
 738   if (!ch2)
 739     return -1;
 740   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 741   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 742     return 2;
 743   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 744   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 745     return 2;
 746   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 747   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 748   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 749       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 750     return 2;
 751   return -1;
 752 }
 753
 754 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 755 {
 756   // NOTE: This assumes word is lowercase!
 757   unsigned char *s = (unsigned char *)str;
 758   do
 759   {
 760     // start with a compare
 761     unsigned char *c = s;
 762     unsigned char *w = (unsigned char *)wordLowerCase;
 763     bool same = true;
 764     while (same && *c && *w)
 765     {
 766       unsigned char lc = *c++;
 767       if (lc >= 'A' && lc <= 'Z')
 768         lc += 'a'-'A';
 769
 770       if (lc != *w++) // different
 771         same = false;
 772     }
 773     if (same && *w == 0)  // only the same if word has been exhausted
 774       return (const char *)s - str;
 775
 776     // otherwise, skip current word (composed by latin letters) or number
 777     int l;
 778     if (*s >= '0' && *s <= '9')
 779     {
 780       ++s;
 781       while (*s >= '0' && *s <= '9') ++s;
 782     }
 783     else if ((l = IsUTF8Letter(s)) > 0)
 784     {
 785       s += l;
 786       while ((l = IsUTF8Letter(s)) > 0) s += l;
 787     }
 788     else
 789       ++s;
 790     while (*s && *s == ' ') s++;
 791
 792     // and repeat until we're done
 793   } while (*s);
 794
 795   return CStdString::npos;
 796 }
 797
 798 // assumes it is called from after the first open bracket is found
 799 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 800 {
 801   int blocks = 1;
 802   for (unsigned int i = startPos; i < str.size(); i++)
 803   {
 804     if (str[i] == opener)
 805       blocks++;
 806     else if (str[i] == closer)
 807     {
 808       blocks--;
 809       if (!blocks)
 810         return i;
 811     }
 812   }
 813
 814   return (int)CStdString::npos;
 815 }
 816
 817 void StringUtils::WordToDigits(CStdString &word)
 818 {
 819   static const char word_to_letter[] = "22233344455566677778889999";
 820   StringUtils::ToLower(word);
 821   for (unsigned int i = 0; i < word.size(); ++i)
 822   { // NB: This assumes ascii, which probably needs extending at some  point.
 823     char letter = word[i];
 824     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 825     {
 826       word[i] = word_to_letter[letter-'a'];
 827     }
 828     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 829     {
 830       word[i] = ' ';  // replace everything else with a space
 831     }
 832   }
 833 }
 834
 835 CStdString StringUtils::CreateUUID()
 836 {
 837   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 838   * Version 4 conform local unique UUID based upon random number generation.
 839   */
 840   char UuidStrTmp[40];
 841   char *pUuidStr = UuidStrTmp;
 842   int i;
 843
 844   static bool m_uuidInitialized = false;
 845   if (!m_uuidInitialized)
 846   {
 847     /* use current time as the seed for rand()*/
 848     srand(time(NULL));
 849     m_uuidInitialized = true;
 850   }
 851
 852   /*Data1 - 8 characters.*/
 853   for(i = 0; i < 8; i++, pUuidStr++)
 854     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 855
 856   /*Data2 - 4 characters.*/
 857   *pUuidStr++ = '-';
 858   for(i = 0; i < 4; i++, pUuidStr++)
 859     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 860
 861   /*Data3 - 4 characters.*/
 862   *pUuidStr++ = '-';
 863   for(i = 0; i < 4; i++, pUuidStr++)
 864     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 865
 866   /*Data4 - 4 characters.*/
 867   *pUuidStr++ = '-';
 868   for(i = 0; i < 4; i++, pUuidStr++)
 869     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 870
 871   /*Data5 - 12 characters.*/
 872   *pUuidStr++ = '-';
 873   for(i = 0; i < 12; i++, pUuidStr++)
 874     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 875
 876   *pUuidStr = '\0';
 877
 878   m_lastUUID = UuidStrTmp;
 879   return UuidStrTmp;
 880 }
 881
 882 bool StringUtils::ValidateUUID(const CStdString &uuid)
 883 {
 884   CRegExp guidRE;
 885   guidRE.RegComp(ADDON_GUID_RE);
 886   return (guidRE.RegFind(uuid.c_str()) == 0);
 887 }
 888
 889 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 890 {
 891   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 892 }
 893
 894 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 895 {
 896   int best = -1;
 897   matchscore = 0;
 898
 899   int i = 0;
 900   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 901   {
 902     int maxlength = max(str.length(), it->length());
 903     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 904     if (score > matchscore)
 905     {
 906       matchscore = score;
 907       best = i;
 908     }
 909   }
 910   return best;
 911 }
 912
 913 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 914 {
 915   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 916   {
 917     if (str.find(*it) != str.npos)
 918       return true;
 919   }
 920   return false;
 921 }
 922
 923 size_t StringUtils::utf8_strlen(const char *s)
 924 {
 925   size_t length = 0;
 926   while (*s)
 927   {
 928     if ((*s++ & 0xC0) != 0x80)
 929       length++;
 930   }
 931   return length;
 932 }
 933
 934 std::string StringUtils::Paramify(const std::string &param)
 935 {
 936   std::string result = param;
 937   // escape backspaces
 938   StringUtils::Replace(result, "\\", "\\\\");
 939   // escape double quotes
 940   StringUtils::Replace(result, "\"", "\\\"");
 941
 942   // add double quotes around the whole string
 943   return "\"" + result + "\"";
 944 }
 945
 946 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 947 {
 948   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 949   // Skip delimiters at beginning.
 950   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 951   // Find first "non-delimiter".
 952   string::size_type pos = input.find_first_of(delimiters, lastPos);
 953
 954   while (string::npos != pos || string::npos != lastPos)
 955   {
 956     // Found a token, add it to the vector.
 957     tokens.push_back(input.substr(lastPos, pos - lastPos));
 958     // Skip delimiters.  Note the "not_of"
 959     lastPos = input.find_first_not_of(delimiters, pos);
 960     // Find next "non-delimiter"
 961     pos = input.find_first_of(delimiters, lastPos);
 962   }
 963 }