code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToUpper(wstring &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::towupper);
 167 }
 168
 169 void StringUtils::ToLower(string &str)
 170 {
 171   transform(str.begin(), str.end(), str.begin(), ::tolower);
 172 }
 173
 174 void StringUtils::ToLower(wstring &str)
 175 {
 176   transform(str.begin(), str.end(), str.begin(), ::towlower);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 180 {
 181   return EqualsNoCase(str1.c_str(), str2.c_str());
 182 }
 183
 184 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 185 {
 186   return EqualsNoCase(str1.c_str(), s2);
 187 }
 188
 189 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 190 {
 191   char c2; // we need only one char outside the loop
 192   do
 193   {
 194     const char c1 = *s1++; // const local variable should help compiler to optimize
 195     c2 = *s2++;
 196     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 197       return false;
 198   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 199   return true;
 200 }
 201
 202 string StringUtils::Left(const string &str, size_t count)
 203 {
 204   count = max((size_t)0, min(count, str.size()));
 205   return str.substr(0, count);
 206 }
 207
 208 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 209 {
 210   if (first + count > str.size())
 211     count = str.size() - first;
 212
 213   if (first > str.size())
 214     return string();
 215
 216   ASSERT(first + count <= str.size());
 217
 218   return str.substr(first, count);
 219 }
 220
 221 string StringUtils::Right(const string &str, size_t count)
 222 {
 223   count = max((size_t)0, min(count, str.size()));
 224   return str.substr(str.size() - count);
 225 }
 226
 227 std::string& StringUtils::Trim(std::string &str)
 228 {
 229   TrimLeft(str);
 230   return TrimRight(str);
 231 }
 232
 233 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 234 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 235 static int isspace_c(char c)
 236 {
 237   return ::isspace((unsigned char)c);
 238 }
 239
 240 std::string& StringUtils::TrimLeft(std::string &str)
 241 {
 242   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 243   return str;
 244 }
 245
 246 std::string& StringUtils::TrimLeft(std::string &str, const std::string& chars)
 247 {
 248   size_t nidx = str.find_first_not_of(chars);
 249   str.substr(nidx == str.npos ? 0 : nidx).swap(str);
 250   return str;
 251 }
 252
 253 std::string& StringUtils::TrimRight(std::string &str)
 254 {
 255   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 256   return str;
 257 }
 258
 259 std::string& StringUtils::TrimRight(std::string &str, const std::string& chars)
 260 {
 261   size_t nidx = str.find_last_not_of(chars);
 262   str.erase(str.npos == nidx ? 0 : ++nidx);
 263   return str;
 264 }
 265
 266 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 267 {
 268   std::string::iterator it = str.begin();
 269   bool onSpace = false;
 270   while(it != str.end())
 271   {
 272     if (*it == '\t')
 273       *it = ' ';
 274
 275     if (*it == ' ')
 276     {
 277       if (onSpace)
 278       {
 279         it = str.erase(it);
 280         continue;
 281       }
 282       else
 283         onSpace = true;
 284     }
 285     else
 286       onSpace = false;
 287
 288     ++it;
 289   }
 290   return str;
 291 }
 292
 293 int StringUtils::Replace(string &str, char oldChar, char newChar)
 294 {
 295   int replacedChars = 0;
 296   for (string::iterator it = str.begin(); it != str.end(); it++)
 297   {
 298     if (*it == oldChar)
 299     {
 300       *it = newChar;
 301       replacedChars++;
 302     }
 303   }
 304
 305   return replacedChars;
 306 }
 307
 308 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 309 {
 310   if (oldStr.empty())
 311     return 0;
 312
 313   int replacedChars = 0;
 314   size_t index = 0;
 315
 316   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 317   {
 318     str.replace(index, oldStr.size(), newStr);
 319     index += newStr.size();
 320     replacedChars++;
 321   }
 322
 323   return replacedChars;
 324 }
 325
 326 int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
 327 {
 328   if (oldStr.empty())
 329     return 0;
 330
 331   int replacedChars = 0;
 332   size_t index = 0;
 333
 334   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 335   {
 336     str.replace(index, oldStr.size(), newStr);
 337     index += newStr.size();
 338     replacedChars++;
 339   }
 340
 341   return replacedChars;
 342 }
 343
 344 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 345 {
 346   return str1.compare(0, str2.size(), str2) == 0;
 347 }
 348
 349 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 350 {
 351   return StartsWith(str1.c_str(), s2);
 352 }
 353
 354 bool StringUtils::StartsWith(const char *s1, const char *s2)
 355 {
 356   while (*s2 != '\0')
 357   {
 358     if (*s1 != *s2)
 359       return false;
 360     s1++;
 361     s2++;
 362   }
 363   return true;
 364 }
 365
 366 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 367 {
 368   return StartsWithNoCase(str1.c_str(), str2.c_str());
 369 }
 370
 371 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 372 {
 373   return StartsWithNoCase(str1.c_str(), s2);
 374 }
 375
 376 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 377 {
 378   while (*s2 != '\0')
 379   {
 380     if (::tolower(*s1) != ::tolower(*s2))
 381       return false;
 382     s1++;
 383     s2++;
 384   }
 385   return true;
 386 }
 387
 388 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 389 {
 390   if (str1.size() < str2.size())
 391     return false;
 392   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 393 }
 394
 395 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 396 {
 397   size_t len2 = strlen(s2);
 398   if (str1.size() < len2)
 399     return false;
 400   return str1.compare(str1.size() - len2, len2, s2) == 0;
 401 }
 402
 403 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 404 {
 405   if (str1.size() < str2.size())
 406     return false;
 407   const char *s1 = str1.c_str() + str1.size() - str2.size();
 408   const char *s2 = str2.c_str();
 409   while (*s2 != '\0')
 410   {
 411     if (::tolower(*s1) != ::tolower(*s2))
 412       return false;
 413     s1++;
 414     s2++;
 415   }
 416   return true;
 417 }
 418
 419 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 420 {
 421   size_t len2 = strlen(s2);
 422   if (str1.size() < len2)
 423     return false;
 424   const char *s1 = str1.c_str() + str1.size() - len2;
 425   while (*s2 != '\0')
 426   {
 427     if (::tolower(*s1) != ::tolower(*s2))
 428       return false;
 429     s1++;
 430     s2++;
 431   }
 432   return true;
 433 }
 434
 435 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 436 {
 437   result = "";
 438   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 439     result += (*it) + delimiter;
 440
 441   if(result != "")
 442     result.erase(result.size()-delimiter.size(), delimiter.size());
 443 }
 444
 445 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 446 {
 447   CStdString result;
 448   JoinString(strings, delimiter, result);
 449   return result;
 450 }
 451
 452 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 453 {
 454   CStdStringArray strArray;
 455   for (unsigned int index = 0; index < strings.size(); index++)
 456     strArray.push_back(strings.at(index));
 457
 458   return JoinString(strArray, delimiter);
 459 }
 460
 461 // Splits the string input into pieces delimited by delimiter.
 462 // if 2 delimiters are in a row, it will include the empty string between them.
 463 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 464 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 465 {
 466   size_t iPos = std::string::npos;
 467   size_t newPos = std::string::npos;
 468   size_t sizeS2 = delimiter.size();
 469   size_t isize = input.size();
 470
 471   results.clear();
 472
 473   vector<unsigned int> positions;
 474
 475   newPos = input.find(delimiter, 0);
 476
 477   if (newPos == std::string::npos)
 478   {
 479     results.push_back(input);
 480     return 1;
 481   }
 482
 483   while (newPos != std::string::npos)
 484   {
 485     positions.push_back(newPos);
 486     iPos = newPos;
 487     newPos = input.find(delimiter, iPos + sizeS2);
 488   }
 489
 490   // numFound is the number of delimiters which is one less
 491   // than the number of substrings
 492   unsigned int numFound = positions.size();
 493   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 494     numFound = iMaxStrings - 1;
 495
 496   for ( unsigned int i = 0; i <= numFound; i++ )
 497   {
 498     CStdString s;
 499     if ( i == 0 )
 500     {
 501       if ( i == numFound )
 502         s = input;
 503       else
 504         s = input.substr(i, positions[i]);
 505     }
 506     else
 507     {
 508       size_t offset = positions[i - 1] + sizeS2;
 509       if ( offset < isize )
 510       {
 511         if ( i == numFound )
 512           s = input.substr(offset);
 513         else if ( i > 0 )
 514           s = input.substr( positions[i - 1] + sizeS2,
 515                          positions[i] - positions[i - 1] - sizeS2 );
 516       }
 517     }
 518     results.push_back(s);
 519   }
 520   // return the number of substrings
 521   return results.size();
 522 }
 523
 524 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 525 {
 526   CStdStringArray result;
 527   SplitString(input, delimiter, result, iMaxStrings);
 528   return result;
 529 }
 530
 531 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 532 {
 533   CStdStringArray result;
 534   SplitString(input, delimiter, result, iMaxStrings);
 535
 536   vector<string> strArray;
 537   for (unsigned int index = 0; index < result.size(); index++)
 538     strArray.push_back(result.at(index));
 539
 540   return strArray;
 541 }
 542
 543 // returns the number of occurrences of strFind in strInput.
 544 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 545 {
 546   size_t pos = strInput.find(strFind, 0);
 547   int numfound = 0;
 548   while (pos != std::string::npos)
 549   {
 550     numfound++;
 551     pos = strInput.find(strFind, pos + 1);
 552   }
 553   return numfound;
 554 }
 555
 556 // Compares separately the numeric and alphabetic parts of a string.
 557 // returns negative if left < right, positive if left > right
 558 // and 0 if they are identical (essentially calculates left - right)
 559 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 560 {
 561   wchar_t *l = (wchar_t *)left;
 562   wchar_t *r = (wchar_t *)right;
 563   wchar_t *ld, *rd;
 564   wchar_t lc, rc;
 565   int64_t lnum, rnum;
 566   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 567   int cmp_res = 0;
 568   while (*l != 0 && *r != 0)
 569   {
 570     // check if we have a numerical value
 571     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 572     {
 573       ld = l;
 574       lnum = 0;
 575       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 576       { // compare only up to 15 digits
 577         lnum *= 10;
 578         lnum += *ld++ - '0';
 579       }
 580       rd = r;
 581       rnum = 0;
 582       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 583       { // compare only up to 15 digits
 584         rnum *= 10;
 585         rnum += *rd++ - L'0';
 586       }
 587       // do we have numbers?
 588       if (lnum != rnum)
 589       { // yes - and they're different!
 590         return lnum - rnum;
 591       }
 592       l = ld;
 593       r = rd;
 594       continue;
 595     }
 596     // do case less comparison
 597     lc = *l;
 598     if (lc >= L'A' && lc <= L'Z')
 599       lc += L'a'-L'A';
 600     rc = *r;
 601     if (rc >= L'A' && rc <= L'Z')
 602       rc += L'a'- L'A';
 603
 604     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 605     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 606     {
 607       return cmp_res;
 608     }
 609     l++; r++;
 610   }
 611   if (*r)
 612   { // r is longer
 613     return -1;
 614   }
 615   else if (*l)
 616   { // l is longer
 617     return 1;
 618   }
 619   return 0; // files are the same
 620 }
 621
 622 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 623 {
 624   CStdStringArray days;
 625   int splitCount = StringUtils::SplitString(dateString, "-", days);
 626   if (splitCount == 1)
 627     return atoi(days[0].c_str());
 628   else if (splitCount == 2)
 629     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 630   else if (splitCount == 3)
 631     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 632   else
 633     return -1;
 634 }
 635
 636 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 637 {
 638   CStdString strCopy(timeString);
 639   StringUtils::Trim(strCopy);
 640   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 641   {
 642     // this is imdb format of "XXX min"
 643     return 60 * atoi(strCopy.c_str());
 644   }
 645   else
 646   {
 647     CStdStringArray secs;
 648     StringUtils::SplitString(strCopy, ":", secs);
 649     int timeInSecs = 0;
 650     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 651     {
 652       timeInSecs *= 60;
 653       timeInSecs += atoi(secs[i]);
 654     }
 655     return timeInSecs;
 656   }
 657 }
 658
 659 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 660 {
 661   int hh = lSeconds / 3600;
 662   lSeconds = lSeconds % 3600;
 663   int mm = lSeconds / 60;
 664   int ss = lSeconds % 60;
 665
 666   if (format == TIME_FORMAT_GUESS)
 667     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 668   CStdString strHMS;
 669   if (format & TIME_FORMAT_HH)
 670     strHMS += StringUtils::Format("%02.2i", hh);
 671   else if (format & TIME_FORMAT_H)
 672     strHMS += StringUtils::Format("%i", hh);
 673   if (format & TIME_FORMAT_MM)
 674     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", mm);
 675   if (format & TIME_FORMAT_SS)
 676     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", ss);
 677   return strHMS;
 678 }
 679
 680 bool StringUtils::IsNaturalNumber(const CStdString& str)
 681 {
 682   size_t i = 0, n = 0;
 683   // allow whitespace,digits,whitespace
 684   while (i < str.size() && isspace((unsigned char) str[i]))
 685     i++;
 686   while (i < str.size() && isdigit((unsigned char) str[i]))
 687   {
 688     i++; n++;
 689   }
 690   while (i < str.size() && isspace((unsigned char) str[i]))
 691     i++;
 692   return i == str.size() && n > 0;
 693 }
 694
 695 bool StringUtils::IsInteger(const CStdString& str)
 696 {
 697   size_t i = 0, n = 0;
 698   // allow whitespace,-,digits,whitespace
 699   while (i < str.size() && isspace((unsigned char) str[i]))
 700     i++;
 701   if (i < str.size() && str[i] == '-')
 702     i++;
 703   while (i < str.size() && isdigit((unsigned char) str[i]))
 704   {
 705     i++; n++;
 706   }
 707   while (i < str.size() && isspace((unsigned char) str[i]))
 708     i++;
 709   return i == str.size() && n > 0;
 710 }
 711
 712 void StringUtils::RemoveCRLF(CStdString& strLine)
 713 {
 714   StringUtils::TrimRight(strLine, "\n\r");
 715 }
 716
 717 CStdString StringUtils::SizeToString(int64_t size)
 718 {
 719   CStdString strLabel;
 720   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 721   unsigned int i = 0;
 722   double s = (double)size;
 723   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 724   {
 725     s /= 1024.0;
 726     i++;
 727   }
 728
 729   if (!i)
 730     strLabel = StringUtils::Format("%.0lf %cB ", s, prefixes[i]);
 731   else if (s >= 100.0)
 732     strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
 733   else
 734     strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
 735
 736   return strLabel;
 737 }
 738
 739 // return -1 if not, else return the utf8 char length.
 740 int IsUTF8Letter(const unsigned char *str)
 741 {
 742   // reference:
 743   // unicode -> utf8 table: http://www.utf8-chartable.de/
 744   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 745   unsigned char ch = str[0];
 746   if (!ch)
 747     return -1;
 748   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 749     return 1;
 750   if (!(ch & 0x80))
 751     return -1;
 752   unsigned char ch2 = str[1];
 753   if (!ch2)
 754     return -1;
 755   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 756   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 757     return 2;
 758   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 759   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 760     return 2;
 761   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 762   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 763   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 764       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 765     return 2;
 766   return -1;
 767 }
 768
 769 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 770 {
 771   // NOTE: This assumes word is lowercase!
 772   unsigned char *s = (unsigned char *)str;
 773   do
 774   {
 775     // start with a compare
 776     unsigned char *c = s;
 777     unsigned char *w = (unsigned char *)wordLowerCase;
 778     bool same = true;
 779     while (same && *c && *w)
 780     {
 781       unsigned char lc = *c++;
 782       if (lc >= 'A' && lc <= 'Z')
 783         lc += 'a'-'A';
 784
 785       if (lc != *w++) // different
 786         same = false;
 787     }
 788     if (same && *w == 0)  // only the same if word has been exhausted
 789       return (const char *)s - str;
 790
 791     // otherwise, skip current word (composed by latin letters) or number
 792     int l;
 793     if (*s >= '0' && *s <= '9')
 794     {
 795       ++s;
 796       while (*s >= '0' && *s <= '9') ++s;
 797     }
 798     else if ((l = IsUTF8Letter(s)) > 0)
 799     {
 800       s += l;
 801       while ((l = IsUTF8Letter(s)) > 0) s += l;
 802     }
 803     else
 804       ++s;
 805     while (*s && *s == ' ') s++;
 806
 807     // and repeat until we're done
 808   } while (*s);
 809
 810   return CStdString::npos;
 811 }
 812
 813 // assumes it is called from after the first open bracket is found
 814 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 815 {
 816   int blocks = 1;
 817   for (unsigned int i = startPos; i < str.size(); i++)
 818   {
 819     if (str[i] == opener)
 820       blocks++;
 821     else if (str[i] == closer)
 822     {
 823       blocks--;
 824       if (!blocks)
 825         return i;
 826     }
 827   }
 828
 829   return (int)CStdString::npos;
 830 }
 831
 832 void StringUtils::WordToDigits(CStdString &word)
 833 {
 834   static const char word_to_letter[] = "22233344455566677778889999";
 835   StringUtils::ToLower(word);
 836   for (unsigned int i = 0; i < word.size(); ++i)
 837   { // NB: This assumes ascii, which probably needs extending at some  point.
 838     char letter = word[i];
 839     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 840     {
 841       word[i] = word_to_letter[letter-'a'];
 842     }
 843     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 844     {
 845       word[i] = ' ';  // replace everything else with a space
 846     }
 847   }
 848 }
 849
 850 CStdString StringUtils::CreateUUID()
 851 {
 852   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 853   * Version 4 conform local unique UUID based upon random number generation.
 854   */
 855   char UuidStrTmp[40];
 856   char *pUuidStr = UuidStrTmp;
 857   int i;
 858
 859   static bool m_uuidInitialized = false;
 860   if (!m_uuidInitialized)
 861   {
 862     /* use current time as the seed for rand()*/
 863     srand(time(NULL));
 864     m_uuidInitialized = true;
 865   }
 866
 867   /*Data1 - 8 characters.*/
 868   for(i = 0; i < 8; i++, pUuidStr++)
 869     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 870
 871   /*Data2 - 4 characters.*/
 872   *pUuidStr++ = '-';
 873   for(i = 0; i < 4; i++, pUuidStr++)
 874     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 875
 876   /*Data3 - 4 characters.*/
 877   *pUuidStr++ = '-';
 878   for(i = 0; i < 4; i++, pUuidStr++)
 879     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 880
 881   /*Data4 - 4 characters.*/
 882   *pUuidStr++ = '-';
 883   for(i = 0; i < 4; i++, pUuidStr++)
 884     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 885
 886   /*Data5 - 12 characters.*/
 887   *pUuidStr++ = '-';
 888   for(i = 0; i < 12; i++, pUuidStr++)
 889     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 890
 891   *pUuidStr = '\0';
 892
 893   m_lastUUID = UuidStrTmp;
 894   return UuidStrTmp;
 895 }
 896
 897 bool StringUtils::ValidateUUID(const CStdString &uuid)
 898 {
 899   CRegExp guidRE;
 900   guidRE.RegComp(ADDON_GUID_RE);
 901   return (guidRE.RegFind(uuid.c_str()) == 0);
 902 }
 903
 904 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 905 {
 906   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 907 }
 908
 909 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 910 {
 911   int best = -1;
 912   matchscore = 0;
 913
 914   int i = 0;
 915   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 916   {
 917     int maxlength = max(str.length(), it->length());
 918     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 919     if (score > matchscore)
 920     {
 921       matchscore = score;
 922       best = i;
 923     }
 924   }
 925   return best;
 926 }
 927
 928 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 929 {
 930   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 931   {
 932     if (str.find(*it) != str.npos)
 933       return true;
 934   }
 935   return false;
 936 }
 937
 938 size_t StringUtils::utf8_strlen(const char *s)
 939 {
 940   size_t length = 0;
 941   while (*s)
 942   {
 943     if ((*s++ & 0xC0) != 0x80)
 944       length++;
 945   }
 946   return length;
 947 }
 948
 949 std::string StringUtils::Paramify(const std::string &param)
 950 {
 951   std::string result = param;
 952   // escape backspaces
 953   StringUtils::Replace(result, "\\", "\\\\");
 954   // escape double quotes
 955   StringUtils::Replace(result, "\"", "\\\"");
 956
 957   // add double quotes around the whole string
 958   return "\"" + result + "\"";
 959 }
 960
 961 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 962 {
 963   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 964   // Skip delimiters at beginning.
 965   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 966   // Find first "non-delimiter".
 967   string::size_type pos = input.find_first_of(delimiters, lastPos);
 968
 969   while (string::npos != pos || string::npos != lastPos)
 970   {
 971     // Found a token, add it to the vector.
 972     tokens.push_back(input.substr(lastPos, pos - lastPos));
 973     // Skip delimiters.  Note the "not_of"
 974     lastPos = input.find_first_not_of(delimiters, pos);
 975     // Find next "non-delimiter"
 976     pos = input.find_first_of(delimiters, lastPos);
 977   }
 978 }