code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 void StringUtils::ToUpper(string &str)
 107 {
 108   transform(str.begin(), str.end(), str.begin(), ::toupper);
 109 }
 110
 111 void StringUtils::ToLower(string &str)
 112 {
 113   transform(str.begin(), str.end(), str.begin(), ::tolower);
 114 }
 115
 116 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 117 {
 118   return EqualsNoCase(str1.c_str(), str2.c_str());
 119 }
 120
 121 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 122 {
 123   return EqualsNoCase(str1.c_str(), s2);
 124 }
 125
 126 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 127 {
 128   int c1, c2; // Yes, because the return type of tolower() is int.
 129               // To make these chars would be to introduce an unnecesary extra bitmask/zero-extend (effectively caller-narowing) into the binary.
 130   do
 131   {
 132     c1 = ::tolower(*s1++);
 133     c2 = ::tolower(*s2++);
 134     if (c1 != c2) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 135       return false;
 136   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 137   return true;
 138 }
 139
 140 string StringUtils::Left(const string &str, size_t count)
 141 {
 142   count = max((size_t)0, min(count, str.size()));
 143   return str.substr(0, count);
 144 }
 145
 146 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 147 {
 148   if (first + count > str.size())
 149     count = str.size() - first;
 150
 151   if (first > str.size())
 152     return string();
 153
 154   ASSERT(first + count <= str.size());
 155
 156   return str.substr(first, count);
 157 }
 158
 159 string StringUtils::Right(const string &str, size_t count)
 160 {
 161   count = max((size_t)0, min(count, str.size()));
 162   return str.substr(str.size() - count);
 163 }
 164
 165 std::string& StringUtils::Trim(std::string &str)
 166 {
 167   TrimLeft(str);
 168   return TrimRight(str);
 169 }
 170
 171 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 172 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 173 static int isspace_c(char c)
 174 {
 175   return ::isspace((unsigned char)c);
 176 }
 177
 178 std::string& StringUtils::TrimLeft(std::string &str)
 179 {
 180   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 181   return str;
 182 }
 183
 184 std::string& StringUtils::TrimRight(std::string &str)
 185 {
 186   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 187   return str;
 188 }
 189
 190 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 191 {
 192   std::string::iterator it = str.begin();
 193   bool onSpace = false;
 194   while(it != str.end())
 195   {
 196     if (*it == '\t')
 197       *it = ' ';
 198
 199     if (*it == ' ')
 200     {
 201       if (onSpace)
 202       {
 203         it = str.erase(it);
 204         continue;
 205       }
 206       else
 207         onSpace = true;
 208     }
 209     else
 210       onSpace = false;
 211
 212     ++it;
 213   }
 214   return str;
 215 }
 216
 217 int StringUtils::Replace(string &str, char oldChar, char newChar)
 218 {
 219   int replacedChars = 0;
 220   for (string::iterator it = str.begin(); it != str.end(); it++)
 221   {
 222     if (*it == oldChar)
 223     {
 224       *it = newChar;
 225       replacedChars++;
 226     }
 227   }
 228
 229   return replacedChars;
 230 }
 231
 232 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 233 {
 234   if (oldStr.empty())
 235     return 0;
 236
 237   int replacedChars = 0;
 238   size_t index = 0;
 239
 240   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 241   {
 242     str.replace(index, oldStr.size(), newStr);
 243     index += newStr.size();
 244     replacedChars++;
 245   }
 246
 247   return replacedChars;
 248 }
 249
 250 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 251 {
 252   return str1.compare(0, str2.size(), str2) == 0;
 253 }
 254
 255 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 256 {
 257   return StartsWith(str1.c_str(), s2);
 258 }
 259
 260 bool StringUtils::StartsWith(const char *s1, const char *s2)
 261 {
 262   while (*s2 != '\0')
 263   {
 264     if (*s1 != *s2)
 265       return false;
 266     s1++;
 267     s2++;
 268   }
 269   return true;
 270 }
 271
 272 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 273 {
 274   return StartsWithNoCase(str1.c_str(), str2.c_str());
 275 }
 276
 277 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 278 {
 279   return StartsWithNoCase(str1.c_str(), s2);
 280 }
 281
 282 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 283 {
 284   while (*s2 != '\0')
 285   {
 286     if (::tolower(*s1) != ::tolower(*s2))
 287       return false;
 288     s1++;
 289     s2++;
 290   }
 291   return true;
 292 }
 293
 294 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 295 {
 296   if (str1.size() < str2.size())
 297     return false;
 298   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 299 }
 300
 301 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 302 {
 303   size_t len2 = strlen(s2);
 304   if (str1.size() < len2)
 305     return false;
 306   return str1.compare(str1.size() - len2, len2, s2) == 0;
 307 }
 308
 309 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 310 {
 311   if (str1.size() < str2.size())
 312     return false;
 313   const char *s1 = str1.c_str() + str1.size() - str2.size();
 314   const char *s2 = str2.c_str();
 315   while (*s2 != '\0')
 316   {
 317     if (::tolower(*s1) != ::tolower(*s2))
 318       return false;
 319     s1++;
 320     s2++;
 321   }
 322   return true;
 323 }
 324
 325 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 326 {
 327   size_t len2 = strlen(s2);
 328   if (str1.size() < len2)
 329     return false;
 330   const char *s1 = str1.c_str() + str1.size() - len2;
 331   while (*s2 != '\0')
 332   {
 333     if (::tolower(*s1) != ::tolower(*s2))
 334       return false;
 335     s1++;
 336     s2++;
 337   }
 338   return true;
 339 }
 340
 341 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 342 {
 343   result = "";
 344   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 345     result += (*it) + delimiter;
 346
 347   if(result != "")
 348     result.Delete(result.size()-delimiter.size(), delimiter.size());
 349 }
 350
 351 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 352 {
 353   CStdString result;
 354   JoinString(strings, delimiter, result);
 355   return result;
 356 }
 357
 358 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 359 {
 360   CStdStringArray strArray;
 361   for (unsigned int index = 0; index < strings.size(); index++)
 362     strArray.push_back(strings.at(index));
 363
 364   return JoinString(strArray, delimiter);
 365 }
 366
 367 // Splits the string input into pieces delimited by delimiter.
 368 // if 2 delimiters are in a row, it will include the empty string between them.
 369 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 370 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 371 {
 372   int iPos = -1;
 373   int newPos = -1;
 374   int sizeS2 = delimiter.GetLength();
 375   int isize = input.GetLength();
 376
 377   results.clear();
 378
 379   vector<unsigned int> positions;
 380
 381   newPos = input.Find (delimiter, 0);
 382
 383   if ( newPos < 0 )
 384   {
 385     results.push_back(input);
 386     return 1;
 387   }
 388
 389   while ( newPos > iPos )
 390   {
 391     positions.push_back(newPos);
 392     iPos = newPos;
 393     newPos = input.Find (delimiter, iPos + sizeS2);
 394   }
 395
 396   // numFound is the number of delimiters which is one less
 397   // than the number of substrings
 398   unsigned int numFound = positions.size();
 399   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 400     numFound = iMaxStrings - 1;
 401
 402   for ( unsigned int i = 0; i <= numFound; i++ )
 403   {
 404     CStdString s;
 405     if ( i == 0 )
 406     {
 407       if ( i == numFound )
 408         s = input;
 409       else
 410         s = input.Mid( i, positions[i] );
 411     }
 412     else
 413     {
 414       int offset = positions[i - 1] + sizeS2;
 415       if ( offset < isize )
 416       {
 417         if ( i == numFound )
 418           s = input.Mid(offset);
 419         else if ( i > 0 )
 420           s = input.Mid( positions[i - 1] + sizeS2,
 421                          positions[i] - positions[i - 1] - sizeS2 );
 422       }
 423     }
 424     results.push_back(s);
 425   }
 426   // return the number of substrings
 427   return results.size();
 428 }
 429
 430 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 431 {
 432   CStdStringArray result;
 433   SplitString(input, delimiter, result, iMaxStrings);
 434   return result;
 435 }
 436
 437 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 438 {
 439   CStdStringArray result;
 440   SplitString(input, delimiter, result, iMaxStrings);
 441
 442   vector<string> strArray;
 443   for (unsigned int index = 0; index < result.size(); index++)
 444     strArray.push_back(result.at(index));
 445
 446   return strArray;
 447 }
 448
 449 // returns the number of occurrences of strFind in strInput.
 450 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 451 {
 452   int pos = strInput.Find(strFind, 0);
 453   int numfound = 0;
 454   while (pos >= 0)
 455   {
 456     numfound++;
 457     pos = strInput.Find(strFind, pos + 1);
 458   }
 459   return numfound;
 460 }
 461
 462 // Compares separately the numeric and alphabetic parts of a string.
 463 // returns negative if left < right, positive if left > right
 464 // and 0 if they are identical (essentially calculates left - right)
 465 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 466 {
 467   wchar_t *l = (wchar_t *)left;
 468   wchar_t *r = (wchar_t *)right;
 469   wchar_t *ld, *rd;
 470   wchar_t lc, rc;
 471   int64_t lnum, rnum;
 472   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 473   int cmp_res = 0;
 474   while (*l != 0 && *r != 0)
 475   {
 476     // check if we have a numerical value
 477     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 478     {
 479       ld = l;
 480       lnum = 0;
 481       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 482       { // compare only up to 15 digits
 483         lnum *= 10;
 484         lnum += *ld++ - '0';
 485       }
 486       rd = r;
 487       rnum = 0;
 488       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 489       { // compare only up to 15 digits
 490         rnum *= 10;
 491         rnum += *rd++ - L'0';
 492       }
 493       // do we have numbers?
 494       if (lnum != rnum)
 495       { // yes - and they're different!
 496         return lnum - rnum;
 497       }
 498       l = ld;
 499       r = rd;
 500       continue;
 501     }
 502     // do case less comparison
 503     lc = *l;
 504     if (lc >= L'A' && lc <= L'Z')
 505       lc += L'a'-L'A';
 506     rc = *r;
 507     if (rc >= L'A' && rc <= L'Z')
 508       rc += L'a'- L'A';
 509
 510     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 511     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 512     {
 513       return cmp_res;
 514     }
 515     l++; r++;
 516   }
 517   if (*r)
 518   { // r is longer
 519     return -1;
 520   }
 521   else if (*l)
 522   { // l is longer
 523     return 1;
 524   }
 525   return 0; // files are the same
 526 }
 527
 528 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 529 {
 530   CStdStringArray days;
 531   int splitCount = StringUtils::SplitString(dateString, "-", days);
 532   if (splitCount == 1)
 533     return atoi(days[0].c_str());
 534   else if (splitCount == 2)
 535     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 536   else if (splitCount == 3)
 537     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 538   else
 539     return -1;
 540 }
 541
 542 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 543 {
 544   CStdString strCopy(timeString);
 545   strCopy.TrimLeft(" \n\r\t");
 546   strCopy.TrimRight(" \n\r\t");
 547   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 548   {
 549     // this is imdb format of "XXX min"
 550     return 60 * atoi(strCopy.c_str());
 551   }
 552   else
 553   {
 554     CStdStringArray secs;
 555     StringUtils::SplitString(strCopy, ":", secs);
 556     int timeInSecs = 0;
 557     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 558     {
 559       timeInSecs *= 60;
 560       timeInSecs += atoi(secs[i]);
 561     }
 562     return timeInSecs;
 563   }
 564 }
 565
 566 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 567 {
 568   int hh = lSeconds / 3600;
 569   lSeconds = lSeconds % 3600;
 570   int mm = lSeconds / 60;
 571   int ss = lSeconds % 60;
 572
 573   if (format == TIME_FORMAT_GUESS)
 574     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 575   CStdString strHMS;
 576   if (format & TIME_FORMAT_HH)
 577     strHMS.AppendFormat("%02.2i", hh);
 578   else if (format & TIME_FORMAT_H)
 579     strHMS.AppendFormat("%i", hh);
 580   if (format & TIME_FORMAT_MM)
 581     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", mm);
 582   if (format & TIME_FORMAT_SS)
 583     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", ss);
 584   return strHMS;
 585 }
 586
 587 bool StringUtils::IsNaturalNumber(const CStdString& str)
 588 {
 589   size_t i = 0, n = 0;
 590   // allow whitespace,digits,whitespace
 591   while (i < str.size() && isspace((unsigned char) str[i]))
 592     i++;
 593   while (i < str.size() && isdigit((unsigned char) str[i]))
 594   {
 595     i++; n++;
 596   }
 597   while (i < str.size() && isspace((unsigned char) str[i]))
 598     i++;
 599   return i == str.size() && n > 0;
 600 }
 601
 602 bool StringUtils::IsInteger(const CStdString& str)
 603 {
 604   size_t i = 0, n = 0;
 605   // allow whitespace,-,digits,whitespace
 606   while (i < str.size() && isspace((unsigned char) str[i]))
 607     i++;
 608   if (i < str.size() && str[i] == '-')
 609     i++;
 610   while (i < str.size() && isdigit((unsigned char) str[i]))
 611   {
 612     i++; n++;
 613   }
 614   while (i < str.size() && isspace((unsigned char) str[i]))
 615     i++;
 616   return i == str.size() && n > 0;
 617 }
 618
 619 void StringUtils::RemoveCRLF(CStdString& strLine)
 620 {
 621   while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
 622   {
 623     strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
 624   }
 625 }
 626
 627 CStdString StringUtils::SizeToString(int64_t size)
 628 {
 629   CStdString strLabel;
 630   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 631   unsigned int i = 0;
 632   double s = (double)size;
 633   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 634   {
 635     s /= 1024.0;
 636     i++;
 637   }
 638
 639   if (!i)
 640     strLabel.Format("%.0lf %cB ", s, prefixes[i]);
 641   else if (s >= 100.0)
 642     strLabel.Format("%.1lf %cB", s, prefixes[i]);
 643   else
 644     strLabel.Format("%.2lf %cB", s, prefixes[i]);
 645
 646   return strLabel;
 647 }
 648
 649 // return -1 if not, else return the utf8 char length.
 650 int IsUTF8Letter(const unsigned char *str)
 651 {
 652   // reference:
 653   // unicode -> utf8 table: http://www.utf8-chartable.de/
 654   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 655   unsigned char ch = str[0];
 656   if (!ch)
 657     return -1;
 658   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 659     return 1;
 660   if (!(ch & 0x80))
 661     return -1;
 662   unsigned char ch2 = str[1];
 663   if (!ch2)
 664     return -1;
 665   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 666   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 667     return 2;
 668   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 669   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 670     return 2;
 671   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 672   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 673   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 674       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 675     return 2;
 676   return -1;
 677 }
 678
 679 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 680 {
 681   // NOTE: This assumes word is lowercase!
 682   unsigned char *s = (unsigned char *)str;
 683   do
 684   {
 685     // start with a compare
 686     unsigned char *c = s;
 687     unsigned char *w = (unsigned char *)wordLowerCase;
 688     bool same = true;
 689     while (same && *c && *w)
 690     {
 691       unsigned char lc = *c++;
 692       if (lc >= 'A' && lc <= 'Z')
 693         lc += 'a'-'A';
 694
 695       if (lc != *w++) // different
 696         same = false;
 697     }
 698     if (same && *w == 0)  // only the same if word has been exhausted
 699       return (const char *)s - str;
 700
 701     // otherwise, skip current word (composed by latin letters) or number
 702     int l;
 703     if (*s >= '0' && *s <= '9')
 704     {
 705       ++s;
 706       while (*s >= '0' && *s <= '9') ++s;
 707     }
 708     else if ((l = IsUTF8Letter(s)) > 0)
 709     {
 710       s += l;
 711       while ((l = IsUTF8Letter(s)) > 0) s += l;
 712     }
 713     else
 714       ++s;
 715     while (*s && *s == ' ') s++;
 716
 717     // and repeat until we're done
 718   } while (*s);
 719
 720   return CStdString::npos;
 721 }
 722
 723 // assumes it is called from after the first open bracket is found
 724 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 725 {
 726   int blocks = 1;
 727   for (unsigned int i = startPos; i < str.size(); i++)
 728   {
 729     if (str[i] == opener)
 730       blocks++;
 731     else if (str[i] == closer)
 732     {
 733       blocks--;
 734       if (!blocks)
 735         return i;
 736     }
 737   }
 738
 739   return (int)CStdString::npos;
 740 }
 741
 742 void StringUtils::WordToDigits(CStdString &word)
 743 {
 744   static const char word_to_letter[] = "22233344455566677778889999";
 745   word.ToLower();
 746   for (unsigned int i = 0; i < word.size(); ++i)
 747   { // NB: This assumes ascii, which probably needs extending at some  point.
 748     char letter = word[i];
 749     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 750     {
 751       word[i] = word_to_letter[letter-'a'];
 752     }
 753     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 754     {
 755       word[i] = ' ';  // replace everything else with a space
 756     }
 757   }
 758 }
 759
 760 CStdString StringUtils::CreateUUID()
 761 {
 762   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 763   * Version 4 conform local unique UUID based upon random number generation.
 764   */
 765   char UuidStrTmp[40];
 766   char *pUuidStr = UuidStrTmp;
 767   int i;
 768
 769   static bool m_uuidInitialized = false;
 770   if (!m_uuidInitialized)
 771   {
 772     /* use current time as the seed for rand()*/
 773     srand(time(NULL));
 774     m_uuidInitialized = true;
 775   }
 776
 777   /*Data1 - 8 characters.*/
 778   for(i = 0; i < 8; i++, pUuidStr++)
 779     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 780
 781   /*Data2 - 4 characters.*/
 782   *pUuidStr++ = '-';
 783   for(i = 0; i < 4; i++, pUuidStr++)
 784     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 785
 786   /*Data3 - 4 characters.*/
 787   *pUuidStr++ = '-';
 788   for(i = 0; i < 4; i++, pUuidStr++)
 789     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 790
 791   /*Data4 - 4 characters.*/
 792   *pUuidStr++ = '-';
 793   for(i = 0; i < 4; i++, pUuidStr++)
 794     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 795
 796   /*Data5 - 12 characters.*/
 797   *pUuidStr++ = '-';
 798   for(i = 0; i < 12; i++, pUuidStr++)
 799     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 800
 801   *pUuidStr = '\0';
 802
 803   m_lastUUID = UuidStrTmp;
 804   return UuidStrTmp;
 805 }
 806
 807 bool StringUtils::ValidateUUID(const CStdString &uuid)
 808 {
 809   CRegExp guidRE;
 810   guidRE.RegComp(ADDON_GUID_RE);
 811   return (guidRE.RegFind(uuid.c_str()) == 0);
 812 }
 813
 814 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 815 {
 816   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 817 }
 818
 819 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 820 {
 821   int best = -1;
 822   matchscore = 0;
 823
 824   int i = 0;
 825   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 826   {
 827     int maxlength = max(str.length(), it->length());
 828     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 829     if (score > matchscore)
 830     {
 831       matchscore = score;
 832       best = i;
 833     }
 834   }
 835   return best;
 836 }
 837
 838 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 839 {
 840   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 841   {
 842     if (str.find(*it) != str.npos)
 843       return true;
 844   }
 845   return false;
 846 }
 847
 848 size_t StringUtils::utf8_strlen(const char *s)
 849 {
 850   size_t length = 0;
 851   while (*s)
 852   {
 853     if ((*s++ & 0xC0) != 0x80)
 854       length++;
 855   }
 856   return length;
 857 }
 858
 859 std::string StringUtils::Paramify(const std::string &param)
 860 {
 861   std::string result = param;
 862   // escape backspaces
 863   StringUtils::Replace(result, "\\", "\\\\");
 864   // escape double quotes
 865   StringUtils::Replace(result, "\"", "\\\"");
 866
 867   // add double quotes around the whole string
 868   return "\"" + result + "\"";
 869 }
 870
 871 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 872 {
 873   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 874   // Skip delimiters at beginning.
 875   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 876   // Find first "non-delimiter".
 877   string::size_type pos = input.find_first_of(delimiters, lastPos);
 878
 879   while (string::npos != pos || string::npos != lastPos)
 880   {
 881     // Found a token, add it to the vector.
 882     tokens.push_back(input.substr(lastPos, pos - lastPos));
 883     // Skip delimiters.  Note the "not_of"
 884     lastPos = input.find_first_not_of(delimiters, pos);
 885     // Find next "non-delimiter"
 886     pos = input.find_first_of(delimiters, lastPos);
 887   }
 888 }