code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 void StringUtils::ToUpper(string &str)
 107 {
 108   transform(str.begin(), str.end(), str.begin(), ::toupper);
 109 }
 110
 111 void StringUtils::ToLower(string &str)
 112 {
 113   transform(str.begin(), str.end(), str.begin(), ::tolower);
 114 }
 115
 116 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 117 {
 118   return EqualsNoCase(str1.c_str(), str2.c_str());
 119 }
 120
 121 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 122 {
 123   return EqualsNoCase(str1.c_str(), s2);
 124 }
 125
 126 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 127 {
 128   char c2; // we need only one char outside the loop
 129   do
 130   {
 131     const char c1 = *s1++; // const local variable should help compiler to optimize
 132     c2 = *s2++;
 133     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 134       return false;
 135   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 136   return true;
 137 }
 138
 139 string StringUtils::Left(const string &str, size_t count)
 140 {
 141   count = max((size_t)0, min(count, str.size()));
 142   return str.substr(0, count);
 143 }
 144
 145 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 146 {
 147   if (first + count > str.size())
 148     count = str.size() - first;
 149
 150   if (first > str.size())
 151     return string();
 152
 153   ASSERT(first + count <= str.size());
 154
 155   return str.substr(first, count);
 156 }
 157
 158 string StringUtils::Right(const string &str, size_t count)
 159 {
 160   count = max((size_t)0, min(count, str.size()));
 161   return str.substr(str.size() - count);
 162 }
 163
 164 std::string& StringUtils::Trim(std::string &str)
 165 {
 166   TrimLeft(str);
 167   return TrimRight(str);
 168 }
 169
 170 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 171 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 172 static int isspace_c(char c)
 173 {
 174   return ::isspace((unsigned char)c);
 175 }
 176
 177 std::string& StringUtils::TrimLeft(std::string &str)
 178 {
 179   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 180   return str;
 181 }
 182
 183 std::string& StringUtils::TrimRight(std::string &str)
 184 {
 185   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 186   return str;
 187 }
 188
 189 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 190 {
 191   std::string::iterator it = str.begin();
 192   bool onSpace = false;
 193   while(it != str.end())
 194   {
 195     if (*it == '\t')
 196       *it = ' ';
 197
 198     if (*it == ' ')
 199     {
 200       if (onSpace)
 201       {
 202         it = str.erase(it);
 203         continue;
 204       }
 205       else
 206         onSpace = true;
 207     }
 208     else
 209       onSpace = false;
 210
 211     ++it;
 212   }
 213   return str;
 214 }
 215
 216 int StringUtils::Replace(string &str, char oldChar, char newChar)
 217 {
 218   int replacedChars = 0;
 219   for (string::iterator it = str.begin(); it != str.end(); it++)
 220   {
 221     if (*it == oldChar)
 222     {
 223       *it = newChar;
 224       replacedChars++;
 225     }
 226   }
 227
 228   return replacedChars;
 229 }
 230
 231 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 232 {
 233   if (oldStr.empty())
 234     return 0;
 235
 236   int replacedChars = 0;
 237   size_t index = 0;
 238
 239   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 240   {
 241     str.replace(index, oldStr.size(), newStr);
 242     index += newStr.size();
 243     replacedChars++;
 244   }
 245
 246   return replacedChars;
 247 }
 248
 249 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 250 {
 251   return str1.compare(0, str2.size(), str2) == 0;
 252 }
 253
 254 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 255 {
 256   return StartsWith(str1.c_str(), s2);
 257 }
 258
 259 bool StringUtils::StartsWith(const char *s1, const char *s2)
 260 {
 261   while (*s2 != '\0')
 262   {
 263     if (*s1 != *s2)
 264       return false;
 265     s1++;
 266     s2++;
 267   }
 268   return true;
 269 }
 270
 271 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 272 {
 273   return StartsWithNoCase(str1.c_str(), str2.c_str());
 274 }
 275
 276 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 277 {
 278   return StartsWithNoCase(str1.c_str(), s2);
 279 }
 280
 281 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 282 {
 283   while (*s2 != '\0')
 284   {
 285     if (::tolower(*s1) != ::tolower(*s2))
 286       return false;
 287     s1++;
 288     s2++;
 289   }
 290   return true;
 291 }
 292
 293 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 294 {
 295   if (str1.size() < str2.size())
 296     return false;
 297   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 298 }
 299
 300 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 301 {
 302   size_t len2 = strlen(s2);
 303   if (str1.size() < len2)
 304     return false;
 305   return str1.compare(str1.size() - len2, len2, s2) == 0;
 306 }
 307
 308 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 309 {
 310   if (str1.size() < str2.size())
 311     return false;
 312   const char *s1 = str1.c_str() + str1.size() - str2.size();
 313   const char *s2 = str2.c_str();
 314   while (*s2 != '\0')
 315   {
 316     if (::tolower(*s1) != ::tolower(*s2))
 317       return false;
 318     s1++;
 319     s2++;
 320   }
 321   return true;
 322 }
 323
 324 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 325 {
 326   size_t len2 = strlen(s2);
 327   if (str1.size() < len2)
 328     return false;
 329   const char *s1 = str1.c_str() + str1.size() - len2;
 330   while (*s2 != '\0')
 331   {
 332     if (::tolower(*s1) != ::tolower(*s2))
 333       return false;
 334     s1++;
 335     s2++;
 336   }
 337   return true;
 338 }
 339
 340 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 341 {
 342   result = "";
 343   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 344     result += (*it) + delimiter;
 345
 346   if(result != "")
 347     result.Delete(result.size()-delimiter.size(), delimiter.size());
 348 }
 349
 350 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 351 {
 352   CStdString result;
 353   JoinString(strings, delimiter, result);
 354   return result;
 355 }
 356
 357 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 358 {
 359   CStdStringArray strArray;
 360   for (unsigned int index = 0; index < strings.size(); index++)
 361     strArray.push_back(strings.at(index));
 362
 363   return JoinString(strArray, delimiter);
 364 }
 365
 366 // Splits the string input into pieces delimited by delimiter.
 367 // if 2 delimiters are in a row, it will include the empty string between them.
 368 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 369 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 370 {
 371   int iPos = -1;
 372   int newPos = -1;
 373   int sizeS2 = delimiter.GetLength();
 374   int isize = input.GetLength();
 375
 376   results.clear();
 377
 378   vector<unsigned int> positions;
 379
 380   newPos = input.Find (delimiter, 0);
 381
 382   if ( newPos < 0 )
 383   {
 384     results.push_back(input);
 385     return 1;
 386   }
 387
 388   while ( newPos > iPos )
 389   {
 390     positions.push_back(newPos);
 391     iPos = newPos;
 392     newPos = input.Find (delimiter, iPos + sizeS2);
 393   }
 394
 395   // numFound is the number of delimiters which is one less
 396   // than the number of substrings
 397   unsigned int numFound = positions.size();
 398   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 399     numFound = iMaxStrings - 1;
 400
 401   for ( unsigned int i = 0; i <= numFound; i++ )
 402   {
 403     CStdString s;
 404     if ( i == 0 )
 405     {
 406       if ( i == numFound )
 407         s = input;
 408       else
 409         s = input.Mid( i, positions[i] );
 410     }
 411     else
 412     {
 413       int offset = positions[i - 1] + sizeS2;
 414       if ( offset < isize )
 415       {
 416         if ( i == numFound )
 417           s = input.Mid(offset);
 418         else if ( i > 0 )
 419           s = input.Mid( positions[i - 1] + sizeS2,
 420                          positions[i] - positions[i - 1] - sizeS2 );
 421       }
 422     }
 423     results.push_back(s);
 424   }
 425   // return the number of substrings
 426   return results.size();
 427 }
 428
 429 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 430 {
 431   CStdStringArray result;
 432   SplitString(input, delimiter, result, iMaxStrings);
 433   return result;
 434 }
 435
 436 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 437 {
 438   CStdStringArray result;
 439   SplitString(input, delimiter, result, iMaxStrings);
 440
 441   vector<string> strArray;
 442   for (unsigned int index = 0; index < result.size(); index++)
 443     strArray.push_back(result.at(index));
 444
 445   return strArray;
 446 }
 447
 448 // returns the number of occurrences of strFind in strInput.
 449 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 450 {
 451   int pos = strInput.Find(strFind, 0);
 452   int numfound = 0;
 453   while (pos >= 0)
 454   {
 455     numfound++;
 456     pos = strInput.Find(strFind, pos + 1);
 457   }
 458   return numfound;
 459 }
 460
 461 // Compares separately the numeric and alphabetic parts of a string.
 462 // returns negative if left < right, positive if left > right
 463 // and 0 if they are identical (essentially calculates left - right)
 464 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 465 {
 466   wchar_t *l = (wchar_t *)left;
 467   wchar_t *r = (wchar_t *)right;
 468   wchar_t *ld, *rd;
 469   wchar_t lc, rc;
 470   int64_t lnum, rnum;
 471   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 472   int cmp_res = 0;
 473   while (*l != 0 && *r != 0)
 474   {
 475     // check if we have a numerical value
 476     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 477     {
 478       ld = l;
 479       lnum = 0;
 480       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 481       { // compare only up to 15 digits
 482         lnum *= 10;
 483         lnum += *ld++ - '0';
 484       }
 485       rd = r;
 486       rnum = 0;
 487       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 488       { // compare only up to 15 digits
 489         rnum *= 10;
 490         rnum += *rd++ - L'0';
 491       }
 492       // do we have numbers?
 493       if (lnum != rnum)
 494       { // yes - and they're different!
 495         return lnum - rnum;
 496       }
 497       l = ld;
 498       r = rd;
 499       continue;
 500     }
 501     // do case less comparison
 502     lc = *l;
 503     if (lc >= L'A' && lc <= L'Z')
 504       lc += L'a'-L'A';
 505     rc = *r;
 506     if (rc >= L'A' && rc <= L'Z')
 507       rc += L'a'- L'A';
 508
 509     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 510     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 511     {
 512       return cmp_res;
 513     }
 514     l++; r++;
 515   }
 516   if (*r)
 517   { // r is longer
 518     return -1;
 519   }
 520   else if (*l)
 521   { // l is longer
 522     return 1;
 523   }
 524   return 0; // files are the same
 525 }
 526
 527 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 528 {
 529   CStdStringArray days;
 530   int splitCount = StringUtils::SplitString(dateString, "-", days);
 531   if (splitCount == 1)
 532     return atoi(days[0].c_str());
 533   else if (splitCount == 2)
 534     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 535   else if (splitCount == 3)
 536     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 537   else
 538     return -1;
 539 }
 540
 541 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 542 {
 543   CStdString strCopy(timeString);
 544   strCopy.TrimLeft(" \n\r\t");
 545   strCopy.TrimRight(" \n\r\t");
 546   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 547   {
 548     // this is imdb format of "XXX min"
 549     return 60 * atoi(strCopy.c_str());
 550   }
 551   else
 552   {
 553     CStdStringArray secs;
 554     StringUtils::SplitString(strCopy, ":", secs);
 555     int timeInSecs = 0;
 556     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 557     {
 558       timeInSecs *= 60;
 559       timeInSecs += atoi(secs[i]);
 560     }
 561     return timeInSecs;
 562   }
 563 }
 564
 565 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 566 {
 567   int hh = lSeconds / 3600;
 568   lSeconds = lSeconds % 3600;
 569   int mm = lSeconds / 60;
 570   int ss = lSeconds % 60;
 571
 572   if (format == TIME_FORMAT_GUESS)
 573     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 574   CStdString strHMS;
 575   if (format & TIME_FORMAT_HH)
 576     strHMS.AppendFormat("%02.2i", hh);
 577   else if (format & TIME_FORMAT_H)
 578     strHMS.AppendFormat("%i", hh);
 579   if (format & TIME_FORMAT_MM)
 580     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", mm);
 581   if (format & TIME_FORMAT_SS)
 582     strHMS.AppendFormat(strHMS.IsEmpty() ? "%02.2i" : ":%02.2i", ss);
 583   return strHMS;
 584 }
 585
 586 bool StringUtils::IsNaturalNumber(const CStdString& str)
 587 {
 588   size_t i = 0, n = 0;
 589   // allow whitespace,digits,whitespace
 590   while (i < str.size() && isspace((unsigned char) str[i]))
 591     i++;
 592   while (i < str.size() && isdigit((unsigned char) str[i]))
 593   {
 594     i++; n++;
 595   }
 596   while (i < str.size() && isspace((unsigned char) str[i]))
 597     i++;
 598   return i == str.size() && n > 0;
 599 }
 600
 601 bool StringUtils::IsInteger(const CStdString& str)
 602 {
 603   size_t i = 0, n = 0;
 604   // allow whitespace,-,digits,whitespace
 605   while (i < str.size() && isspace((unsigned char) str[i]))
 606     i++;
 607   if (i < str.size() && str[i] == '-')
 608     i++;
 609   while (i < str.size() && isdigit((unsigned char) str[i]))
 610   {
 611     i++; n++;
 612   }
 613   while (i < str.size() && isspace((unsigned char) str[i]))
 614     i++;
 615   return i == str.size() && n > 0;
 616 }
 617
 618 void StringUtils::RemoveCRLF(CStdString& strLine)
 619 {
 620   while ( strLine.size() && (strLine.Right(1) == "\n" || strLine.Right(1) == "\r") )
 621   {
 622     strLine = strLine.Left(std::max(0, (int)strLine.size() - 1));
 623   }
 624 }
 625
 626 CStdString StringUtils::SizeToString(int64_t size)
 627 {
 628   CStdString strLabel;
 629   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 630   unsigned int i = 0;
 631   double s = (double)size;
 632   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 633   {
 634     s /= 1024.0;
 635     i++;
 636   }
 637
 638   if (!i)
 639     strLabel.Format("%.0lf %cB ", s, prefixes[i]);
 640   else if (s >= 100.0)
 641     strLabel.Format("%.1lf %cB", s, prefixes[i]);
 642   else
 643     strLabel.Format("%.2lf %cB", s, prefixes[i]);
 644
 645   return strLabel;
 646 }
 647
 648 // return -1 if not, else return the utf8 char length.
 649 int IsUTF8Letter(const unsigned char *str)
 650 {
 651   // reference:
 652   // unicode -> utf8 table: http://www.utf8-chartable.de/
 653   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 654   unsigned char ch = str[0];
 655   if (!ch)
 656     return -1;
 657   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 658     return 1;
 659   if (!(ch & 0x80))
 660     return -1;
 661   unsigned char ch2 = str[1];
 662   if (!ch2)
 663     return -1;
 664   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 665   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 666     return 2;
 667   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 668   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 669     return 2;
 670   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 671   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 672   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 673       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 674     return 2;
 675   return -1;
 676 }
 677
 678 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 679 {
 680   // NOTE: This assumes word is lowercase!
 681   unsigned char *s = (unsigned char *)str;
 682   do
 683   {
 684     // start with a compare
 685     unsigned char *c = s;
 686     unsigned char *w = (unsigned char *)wordLowerCase;
 687     bool same = true;
 688     while (same && *c && *w)
 689     {
 690       unsigned char lc = *c++;
 691       if (lc >= 'A' && lc <= 'Z')
 692         lc += 'a'-'A';
 693
 694       if (lc != *w++) // different
 695         same = false;
 696     }
 697     if (same && *w == 0)  // only the same if word has been exhausted
 698       return (const char *)s - str;
 699
 700     // otherwise, skip current word (composed by latin letters) or number
 701     int l;
 702     if (*s >= '0' && *s <= '9')
 703     {
 704       ++s;
 705       while (*s >= '0' && *s <= '9') ++s;
 706     }
 707     else if ((l = IsUTF8Letter(s)) > 0)
 708     {
 709       s += l;
 710       while ((l = IsUTF8Letter(s)) > 0) s += l;
 711     }
 712     else
 713       ++s;
 714     while (*s && *s == ' ') s++;
 715
 716     // and repeat until we're done
 717   } while (*s);
 718
 719   return CStdString::npos;
 720 }
 721
 722 // assumes it is called from after the first open bracket is found
 723 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 724 {
 725   int blocks = 1;
 726   for (unsigned int i = startPos; i < str.size(); i++)
 727   {
 728     if (str[i] == opener)
 729       blocks++;
 730     else if (str[i] == closer)
 731     {
 732       blocks--;
 733       if (!blocks)
 734         return i;
 735     }
 736   }
 737
 738   return (int)CStdString::npos;
 739 }
 740
 741 void StringUtils::WordToDigits(CStdString &word)
 742 {
 743   static const char word_to_letter[] = "22233344455566677778889999";
 744   word.ToLower();
 745   for (unsigned int i = 0; i < word.size(); ++i)
 746   { // NB: This assumes ascii, which probably needs extending at some  point.
 747     char letter = word[i];
 748     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 749     {
 750       word[i] = word_to_letter[letter-'a'];
 751     }
 752     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 753     {
 754       word[i] = ' ';  // replace everything else with a space
 755     }
 756   }
 757 }
 758
 759 CStdString StringUtils::CreateUUID()
 760 {
 761   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 762   * Version 4 conform local unique UUID based upon random number generation.
 763   */
 764   char UuidStrTmp[40];
 765   char *pUuidStr = UuidStrTmp;
 766   int i;
 767
 768   static bool m_uuidInitialized = false;
 769   if (!m_uuidInitialized)
 770   {
 771     /* use current time as the seed for rand()*/
 772     srand(time(NULL));
 773     m_uuidInitialized = true;
 774   }
 775
 776   /*Data1 - 8 characters.*/
 777   for(i = 0; i < 8; i++, pUuidStr++)
 778     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 779
 780   /*Data2 - 4 characters.*/
 781   *pUuidStr++ = '-';
 782   for(i = 0; i < 4; i++, pUuidStr++)
 783     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 784
 785   /*Data3 - 4 characters.*/
 786   *pUuidStr++ = '-';
 787   for(i = 0; i < 4; i++, pUuidStr++)
 788     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 789
 790   /*Data4 - 4 characters.*/
 791   *pUuidStr++ = '-';
 792   for(i = 0; i < 4; i++, pUuidStr++)
 793     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 794
 795   /*Data5 - 12 characters.*/
 796   *pUuidStr++ = '-';
 797   for(i = 0; i < 12; i++, pUuidStr++)
 798     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 799
 800   *pUuidStr = '\0';
 801
 802   m_lastUUID = UuidStrTmp;
 803   return UuidStrTmp;
 804 }
 805
 806 bool StringUtils::ValidateUUID(const CStdString &uuid)
 807 {
 808   CRegExp guidRE;
 809   guidRE.RegComp(ADDON_GUID_RE);
 810   return (guidRE.RegFind(uuid.c_str()) == 0);
 811 }
 812
 813 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 814 {
 815   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 816 }
 817
 818 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 819 {
 820   int best = -1;
 821   matchscore = 0;
 822
 823   int i = 0;
 824   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 825   {
 826     int maxlength = max(str.length(), it->length());
 827     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 828     if (score > matchscore)
 829     {
 830       matchscore = score;
 831       best = i;
 832     }
 833   }
 834   return best;
 835 }
 836
 837 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 838 {
 839   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 840   {
 841     if (str.find(*it) != str.npos)
 842       return true;
 843   }
 844   return false;
 845 }
 846
 847 size_t StringUtils::utf8_strlen(const char *s)
 848 {
 849   size_t length = 0;
 850   while (*s)
 851   {
 852     if ((*s++ & 0xC0) != 0x80)
 853       length++;
 854   }
 855   return length;
 856 }
 857
 858 std::string StringUtils::Paramify(const std::string &param)
 859 {
 860   std::string result = param;
 861   // escape backspaces
 862   StringUtils::Replace(result, "\\", "\\\\");
 863   // escape double quotes
 864   StringUtils::Replace(result, "\"", "\\\"");
 865
 866   // add double quotes around the whole string
 867   return "\"" + result + "\"";
 868 }
 869
 870 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 871 {
 872   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 873   // Skip delimiters at beginning.
 874   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 875   // Find first "non-delimiter".
 876   string::size_type pos = input.find_first_of(delimiters, lastPos);
 877
 878   while (string::npos != pos || string::npos != lastPos)
 879   {
 880     // Found a token, add it to the vector.
 881     tokens.push_back(input.substr(lastPos, pos - lastPos));
 882     // Skip delimiters.  Note the "not_of"
 883     lastPos = input.find_first_not_of(delimiters, pos);
 884     // Find next "non-delimiter"
 885     pos = input.find_first_of(delimiters, lastPos);
 886   }
 887 }