code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToUpper(wstring &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::towupper);
 167 }
 168
 169 void StringUtils::ToLower(string &str)
 170 {
 171   transform(str.begin(), str.end(), str.begin(), ::tolower);
 172 }
 173
 174 void StringUtils::ToLower(wstring &str)
 175 {
 176   transform(str.begin(), str.end(), str.begin(), ::towlower);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 180 {
 181   return EqualsNoCase(str1.c_str(), str2.c_str());
 182 }
 183
 184 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 185 {
 186   return EqualsNoCase(str1.c_str(), s2);
 187 }
 188
 189 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 190 {
 191   char c2; // we need only one char outside the loop
 192   do
 193   {
 194     const char c1 = *s1++; // const local variable should help compiler to optimize
 195     c2 = *s2++;
 196     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 197       return false;
 198   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 199   return true;
 200 }
 201
 202 int StringUtils::CompareNoCase(const std::string &str1, const std::string &str2)
 203 {
 204   return CompareNoCase(str1.c_str(), str2.c_str());
 205 }
 206
 207 int StringUtils::CompareNoCase(const char *s1, const char *s2)
 208 {
 209   char c2; // we need only one char outside the loop
 210   do
 211   {
 212     const char c1 = *s1++; // const local variable should help compiler to optimize
 213     c2 = *s2++;
 214     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 215       return ::tolower(c1) - ::tolower(c2);
 216   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 217   return 0;
 218 }
 219
 220 string StringUtils::Left(const string &str, size_t count)
 221 {
 222   count = max((size_t)0, min(count, str.size()));
 223   return str.substr(0, count);
 224 }
 225
 226 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 227 {
 228   if (first + count > str.size())
 229     count = str.size() - first;
 230
 231   if (first > str.size())
 232     return string();
 233
 234   ASSERT(first + count <= str.size());
 235
 236   return str.substr(first, count);
 237 }
 238
 239 string StringUtils::Right(const string &str, size_t count)
 240 {
 241   count = max((size_t)0, min(count, str.size()));
 242   return str.substr(str.size() - count);
 243 }
 244
 245 std::string& StringUtils::Trim(std::string &str)
 246 {
 247   TrimLeft(str);
 248   return TrimRight(str);
 249 }
 250
 251 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 252 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 253 static int isspace_c(char c)
 254 {
 255   return ::isspace((unsigned char)c);
 256 }
 257
 258 std::string& StringUtils::TrimLeft(std::string &str)
 259 {
 260   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 261   return str;
 262 }
 263
 264 std::string& StringUtils::TrimLeft(std::string &str, const std::string& chars)
 265 {
 266   size_t nidx = str.find_first_not_of(chars);
 267   str.erase(0, nidx);
 268   return str;
 269 }
 270
 271 std::string& StringUtils::TrimRight(std::string &str)
 272 {
 273   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 274   return str;
 275 }
 276
 277 std::string& StringUtils::TrimRight(std::string &str, const std::string& chars)
 278 {
 279   size_t nidx = str.find_last_not_of(chars);
 280   str.erase(str.npos == nidx ? 0 : ++nidx);
 281   return str;
 282 }
 283
 284 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 285 {
 286   std::string::iterator it = str.begin();
 287   bool onSpace = false;
 288   while(it != str.end())
 289   {
 290     if (*it == '\t')
 291       *it = ' ';
 292
 293     if (*it == ' ')
 294     {
 295       if (onSpace)
 296       {
 297         it = str.erase(it);
 298         continue;
 299       }
 300       else
 301         onSpace = true;
 302     }
 303     else
 304       onSpace = false;
 305
 306     ++it;
 307   }
 308   return str;
 309 }
 310
 311 int StringUtils::Replace(string &str, char oldChar, char newChar)
 312 {
 313   int replacedChars = 0;
 314   for (string::iterator it = str.begin(); it != str.end(); it++)
 315   {
 316     if (*it == oldChar)
 317     {
 318       *it = newChar;
 319       replacedChars++;
 320     }
 321   }
 322
 323   return replacedChars;
 324 }
 325
 326 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 327 {
 328   if (oldStr.empty())
 329     return 0;
 330
 331   int replacedChars = 0;
 332   size_t index = 0;
 333
 334   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 335   {
 336     str.replace(index, oldStr.size(), newStr);
 337     index += newStr.size();
 338     replacedChars++;
 339   }
 340
 341   return replacedChars;
 342 }
 343
 344 int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
 345 {
 346   if (oldStr.empty())
 347     return 0;
 348
 349   int replacedChars = 0;
 350   size_t index = 0;
 351
 352   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 353   {
 354     str.replace(index, oldStr.size(), newStr);
 355     index += newStr.size();
 356     replacedChars++;
 357   }
 358
 359   return replacedChars;
 360 }
 361
 362 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 363 {
 364   return str1.compare(0, str2.size(), str2) == 0;
 365 }
 366
 367 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 368 {
 369   return StartsWith(str1.c_str(), s2);
 370 }
 371
 372 bool StringUtils::StartsWith(const char *s1, const char *s2)
 373 {
 374   while (*s2 != '\0')
 375   {
 376     if (*s1 != *s2)
 377       return false;
 378     s1++;
 379     s2++;
 380   }
 381   return true;
 382 }
 383
 384 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 385 {
 386   return StartsWithNoCase(str1.c_str(), str2.c_str());
 387 }
 388
 389 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 390 {
 391   return StartsWithNoCase(str1.c_str(), s2);
 392 }
 393
 394 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 395 {
 396   while (*s2 != '\0')
 397   {
 398     if (::tolower(*s1) != ::tolower(*s2))
 399       return false;
 400     s1++;
 401     s2++;
 402   }
 403   return true;
 404 }
 405
 406 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 407 {
 408   if (str1.size() < str2.size())
 409     return false;
 410   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 411 }
 412
 413 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 414 {
 415   size_t len2 = strlen(s2);
 416   if (str1.size() < len2)
 417     return false;
 418   return str1.compare(str1.size() - len2, len2, s2) == 0;
 419 }
 420
 421 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 422 {
 423   if (str1.size() < str2.size())
 424     return false;
 425   const char *s1 = str1.c_str() + str1.size() - str2.size();
 426   const char *s2 = str2.c_str();
 427   while (*s2 != '\0')
 428   {
 429     if (::tolower(*s1) != ::tolower(*s2))
 430       return false;
 431     s1++;
 432     s2++;
 433   }
 434   return true;
 435 }
 436
 437 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 438 {
 439   size_t len2 = strlen(s2);
 440   if (str1.size() < len2)
 441     return false;
 442   const char *s1 = str1.c_str() + str1.size() - len2;
 443   while (*s2 != '\0')
 444   {
 445     if (::tolower(*s1) != ::tolower(*s2))
 446       return false;
 447     s1++;
 448     s2++;
 449   }
 450   return true;
 451 }
 452
 453 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 454 {
 455   result = "";
 456   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 457     result += (*it) + delimiter;
 458
 459   if(result != "")
 460     result.erase(result.size()-delimiter.size(), delimiter.size());
 461 }
 462
 463 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 464 {
 465   CStdString result;
 466   JoinString(strings, delimiter, result);
 467   return result;
 468 }
 469
 470 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 471 {
 472   CStdStringArray strArray;
 473   for (unsigned int index = 0; index < strings.size(); index++)
 474     strArray.push_back(strings.at(index));
 475
 476   return JoinString(strArray, delimiter);
 477 }
 478
 479 // Splits the string input into pieces delimited by delimiter.
 480 // if 2 delimiters are in a row, it will include the empty string between them.
 481 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 482 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 483 {
 484   size_t iPos = std::string::npos;
 485   size_t newPos = std::string::npos;
 486   size_t sizeS2 = delimiter.size();
 487   size_t isize = input.size();
 488
 489   results.clear();
 490
 491   vector<unsigned int> positions;
 492
 493   newPos = input.find(delimiter, 0);
 494
 495   if (newPos == std::string::npos)
 496   {
 497     results.push_back(input);
 498     return 1;
 499   }
 500
 501   while (newPos != std::string::npos)
 502   {
 503     positions.push_back(newPos);
 504     iPos = newPos;
 505     newPos = input.find(delimiter, iPos + sizeS2);
 506   }
 507
 508   // numFound is the number of delimiters which is one less
 509   // than the number of substrings
 510   unsigned int numFound = positions.size();
 511   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 512     numFound = iMaxStrings - 1;
 513
 514   for ( unsigned int i = 0; i <= numFound; i++ )
 515   {
 516     CStdString s;
 517     if ( i == 0 )
 518     {
 519       if ( i == numFound )
 520         s = input;
 521       else
 522         s = input.substr(i, positions[i]);
 523     }
 524     else
 525     {
 526       size_t offset = positions[i - 1] + sizeS2;
 527       if ( offset < isize )
 528       {
 529         if ( i == numFound )
 530           s = input.substr(offset);
 531         else if ( i > 0 )
 532           s = input.substr( positions[i - 1] + sizeS2,
 533                          positions[i] - positions[i - 1] - sizeS2 );
 534       }
 535     }
 536     results.push_back(s);
 537   }
 538   // return the number of substrings
 539   return results.size();
 540 }
 541
 542 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 543 {
 544   CStdStringArray result;
 545   SplitString(input, delimiter, result, iMaxStrings);
 546   return result;
 547 }
 548
 549 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 550 {
 551   CStdStringArray result;
 552   SplitString(input, delimiter, result, iMaxStrings);
 553
 554   vector<string> strArray;
 555   for (unsigned int index = 0; index < result.size(); index++)
 556     strArray.push_back(result.at(index));
 557
 558   return strArray;
 559 }
 560
 561 // returns the number of occurrences of strFind in strInput.
 562 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 563 {
 564   size_t pos = strInput.find(strFind, 0);
 565   int numfound = 0;
 566   while (pos != std::string::npos)
 567   {
 568     numfound++;
 569     pos = strInput.find(strFind, pos + 1);
 570   }
 571   return numfound;
 572 }
 573
 574 // Compares separately the numeric and alphabetic parts of a string.
 575 // returns negative if left < right, positive if left > right
 576 // and 0 if they are identical (essentially calculates left - right)
 577 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 578 {
 579   wchar_t *l = (wchar_t *)left;
 580   wchar_t *r = (wchar_t *)right;
 581   wchar_t *ld, *rd;
 582   wchar_t lc, rc;
 583   int64_t lnum, rnum;
 584   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 585   int cmp_res = 0;
 586   while (*l != 0 && *r != 0)
 587   {
 588     // check if we have a numerical value
 589     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 590     {
 591       ld = l;
 592       lnum = 0;
 593       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 594       { // compare only up to 15 digits
 595         lnum *= 10;
 596         lnum += *ld++ - '0';
 597       }
 598       rd = r;
 599       rnum = 0;
 600       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 601       { // compare only up to 15 digits
 602         rnum *= 10;
 603         rnum += *rd++ - L'0';
 604       }
 605       // do we have numbers?
 606       if (lnum != rnum)
 607       { // yes - and they're different!
 608         return lnum - rnum;
 609       }
 610       l = ld;
 611       r = rd;
 612       continue;
 613     }
 614     // do case less comparison
 615     lc = *l;
 616     if (lc >= L'A' && lc <= L'Z')
 617       lc += L'a'-L'A';
 618     rc = *r;
 619     if (rc >= L'A' && rc <= L'Z')
 620       rc += L'a'- L'A';
 621
 622     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 623     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 624     {
 625       return cmp_res;
 626     }
 627     l++; r++;
 628   }
 629   if (*r)
 630   { // r is longer
 631     return -1;
 632   }
 633   else if (*l)
 634   { // l is longer
 635     return 1;
 636   }
 637   return 0; // files are the same
 638 }
 639
 640 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 641 {
 642   CStdStringArray days;
 643   int splitCount = StringUtils::SplitString(dateString, "-", days);
 644   if (splitCount == 1)
 645     return atoi(days[0].c_str());
 646   else if (splitCount == 2)
 647     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 648   else if (splitCount == 3)
 649     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 650   else
 651     return -1;
 652 }
 653
 654 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 655 {
 656   CStdString strCopy(timeString);
 657   StringUtils::Trim(strCopy);
 658   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 659   {
 660     // this is imdb format of "XXX min"
 661     return 60 * atoi(strCopy.c_str());
 662   }
 663   else
 664   {
 665     CStdStringArray secs;
 666     StringUtils::SplitString(strCopy, ":", secs);
 667     int timeInSecs = 0;
 668     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 669     {
 670       timeInSecs *= 60;
 671       timeInSecs += atoi(secs[i]);
 672     }
 673     return timeInSecs;
 674   }
 675 }
 676
 677 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 678 {
 679   int hh = lSeconds / 3600;
 680   lSeconds = lSeconds % 3600;
 681   int mm = lSeconds / 60;
 682   int ss = lSeconds % 60;
 683
 684   if (format == TIME_FORMAT_GUESS)
 685     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 686   CStdString strHMS;
 687   if (format & TIME_FORMAT_HH)
 688     strHMS += StringUtils::Format("%02.2i", hh);
 689   else if (format & TIME_FORMAT_H)
 690     strHMS += StringUtils::Format("%i", hh);
 691   if (format & TIME_FORMAT_MM)
 692     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", mm);
 693   if (format & TIME_FORMAT_SS)
 694     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", ss);
 695   return strHMS;
 696 }
 697
 698 bool StringUtils::IsNaturalNumber(const CStdString& str)
 699 {
 700   size_t i = 0, n = 0;
 701   // allow whitespace,digits,whitespace
 702   while (i < str.size() && isspace((unsigned char) str[i]))
 703     i++;
 704   while (i < str.size() && isdigit((unsigned char) str[i]))
 705   {
 706     i++; n++;
 707   }
 708   while (i < str.size() && isspace((unsigned char) str[i]))
 709     i++;
 710   return i == str.size() && n > 0;
 711 }
 712
 713 bool StringUtils::IsInteger(const CStdString& str)
 714 {
 715   size_t i = 0, n = 0;
 716   // allow whitespace,-,digits,whitespace
 717   while (i < str.size() && isspace((unsigned char) str[i]))
 718     i++;
 719   if (i < str.size() && str[i] == '-')
 720     i++;
 721   while (i < str.size() && isdigit((unsigned char) str[i]))
 722   {
 723     i++; n++;
 724   }
 725   while (i < str.size() && isspace((unsigned char) str[i]))
 726     i++;
 727   return i == str.size() && n > 0;
 728 }
 729
 730 void StringUtils::RemoveCRLF(CStdString& strLine)
 731 {
 732   StringUtils::TrimRight(strLine, "\n\r");
 733 }
 734
 735 CStdString StringUtils::SizeToString(int64_t size)
 736 {
 737   CStdString strLabel;
 738   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 739   unsigned int i = 0;
 740   double s = (double)size;
 741   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 742   {
 743     s /= 1024.0;
 744     i++;
 745   }
 746
 747   if (!i)
 748     strLabel = StringUtils::Format("%.0lf %cB ", s, prefixes[i]);
 749   else if (s >= 100.0)
 750     strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
 751   else
 752     strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
 753
 754   return strLabel;
 755 }
 756
 757 // return -1 if not, else return the utf8 char length.
 758 int IsUTF8Letter(const unsigned char *str)
 759 {
 760   // reference:
 761   // unicode -> utf8 table: http://www.utf8-chartable.de/
 762   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 763   unsigned char ch = str[0];
 764   if (!ch)
 765     return -1;
 766   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 767     return 1;
 768   if (!(ch & 0x80))
 769     return -1;
 770   unsigned char ch2 = str[1];
 771   if (!ch2)
 772     return -1;
 773   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 774   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 775     return 2;
 776   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 777   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 778     return 2;
 779   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 780   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 781   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 782       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 783     return 2;
 784   return -1;
 785 }
 786
 787 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 788 {
 789   // NOTE: This assumes word is lowercase!
 790   unsigned char *s = (unsigned char *)str;
 791   do
 792   {
 793     // start with a compare
 794     unsigned char *c = s;
 795     unsigned char *w = (unsigned char *)wordLowerCase;
 796     bool same = true;
 797     while (same && *c && *w)
 798     {
 799       unsigned char lc = *c++;
 800       if (lc >= 'A' && lc <= 'Z')
 801         lc += 'a'-'A';
 802
 803       if (lc != *w++) // different
 804         same = false;
 805     }
 806     if (same && *w == 0)  // only the same if word has been exhausted
 807       return (const char *)s - str;
 808
 809     // otherwise, skip current word (composed by latin letters) or number
 810     int l;
 811     if (*s >= '0' && *s <= '9')
 812     {
 813       ++s;
 814       while (*s >= '0' && *s <= '9') ++s;
 815     }
 816     else if ((l = IsUTF8Letter(s)) > 0)
 817     {
 818       s += l;
 819       while ((l = IsUTF8Letter(s)) > 0) s += l;
 820     }
 821     else
 822       ++s;
 823     while (*s && *s == ' ') s++;
 824
 825     // and repeat until we're done
 826   } while (*s);
 827
 828   return CStdString::npos;
 829 }
 830
 831 // assumes it is called from after the first open bracket is found
 832 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 833 {
 834   int blocks = 1;
 835   for (unsigned int i = startPos; i < str.size(); i++)
 836   {
 837     if (str[i] == opener)
 838       blocks++;
 839     else if (str[i] == closer)
 840     {
 841       blocks--;
 842       if (!blocks)
 843         return i;
 844     }
 845   }
 846
 847   return (int)CStdString::npos;
 848 }
 849
 850 void StringUtils::WordToDigits(CStdString &word)
 851 {
 852   static const char word_to_letter[] = "22233344455566677778889999";
 853   StringUtils::ToLower(word);
 854   for (unsigned int i = 0; i < word.size(); ++i)
 855   { // NB: This assumes ascii, which probably needs extending at some  point.
 856     char letter = word[i];
 857     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 858     {
 859       word[i] = word_to_letter[letter-'a'];
 860     }
 861     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 862     {
 863       word[i] = ' ';  // replace everything else with a space
 864     }
 865   }
 866 }
 867
 868 CStdString StringUtils::CreateUUID()
 869 {
 870   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 871   * Version 4 conform local unique UUID based upon random number generation.
 872   */
 873   char UuidStrTmp[40];
 874   char *pUuidStr = UuidStrTmp;
 875   int i;
 876
 877   static bool m_uuidInitialized = false;
 878   if (!m_uuidInitialized)
 879   {
 880     /* use current time as the seed for rand()*/
 881     srand(time(NULL));
 882     m_uuidInitialized = true;
 883   }
 884
 885   /*Data1 - 8 characters.*/
 886   for(i = 0; i < 8; i++, pUuidStr++)
 887     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 888
 889   /*Data2 - 4 characters.*/
 890   *pUuidStr++ = '-';
 891   for(i = 0; i < 4; i++, pUuidStr++)
 892     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 893
 894   /*Data3 - 4 characters.*/
 895   *pUuidStr++ = '-';
 896   for(i = 0; i < 4; i++, pUuidStr++)
 897     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 898
 899   /*Data4 - 4 characters.*/
 900   *pUuidStr++ = '-';
 901   for(i = 0; i < 4; i++, pUuidStr++)
 902     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 903
 904   /*Data5 - 12 characters.*/
 905   *pUuidStr++ = '-';
 906   for(i = 0; i < 12; i++, pUuidStr++)
 907     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 908
 909   *pUuidStr = '\0';
 910
 911   m_lastUUID = UuidStrTmp;
 912   return UuidStrTmp;
 913 }
 914
 915 bool StringUtils::ValidateUUID(const CStdString &uuid)
 916 {
 917   CRegExp guidRE;
 918   guidRE.RegComp(ADDON_GUID_RE);
 919   return (guidRE.RegFind(uuid.c_str()) == 0);
 920 }
 921
 922 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 923 {
 924   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 925 }
 926
 927 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 928 {
 929   int best = -1;
 930   matchscore = 0;
 931
 932   int i = 0;
 933   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 934   {
 935     int maxlength = max(str.length(), it->length());
 936     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 937     if (score > matchscore)
 938     {
 939       matchscore = score;
 940       best = i;
 941     }
 942   }
 943   return best;
 944 }
 945
 946 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 947 {
 948   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 949   {
 950     if (str.find(*it) != str.npos)
 951       return true;
 952   }
 953   return false;
 954 }
 955
 956 size_t StringUtils::utf8_strlen(const char *s)
 957 {
 958   size_t length = 0;
 959   while (*s)
 960   {
 961     if ((*s++ & 0xC0) != 0x80)
 962       length++;
 963   }
 964   return length;
 965 }
 966
 967 std::string StringUtils::Paramify(const std::string &param)
 968 {
 969   std::string result = param;
 970   // escape backspaces
 971   StringUtils::Replace(result, "\\", "\\\\");
 972   // escape double quotes
 973   StringUtils::Replace(result, "\"", "\\\"");
 974
 975   // add double quotes around the whole string
 976   return "\"" + result + "\"";
 977 }
 978
 979 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 980 {
 981   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 982   // Skip delimiters at beginning.
 983   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 984   // Find first "non-delimiter".
 985   string::size_type pos = input.find_first_of(delimiters, lastPos);
 986
 987   while (string::npos != pos || string::npos != lastPos)
 988   {
 989     // Found a token, add it to the vector.
 990     tokens.push_back(input.substr(lastPos, pos - lastPos));
 991     // Skip delimiters.  Note the "not_of"
 992     lastPos = input.find_first_not_of(delimiters, pos);
 993     // Find next "non-delimiter"
 994     pos = input.find_first_of(delimiters, lastPos);
 995   }
 996 }