code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/StringUtils.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 //-----------------------------------------------------------------------
  21 //
  22 //  File:      StringUtils.cpp
  23 //
  24 //  Purpose:   ATL split string utility
  25 //  Author:    Paul J. Weiss
  26 //
  27 //  Modified to use J O'Leary's CStdString class by kraqh3d
  28 //
  29 //------------------------------------------------------------------------
  30
  31
  32 #include "StringUtils.h"
  33 #include "utils/RegExp.h"
  34 #include "utils/fstrcmp.h"
  35 #include <locale>
  36
  37 #include <math.h>
  38 #include <sstream>
  39 #include <time.h>
  40
  41 #define FORMAT_BLOCK_SIZE 2048 // # of bytes to increment per try
  42
  43 using namespace std;
  44
  45 const char* ADDON_GUID_RE = "^(\\{){0,1}[0-9a-fA-F]{8}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{4}\\-[0-9a-fA-F]{12}(\\}){0,1}$";
  46
  47 /* empty string for use in returns by ref */
  48 const CStdString StringUtils::EmptyString = "";
  49 const std::string StringUtils::Empty = "";
  50 CStdString StringUtils::m_lastUUID = "";
  51
  52 string StringUtils::Format(const char *fmt, ...)
  53 {
  54   va_list args;
  55   va_start(args, fmt);
  56   string str = FormatV(fmt, args);
  57   va_end(args);
  58
  59   return str;
  60 }
  61
  62 string StringUtils::FormatV(const char *fmt, va_list args)
  63 {
  64   if (fmt == NULL)
  65     return "";
  66
  67   int size = FORMAT_BLOCK_SIZE;
  68   va_list argCopy;
  69
  70   char *cstr = reinterpret_cast<char*>(malloc(sizeof(char) * size));
  71   if (cstr == NULL)
  72     return "";
  73
  74   while (1)
  75   {
  76     va_copy(argCopy, args);
  77
  78     int nActual = vsnprintf(cstr, size, fmt, argCopy);
  79     va_end(argCopy);
  80
  81     if (nActual > -1 && nActual < size) // We got a valid result
  82     {
  83       string str(cstr, nActual);
  84       free(cstr);
  85       return str;
  86     }
  87     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
  88       size = nActual + 1;
  89     else                                // Let's try to double the size (glibc 2.0)
  90       size *= 2;
  91
  92     char *new_cstr = reinterpret_cast<char*>(realloc(cstr, sizeof(char) * size));
  93     if (new_cstr == NULL)
  94     {
  95       free(cstr);
  96       return "";
  97     }
  98
  99     cstr = new_cstr;
 100   }
 101
 102   free(cstr);
 103   return "";
 104 }
 105
 106 wstring StringUtils::Format(const wchar_t *fmt, ...)
 107 {
 108   va_list args;
 109   va_start(args, fmt);
 110   wstring str = FormatV(fmt, args);
 111   va_end(args);
 112
 113   return str;
 114 }
 115
 116 wstring StringUtils::FormatV(const wchar_t *fmt, va_list args)
 117 {
 118   if (fmt == NULL)
 119     return L"";
 120
 121   int size = FORMAT_BLOCK_SIZE;
 122   va_list argCopy;
 123
 124   wchar_t *cstr = reinterpret_cast<wchar_t*>(malloc(sizeof(wchar_t) * size));
 125   if (cstr == NULL)
 126     return L"";
 127
 128   while (1)
 129   {
 130     va_copy(argCopy, args);
 131
 132     int nActual = vswprintf(cstr, size, fmt, argCopy);
 133     va_end(argCopy);
 134
 135     if (nActual > -1 && nActual < size) // We got a valid result
 136     {
 137       wstring str(cstr, nActual);
 138       free(cstr);
 139       return str;
 140     }
 141     if (nActual > -1)                   // Exactly what we will need (glibc 2.1)
 142       size = nActual + 1;
 143     else                                // Let's try to double the size (glibc 2.0)
 144       size *= 2;
 145
 146     wchar_t *new_cstr = reinterpret_cast<wchar_t*>(realloc(cstr, sizeof(wchar_t) * size));
 147     if (new_cstr == NULL)
 148     {
 149       free(cstr);
 150       return L"";
 151     }
 152
 153     cstr = new_cstr;
 154   }
 155
 156   return L"";
 157 }
 158
 159 void StringUtils::ToUpper(string &str)
 160 {
 161   transform(str.begin(), str.end(), str.begin(), ::toupper);
 162 }
 163
 164 void StringUtils::ToUpper(wstring &str)
 165 {
 166   transform(str.begin(), str.end(), str.begin(), ::towupper);
 167 }
 168
 169 void StringUtils::ToLower(string &str)
 170 {
 171   transform(str.begin(), str.end(), str.begin(), ::tolower);
 172 }
 173
 174 void StringUtils::ToLower(wstring &str)
 175 {
 176   transform(str.begin(), str.end(), str.begin(), ::towlower);
 177 }
 178
 179 bool StringUtils::EqualsNoCase(const std::string &str1, const std::string &str2)
 180 {
 181   return EqualsNoCase(str1.c_str(), str2.c_str());
 182 }
 183
 184 bool StringUtils::EqualsNoCase(const std::string &str1, const char *s2)
 185 {
 186   return EqualsNoCase(str1.c_str(), s2);
 187 }
 188
 189 bool StringUtils::EqualsNoCase(const char *s1, const char *s2)
 190 {
 191   char c2; // we need only one char outside the loop
 192   do
 193   {
 194     const char c1 = *s1++; // const local variable should help compiler to optimize
 195     c2 = *s2++;
 196     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 197       return false;
 198   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 199   return true;
 200 }
 201
 202 int StringUtils::CompareNoCase(const std::string &str1, const std::string &str2)
 203 {
 204   return CompareNoCase(str1.c_str(), str2.c_str());
 205 }
 206
 207 int StringUtils::CompareNoCase(const char *s1, const char *s2)
 208 {
 209   char c2; // we need only one char outside the loop
 210   do
 211   {
 212     const char c1 = *s1++; // const local variable should help compiler to optimize
 213     c2 = *s2++;
 214     if (c1 != c2 && ::tolower(c1) != ::tolower(c2)) // This includes the possibility that one of the characters is the null-terminator, which implies a string mismatch.
 215       return ::tolower(c1) - ::tolower(c2);
 216   } while (c2 != '\0'); // At this point, we know c1 == c2, so there's no need to test them both.
 217   return 0;
 218 }
 219
 220 string StringUtils::Left(const string &str, size_t count)
 221 {
 222   count = max((size_t)0, min(count, str.size()));
 223   return str.substr(0, count);
 224 }
 225
 226 string StringUtils::Mid(const string &str, size_t first, size_t count /* = string::npos */)
 227 {
 228   if (first + count > str.size())
 229     count = str.size() - first;
 230
 231   if (first > str.size())
 232     return string();
 233
 234   ASSERT(first + count <= str.size());
 235
 236   return str.substr(first, count);
 237 }
 238
 239 string StringUtils::Right(const string &str, size_t count)
 240 {
 241   count = max((size_t)0, min(count, str.size()));
 242   return str.substr(str.size() - count);
 243 }
 244
 245 std::string& StringUtils::Trim(std::string &str)
 246 {
 247   TrimLeft(str);
 248   return TrimRight(str);
 249 }
 250
 251 std::string& StringUtils::Trim(std::string &str, const char* const chars)
 252 {
 253   TrimLeft(str, chars);
 254   return TrimRight(str, chars);
 255 }
 256
 257 // hack to ensure that std::string::iterator will be dereferenced as _unsigned_ char
 258 // without this hack "TrimX" functions failed on Win32 with UTF-8 strings
 259 static int isspace_c(char c)
 260 {
 261   return ::isspace((unsigned char)c);
 262 }
 263
 264 std::string& StringUtils::TrimLeft(std::string &str)
 265 {
 266   str.erase(str.begin(), ::find_if(str.begin(), str.end(), ::not1(::ptr_fun(isspace_c))));
 267   return str;
 268 }
 269
 270 std::string& StringUtils::TrimLeft(std::string &str, const char* const chars)
 271 {
 272   size_t nidx = str.find_first_not_of(chars);
 273   str.erase(0, nidx);
 274   return str;
 275 }
 276
 277 std::string& StringUtils::TrimRight(std::string &str)
 278 {
 279   str.erase(::find_if(str.rbegin(), str.rend(), ::not1(::ptr_fun(isspace_c))).base(), str.end());
 280   return str;
 281 }
 282
 283 std::string& StringUtils::TrimRight(std::string &str, const char* const chars)
 284 {
 285   size_t nidx = str.find_last_not_of(chars);
 286   str.erase(str.npos == nidx ? 0 : ++nidx);
 287   return str;
 288 }
 289
 290 std::string& StringUtils::RemoveDuplicatedSpacesAndTabs(std::string& str)
 291 {
 292   std::string::iterator it = str.begin();
 293   bool onSpace = false;
 294   while(it != str.end())
 295   {
 296     if (*it == '\t')
 297       *it = ' ';
 298
 299     if (*it == ' ')
 300     {
 301       if (onSpace)
 302       {
 303         it = str.erase(it);
 304         continue;
 305       }
 306       else
 307         onSpace = true;
 308     }
 309     else
 310       onSpace = false;
 311
 312     ++it;
 313   }
 314   return str;
 315 }
 316
 317 int StringUtils::Replace(string &str, char oldChar, char newChar)
 318 {
 319   int replacedChars = 0;
 320   for (string::iterator it = str.begin(); it != str.end(); it++)
 321   {
 322     if (*it == oldChar)
 323     {
 324       *it = newChar;
 325       replacedChars++;
 326     }
 327   }
 328
 329   return replacedChars;
 330 }
 331
 332 int StringUtils::Replace(std::string &str, const std::string &oldStr, const std::string &newStr)
 333 {
 334   if (oldStr.empty())
 335     return 0;
 336
 337   int replacedChars = 0;
 338   size_t index = 0;
 339
 340   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 341   {
 342     str.replace(index, oldStr.size(), newStr);
 343     index += newStr.size();
 344     replacedChars++;
 345   }
 346
 347   return replacedChars;
 348 }
 349
 350 int StringUtils::Replace(std::wstring &str, const std::wstring &oldStr, const std::wstring &newStr)
 351 {
 352   if (oldStr.empty())
 353     return 0;
 354
 355   int replacedChars = 0;
 356   size_t index = 0;
 357
 358   while (index < str.size() && (index = str.find(oldStr, index)) != string::npos)
 359   {
 360     str.replace(index, oldStr.size(), newStr);
 361     index += newStr.size();
 362     replacedChars++;
 363   }
 364
 365   return replacedChars;
 366 }
 367
 368 bool StringUtils::StartsWith(const std::string &str1, const std::string &str2)
 369 {
 370   return str1.compare(0, str2.size(), str2) == 0;
 371 }
 372
 373 bool StringUtils::StartsWith(const std::string &str1, const char *s2)
 374 {
 375   return StartsWith(str1.c_str(), s2);
 376 }
 377
 378 bool StringUtils::StartsWith(const char *s1, const char *s2)
 379 {
 380   while (*s2 != '\0')
 381   {
 382     if (*s1 != *s2)
 383       return false;
 384     s1++;
 385     s2++;
 386   }
 387   return true;
 388 }
 389
 390 bool StringUtils::StartsWithNoCase(const std::string &str1, const std::string &str2)
 391 {
 392   return StartsWithNoCase(str1.c_str(), str2.c_str());
 393 }
 394
 395 bool StringUtils::StartsWithNoCase(const std::string &str1, const char *s2)
 396 {
 397   return StartsWithNoCase(str1.c_str(), s2);
 398 }
 399
 400 bool StringUtils::StartsWithNoCase(const char *s1, const char *s2)
 401 {
 402   while (*s2 != '\0')
 403   {
 404     if (::tolower(*s1) != ::tolower(*s2))
 405       return false;
 406     s1++;
 407     s2++;
 408   }
 409   return true;
 410 }
 411
 412 bool StringUtils::EndsWith(const std::string &str1, const std::string &str2)
 413 {
 414   if (str1.size() < str2.size())
 415     return false;
 416   return str1.compare(str1.size() - str2.size(), str2.size(), str2) == 0;
 417 }
 418
 419 bool StringUtils::EndsWith(const std::string &str1, const char *s2)
 420 {
 421   size_t len2 = strlen(s2);
 422   if (str1.size() < len2)
 423     return false;
 424   return str1.compare(str1.size() - len2, len2, s2) == 0;
 425 }
 426
 427 bool StringUtils::EndsWithNoCase(const std::string &str1, const std::string &str2)
 428 {
 429   if (str1.size() < str2.size())
 430     return false;
 431   const char *s1 = str1.c_str() + str1.size() - str2.size();
 432   const char *s2 = str2.c_str();
 433   while (*s2 != '\0')
 434   {
 435     if (::tolower(*s1) != ::tolower(*s2))
 436       return false;
 437     s1++;
 438     s2++;
 439   }
 440   return true;
 441 }
 442
 443 bool StringUtils::EndsWithNoCase(const std::string &str1, const char *s2)
 444 {
 445   size_t len2 = strlen(s2);
 446   if (str1.size() < len2)
 447     return false;
 448   const char *s1 = str1.c_str() + str1.size() - len2;
 449   while (*s2 != '\0')
 450   {
 451     if (::tolower(*s1) != ::tolower(*s2))
 452       return false;
 453     s1++;
 454     s2++;
 455   }
 456   return true;
 457 }
 458
 459 void StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter, CStdString& result)
 460 {
 461   result = "";
 462   for(CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++ )
 463     result += (*it) + delimiter;
 464
 465   if(result != "")
 466     result.erase(result.size()-delimiter.size(), delimiter.size());
 467 }
 468
 469 CStdString StringUtils::JoinString(const CStdStringArray &strings, const CStdString& delimiter)
 470 {
 471   CStdString result;
 472   JoinString(strings, delimiter, result);
 473   return result;
 474 }
 475
 476 CStdString StringUtils::Join(const vector<string> &strings, const CStdString& delimiter)
 477 {
 478   CStdStringArray strArray;
 479   for (unsigned int index = 0; index < strings.size(); index++)
 480     strArray.push_back(strings.at(index));
 481
 482   return JoinString(strArray, delimiter);
 483 }
 484
 485 // Splits the string input into pieces delimited by delimiter.
 486 // if 2 delimiters are in a row, it will include the empty string between them.
 487 // added MaxStrings parameter to restrict the number of returned substrings (like perl and python)
 488 int StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, CStdStringArray &results, unsigned int iMaxStrings /* = 0 */)
 489 {
 490   size_t iPos = std::string::npos;
 491   size_t newPos = std::string::npos;
 492   size_t sizeS2 = delimiter.size();
 493   size_t isize = input.size();
 494
 495   results.clear();
 496
 497   vector<unsigned int> positions;
 498
 499   newPos = input.find(delimiter, 0);
 500
 501   if (newPos == std::string::npos)
 502   {
 503     results.push_back(input);
 504     return 1;
 505   }
 506
 507   while (newPos != std::string::npos)
 508   {
 509     positions.push_back(newPos);
 510     iPos = newPos;
 511     newPos = input.find(delimiter, iPos + sizeS2);
 512   }
 513
 514   // numFound is the number of delimiters which is one less
 515   // than the number of substrings
 516   unsigned int numFound = positions.size();
 517   if (iMaxStrings > 0 && numFound >= iMaxStrings)
 518     numFound = iMaxStrings - 1;
 519
 520   for ( unsigned int i = 0; i <= numFound; i++ )
 521   {
 522     CStdString s;
 523     if ( i == 0 )
 524     {
 525       if ( i == numFound )
 526         s = input;
 527       else
 528         s = input.substr(i, positions[i]);
 529     }
 530     else
 531     {
 532       size_t offset = positions[i - 1] + sizeS2;
 533       if ( offset < isize )
 534       {
 535         if ( i == numFound )
 536           s = input.substr(offset);
 537         else if ( i > 0 )
 538           s = input.substr( positions[i - 1] + sizeS2,
 539                          positions[i] - positions[i - 1] - sizeS2 );
 540       }
 541     }
 542     results.push_back(s);
 543   }
 544   // return the number of substrings
 545   return results.size();
 546 }
 547
 548 CStdStringArray StringUtils::SplitString(const CStdString& input, const CStdString& delimiter, unsigned int iMaxStrings /* = 0 */)
 549 {
 550   CStdStringArray result;
 551   SplitString(input, delimiter, result, iMaxStrings);
 552   return result;
 553 }
 554
 555 vector<string> StringUtils::Split(const std::string& input, const std::string& delimiter, unsigned int iMaxStrings /* = 0 */)
 556 {
 557   CStdStringArray result;
 558   SplitString(input, delimiter, result, iMaxStrings);
 559
 560   vector<string> strArray;
 561   for (unsigned int index = 0; index < result.size(); index++)
 562     strArray.push_back(result.at(index));
 563
 564   return strArray;
 565 }
 566
 567 // returns the number of occurrences of strFind in strInput.
 568 int StringUtils::FindNumber(const CStdString& strInput, const CStdString &strFind)
 569 {
 570   size_t pos = strInput.find(strFind, 0);
 571   int numfound = 0;
 572   while (pos != std::string::npos)
 573   {
 574     numfound++;
 575     pos = strInput.find(strFind, pos + 1);
 576   }
 577   return numfound;
 578 }
 579
 580 // Compares separately the numeric and alphabetic parts of a string.
 581 // returns negative if left < right, positive if left > right
 582 // and 0 if they are identical (essentially calculates left - right)
 583 int64_t StringUtils::AlphaNumericCompare(const wchar_t *left, const wchar_t *right)
 584 {
 585   wchar_t *l = (wchar_t *)left;
 586   wchar_t *r = (wchar_t *)right;
 587   wchar_t *ld, *rd;
 588   wchar_t lc, rc;
 589   int64_t lnum, rnum;
 590   const collate<wchar_t>& coll = use_facet< collate<wchar_t> >( locale() );
 591   int cmp_res = 0;
 592   while (*l != 0 && *r != 0)
 593   {
 594     // check if we have a numerical value
 595     if (*l >= L'0' && *l <= L'9' && *r >= L'0' && *r <= L'9')
 596     {
 597       ld = l;
 598       lnum = 0;
 599       while (*ld >= L'0' && *ld <= L'9' && ld < l + 15)
 600       { // compare only up to 15 digits
 601         lnum *= 10;
 602         lnum += *ld++ - '0';
 603       }
 604       rd = r;
 605       rnum = 0;
 606       while (*rd >= L'0' && *rd <= L'9' && rd < r + 15)
 607       { // compare only up to 15 digits
 608         rnum *= 10;
 609         rnum += *rd++ - L'0';
 610       }
 611       // do we have numbers?
 612       if (lnum != rnum)
 613       { // yes - and they're different!
 614         return lnum - rnum;
 615       }
 616       l = ld;
 617       r = rd;
 618       continue;
 619     }
 620     // do case less comparison
 621     lc = *l;
 622     if (lc >= L'A' && lc <= L'Z')
 623       lc += L'a'-L'A';
 624     rc = *r;
 625     if (rc >= L'A' && rc <= L'Z')
 626       rc += L'a'- L'A';
 627
 628     // ok, do a normal comparison, taking current locale into account. Add special case stuff (eg '(' characters)) in here later
 629     if ((cmp_res = coll.compare(&lc, &lc + 1, &rc, &rc + 1)) != 0)
 630     {
 631       return cmp_res;
 632     }
 633     l++; r++;
 634   }
 635   if (*r)
 636   { // r is longer
 637     return -1;
 638   }
 639   else if (*l)
 640   { // l is longer
 641     return 1;
 642   }
 643   return 0; // files are the same
 644 }
 645
 646 int StringUtils::DateStringToYYYYMMDD(const CStdString &dateString)
 647 {
 648   CStdStringArray days;
 649   int splitCount = StringUtils::SplitString(dateString, "-", days);
 650   if (splitCount == 1)
 651     return atoi(days[0].c_str());
 652   else if (splitCount == 2)
 653     return atoi(days[0].c_str())*100+atoi(days[1].c_str());
 654   else if (splitCount == 3)
 655     return atoi(days[0].c_str())*10000+atoi(days[1].c_str())*100+atoi(days[2].c_str());
 656   else
 657     return -1;
 658 }
 659
 660 long StringUtils::TimeStringToSeconds(const CStdString &timeString)
 661 {
 662   CStdString strCopy(timeString);
 663   StringUtils::Trim(strCopy);
 664   if(StringUtils::EndsWithNoCase(strCopy, " min"))
 665   {
 666     // this is imdb format of "XXX min"
 667     return 60 * atoi(strCopy.c_str());
 668   }
 669   else
 670   {
 671     CStdStringArray secs;
 672     StringUtils::SplitString(strCopy, ":", secs);
 673     int timeInSecs = 0;
 674     for (unsigned int i = 0; i < 3 && i < secs.size(); i++)
 675     {
 676       timeInSecs *= 60;
 677       timeInSecs += atoi(secs[i]);
 678     }
 679     return timeInSecs;
 680   }
 681 }
 682
 683 CStdString StringUtils::SecondsToTimeString(long lSeconds, TIME_FORMAT format)
 684 {
 685   int hh = lSeconds / 3600;
 686   lSeconds = lSeconds % 3600;
 687   int mm = lSeconds / 60;
 688   int ss = lSeconds % 60;
 689
 690   if (format == TIME_FORMAT_GUESS)
 691     format = (hh >= 1) ? TIME_FORMAT_HH_MM_SS : TIME_FORMAT_MM_SS;
 692   CStdString strHMS;
 693   if (format & TIME_FORMAT_HH)
 694     strHMS += StringUtils::Format("%02.2i", hh);
 695   else if (format & TIME_FORMAT_H)
 696     strHMS += StringUtils::Format("%i", hh);
 697   if (format & TIME_FORMAT_MM)
 698     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", mm);
 699   if (format & TIME_FORMAT_SS)
 700     strHMS += StringUtils::Format(strHMS.empty() ? "%02.2i" : ":%02.2i", ss);
 701   return strHMS;
 702 }
 703
 704 bool StringUtils::IsNaturalNumber(const CStdString& str)
 705 {
 706   size_t i = 0, n = 0;
 707   // allow whitespace,digits,whitespace
 708   while (i < str.size() && isspace((unsigned char) str[i]))
 709     i++;
 710   while (i < str.size() && isdigit((unsigned char) str[i]))
 711   {
 712     i++; n++;
 713   }
 714   while (i < str.size() && isspace((unsigned char) str[i]))
 715     i++;
 716   return i == str.size() && n > 0;
 717 }
 718
 719 bool StringUtils::IsInteger(const CStdString& str)
 720 {
 721   size_t i = 0, n = 0;
 722   // allow whitespace,-,digits,whitespace
 723   while (i < str.size() && isspace((unsigned char) str[i]))
 724     i++;
 725   if (i < str.size() && str[i] == '-')
 726     i++;
 727   while (i < str.size() && isdigit((unsigned char) str[i]))
 728   {
 729     i++; n++;
 730   }
 731   while (i < str.size() && isspace((unsigned char) str[i]))
 732     i++;
 733   return i == str.size() && n > 0;
 734 }
 735
 736 void StringUtils::RemoveCRLF(CStdString& strLine)
 737 {
 738   StringUtils::TrimRight(strLine, "\n\r");
 739 }
 740
 741 CStdString StringUtils::SizeToString(int64_t size)
 742 {
 743   CStdString strLabel;
 744   const char prefixes[] = {' ','k', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'};
 745   unsigned int i = 0;
 746   double s = (double)size;
 747   while (i < sizeof(prefixes)/sizeof(prefixes[0]) && s >= 1000.0)
 748   {
 749     s /= 1024.0;
 750     i++;
 751   }
 752
 753   if (!i)
 754     strLabel = StringUtils::Format("%.0lf %cB ", s, prefixes[i]);
 755   else if (s >= 100.0)
 756     strLabel = StringUtils::Format("%.1lf %cB", s, prefixes[i]);
 757   else
 758     strLabel = StringUtils::Format("%.2lf %cB", s, prefixes[i]);
 759
 760   return strLabel;
 761 }
 762
 763 // return -1 if not, else return the utf8 char length.
 764 int IsUTF8Letter(const unsigned char *str)
 765 {
 766   // reference:
 767   // unicode -> utf8 table: http://www.utf8-chartable.de/
 768   // latin characters in unicode: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
 769   unsigned char ch = str[0];
 770   if (!ch)
 771     return -1;
 772   if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
 773     return 1;
 774   if (!(ch & 0x80))
 775     return -1;
 776   unsigned char ch2 = str[1];
 777   if (!ch2)
 778     return -1;
 779   // check latin 1 letter table: http://en.wikipedia.org/wiki/C1_Controls_and_Latin-1_Supplement
 780   if (ch == 0xC3 && ch2 >= 0x80 && ch2 <= 0xBF && ch2 != 0x97 && ch2 != 0xB7)
 781     return 2;
 782   // check latin extended A table: http://en.wikipedia.org/wiki/Latin_Extended-A
 783   if (ch >= 0xC4 && ch <= 0xC7 && ch2 >= 0x80 && ch2 <= 0xBF)
 784     return 2;
 785   // check latin extended B table: http://en.wikipedia.org/wiki/Latin_Extended-B
 786   // and International Phonetic Alphabet: http://en.wikipedia.org/wiki/IPA_Extensions_(Unicode_block)
 787   if (((ch == 0xC8 || ch == 0xC9) && ch2 >= 0x80 && ch2 <= 0xBF)
 788       || (ch == 0xCA && ch2 >= 0x80 && ch2 <= 0xAF))
 789     return 2;
 790   return -1;
 791 }
 792
 793 size_t StringUtils::FindWords(const char *str, const char *wordLowerCase)
 794 {
 795   // NOTE: This assumes word is lowercase!
 796   unsigned char *s = (unsigned char *)str;
 797   do
 798   {
 799     // start with a compare
 800     unsigned char *c = s;
 801     unsigned char *w = (unsigned char *)wordLowerCase;
 802     bool same = true;
 803     while (same && *c && *w)
 804     {
 805       unsigned char lc = *c++;
 806       if (lc >= 'A' && lc <= 'Z')
 807         lc += 'a'-'A';
 808
 809       if (lc != *w++) // different
 810         same = false;
 811     }
 812     if (same && *w == 0)  // only the same if word has been exhausted
 813       return (const char *)s - str;
 814
 815     // otherwise, skip current word (composed by latin letters) or number
 816     int l;
 817     if (*s >= '0' && *s <= '9')
 818     {
 819       ++s;
 820       while (*s >= '0' && *s <= '9') ++s;
 821     }
 822     else if ((l = IsUTF8Letter(s)) > 0)
 823     {
 824       s += l;
 825       while ((l = IsUTF8Letter(s)) > 0) s += l;
 826     }
 827     else
 828       ++s;
 829     while (*s && *s == ' ') s++;
 830
 831     // and repeat until we're done
 832   } while (*s);
 833
 834   return CStdString::npos;
 835 }
 836
 837 // assumes it is called from after the first open bracket is found
 838 int StringUtils::FindEndBracket(const CStdString &str, char opener, char closer, int startPos)
 839 {
 840   int blocks = 1;
 841   for (unsigned int i = startPos; i < str.size(); i++)
 842   {
 843     if (str[i] == opener)
 844       blocks++;
 845     else if (str[i] == closer)
 846     {
 847       blocks--;
 848       if (!blocks)
 849         return i;
 850     }
 851   }
 852
 853   return (int)CStdString::npos;
 854 }
 855
 856 void StringUtils::WordToDigits(CStdString &word)
 857 {
 858   static const char word_to_letter[] = "22233344455566677778889999";
 859   StringUtils::ToLower(word);
 860   for (unsigned int i = 0; i < word.size(); ++i)
 861   { // NB: This assumes ascii, which probably needs extending at some  point.
 862     char letter = word[i];
 863     if ((letter >= 'a' && letter <= 'z')) // assume contiguous letter range
 864     {
 865       word[i] = word_to_letter[letter-'a'];
 866     }
 867     else if (letter < '0' || letter > '9') // We want to keep 0-9!
 868     {
 869       word[i] = ' ';  // replace everything else with a space
 870     }
 871   }
 872 }
 873
 874 CStdString StringUtils::CreateUUID()
 875 {
 876   /* This function generate a DCE 1.1, ISO/IEC 11578:1996 and IETF RFC-4122
 877   * Version 4 conform local unique UUID based upon random number generation.
 878   */
 879   char UuidStrTmp[40];
 880   char *pUuidStr = UuidStrTmp;
 881   int i;
 882
 883   static bool m_uuidInitialized = false;
 884   if (!m_uuidInitialized)
 885   {
 886     /* use current time as the seed for rand()*/
 887     srand(time(NULL));
 888     m_uuidInitialized = true;
 889   }
 890
 891   /*Data1 - 8 characters.*/
 892   for(i = 0; i < 8; i++, pUuidStr++)
 893     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 894
 895   /*Data2 - 4 characters.*/
 896   *pUuidStr++ = '-';
 897   for(i = 0; i < 4; i++, pUuidStr++)
 898     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 899
 900   /*Data3 - 4 characters.*/
 901   *pUuidStr++ = '-';
 902   for(i = 0; i < 4; i++, pUuidStr++)
 903     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 904
 905   /*Data4 - 4 characters.*/
 906   *pUuidStr++ = '-';
 907   for(i = 0; i < 4; i++, pUuidStr++)
 908     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 909
 910   /*Data5 - 12 characters.*/
 911   *pUuidStr++ = '-';
 912   for(i = 0; i < 12; i++, pUuidStr++)
 913     ((*pUuidStr = (rand() % 16)) < 10) ? *pUuidStr += 48 : *pUuidStr += 55;
 914
 915   *pUuidStr = '\0';
 916
 917   m_lastUUID = UuidStrTmp;
 918   return UuidStrTmp;
 919 }
 920
 921 bool StringUtils::ValidateUUID(const CStdString &uuid)
 922 {
 923   CRegExp guidRE;
 924   guidRE.RegComp(ADDON_GUID_RE);
 925   return (guidRE.RegFind(uuid.c_str()) == 0);
 926 }
 927
 928 double StringUtils::CompareFuzzy(const CStdString &left, const CStdString &right)
 929 {
 930   return (0.5 + fstrcmp(left.c_str(), right.c_str(), 0.0) * (left.length() + right.length())) / 2.0;
 931 }
 932
 933 int StringUtils::FindBestMatch(const CStdString &str, const CStdStringArray &strings, double &matchscore)
 934 {
 935   int best = -1;
 936   matchscore = 0;
 937
 938   int i = 0;
 939   for (CStdStringArray::const_iterator it = strings.begin(); it != strings.end(); it++, i++)
 940   {
 941     int maxlength = max(str.length(), it->length());
 942     double score = StringUtils::CompareFuzzy(str, *it) / maxlength;
 943     if (score > matchscore)
 944     {
 945       matchscore = score;
 946       best = i;
 947     }
 948   }
 949   return best;
 950 }
 951
 952 bool StringUtils::ContainsKeyword(const CStdString &str, const CStdStringArray &keywords)
 953 {
 954   for (CStdStringArray::const_iterator it = keywords.begin(); it != keywords.end(); it++)
 955   {
 956     if (str.find(*it) != str.npos)
 957       return true;
 958   }
 959   return false;
 960 }
 961
 962 size_t StringUtils::utf8_strlen(const char *s)
 963 {
 964   size_t length = 0;
 965   while (*s)
 966   {
 967     if ((*s++ & 0xC0) != 0x80)
 968       length++;
 969   }
 970   return length;
 971 }
 972
 973 std::string StringUtils::Paramify(const std::string &param)
 974 {
 975   std::string result = param;
 976   // escape backspaces
 977   StringUtils::Replace(result, "\\", "\\\\");
 978   // escape double quotes
 979   StringUtils::Replace(result, "\"", "\\\"");
 980
 981   // add double quotes around the whole string
 982   return "\"" + result + "\"";
 983 }
 984
 985 void StringUtils::Tokenize(const std::string& input, std::vector<std::string>& tokens, const std::string& delimiters)
 986 {
 987   // Tokenize ripped from http://www.linuxselfhelp.com/HOWTO/C++Programming-HOWTO-7.html
 988   // Skip delimiters at beginning.
 989   string::size_type lastPos = input.find_first_not_of(delimiters, 0);
 990   // Find first "non-delimiter".
 991   string::size_type pos = input.find_first_of(delimiters, lastPos);
 992
 993   while (string::npos != pos || string::npos != lastPos)
 994   {
 995     // Found a token, add it to the vector.
 996     tokens.push_back(input.substr(lastPos, pos - lastPos));
 997     // Skip delimiters.  Note the "not_of"
 998     lastPos = input.find_first_not_of(delimiters, pos);
 999     // Find next "non-delimiter"
1000     pos = input.find_first_of(delimiters, lastPos);
1001   }
1002 }