code.vuplus.com Git - vuplus_xbmc/blob - xbmc/addons/Scraper.cpp

   1 /*
   2 *      Copyright (C) 2005-2013 Team XBMC
   3 *      http://www.xbmc.org
   4 *
   5 *  This Program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2, or (at your option)
   8 *  any later version.
   9 *
  10 *  This Program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with XBMC; see the file COPYING.  If not, see
  17 *  <http://www.gnu.org/licenses/>.
  18 *
  19 */
  20 #include "Scraper.h"
  21 #include "filesystem/File.h"
  22 #include "filesystem/Directory.h"
  23 #include "filesystem/CurlFile.h"
  24 #include "AddonManager.h"
  25 #include "utils/ScraperParser.h"
  26 #include "utils/ScraperUrl.h"
  27 #include "utils/CharsetConverter.h"
  28 #include "utils/log.h"
  29 #include "music/infoscanner/MusicAlbumInfo.h"
  30 #include "music/infoscanner/MusicArtistInfo.h"
  31 #include "utils/fstrcmp.h"
  32 #include "settings/AdvancedSettings.h"
  33 #include "FileItem.h"
  34 #include "utils/URIUtils.h"
  35 #include "utils/XMLUtils.h"
  36 #include "utils/StringUtils.h"
  37 #include "music/MusicDatabase.h"
  38 #include "video/VideoDatabase.h"
  39 #include "music/Album.h"
  40 #include "music/Artist.h"
  41 #include "Util.h"
  42 #include "URL.h"
  43
  44 #include <sstream>
  45
  46 using namespace std;
  47 using namespace XFILE;
  48 using namespace MUSIC_GRABBER;
  49 using namespace VIDEO;
  50
  51 namespace ADDON
  52 {
  53
  54 typedef struct
  55 {
  56   const char*  name;
  57   CONTENT_TYPE type;
  58   int          pretty;
  59 } ContentMapping;
  60
  61 static const ContentMapping content[] =
  62   {{"unknown",       CONTENT_NONE,          231 },
  63    {"albums",        CONTENT_ALBUMS,        132 },
  64    {"music",         CONTENT_ALBUMS,        132 },
  65    {"artists",       CONTENT_ARTISTS,       133 },
  66    {"movies",        CONTENT_MOVIES,      20342 },
  67    {"tvshows",       CONTENT_TVSHOWS,     20343 },
  68    {"musicvideos",   CONTENT_MUSICVIDEOS, 20389 }};
  69
  70 CStdString TranslateContent(const CONTENT_TYPE &type, bool pretty/*=false*/)
  71 {
  72   for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
  73   {
  74     const ContentMapping &map = content[index];
  75     if (type == map.type)
  76     {
  77       if (pretty && map.pretty)
  78         return g_localizeStrings.Get(map.pretty);
  79       else
  80         return map.name;
  81     }
  82   }
  83   return "";
  84 }
  85
  86 CONTENT_TYPE TranslateContent(const CStdString &string)
  87 {
  88   for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
  89   {
  90     const ContentMapping &map = content[index];
  91     if (string.Equals(map.name))
  92       return map.type;
  93   }
  94   return CONTENT_NONE;
  95 }
  96
  97 TYPE ScraperTypeFromContent(const CONTENT_TYPE &content)
  98 {
  99   switch (content)
 100   {
 101   case CONTENT_ALBUMS:
 102     return ADDON_SCRAPER_ALBUMS;
 103   case CONTENT_ARTISTS:
 104     return ADDON_SCRAPER_ARTISTS;
 105   case CONTENT_MOVIES:
 106     return ADDON_SCRAPER_MOVIES;
 107   case CONTENT_MUSICVIDEOS:
 108     return ADDON_SCRAPER_MUSICVIDEOS;
 109   case CONTENT_TVSHOWS:
 110     return ADDON_SCRAPER_TVSHOWS;
 111   default:
 112     return ADDON_UNKNOWN;
 113   }
 114 }
 115
 116 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
 117 static void CheckScraperError(const TiXmlElement *pxeRoot)
 118 {
 119   if (!pxeRoot || stricmp(pxeRoot->Value(), "error"))
 120     return;
 121   CStdString sTitle;
 122   CStdString sMessage;
 123   XMLUtils::GetString(pxeRoot, "title", sTitle);
 124   XMLUtils::GetString(pxeRoot, "message", sMessage);
 125   throw CScraperError(sTitle, sMessage);
 126 }
 127
 128 CScraper::CScraper(const cp_extension_t *ext) : CAddon(ext), m_fLoaded(false)
 129 {
 130   if (ext)
 131   {
 132     m_language = CAddonMgr::Get().GetExtValue(ext->configuration, "@language");
 133     m_requiressettings = CAddonMgr::Get().GetExtValue(ext->configuration,"@requiressettings").Equals("true");
 134     CStdString persistence = CAddonMgr::Get().GetExtValue(ext->configuration, "@cachepersistence");
 135     if (!persistence.IsEmpty())
 136       m_persistence.SetFromTimeString(persistence);
 137   }
 138   switch (Type())
 139   {
 140     case ADDON_SCRAPER_ALBUMS:
 141       m_pathContent = CONTENT_ALBUMS;
 142       break;
 143     case ADDON_SCRAPER_ARTISTS:
 144       m_pathContent = CONTENT_ARTISTS;
 145       break;
 146     case ADDON_SCRAPER_MOVIES:
 147       m_pathContent = CONTENT_MOVIES;
 148       break;
 149     case ADDON_SCRAPER_MUSICVIDEOS:
 150       m_pathContent = CONTENT_MUSICVIDEOS;
 151       break;
 152     case ADDON_SCRAPER_TVSHOWS:
 153       m_pathContent = CONTENT_TVSHOWS;
 154       break;
 155     default:
 156       m_pathContent = CONTENT_NONE;
 157       break;
 158   }
 159 }
 160
 161 AddonPtr CScraper::Clone(const AddonPtr &self) const
 162 {
 163   return AddonPtr(new CScraper(*this, self));
 164 }
 165
 166 CScraper::CScraper(const CScraper &rhs, const AddonPtr &self)
 167   : CAddon(rhs, self), m_fLoaded(false)
 168 {
 169   m_pathContent = rhs.m_pathContent;
 170   m_persistence = rhs.m_persistence;
 171   m_requiressettings = rhs.m_requiressettings;
 172   m_language = rhs.m_language;
 173 }
 174
 175 bool CScraper::Supports(const CONTENT_TYPE &content) const
 176 {
 177   return Type() == ScraperTypeFromContent(content);
 178 }
 179
 180 bool CScraper::SetPathSettings(CONTENT_TYPE content, const CStdString& xml)
 181 {
 182   m_pathContent = content;
 183   if (!LoadSettings())
 184     return false;
 185
 186   if (xml.IsEmpty())
 187     return true;
 188
 189   CXBMCTinyXML doc;
 190   doc.Parse(xml.c_str());
 191   m_userSettingsLoaded = SettingsFromXML(doc);
 192
 193   return m_userSettingsLoaded;
 194 }
 195
 196 CStdString CScraper::GetPathSettings()
 197 {
 198   if (!LoadSettings())
 199     return "";
 200
 201   stringstream stream;
 202   CXBMCTinyXML doc;
 203   SettingsToXML(doc);
 204   if (doc.RootElement())
 205     stream << *doc.RootElement();
 206
 207   return stream.str();
 208 }
 209
 210 void CScraper::ClearCache()
 211 {
 212   CStdString strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath, "scrapers");
 213
 214   // create scraper cache dir if needed
 215   if (!CDirectory::Exists(strCachePath))
 216     CDirectory::Create(strCachePath);
 217
 218   strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
 219   URIUtils::AddSlashAtEnd(strCachePath);
 220
 221   if (CDirectory::Exists(strCachePath))
 222   {
 223     CFileItemList items;
 224     CDirectory::GetDirectory(strCachePath,items);
 225     for (int i=0;i<items.Size();++i)
 226     {
 227       // wipe cache
 228       if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
 229         CFile::Delete(items[i]->GetPath());
 230     }
 231   }
 232   else
 233     CDirectory::Create(strCachePath);
 234 }
 235
 236 // returns a vector of strings: the first is the XML output by the function; the rest
 237 // is XML output by chained functions, possibly recursively
 238 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
 239 // throws CScraperError abort on internal failures (e.g., parse errors)
 240 vector<CStdString> CScraper::Run(const CStdString& function,
 241                                  const CScraperUrl& scrURL,
 242                                  CCurlFile& http,
 243                                  const vector<CStdString>* extras)
 244 {
 245   if (!Load())
 246     throw CScraperError();
 247
 248   CStdString strXML = InternalRun(function,scrURL,http,extras);
 249   if (strXML.IsEmpty())
 250   {
 251     if (function != "NfoUrl")
 252       CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
 253     throw CScraperError();
 254   }
 255
 256   CLog::Log(LOGDEBUG,"scraper: %s returned %s",function.c_str(),strXML.c_str());
 257
 258   if (!XMLUtils::HasUTF8Declaration(strXML))
 259     g_charsetConverter.unknownToUTF8(strXML);
 260
 261   CXBMCTinyXML doc;
 262   doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8);
 263   if (!doc.RootElement())
 264   {
 265     CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
 266     throw CScraperError();
 267   }
 268
 269   vector<CStdString> result;
 270   result.push_back(strXML);
 271   TiXmlElement* xchain = doc.RootElement()->FirstChildElement();
 272   // skip children of the root element until <url> or <chain>
 273   while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
 274       xchain = xchain->NextSiblingElement();
 275   while (xchain)
 276   {
 277     // <chain|url function="...">param</>
 278     const char* szFunction = xchain->Attribute("function");
 279     if (szFunction)
 280     {
 281       CScraperUrl scrURL2;
 282       vector<CStdString> extras;
 283       // for <chain>, pass the contained text as a parameter; for <url>, as URL content
 284       if (strcmp(xchain->Value(),"chain")==0)
 285       {
 286         if (xchain->FirstChild())
 287           extras.push_back(xchain->FirstChild()->Value());
 288       }
 289       else
 290         scrURL2.ParseElement(xchain);
 291       // Fix for empty chains. $$1 would still contain the
 292       // previous value as there is no child of the xml node.
 293       // since $$1 will always either contain the data from an
 294       // url or the parameters to a chain, we can safely clear it here
 295       // to fix this issue
 296       m_parser.m_param[0].clear();
 297       vector<CStdString> result2 = RunNoThrow(szFunction,scrURL2,http,&extras);
 298       result.insert(result.end(),result2.begin(),result2.end());
 299     }
 300     xchain = xchain->NextSiblingElement();
 301     // continue to skip past non-<url> or <chain> elements
 302     while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
 303       xchain = xchain->NextSiblingElement();
 304   }
 305
 306   return result;
 307 }
 308
 309 // just like Run, but returns an empty list instead of throwing in case of error
 310 // don't use in new code; errors should be handled appropriately
 311 std::vector<CStdString> CScraper::RunNoThrow(const CStdString& function,
 312   const CScraperUrl& url,
 313   XFILE::CCurlFile& http,
 314   const std::vector<CStdString>* extras)
 315 {
 316   std::vector<CStdString> vcs;
 317   try
 318   {
 319     vcs = Run(function, url, http, extras);
 320   }
 321   catch (const CScraperError &sce)
 322   {
 323     ASSERT(sce.FAborted());  // the only kind we should get
 324   }
 325   return vcs;
 326 }
 327
 328 CStdString CScraper::InternalRun(const CStdString& function,
 329                                  const CScraperUrl& scrURL,
 330                                  CCurlFile& http,
 331                                  const vector<CStdString>* extras)
 332 {
 333   // walk the list of input URLs and fetch each into parser parameters
 334   unsigned int i;
 335   for (i=0;i<scrURL.m_url.size();++i)
 336   {
 337     CStdString strCurrHTML;
 338     if (!CScraperUrl::Get(scrURL.m_url[i],m_parser.m_param[i],http,ID()) || m_parser.m_param[i].size() == 0)
 339       return "";
 340   }
 341   // put the 'extra' parameterts into the parser parameter list too
 342   if (extras)
 343   {
 344     for (unsigned int j=0;j<extras->size();++j)
 345       m_parser.m_param[j+i] = (*extras)[j];
 346   }
 347
 348   return m_parser.Parse(function,this);
 349 }
 350
 351 bool CScraper::Load()
 352 {
 353   if (m_fLoaded)
 354     return true;
 355
 356   bool result=m_parser.Load(LibPath());
 357   if (result)
 358   {
 359     // TODO: this routine assumes that deps are a single level, and assumes the dep is installed.
 360     //       1. Does it make sense to have recursive dependencies?
 361     //       2. Should we be checking the dep versions or do we assume it is ok?
 362     ADDONDEPS deps = GetDeps();
 363     ADDONDEPS::iterator itr = deps.begin();
 364     while (itr != deps.end())
 365     {
 366       if (itr->first.Equals("xbmc.metadata"))
 367       {
 368         ++itr;
 369         continue;
 370       }
 371       AddonPtr dep;
 372
 373       bool bOptional = itr->second.second;
 374
 375       if (CAddonMgr::Get().GetAddon((*itr).first, dep))
 376       {
 377         CXBMCTinyXML doc;
 378         if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
 379           m_parser.AddDocument(&doc);
 380       }
 381       else
 382       {
 383         if (!bOptional)
 384         {
 385           result = false;
 386           break;
 387         }
 388       }
 389       itr++;
 390     }
 391   }
 392
 393   if (!result)
 394     CLog::Log(LOGWARNING, "failed to load scraper XML");
 395   return m_fLoaded = result;
 396 }
 397
 398 bool CScraper::IsInUse() const
 399 {
 400   if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
 401   { // music scraper
 402     CMusicDatabase db;
 403     if (db.Open() && db.ScraperInUse(ID()))
 404       return true;
 405   }
 406   else
 407   { // video scraper
 408     CVideoDatabase db;
 409     if (db.Open() && db.ScraperInUse(ID()))
 410       return true;
 411   }
 412   return false;
 413 }
 414
 415 // pass in contents of .nfo file; returns URL (possibly empty if none found)
 416 // and may populate strId, or throws CScraperError on error
 417 CScraperUrl CScraper::NfoUrl(const CStdString &sNfoContent)
 418 {
 419   CScraperUrl scurlRet;
 420
 421   // scraper function takes contents of .nfo file, returns XML (see below)
 422   vector<CStdString> vcsIn;
 423   vcsIn.push_back(sNfoContent);
 424   CScraperUrl scurl;
 425   CCurlFile fcurl;
 426   vector<CStdString> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
 427   if (vcsOut.empty() || vcsOut[0].empty())
 428     return scurlRet;
 429   if (vcsOut.size() > 1)
 430     CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
 431
 432   // parse returned XML: either <error> element on error, blank on failure,
 433   // or <url>...</url> or <url>...</url><id>...</id> on success
 434   for (unsigned int i=0; i < vcsOut.size(); ++i)
 435   {
 436     CXBMCTinyXML doc;
 437     doc.Parse(vcsOut[i], 0, TIXML_ENCODING_UTF8);
 438     CheckScraperError(doc.RootElement());
 439
 440     if (doc.RootElement())
 441     {
 442       /*
 443        NOTE: Scrapers might return invalid xml with some loose
 444        elements (eg. '<url>http://some.url</url><id>123</id>').
 445        Since XMLUtils::GetString() is assuming well formed xml
 446        with start and end-tags we're not able to use it.
 447        Check for the desired Elements instead.
 448       */
 449       TiXmlElement* pxeUrl=NULL;
 450       TiXmlElement* pId=NULL;
 451       if (!strcmp(doc.RootElement()->Value(),"details"))
 452       {
 453         pxeUrl = doc.RootElement()->FirstChildElement("url");
 454         pId = doc.RootElement()->FirstChildElement("id");
 455       }
 456       else
 457       {
 458         pId = doc.FirstChildElement("id");
 459         pxeUrl = doc.FirstChildElement("url");
 460       }
 461       if (pId && pId->FirstChild())
 462         scurlRet.strId = pId->FirstChild()->Value();
 463
 464       if (pxeUrl && pxeUrl->Attribute("function"))
 465         continue;
 466
 467       if (pxeUrl)
 468         scurlRet.ParseElement(pxeUrl);
 469       else if (!strcmp(doc.RootElement()->Value(), "url"))
 470         scurlRet.ParseElement(doc.RootElement());
 471       else
 472         continue;
 473       break;
 474     }
 475   }
 476   return scurlRet;
 477 }
 478
 479 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
 480 {
 481   return left.relevance > right.relevance;
 482 }
 483
 484 // fetch list of matching movies sorted by relevance (may be empty);
 485 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
 486 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl, const CStdString &sMovie,
 487   bool fFirst)
 488 {
 489   // prepare parameters for URL creation
 490   CStdString sTitle, sTitleYear, sYear;
 491   CUtil::CleanString(sMovie, sTitle, sTitleYear, sYear, true/*fRemoveExt*/, fFirst);
 492
 493   if (!fFirst || Content() == CONTENT_MUSICVIDEOS)
 494     sTitle.Replace("-"," ");
 495
 496   CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
 497     "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sTitle.c_str(),
 498     Name().c_str(), Path().c_str(),
 499     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 500
 501   sTitle.ToLower();
 502
 503   vector<CStdString> vcsIn(1);
 504   g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
 505   CURL::Encode(vcsIn[0]);
 506   if (!sYear.IsEmpty())
 507     vcsIn.push_back(sYear);
 508
 509   // request a search URL from the title/filename/etc.
 510   CScraperUrl scurl;
 511   vector<CStdString> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
 512   std::vector<CScraperUrl> vcscurl;
 513   if (vcsOut.empty())
 514   {
 515     CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__);
 516     throw CScraperError();
 517   }
 518   scurl.ParseString(vcsOut[0]);
 519
 520   // do the search, and parse the result into a list
 521   vcsIn.clear();
 522   vcsIn.push_back(scurl.m_url[0].m_url);
 523   vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
 524
 525   bool fSort(true);
 526   std::set<CStdString> stsDupeCheck;
 527   bool fResults(false);
 528   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 529   {
 530     CXBMCTinyXML doc;
 531     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 532     if (!doc.RootElement())
 533     {
 534       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
 535       continue;  // might have more valid results later
 536     }
 537
 538     CheckScraperError(doc.RootElement());
 539
 540     TiXmlHandle xhDoc(&doc);
 541     TiXmlHandle xhResults = xhDoc.FirstChild("results");
 542     if (!xhResults.Element())
 543       continue;
 544     fResults = true;  // even if empty
 545
 546     // we need to sort if returned results don't specify 'sorted="yes"'
 547     if (fSort)
 548       fSort = CStdString(xhResults.Element()->Attribute("sorted")).CompareNoCase("yes") != 0;
 549
 550     for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element();
 551       pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
 552     {
 553       CScraperUrl scurlMovie;
 554       TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
 555       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
 556       if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
 557       {
 558         scurlMovie.strTitle = pxnTitle->FirstChild()->Value();
 559         XMLUtils::GetString(pxeMovie, "id", scurlMovie.strId);
 560
 561         for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
 562           scurlMovie.ParseElement(pxeLink);
 563
 564         // calculate the relavance of this hit
 565         CStdString sCompareTitle = scurlMovie.strTitle;
 566         sCompareTitle.ToLower();
 567         CStdString sMatchTitle = sTitle;
 568         sMatchTitle.ToLower();
 569
 570         /*
 571          * Identify the best match by performing a fuzzy string compare on the search term and
 572          * the result. Additionally, use the year (if available) to further refine the best match.
 573          * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
 574          * countries), otherwise it scores 0.
 575          */
 576         CStdString sCompareYear;
 577         XMLUtils::GetString(pxeMovie, "year", sCompareYear);
 578
 579         double yearScore = 0;
 580         if (!sYear.empty() && !sCompareYear.empty())
 581           yearScore = std::max(0.0, 1-0.5*abs(atoi(sYear)-atoi(sCompareYear)));
 582
 583         scurlMovie.relevance = fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str(), 0.0) + yearScore;
 584
 585         // reconstruct a title for the user
 586         if (!sCompareYear.empty())
 587           scurlMovie.strTitle.AppendFormat(" (%s)", sCompareYear.c_str());
 588
 589         CStdString sLanguage;
 590         if (XMLUtils::GetString(pxeMovie, "language", sLanguage))
 591           scurlMovie.strTitle.AppendFormat(" (%s)", sLanguage.c_str());
 592
 593         // filter for dupes from naughty scrapers
 594         if (stsDupeCheck.insert(scurlMovie.m_url[0].m_url + " " + scurlMovie.strTitle).second)
 595           vcscurl.push_back(scurlMovie);
 596       }
 597     }
 598   }
 599
 600   if (!fResults)
 601     throw CScraperError();  // scraper aborted
 602
 603   if (fSort)
 604     std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
 605
 606   return vcscurl;
 607 }
 608
 609 // find album by artist, using fcurl for web fetches
 610 // returns a list of albums (empty if no match or failure)
 611 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl, const CStdString &sAlbum,
 612   const CStdString &sArtist)
 613 {
 614   CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper "
 615     "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
 616     sAlbum.c_str(), Name().c_str(), Path().c_str(),
 617     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 618
 619   // scraper function is given the album and artist as parameters and
 620   // returns an XML <url> element parseable by CScraperUrl
 621   std::vector<CStdString> extras(2);
 622   g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
 623   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
 624   CURL::Encode(extras[0]);
 625   CURL::Encode(extras[1]);
 626   CScraperUrl scurl;
 627   vector<CStdString> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
 628   if (vcsOut.size() > 1)
 629     CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
 630
 631   std::vector<CMusicAlbumInfo> vcali;
 632   if (vcsOut.empty() || vcsOut[0].empty())
 633     return vcali;
 634   scurl.ParseString(vcsOut[0]);
 635
 636   // the next function is passed the contents of the returned URL, and returns
 637   // an empty string on failure; on success, returns XML matches in the form:
 638   // <results>
 639   //  <entity>
 640   //   <title>...</title>
 641   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
 642   //   <artist>...</artist>
 643   //   <year>...</year>
 644   //   <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
 645   //  </entity>
 646   //  ...
 647   // </results>
 648   vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
 649
 650   // parse the returned XML into a vector of album objects
 651   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 652   {
 653     CXBMCTinyXML doc;
 654     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 655     TiXmlHandle xhDoc(&doc);
 656
 657     for (TiXmlElement* pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
 658       pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
 659     {
 660       CStdString sTitle;
 661       if (XMLUtils::GetString(pxeAlbum, "title", sTitle))
 662       {
 663         CStdString sArtist;
 664         CStdString sAlbumName;
 665         if (XMLUtils::GetString(pxeAlbum, "artist", sArtist))
 666           sAlbumName.Format("%s - %s", sArtist.c_str(), sTitle.c_str());
 667         else
 668           sAlbumName = sTitle;
 669
 670         CStdString sYear;
 671         if (XMLUtils::GetString(pxeAlbum, "year", sYear))
 672           sAlbumName.Format("%s (%s)", sAlbumName.c_str(), sYear.c_str());
 673
 674         // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
 675         // (e.g., in case we only got one result back and were sent to the detail page)
 676         TiXmlElement* pxeLink = pxeAlbum->FirstChildElement("url");
 677         CScraperUrl scurlAlbum;
 678         if (!pxeLink)
 679           scurlAlbum.ParseString(scurl.m_xml);
 680         for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
 681           scurlAlbum.ParseElement(pxeLink);
 682
 683         if (!scurlAlbum.m_url.size())
 684           continue;
 685
 686         CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
 687
 688         TiXmlElement* pxeRel = pxeAlbum->FirstChildElement("relevance");
 689         if (pxeRel && pxeRel->FirstChild())
 690         {
 691           const char* szScale = pxeRel->Attribute("scale");
 692           float flScale = szScale ? float(atof(szScale)) : 1;
 693           ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
 694         }
 695
 696         vcali.push_back(ali);
 697       }
 698     }
 699   }
 700   return vcali;
 701 }
 702
 703 // find artist, using fcurl for web fetches
 704 // returns a list of artists (empty if no match or failure)
 705 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl,
 706   const CStdString &sArtist)
 707 {
 708   CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
 709     "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
 710     Name().c_str(), Path().c_str(),
 711     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 712
 713   // scraper function is given the artist as parameter and
 714   // returns an XML <url> element parseable by CScraperUrl
 715   std::vector<CStdString> extras(1);
 716   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
 717   CURL::Encode(extras[0]);
 718   CScraperUrl scurl;
 719   vector<CStdString> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
 720
 721   std::vector<CMusicArtistInfo> vcari;
 722   if (vcsOut.empty() || vcsOut[0].empty())
 723     return vcari;
 724   scurl.ParseString(vcsOut[0]);
 725
 726   // the next function is passed the contents of the returned URL, and returns
 727   // an empty string on failure; on success, returns XML matches in the form:
 728   // <results>
 729   //  <entity>
 730   //   <title>...</title>
 731   //   <year>...</year>
 732   //   <genre>...</genre>
 733   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
 734   //  </entity>
 735   //  ...
 736   // </results>
 737   vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
 738
 739   // parse the returned XML into a vector of artist objects
 740   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 741   {
 742     CXBMCTinyXML doc;
 743     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 744     if (!doc.RootElement())
 745     {
 746       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
 747       return vcari;
 748     }
 749     TiXmlHandle xhDoc(&doc);
 750     for (TiXmlElement* pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
 751       pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
 752     {
 753       TiXmlNode* pxnTitle = pxeArtist->FirstChild("title");
 754       if (pxnTitle && pxnTitle->FirstChild())
 755       {
 756         CScraperUrl scurlArtist;
 757
 758         TiXmlElement* pxeLink = pxeArtist->FirstChildElement("url");
 759         if (!pxeLink)
 760           scurlArtist.ParseString(scurl.m_xml);
 761         for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
 762           scurlArtist.ParseElement(pxeLink);
 763
 764         if (!scurlArtist.m_url.size())
 765           continue;
 766
 767         CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
 768         CStdString genre;
 769         XMLUtils::GetString(pxeArtist, "genre", genre);
 770         if (!genre.empty())
 771           ari.GetArtist().genre = StringUtils::Split(genre, g_advancedSettings.m_musicItemSeparator);
 772         XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
 773
 774         vcari.push_back(ari);
 775       }
 776     }
 777   }
 778   return vcari;
 779 }
 780
 781 // fetch list of episodes from URL (from video database)
 782 EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl)
 783 {
 784   EPISODELIST vcep;
 785   if (scurl.m_url.empty())
 786     return vcep;
 787
 788   CLog::Log(LOGDEBUG, "%s: Searching '%s' using %s scraper "
 789     "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
 790     scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
 791     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 792
 793   vector<CStdString> vcsIn;
 794   vcsIn.push_back(scurl.m_url[0].m_url);
 795   vector<CStdString> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
 796
 797   // parse the XML response
 798   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 799   {
 800     CXBMCTinyXML doc;
 801     doc.Parse(*i);
 802     if (!doc.RootElement())
 803     {
 804       CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
 805       continue;
 806     }
 807
 808     TiXmlHandle xhDoc(&doc);
 809     for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").
 810       Element(); pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
 811     {
 812       EPISODE ep;
 813       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
 814       CStdString strEpNum;
 815       if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
 816         XMLUtils::GetString(pxeMovie, "epnum", strEpNum))
 817       {
 818         CScraperUrl &scurlEp(ep.cScraperUrl);
 819         int dot = strEpNum.Find(".");
 820         ep.iEpisode = atoi(strEpNum.c_str());
 821         ep.iSubepisode = (dot > -1) ? atoi(strEpNum.Mid(dot + 1).c_str()) : 0;
 822         if (!XMLUtils::GetString(pxeMovie, "title", scurlEp.strTitle))
 823             scurlEp.strTitle = g_localizeStrings.Get(416);
 824         XMLUtils::GetString(pxeMovie, "id", scurlEp.strId);
 825
 826         for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
 827           scurlEp.ParseElement(pxeLink);
 828
 829         // date must be the format of yyyy-mm-dd
 830         ep.cDate.SetValid(FALSE);
 831         CStdString sDate;
 832         if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
 833         {
 834           tm tm;
 835           if (strptime(sDate, "%Y-%m-%d", &tm))
 836             ep.cDate.SetDate(1900+tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
 837         }
 838         vcep.push_back(ep);
 839       }
 840     }
 841   }
 842
 843   return vcep;
 844 }
 845
 846 // takes URL; returns true and populates video details on success, false otherwise
 847 bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl,
 848   bool fMovie/*else episode*/, CVideoInfoTag &video)
 849 {
 850   CLog::Log(LOGDEBUG, "%s: Reading %s '%s' using %s scraper "
 851     "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
 852     fMovie ? "movie" : "episode", scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
 853     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 854
 855   video.Reset();
 856   CStdString sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
 857   vector<CStdString> vcsIn;
 858   vcsIn.push_back(scurl.strId);
 859   vcsIn.push_back(scurl.m_url[0].m_url);
 860   vector<CStdString> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
 861
 862   // parse XML output
 863   bool fRet(false);
 864   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 865   {
 866     CXBMCTinyXML doc;
 867     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 868     if (!doc.RootElement())
 869     {
 870       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
 871       continue;
 872     }
 873
 874     TiXmlHandle xhDoc(&doc);
 875     TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
 876     if (!pxeDetails)
 877     {
 878       CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__);
 879       continue;
 880     }
 881     video.Load(pxeDetails, true/*fChain*/);
 882     fRet = true;  // but don't exit in case of chaining
 883   }
 884   return fRet;
 885 }
 886
 887 // takes a URL; returns true and populates album on success, false otherwise
 888 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
 889 {
 890   CLog::Log(LOGDEBUG, "%s: Reading '%s' using %s scraper "
 891     "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
 892     scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
 893     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 894
 895   vector<CStdString> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
 896
 897   // parse the returned XML into an album object (see CAlbum::Load for details)
 898   bool fRet(false);
 899   for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 900   {
 901     CXBMCTinyXML doc;
 902     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 903     if (!doc.RootElement())
 904     {
 905       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
 906       return false;
 907     }
 908     fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
 909   }
 910   return fRet;
 911 }
 912
 913 // takes a URL (one returned from FindArtist), the original search string, and
 914 // returns true and populates artist on success, false on failure
 915 bool CScraper::GetArtistDetails(CCurlFile &fcurl, const CScraperUrl &scurl,
 916   const CStdString &sSearch, CArtist &artist)
 917 {
 918   if (!scurl.m_url.size())
 919     return false;
 920
 921   CLog::Log(LOGDEBUG, "%s: Reading '%s' ('%s') using %s scraper "
 922     "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
 923     scurl.m_url[0].m_url.c_str(), sSearch.c_str(), Name().c_str(), Path().c_str(),
 924     ADDON::TranslateContent(Content()).c_str(), Version().c_str());
 925
 926   // pass in the original search string for chaining to search other sites
 927   vector<CStdString> vcIn;
 928   vcIn.push_back(sSearch);
 929   CURL::Encode(vcIn[0]);
 930
 931   vector<CStdString> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
 932
 933   // ok, now parse the xml file
 934   bool fRet(false);
 935   for (vector<CStdString>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 936   {
 937     CXBMCTinyXML doc;
 938     doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
 939     if (!doc.RootElement())
 940     {
 941       CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
 942       return false;
 943     }
 944
 945     fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());
 946   }
 947   return fRet;
 948 }
 949
 950 }
 951