2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #include "filesystem/File.h"
22 #include "filesystem/Directory.h"
23 #include "filesystem/CurlFile.h"
24 #include "AddonManager.h"
25 #include "utils/ScraperParser.h"
26 #include "utils/ScraperUrl.h"
27 #include "utils/CharsetConverter.h"
28 #include "utils/log.h"
29 #include "music/infoscanner/MusicAlbumInfo.h"
30 #include "music/infoscanner/MusicArtistInfo.h"
31 #include "utils/fstrcmp.h"
32 #include "settings/AdvancedSettings.h"
34 #include "utils/URIUtils.h"
35 #include "utils/XMLUtils.h"
36 #include "utils/StringUtils.h"
37 #include "music/MusicDatabase.h"
38 #include "video/VideoDatabase.h"
39 #include "music/Album.h"
40 #include "music/Artist.h"
47 using namespace XFILE;
48 using namespace MUSIC_GRABBER;
49 using namespace VIDEO;
61 static const ContentMapping content[] =
62 {{"unknown", CONTENT_NONE, 231 },
63 {"albums", CONTENT_ALBUMS, 132 },
64 {"music", CONTENT_ALBUMS, 132 },
65 {"artists", CONTENT_ARTISTS, 133 },
66 {"movies", CONTENT_MOVIES, 20342 },
67 {"tvshows", CONTENT_TVSHOWS, 20343 },
68 {"musicvideos", CONTENT_MUSICVIDEOS, 20389 }};
70 CStdString TranslateContent(const CONTENT_TYPE &type, bool pretty/*=false*/)
72 for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
74 const ContentMapping &map = content[index];
77 if (pretty && map.pretty)
78 return g_localizeStrings.Get(map.pretty);
86 CONTENT_TYPE TranslateContent(const CStdString &string)
88 for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
90 const ContentMapping &map = content[index];
91 if (string.Equals(map.name))
97 TYPE ScraperTypeFromContent(const CONTENT_TYPE &content)
102 return ADDON_SCRAPER_ALBUMS;
103 case CONTENT_ARTISTS:
104 return ADDON_SCRAPER_ARTISTS;
106 return ADDON_SCRAPER_MOVIES;
107 case CONTENT_MUSICVIDEOS:
108 return ADDON_SCRAPER_MUSICVIDEOS;
109 case CONTENT_TVSHOWS:
110 return ADDON_SCRAPER_TVSHOWS;
112 return ADDON_UNKNOWN;
116 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
117 static void CheckScraperError(const TiXmlElement *pxeRoot)
119 if (!pxeRoot || stricmp(pxeRoot->Value(), "error"))
123 XMLUtils::GetString(pxeRoot, "title", sTitle);
124 XMLUtils::GetString(pxeRoot, "message", sMessage);
125 throw CScraperError(sTitle, sMessage);
128 CScraper::CScraper(const cp_extension_t *ext) : CAddon(ext), m_fLoaded(false)
132 m_language = CAddonMgr::Get().GetExtValue(ext->configuration, "@language");
133 m_requiressettings = CAddonMgr::Get().GetExtValue(ext->configuration,"@requiressettings").Equals("true");
134 CStdString persistence = CAddonMgr::Get().GetExtValue(ext->configuration, "@cachepersistence");
135 if (!persistence.empty())
136 m_persistence.SetFromTimeString(persistence);
140 case ADDON_SCRAPER_ALBUMS:
141 m_pathContent = CONTENT_ALBUMS;
143 case ADDON_SCRAPER_ARTISTS:
144 m_pathContent = CONTENT_ARTISTS;
146 case ADDON_SCRAPER_MOVIES:
147 m_pathContent = CONTENT_MOVIES;
149 case ADDON_SCRAPER_MUSICVIDEOS:
150 m_pathContent = CONTENT_MUSICVIDEOS;
152 case ADDON_SCRAPER_TVSHOWS:
153 m_pathContent = CONTENT_TVSHOWS;
156 m_pathContent = CONTENT_NONE;
161 AddonPtr CScraper::Clone() const
163 return AddonPtr(new CScraper(*this));
166 CScraper::CScraper(const CScraper &rhs)
167 : CAddon(rhs), m_fLoaded(false)
169 m_pathContent = rhs.m_pathContent;
170 m_persistence = rhs.m_persistence;
171 m_requiressettings = rhs.m_requiressettings;
172 m_language = rhs.m_language;
175 bool CScraper::Supports(const CONTENT_TYPE &content) const
177 return Type() == ScraperTypeFromContent(content);
180 bool CScraper::SetPathSettings(CONTENT_TYPE content, const CStdString& xml)
182 m_pathContent = content;
191 m_userSettingsLoaded = SettingsFromXML(doc);
193 return m_userSettingsLoaded;
196 CStdString CScraper::GetPathSettings()
204 if (doc.RootElement())
205 stream << *doc.RootElement();
210 void CScraper::ClearCache()
212 CStdString strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath, "scrapers");
214 // create scraper cache dir if needed
215 if (!CDirectory::Exists(strCachePath))
216 CDirectory::Create(strCachePath);
218 strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
219 URIUtils::AddSlashAtEnd(strCachePath);
221 if (CDirectory::Exists(strCachePath))
224 CDirectory::GetDirectory(strCachePath,items);
225 for (int i=0;i<items.Size();++i)
228 if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
229 CFile::Delete(items[i]->GetPath());
233 CDirectory::Create(strCachePath);
236 // returns a vector of strings: the first is the XML output by the function; the rest
237 // is XML output by chained functions, possibly recursively
238 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
239 // throws CScraperError abort on internal failures (e.g., parse errors)
240 vector<CStdString> CScraper::Run(const CStdString& function,
241 const CScraperUrl& scrURL,
243 const vector<CStdString>* extras)
246 throw CScraperError();
248 CStdString strXML = InternalRun(function,scrURL,http,extras);
251 if (function != "NfoUrl" && function != "ResolveIDToUrl")
252 CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
253 throw CScraperError();
256 CLog::Log(LOGDEBUG,"scraper: %s returned %s",function.c_str(),strXML.c_str());
259 doc.Parse(strXML, TIXML_ENCODING_UNKNOWN);
260 if (!doc.RootElement())
262 CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
263 throw CScraperError();
266 vector<CStdString> result;
267 result.push_back(strXML);
268 TiXmlElement* xchain = doc.RootElement()->FirstChildElement();
269 // skip children of the root element until <url> or <chain>
270 while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
271 xchain = xchain->NextSiblingElement();
274 // <chain|url function="...">param</>
275 const char* szFunction = xchain->Attribute("function");
279 vector<CStdString> extras;
280 // for <chain>, pass the contained text as a parameter; for <url>, as URL content
281 if (strcmp(xchain->Value(),"chain")==0)
283 if (xchain->FirstChild())
284 extras.push_back(xchain->FirstChild()->Value());
287 scrURL2.ParseElement(xchain);
288 // Fix for empty chains. $$1 would still contain the
289 // previous value as there is no child of the xml node.
290 // since $$1 will always either contain the data from an
291 // url or the parameters to a chain, we can safely clear it here
293 m_parser.m_param[0].clear();
294 vector<CStdString> result2 = RunNoThrow(szFunction,scrURL2,http,&extras);
295 result.insert(result.end(),result2.begin(),result2.end());
297 xchain = xchain->NextSiblingElement();
298 // continue to skip past non-<url> or <chain> elements
299 while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
300 xchain = xchain->NextSiblingElement();
306 // just like Run, but returns an empty list instead of throwing in case of error
307 // don't use in new code; errors should be handled appropriately
308 std::vector<CStdString> CScraper::RunNoThrow(const CStdString& function,
309 const CScraperUrl& url,
310 XFILE::CCurlFile& http,
311 const std::vector<CStdString>* extras)
313 std::vector<CStdString> vcs;
316 vcs = Run(function, url, http, extras);
318 catch (const CScraperError &sce)
320 ASSERT(sce.FAborted()); // the only kind we should get
325 CStdString CScraper::InternalRun(const CStdString& function,
326 const CScraperUrl& scrURL,
328 const vector<CStdString>* extras)
330 // walk the list of input URLs and fetch each into parser parameters
332 for (i=0;i<scrURL.m_url.size();++i)
334 CStdString strCurrHTML;
335 if (!CScraperUrl::Get(scrURL.m_url[i],m_parser.m_param[i],http,ID()) || m_parser.m_param[i].size() == 0)
338 // put the 'extra' parameterts into the parser parameter list too
341 for (unsigned int j=0;j<extras->size();++j)
342 m_parser.m_param[j+i] = (*extras)[j];
345 return m_parser.Parse(function,this);
348 bool CScraper::Load()
353 bool result=m_parser.Load(LibPath());
356 // TODO: this routine assumes that deps are a single level, and assumes the dep is installed.
357 // 1. Does it make sense to have recursive dependencies?
358 // 2. Should we be checking the dep versions or do we assume it is ok?
359 ADDONDEPS deps = GetDeps();
360 ADDONDEPS::iterator itr = deps.begin();
361 while (itr != deps.end())
363 if (itr->first.Equals("xbmc.metadata"))
370 bool bOptional = itr->second.second;
372 if (CAddonMgr::Get().GetAddon((*itr).first, dep))
375 if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
376 m_parser.AddDocument(&doc);
391 CLog::Log(LOGWARNING, "failed to load scraper XML");
392 return m_fLoaded = result;
395 bool CScraper::IsInUse() const
397 if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
400 if (db.Open() && db.ScraperInUse(ID()))
406 if (db.Open() && db.ScraperInUse(ID()))
412 bool CScraper::IsNoop()
415 throw CScraperError();
417 return m_parser.IsNoop();
420 // pass in contents of .nfo file; returns URL (possibly empty if none found)
421 // and may populate strId, or throws CScraperError on error
422 CScraperUrl CScraper::NfoUrl(const CStdString &sNfoContent)
424 CScraperUrl scurlRet;
429 // scraper function takes contents of .nfo file, returns XML (see below)
430 vector<CStdString> vcsIn;
431 vcsIn.push_back(sNfoContent);
434 vector<CStdString> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
435 if (vcsOut.empty() || vcsOut[0].empty())
437 if (vcsOut.size() > 1)
438 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
440 // parse returned XML: either <error> element on error, blank on failure,
441 // or <url>...</url> or <url>...</url><id>...</id> on success
442 for (unsigned int i=0; i < vcsOut.size(); ++i)
445 doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
446 CheckScraperError(doc.RootElement());
448 if (doc.RootElement())
451 NOTE: Scrapers might return invalid xml with some loose
452 elements (eg. '<url>http://some.url</url><id>123</id>').
453 Since XMLUtils::GetString() is assuming well formed xml
454 with start and end-tags we're not able to use it.
455 Check for the desired Elements instead.
457 TiXmlElement* pxeUrl=NULL;
458 TiXmlElement* pId=NULL;
459 if (!strcmp(doc.RootElement()->Value(),"details"))
461 pxeUrl = doc.RootElement()->FirstChildElement("url");
462 pId = doc.RootElement()->FirstChildElement("id");
466 pId = doc.FirstChildElement("id");
467 pxeUrl = doc.FirstChildElement("url");
469 if (pId && pId->FirstChild())
470 scurlRet.strId = pId->FirstChild()->Value();
472 if (pxeUrl && pxeUrl->Attribute("function"))
476 scurlRet.ParseElement(pxeUrl);
477 else if (!strcmp(doc.RootElement()->Value(), "url"))
478 scurlRet.ParseElement(doc.RootElement());
487 CScraperUrl CScraper::ResolveIDToUrl(const CStdString& externalID)
489 CScraperUrl scurlRet;
491 // scraper function takes an external ID, returns XML (see below)
492 vector<CStdString> vcsIn;
493 vcsIn.push_back(externalID);
496 vector<CStdString> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn);
497 if (vcsOut.empty() || vcsOut[0].empty())
499 if (vcsOut.size() > 1)
500 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
502 // parse returned XML: either <error> element on error, blank on failure,
503 // or <url>...</url> or <url>...</url><id>...</id> on success
504 for (unsigned int i=0; i < vcsOut.size(); ++i)
507 doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
508 CheckScraperError(doc.RootElement());
510 if (doc.RootElement())
513 NOTE: Scrapers might return invalid xml with some loose
514 elements (eg. '<url>http://some.url</url><id>123</id>').
515 Since XMLUtils::GetString() is assuming well formed xml
516 with start and end-tags we're not able to use it.
517 Check for the desired Elements instead.
519 TiXmlElement* pxeUrl=NULL;
520 TiXmlElement* pId=NULL;
521 if (!strcmp(doc.RootElement()->Value(),"details"))
523 pxeUrl = doc.RootElement()->FirstChildElement("url");
524 pId = doc.RootElement()->FirstChildElement("id");
528 pId = doc.FirstChildElement("id");
529 pxeUrl = doc.FirstChildElement("url");
531 if (pId && pId->FirstChild())
532 scurlRet.strId = pId->FirstChild()->Value();
534 if (pxeUrl && pxeUrl->Attribute("function"))
538 scurlRet.ParseElement(pxeUrl);
539 else if (!strcmp(doc.RootElement()->Value(), "url"))
540 scurlRet.ParseElement(doc.RootElement());
549 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
551 return left.relevance > right.relevance;
554 // fetch list of matching movies sorted by relevance (may be empty);
555 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
556 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl, const CStdString &sMovie,
559 // prepare parameters for URL creation
560 CStdString sTitle, sTitleYear, sYear;
561 CUtil::CleanString(sMovie, sTitle, sTitleYear, sYear, true/*fRemoveExt*/, fFirst);
563 CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
564 "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sTitle.c_str(),
565 Name().c_str(), Path().c_str(),
566 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
568 std::vector<CScraperUrl> vcscurl;
573 StringUtils::Replace(sTitle, '-',' ');
575 StringUtils::ToLower(sTitle);
577 vector<CStdString> vcsIn(1);
578 g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
579 CURL::Encode(vcsIn[0]);
581 vcsIn.push_back(sYear);
583 // request a search URL from the title/filename/etc.
585 vector<CStdString> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
588 CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__);
589 throw CScraperError();
591 scurl.ParseString(vcsOut[0]);
593 // do the search, and parse the result into a list
595 vcsIn.push_back(scurl.m_url[0].m_url);
596 vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
599 std::set<CStdString> stsDupeCheck;
600 bool fResults(false);
601 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
604 doc.Parse(*i, TIXML_ENCODING_UTF8);
605 if (!doc.RootElement())
607 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
608 continue; // might have more valid results later
611 CheckScraperError(doc.RootElement());
613 TiXmlHandle xhDoc(&doc);
614 TiXmlHandle xhResults = xhDoc.FirstChild("results");
615 if (!xhResults.Element())
617 fResults = true; // even if empty
619 // we need to sort if returned results don't specify 'sorted="yes"'
622 const char *sorted = xhResults.Element()->Attribute("sorted");
624 fSort = !StringUtils::EqualsNoCase(sorted, "yes");
627 for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element();
628 pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
630 CScraperUrl scurlMovie;
631 TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
632 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
633 if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
635 scurlMovie.strTitle = pxnTitle->FirstChild()->Value();
636 XMLUtils::GetString(pxeMovie, "id", scurlMovie.strId);
638 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
639 scurlMovie.ParseElement(pxeLink);
641 // calculate the relavance of this hit
642 CStdString sCompareTitle = scurlMovie.strTitle;
643 StringUtils::ToLower(sCompareTitle);
644 CStdString sMatchTitle = sTitle;
645 StringUtils::ToLower(sMatchTitle);
648 * Identify the best match by performing a fuzzy string compare on the search term and
649 * the result. Additionally, use the year (if available) to further refine the best match.
650 * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
651 * countries), otherwise it scores 0.
653 CStdString sCompareYear;
654 XMLUtils::GetString(pxeMovie, "year", sCompareYear);
656 double yearScore = 0;
657 if (!sYear.empty() && !sCompareYear.empty())
658 yearScore = std::max(0.0, 1-0.5*abs(atoi(sYear)-atoi(sCompareYear)));
660 scurlMovie.relevance = fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str(), 0.0) + yearScore;
662 // reconstruct a title for the user
663 if (!sCompareYear.empty())
664 scurlMovie.strTitle += StringUtils::Format(" (%s)", sCompareYear.c_str());
666 CStdString sLanguage;
667 if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty())
668 scurlMovie.strTitle += StringUtils::Format(" (%s)", sLanguage.c_str());
670 // filter for dupes from naughty scrapers
671 if (stsDupeCheck.insert(scurlMovie.m_url[0].m_url + " " + scurlMovie.strTitle).second)
672 vcscurl.push_back(scurlMovie);
678 throw CScraperError(); // scraper aborted
681 std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
686 // find album by artist, using fcurl for web fetches
687 // returns a list of albums (empty if no match or failure)
688 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl, const CStdString &sAlbum,
689 const CStdString &sArtist)
691 CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper "
692 "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
693 sAlbum.c_str(), Name().c_str(), Path().c_str(),
694 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
696 std::vector<CMusicAlbumInfo> vcali;
700 // scraper function is given the album and artist as parameters and
701 // returns an XML <url> element parseable by CScraperUrl
702 std::vector<CStdString> extras(2);
703 g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
704 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
705 CURL::Encode(extras[0]);
706 CURL::Encode(extras[1]);
708 vector<CStdString> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
709 if (vcsOut.size() > 1)
710 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
712 if (vcsOut.empty() || vcsOut[0].empty())
714 scurl.ParseString(vcsOut[0]);
716 // the next function is passed the contents of the returned URL, and returns
717 // an empty string on failure; on success, returns XML matches in the form:
720 // <title>...</title>
721 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
722 // <artist>...</artist>
724 // <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
728 vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
730 // parse the returned XML into a vector of album objects
731 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
734 doc.Parse(*i, TIXML_ENCODING_UTF8);
735 TiXmlHandle xhDoc(&doc);
737 for (TiXmlElement* pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
738 pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
741 if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty())
744 CStdString sAlbumName;
745 if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty())
746 sAlbumName = StringUtils::Format("%s - %s", sArtist.c_str(), sTitle.c_str());
751 if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty())
752 sAlbumName = StringUtils::Format("%s (%s)", sAlbumName.c_str(), sYear.c_str());
754 // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
755 // (e.g., in case we only got one result back and were sent to the detail page)
756 TiXmlElement* pxeLink = pxeAlbum->FirstChildElement("url");
757 CScraperUrl scurlAlbum;
759 scurlAlbum.ParseString(scurl.m_xml);
760 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
761 scurlAlbum.ParseElement(pxeLink);
763 if (!scurlAlbum.m_url.size())
766 CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
768 TiXmlElement* pxeRel = pxeAlbum->FirstChildElement("relevance");
769 if (pxeRel && pxeRel->FirstChild())
771 const char* szScale = pxeRel->Attribute("scale");
772 float flScale = szScale ? float(atof(szScale)) : 1;
773 ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
776 vcali.push_back(ali);
783 // find artist, using fcurl for web fetches
784 // returns a list of artists (empty if no match or failure)
785 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl,
786 const CStdString &sArtist)
788 CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
789 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
790 Name().c_str(), Path().c_str(),
791 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
793 std::vector<CMusicArtistInfo> vcari;
797 // scraper function is given the artist as parameter and
798 // returns an XML <url> element parseable by CScraperUrl
799 std::vector<CStdString> extras(1);
800 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
801 CURL::Encode(extras[0]);
803 vector<CStdString> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
805 if (vcsOut.empty() || vcsOut[0].empty())
807 scurl.ParseString(vcsOut[0]);
809 // the next function is passed the contents of the returned URL, and returns
810 // an empty string on failure; on success, returns XML matches in the form:
813 // <title>...</title>
815 // <genre>...</genre>
816 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
820 vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
822 // parse the returned XML into a vector of artist objects
823 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
826 doc.Parse(*i, TIXML_ENCODING_UTF8);
827 if (!doc.RootElement())
829 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
832 TiXmlHandle xhDoc(&doc);
833 for (TiXmlElement* pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
834 pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
836 TiXmlNode* pxnTitle = pxeArtist->FirstChild("title");
837 if (pxnTitle && pxnTitle->FirstChild())
839 CScraperUrl scurlArtist;
841 TiXmlElement* pxeLink = pxeArtist->FirstChildElement("url");
843 scurlArtist.ParseString(scurl.m_xml);
844 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
845 scurlArtist.ParseElement(pxeLink);
847 if (!scurlArtist.m_url.size())
850 CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
852 XMLUtils::GetString(pxeArtist, "genre", genre);
854 ari.GetArtist().genre = StringUtils::Split(genre, g_advancedSettings.m_musicItemSeparator);
855 XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
857 vcari.push_back(ari);
864 // fetch list of episodes from URL (from video database)
865 EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl)
868 if (scurl.m_url.empty())
871 CLog::Log(LOGDEBUG, "%s: Searching '%s' using %s scraper "
872 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
873 scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
874 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
876 vector<CStdString> vcsIn;
877 vcsIn.push_back(scurl.m_url[0].m_url);
878 vector<CStdString> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
880 // parse the XML response
881 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
885 if (!doc.RootElement())
887 CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
891 TiXmlHandle xhDoc(&doc);
892 for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").
893 Element(); pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
896 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
898 if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
899 XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty())
901 CScraperUrl &scurlEp(ep.cScraperUrl);
902 size_t dot = strEpNum.find(".");
903 ep.iEpisode = atoi(strEpNum.c_str());
904 ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0;
905 if (!XMLUtils::GetString(pxeMovie, "title", scurlEp.strTitle) || scurlEp.strTitle.empty() )
906 scurlEp.strTitle = g_localizeStrings.Get(416);
907 XMLUtils::GetString(pxeMovie, "id", scurlEp.strId);
909 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
910 scurlEp.ParseElement(pxeLink);
912 // date must be the format of yyyy-mm-dd
913 ep.cDate.SetValid(FALSE);
915 if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
918 if (strptime(sDate, "%Y-%m-%d", &tm))
919 ep.cDate.SetDate(1900+tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
929 // takes URL; returns true and populates video details on success, false otherwise
930 bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl,
931 bool fMovie/*else episode*/, CVideoInfoTag &video)
933 CLog::Log(LOGDEBUG, "%s: Reading %s '%s' using %s scraper "
934 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
935 fMovie ? "movie" : "episode", scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
936 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
939 CStdString sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
940 vector<CStdString> vcsIn;
941 vcsIn.push_back(scurl.strId);
942 vcsIn.push_back(scurl.m_url[0].m_url);
943 vector<CStdString> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
947 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
950 doc.Parse(*i, TIXML_ENCODING_UTF8);
951 if (!doc.RootElement())
953 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
957 TiXmlHandle xhDoc(&doc);
958 TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
961 CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__);
964 video.Load(pxeDetails, true/*fChain*/);
965 fRet = true; // but don't exit in case of chaining
970 // takes a URL; returns true and populates album on success, false otherwise
971 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
973 CLog::Log(LOGDEBUG, "%s: Reading '%s' using %s scraper "
974 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
975 scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
976 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
978 vector<CStdString> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
980 // parse the returned XML into an album object (see CAlbum::Load for details)
982 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
985 doc.Parse(*i, TIXML_ENCODING_UTF8);
986 if (!doc.RootElement())
988 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
991 fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
996 // takes a URL (one returned from FindArtist), the original search string, and
997 // returns true and populates artist on success, false on failure
998 bool CScraper::GetArtistDetails(CCurlFile &fcurl, const CScraperUrl &scurl,
999 const CStdString &sSearch, CArtist &artist)
1001 if (!scurl.m_url.size())
1004 CLog::Log(LOGDEBUG, "%s: Reading '%s' ('%s') using %s scraper "
1005 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
1006 scurl.m_url[0].m_url.c_str(), sSearch.c_str(), Name().c_str(), Path().c_str(),
1007 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
1009 // pass in the original search string for chaining to search other sites
1010 vector<CStdString> vcIn;
1011 vcIn.push_back(sSearch);
1012 CURL::Encode(vcIn[0]);
1014 vector<CStdString> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
1016 // ok, now parse the xml file
1018 for (vector<CStdString>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1021 doc.Parse(*i, TIXML_ENCODING_UTF8);
1022 if (!doc.RootElement())
1024 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
1028 fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());