2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #include "filesystem/File.h"
22 #include "filesystem/Directory.h"
23 #include "filesystem/CurlFile.h"
24 #include "AddonManager.h"
25 #include "utils/ScraperParser.h"
26 #include "utils/ScraperUrl.h"
27 #include "utils/CharsetConverter.h"
28 #include "utils/log.h"
29 #include "music/infoscanner/MusicAlbumInfo.h"
30 #include "music/infoscanner/MusicArtistInfo.h"
31 #include "utils/fstrcmp.h"
32 #include "settings/AdvancedSettings.h"
34 #include "utils/URIUtils.h"
35 #include "utils/XMLUtils.h"
36 #include "utils/StringUtils.h"
37 #include "music/MusicDatabase.h"
38 #include "video/VideoDatabase.h"
39 #include "music/Album.h"
40 #include "music/Artist.h"
47 using namespace XFILE;
48 using namespace MUSIC_GRABBER;
49 using namespace VIDEO;
61 static const ContentMapping content[] =
62 {{"unknown", CONTENT_NONE, 231 },
63 {"albums", CONTENT_ALBUMS, 132 },
64 {"music", CONTENT_ALBUMS, 132 },
65 {"artists", CONTENT_ARTISTS, 133 },
66 {"movies", CONTENT_MOVIES, 20342 },
67 {"tvshows", CONTENT_TVSHOWS, 20343 },
68 {"musicvideos", CONTENT_MUSICVIDEOS, 20389 }};
70 CStdString TranslateContent(const CONTENT_TYPE &type, bool pretty/*=false*/)
72 for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
74 const ContentMapping &map = content[index];
77 if (pretty && map.pretty)
78 return g_localizeStrings.Get(map.pretty);
86 CONTENT_TYPE TranslateContent(const CStdString &string)
88 for (unsigned int index=0; index < sizeof(content)/sizeof(content[0]); ++index)
90 const ContentMapping &map = content[index];
91 if (string.Equals(map.name))
97 TYPE ScraperTypeFromContent(const CONTENT_TYPE &content)
102 return ADDON_SCRAPER_ALBUMS;
103 case CONTENT_ARTISTS:
104 return ADDON_SCRAPER_ARTISTS;
106 return ADDON_SCRAPER_MOVIES;
107 case CONTENT_MUSICVIDEOS:
108 return ADDON_SCRAPER_MUSICVIDEOS;
109 case CONTENT_TVSHOWS:
110 return ADDON_SCRAPER_TVSHOWS;
112 return ADDON_UNKNOWN;
116 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
117 static void CheckScraperError(const TiXmlElement *pxeRoot)
119 if (!pxeRoot || stricmp(pxeRoot->Value(), "error"))
123 XMLUtils::GetString(pxeRoot, "title", sTitle);
124 XMLUtils::GetString(pxeRoot, "message", sMessage);
125 throw CScraperError(sTitle, sMessage);
128 CScraper::CScraper(const cp_extension_t *ext) : CAddon(ext), m_fLoaded(false)
132 m_language = CAddonMgr::Get().GetExtValue(ext->configuration, "@language");
133 m_requiressettings = CAddonMgr::Get().GetExtValue(ext->configuration,"@requiressettings").Equals("true");
134 CStdString persistence = CAddonMgr::Get().GetExtValue(ext->configuration, "@cachepersistence");
135 if (!persistence.IsEmpty())
136 m_persistence.SetFromTimeString(persistence);
140 case ADDON_SCRAPER_ALBUMS:
141 m_pathContent = CONTENT_ALBUMS;
143 case ADDON_SCRAPER_ARTISTS:
144 m_pathContent = CONTENT_ARTISTS;
146 case ADDON_SCRAPER_MOVIES:
147 m_pathContent = CONTENT_MOVIES;
149 case ADDON_SCRAPER_MUSICVIDEOS:
150 m_pathContent = CONTENT_MUSICVIDEOS;
152 case ADDON_SCRAPER_TVSHOWS:
153 m_pathContent = CONTENT_TVSHOWS;
156 m_pathContent = CONTENT_NONE;
161 AddonPtr CScraper::Clone(const AddonPtr &self) const
163 return AddonPtr(new CScraper(*this, self));
166 CScraper::CScraper(const CScraper &rhs, const AddonPtr &self)
167 : CAddon(rhs, self), m_fLoaded(false)
169 m_pathContent = rhs.m_pathContent;
170 m_persistence = rhs.m_persistence;
171 m_requiressettings = rhs.m_requiressettings;
172 m_language = rhs.m_language;
175 bool CScraper::Supports(const CONTENT_TYPE &content) const
177 return Type() == ScraperTypeFromContent(content);
180 bool CScraper::SetPathSettings(CONTENT_TYPE content, const CStdString& xml)
182 m_pathContent = content;
190 doc.Parse(xml.c_str());
191 m_userSettingsLoaded = SettingsFromXML(doc);
193 return m_userSettingsLoaded;
196 CStdString CScraper::GetPathSettings()
204 if (doc.RootElement())
205 stream << *doc.RootElement();
210 void CScraper::ClearCache()
212 CStdString strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath, "scrapers");
214 // create scraper cache dir if needed
215 if (!CDirectory::Exists(strCachePath))
216 CDirectory::Create(strCachePath);
218 strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
219 URIUtils::AddSlashAtEnd(strCachePath);
221 if (CDirectory::Exists(strCachePath))
224 CDirectory::GetDirectory(strCachePath,items);
225 for (int i=0;i<items.Size();++i)
228 if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
229 CFile::Delete(items[i]->GetPath());
233 CDirectory::Create(strCachePath);
236 // returns a vector of strings: the first is the XML output by the function; the rest
237 // is XML output by chained functions, possibly recursively
238 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
239 // throws CScraperError abort on internal failures (e.g., parse errors)
240 vector<CStdString> CScraper::Run(const CStdString& function,
241 const CScraperUrl& scrURL,
243 const vector<CStdString>* extras)
246 throw CScraperError();
248 CStdString strXML = InternalRun(function,scrURL,http,extras);
249 if (strXML.IsEmpty())
251 if (function != "NfoUrl")
252 CLog::Log(LOGERROR, "%s: Unable to parse web site",__FUNCTION__);
253 throw CScraperError();
256 CLog::Log(LOGDEBUG,"scraper: %s returned %s",function.c_str(),strXML.c_str());
258 if (!XMLUtils::HasUTF8Declaration(strXML))
259 g_charsetConverter.unknownToUTF8(strXML);
262 doc.Parse(strXML.c_str(),0,TIXML_ENCODING_UTF8);
263 if (!doc.RootElement())
265 CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
266 throw CScraperError();
269 vector<CStdString> result;
270 result.push_back(strXML);
271 TiXmlElement* xchain = doc.RootElement()->FirstChildElement();
272 // skip children of the root element until <url> or <chain>
273 while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
274 xchain = xchain->NextSiblingElement();
277 // <chain|url function="...">param</>
278 const char* szFunction = xchain->Attribute("function");
282 vector<CStdString> extras;
283 // for <chain>, pass the contained text as a parameter; for <url>, as URL content
284 if (strcmp(xchain->Value(),"chain")==0)
286 if (xchain->FirstChild())
287 extras.push_back(xchain->FirstChild()->Value());
290 scrURL2.ParseElement(xchain);
291 // Fix for empty chains. $$1 would still contain the
292 // previous value as there is no child of the xml node.
293 // since $$1 will always either contain the data from an
294 // url or the parameters to a chain, we can safely clear it here
296 m_parser.m_param[0].clear();
297 vector<CStdString> result2 = RunNoThrow(szFunction,scrURL2,http,&extras);
298 result.insert(result.end(),result2.begin(),result2.end());
300 xchain = xchain->NextSiblingElement();
301 // continue to skip past non-<url> or <chain> elements
302 while (xchain && strcmp(xchain->Value(),"url") && strcmp(xchain->Value(),"chain"))
303 xchain = xchain->NextSiblingElement();
309 // just like Run, but returns an empty list instead of throwing in case of error
310 // don't use in new code; errors should be handled appropriately
311 std::vector<CStdString> CScraper::RunNoThrow(const CStdString& function,
312 const CScraperUrl& url,
313 XFILE::CCurlFile& http,
314 const std::vector<CStdString>* extras)
316 std::vector<CStdString> vcs;
319 vcs = Run(function, url, http, extras);
321 catch (const CScraperError &sce)
323 ASSERT(sce.FAborted()); // the only kind we should get
328 CStdString CScraper::InternalRun(const CStdString& function,
329 const CScraperUrl& scrURL,
331 const vector<CStdString>* extras)
333 // walk the list of input URLs and fetch each into parser parameters
335 for (i=0;i<scrURL.m_url.size();++i)
337 CStdString strCurrHTML;
338 if (!CScraperUrl::Get(scrURL.m_url[i],m_parser.m_param[i],http,ID()) || m_parser.m_param[i].size() == 0)
341 // put the 'extra' parameterts into the parser parameter list too
344 for (unsigned int j=0;j<extras->size();++j)
345 m_parser.m_param[j+i] = (*extras)[j];
348 return m_parser.Parse(function,this);
351 bool CScraper::Load()
356 bool result=m_parser.Load(LibPath());
359 // TODO: this routine assumes that deps are a single level, and assumes the dep is installed.
360 // 1. Does it make sense to have recursive dependencies?
361 // 2. Should we be checking the dep versions or do we assume it is ok?
362 ADDONDEPS deps = GetDeps();
363 ADDONDEPS::iterator itr = deps.begin();
364 while (itr != deps.end())
366 if (itr->first.Equals("xbmc.metadata"))
373 bool bOptional = itr->second.second;
375 if (CAddonMgr::Get().GetAddon((*itr).first, dep))
378 if (dep->Type() == ADDON_SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
379 m_parser.AddDocument(&doc);
394 CLog::Log(LOGWARNING, "failed to load scraper XML");
395 return m_fLoaded = result;
398 bool CScraper::IsInUse() const
400 if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
403 if (db.Open() && db.ScraperInUse(ID()))
409 if (db.Open() && db.ScraperInUse(ID()))
415 // pass in contents of .nfo file; returns URL (possibly empty if none found)
416 // and may populate strId, or throws CScraperError on error
417 CScraperUrl CScraper::NfoUrl(const CStdString &sNfoContent)
419 CScraperUrl scurlRet;
421 // scraper function takes contents of .nfo file, returns XML (see below)
422 vector<CStdString> vcsIn;
423 vcsIn.push_back(sNfoContent);
426 vector<CStdString> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
427 if (vcsOut.empty() || vcsOut[0].empty())
429 if (vcsOut.size() > 1)
430 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
432 // parse returned XML: either <error> element on error, blank on failure,
433 // or <url>...</url> or <url>...</url><id>...</id> on success
434 for (unsigned int i=0; i < vcsOut.size(); ++i)
437 doc.Parse(vcsOut[i], 0, TIXML_ENCODING_UTF8);
438 CheckScraperError(doc.RootElement());
440 if (doc.RootElement())
443 NOTE: Scrapers might return invalid xml with some loose
444 elements (eg. '<url>http://some.url</url><id>123</id>').
445 Since XMLUtils::GetString() is assuming well formed xml
446 with start and end-tags we're not able to use it.
447 Check for the desired Elements instead.
449 TiXmlElement* pxeUrl=NULL;
450 TiXmlElement* pId=NULL;
451 if (!strcmp(doc.RootElement()->Value(),"details"))
453 pxeUrl = doc.RootElement()->FirstChildElement("url");
454 pId = doc.RootElement()->FirstChildElement("id");
458 pId = doc.FirstChildElement("id");
459 pxeUrl = doc.FirstChildElement("url");
461 if (pId && pId->FirstChild())
462 scurlRet.strId = pId->FirstChild()->Value();
464 if (pxeUrl && pxeUrl->Attribute("function"))
468 scurlRet.ParseElement(pxeUrl);
469 else if (!strcmp(doc.RootElement()->Value(), "url"))
470 scurlRet.ParseElement(doc.RootElement());
479 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
481 return left.relevance > right.relevance;
484 // fetch list of matching movies sorted by relevance (may be empty);
485 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
486 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl, const CStdString &sMovie,
489 // prepare parameters for URL creation
490 CStdString sTitle, sTitleYear, sYear;
491 CUtil::CleanString(sMovie, sTitle, sTitleYear, sYear, true/*fRemoveExt*/, fFirst);
493 if (!fFirst || Content() == CONTENT_MUSICVIDEOS)
494 sTitle.Replace("-"," ");
496 CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
497 "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sTitle.c_str(),
498 Name().c_str(), Path().c_str(),
499 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
503 vector<CStdString> vcsIn(1);
504 g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
505 CURL::Encode(vcsIn[0]);
506 if (!sYear.IsEmpty())
507 vcsIn.push_back(sYear);
509 // request a search URL from the title/filename/etc.
511 vector<CStdString> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
512 std::vector<CScraperUrl> vcscurl;
515 CLog::Log(LOGDEBUG, "%s: CreateSearchUrl failed", __FUNCTION__);
516 throw CScraperError();
518 scurl.ParseString(vcsOut[0]);
520 // do the search, and parse the result into a list
522 vcsIn.push_back(scurl.m_url[0].m_url);
523 vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
526 std::set<CStdString> stsDupeCheck;
527 bool fResults(false);
528 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
531 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
532 if (!doc.RootElement())
534 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
535 continue; // might have more valid results later
538 CheckScraperError(doc.RootElement());
540 TiXmlHandle xhDoc(&doc);
541 TiXmlHandle xhResults = xhDoc.FirstChild("results");
542 if (!xhResults.Element())
544 fResults = true; // even if empty
546 // we need to sort if returned results don't specify 'sorted="yes"'
548 fSort = CStdString(xhResults.Element()->Attribute("sorted")).CompareNoCase("yes") != 0;
550 for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element();
551 pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
553 CScraperUrl scurlMovie;
554 TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
555 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
556 if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
558 scurlMovie.strTitle = pxnTitle->FirstChild()->Value();
559 XMLUtils::GetString(pxeMovie, "id", scurlMovie.strId);
561 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
562 scurlMovie.ParseElement(pxeLink);
564 // calculate the relavance of this hit
565 CStdString sCompareTitle = scurlMovie.strTitle;
566 sCompareTitle.ToLower();
567 CStdString sMatchTitle = sTitle;
568 sMatchTitle.ToLower();
571 * Identify the best match by performing a fuzzy string compare on the search term and
572 * the result. Additionally, use the year (if available) to further refine the best match.
573 * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
574 * countries), otherwise it scores 0.
576 CStdString sCompareYear;
577 XMLUtils::GetString(pxeMovie, "year", sCompareYear);
579 double yearScore = 0;
580 if (!sYear.empty() && !sCompareYear.empty())
581 yearScore = std::max(0.0, 1-0.5*abs(atoi(sYear)-atoi(sCompareYear)));
583 scurlMovie.relevance = fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str(), 0.0) + yearScore;
585 // reconstruct a title for the user
586 if (!sCompareYear.empty())
587 scurlMovie.strTitle.AppendFormat(" (%s)", sCompareYear.c_str());
589 CStdString sLanguage;
590 if (XMLUtils::GetString(pxeMovie, "language", sLanguage))
591 scurlMovie.strTitle.AppendFormat(" (%s)", sLanguage.c_str());
593 // filter for dupes from naughty scrapers
594 if (stsDupeCheck.insert(scurlMovie.m_url[0].m_url + " " + scurlMovie.strTitle).second)
595 vcscurl.push_back(scurlMovie);
601 throw CScraperError(); // scraper aborted
604 std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
609 // find album by artist, using fcurl for web fetches
610 // returns a list of albums (empty if no match or failure)
611 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl, const CStdString &sAlbum,
612 const CStdString &sArtist)
614 CLog::Log(LOGDEBUG, "%s: Searching for '%s - %s' using %s scraper "
615 "(path: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
616 sAlbum.c_str(), Name().c_str(), Path().c_str(),
617 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
619 // scraper function is given the album and artist as parameters and
620 // returns an XML <url> element parseable by CScraperUrl
621 std::vector<CStdString> extras(2);
622 g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
623 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
624 CURL::Encode(extras[0]);
625 CURL::Encode(extras[1]);
627 vector<CStdString> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
628 if (vcsOut.size() > 1)
629 CLog::Log(LOGWARNING, "%s: scraper returned multiple results; using first", __FUNCTION__);
631 std::vector<CMusicAlbumInfo> vcali;
632 if (vcsOut.empty() || vcsOut[0].empty())
634 scurl.ParseString(vcsOut[0]);
636 // the next function is passed the contents of the returned URL, and returns
637 // an empty string on failure; on success, returns XML matches in the form:
640 // <title>...</title>
641 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
642 // <artist>...</artist>
644 // <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
648 vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
650 // parse the returned XML into a vector of album objects
651 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
654 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
655 TiXmlHandle xhDoc(&doc);
657 for (TiXmlElement* pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
658 pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
661 if (XMLUtils::GetString(pxeAlbum, "title", sTitle))
664 CStdString sAlbumName;
665 if (XMLUtils::GetString(pxeAlbum, "artist", sArtist))
666 sAlbumName.Format("%s - %s", sArtist.c_str(), sTitle.c_str());
671 if (XMLUtils::GetString(pxeAlbum, "year", sYear))
672 sAlbumName.Format("%s (%s)", sAlbumName.c_str(), sYear.c_str());
674 // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
675 // (e.g., in case we only got one result back and were sent to the detail page)
676 TiXmlElement* pxeLink = pxeAlbum->FirstChildElement("url");
677 CScraperUrl scurlAlbum;
679 scurlAlbum.ParseString(scurl.m_xml);
680 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
681 scurlAlbum.ParseElement(pxeLink);
683 if (!scurlAlbum.m_url.size())
686 CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
688 TiXmlElement* pxeRel = pxeAlbum->FirstChildElement("relevance");
689 if (pxeRel && pxeRel->FirstChild())
691 const char* szScale = pxeRel->Attribute("scale");
692 float flScale = szScale ? float(atof(szScale)) : 1;
693 ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
696 vcali.push_back(ali);
703 // find artist, using fcurl for web fetches
704 // returns a list of artists (empty if no match or failure)
705 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl,
706 const CStdString &sArtist)
708 CLog::Log(LOGDEBUG, "%s: Searching for '%s' using %s scraper "
709 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__, sArtist.c_str(),
710 Name().c_str(), Path().c_str(),
711 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
713 // scraper function is given the artist as parameter and
714 // returns an XML <url> element parseable by CScraperUrl
715 std::vector<CStdString> extras(1);
716 g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
717 CURL::Encode(extras[0]);
719 vector<CStdString> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
721 std::vector<CMusicArtistInfo> vcari;
722 if (vcsOut.empty() || vcsOut[0].empty())
724 scurl.ParseString(vcsOut[0]);
726 // the next function is passed the contents of the returned URL, and returns
727 // an empty string on failure; on success, returns XML matches in the form:
730 // <title>...</title>
732 // <genre>...</genre>
733 // <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
737 vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
739 // parse the returned XML into a vector of artist objects
740 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
743 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
744 if (!doc.RootElement())
746 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
749 TiXmlHandle xhDoc(&doc);
750 for (TiXmlElement* pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
751 pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
753 TiXmlNode* pxnTitle = pxeArtist->FirstChild("title");
754 if (pxnTitle && pxnTitle->FirstChild())
756 CScraperUrl scurlArtist;
758 TiXmlElement* pxeLink = pxeArtist->FirstChildElement("url");
760 scurlArtist.ParseString(scurl.m_xml);
761 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
762 scurlArtist.ParseElement(pxeLink);
764 if (!scurlArtist.m_url.size())
767 CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
769 XMLUtils::GetString(pxeArtist, "genre", genre);
771 ari.GetArtist().genre = StringUtils::Split(genre, g_advancedSettings.m_musicItemSeparator);
772 XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
774 vcari.push_back(ari);
781 // fetch list of episodes from URL (from video database)
782 EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl)
785 if (scurl.m_url.empty())
788 CLog::Log(LOGDEBUG, "%s: Searching '%s' using %s scraper "
789 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
790 scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
791 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
793 vector<CStdString> vcsIn;
794 vcsIn.push_back(scurl.m_url[0].m_url);
795 vector<CStdString> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
797 // parse the XML response
798 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
802 if (!doc.RootElement())
804 CLog::Log(LOGERROR, "%s: Unable to parse XML",__FUNCTION__);
808 TiXmlHandle xhDoc(&doc);
809 for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").
810 Element(); pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
813 TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
815 if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
816 XMLUtils::GetString(pxeMovie, "epnum", strEpNum))
818 CScraperUrl &scurlEp(ep.cScraperUrl);
819 int dot = strEpNum.Find(".");
820 ep.iEpisode = atoi(strEpNum.c_str());
821 ep.iSubepisode = (dot > -1) ? atoi(strEpNum.Mid(dot + 1).c_str()) : 0;
822 if (!XMLUtils::GetString(pxeMovie, "title", scurlEp.strTitle))
823 scurlEp.strTitle = g_localizeStrings.Get(416);
824 XMLUtils::GetString(pxeMovie, "id", scurlEp.strId);
826 for ( ; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
827 scurlEp.ParseElement(pxeLink);
829 // date must be the format of yyyy-mm-dd
830 ep.cDate.SetValid(FALSE);
832 if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
835 if (strptime(sDate, "%Y-%m-%d", &tm))
836 ep.cDate.SetDate(1900+tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
846 // takes URL; returns true and populates video details on success, false otherwise
847 bool CScraper::GetVideoDetails(XFILE::CCurlFile &fcurl, const CScraperUrl &scurl,
848 bool fMovie/*else episode*/, CVideoInfoTag &video)
850 CLog::Log(LOGDEBUG, "%s: Reading %s '%s' using %s scraper "
851 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
852 fMovie ? "movie" : "episode", scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
853 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
856 CStdString sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
857 vector<CStdString> vcsIn;
858 vcsIn.push_back(scurl.strId);
859 vcsIn.push_back(scurl.m_url[0].m_url);
860 vector<CStdString> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
864 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
867 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
868 if (!doc.RootElement())
870 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
874 TiXmlHandle xhDoc(&doc);
875 TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
878 CLog::Log(LOGERROR, "%s: Invalid XML file (want <details>)", __FUNCTION__);
881 video.Load(pxeDetails, true/*fChain*/);
882 fRet = true; // but don't exit in case of chaining
887 // takes a URL; returns true and populates album on success, false otherwise
888 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
890 CLog::Log(LOGDEBUG, "%s: Reading '%s' using %s scraper "
891 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
892 scurl.m_url[0].m_url.c_str(), Name().c_str(), Path().c_str(),
893 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
895 vector<CStdString> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
897 // parse the returned XML into an album object (see CAlbum::Load for details)
899 for (CStdStringArray::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
902 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
903 if (!doc.RootElement())
905 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
908 fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
913 // takes a URL (one returned from FindArtist), the original search string, and
914 // returns true and populates artist on success, false on failure
915 bool CScraper::GetArtistDetails(CCurlFile &fcurl, const CScraperUrl &scurl,
916 const CStdString &sSearch, CArtist &artist)
918 if (!scurl.m_url.size())
921 CLog::Log(LOGDEBUG, "%s: Reading '%s' ('%s') using %s scraper "
922 "(file: '%s', content: '%s', version: '%s')", __FUNCTION__,
923 scurl.m_url[0].m_url.c_str(), sSearch.c_str(), Name().c_str(), Path().c_str(),
924 ADDON::TranslateContent(Content()).c_str(), Version().c_str());
926 // pass in the original search string for chaining to search other sites
927 vector<CStdString> vcIn;
928 vcIn.push_back(sSearch);
929 CURL::Encode(vcIn[0]);
931 vector<CStdString> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
933 // ok, now parse the xml file
935 for (vector<CStdString>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
938 doc.Parse(*i, 0, TIXML_ENCODING_UTF8);
939 if (!doc.RootElement())
941 CLog::Log(LOGERROR, "%s: Unable to parse XML", __FUNCTION__);
945 fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());