2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
22 #include "ScraperUrl.h"
23 #include "settings/AdvancedSettings.h"
25 #include "CharsetConverter.h"
27 #include "filesystem/CurlFile.h"
28 #include "filesystem/ZipFile.h"
36 CScraperUrl::CScraperUrl(const CStdString& strUrl)
42 CScraperUrl::CScraperUrl(const TiXmlElement* element)
45 ParseElement(element);
48 CScraperUrl::CScraperUrl()
53 CScraperUrl::~CScraperUrl()
57 void CScraperUrl::Clear()
65 bool CScraperUrl::Parse()
67 CStdString strToParse = m_xml;
69 return ParseString(strToParse);
72 bool CScraperUrl::ParseElement(const TiXmlElement* element)
74 if (!element || !element->FirstChild() ||
75 !element->FirstChild()->Value()) return false;
79 m_xml += stream.str();
82 url.m_url = element->FirstChild()->Value();
83 const char* pSpoof = element->Attribute("spoof");
86 const char* szPost=element->Attribute("post");
87 if (szPost && stricmp(szPost,"yes") == 0)
91 const char* szIsGz=element->Attribute("gzip");
92 if (szIsGz && stricmp(szIsGz,"yes") == 0)
96 const char* pCache = element->Attribute("cache");
100 const char* szType = element->Attribute("type");
101 url.m_type = URL_TYPE_GENERAL;
103 if (szType && stricmp(szType,"season") == 0)
105 url.m_type = URL_TYPE_SEASON;
106 const char* szSeason = element->Attribute("season");
108 url.m_season = atoi(szSeason);
110 const char *aspect = element->Attribute("aspect");
112 url.m_aspect = aspect;
114 m_url.push_back(url);
119 bool CScraperUrl::ParseString(CStdString strUrl)
121 if (strUrl.IsEmpty())
125 doc.Parse(strUrl, TIXML_ENCODING_UNKNOWN);
127 TiXmlElement* pElement = doc.RootElement();
132 url.m_type = URL_TYPE_GENERAL;
136 m_url.push_back(url);
143 ParseElement(pElement);
144 pElement = pElement->NextSiblingElement(pElement->Value());
151 const CScraperUrl::SUrlEntry CScraperUrl::GetFirstThumb(const std::string &type) const
153 for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
155 if (iter->m_type == URL_TYPE_GENERAL && (type.empty() || type == "thumb" || iter->m_aspect == type))
160 result.m_type = URL_TYPE_GENERAL;
161 result.m_post = false;
162 result.m_isgz = false;
163 result.m_season = -1;
167 const CScraperUrl::SUrlEntry CScraperUrl::GetSeasonThumb(int season, const std::string &type) const
169 for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
171 if (iter->m_type == URL_TYPE_SEASON && iter->m_season == season &&
172 (type.empty() || type == "thumb" || iter->m_aspect == type))
177 result.m_type = URL_TYPE_GENERAL;
178 result.m_post = false;
179 result.m_isgz = false;
180 result.m_season = -1;
184 unsigned int CScraperUrl::GetMaxSeasonThumb() const
186 unsigned int maxSeason = 0;
187 for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
189 if (iter->m_type == URL_TYPE_SEASON && iter->m_season > 0 && (unsigned int)iter->m_season > maxSeason)
190 maxSeason = iter->m_season;
195 bool CScraperUrl::Get(const SUrlEntry& scrURL, std::string& strHTML, XFILE::CCurlFile& http, const CStdString& cacheContext)
197 CURL url(scrURL.m_url);
198 http.SetReferer(scrURL.m_spoof);
199 CStdString strCachePath;
202 http.SetContentEncoding("gzip");
204 if (!scrURL.m_cache.IsEmpty())
206 strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
207 "scrapers/" + cacheContext + "/" + scrURL.m_cache);
208 if (XFILE::CFile::Exists(strCachePath))
211 XFILE::auto_buffer buffer;
212 if (file.LoadFile(strCachePath, buffer))
214 strHTML.assign(buffer.get(), buffer.length());
220 CStdString strHTML1(strHTML);
224 CStdString strOptions = url.GetOptions();
225 strOptions = strOptions.substr(1);
228 if (!http.Post(url.Get(), strOptions, strHTML1))
232 if (!http.Get(url.Get(), strHTML1))
236 std::string fileCharset(http.GetServerReportedCharset());
238 if (scrURL.m_url.Find(".zip") > -1 )
240 XFILE::CZipFile file;
241 CStdString strBuffer;
242 int iSize = file.UnpackFromMemory(strBuffer,strHTML,scrURL.m_isgz);
247 strHTML.append(strBuffer.c_str(),strBuffer.data()+iSize);
251 if (!fileCharset.empty() && fileCharset != "UTF-8")
253 std::string converted;
254 if (g_charsetConverter.ToUtf8(fileCharset, strHTML, converted) && !converted.empty())
258 if (!scrURL.m_cache.IsEmpty())
260 CStdString strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
261 "scrapers/" + cacheContext + "/" + scrURL.m_cache);
263 if (file.OpenForWrite(strCachePath,true))
264 file.Write(strHTML.data(),strHTML.size());
270 // XML format is of strUrls is:
271 // <TAG><url>...</url>...</TAG> (parsed by ParseElement) or <url>...</url> (ditto)
272 bool CScraperUrl::ParseEpisodeGuide(CStdString strUrls)
274 if (strUrls.IsEmpty())
277 // ok, now parse the xml file
279 doc.Parse(strUrls, TIXML_ENCODING_UNKNOWN);
280 if (doc.RootElement())
282 TiXmlHandle docHandle( &doc );
283 TiXmlElement *link = docHandle.FirstChild("episodeguide").Element();
284 if (link->FirstChildElement("url"))
286 for (link = link->FirstChildElement("url"); link; link = link->NextSiblingElement("url"))
289 else if (link->FirstChild() && link->FirstChild()->Value())
298 CStdString CScraperUrl::GetThumbURL(const CScraperUrl::SUrlEntry &entry)
300 if (entry.m_spoof.IsEmpty())
302 CStdString spoof = entry.m_spoof;
304 return entry.m_url + "|Referer=" + spoof;
307 void CScraperUrl::GetThumbURLs(std::vector<CStdString> &thumbs, const std::string &type, int season) const
309 for (vector<SUrlEntry>::const_iterator iter = m_url.begin(); iter != m_url.end(); ++iter)
311 if (iter->m_aspect == type || type.empty() || type == "thumb" || iter->m_aspect.empty())
313 if ((iter->m_type == CScraperUrl::URL_TYPE_GENERAL && season == -1)
314 || (iter->m_type == CScraperUrl::URL_TYPE_SEASON && iter->m_season == season))
316 thumbs.push_back(GetThumbURL(*iter));