code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/ScraperUrl.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20
  21 #include "XMLUtils.h"
  22 #include "ScraperUrl.h"
  23 #include "settings/AdvancedSettings.h"
  24 #include "HTMLUtil.h"
  25 #include "CharsetConverter.h"
  26 #include "URL.h"
  27 #include "filesystem/CurlFile.h"
  28 #include "filesystem/ZipFile.h"
  29 #include "URIUtils.h"
  30
  31 #include <cstring>
  32 #include <sstream>
  33
  34 using namespace std;
  35
  36 CScraperUrl::CScraperUrl(const CStdString& strUrl)
  37 {
  38   relevance = 0;
  39   ParseString(strUrl);
  40 }
  41
  42 CScraperUrl::CScraperUrl(const TiXmlElement* element)
  43 {
  44   relevance = 0;
  45   ParseElement(element);
  46 }
  47
  48 CScraperUrl::CScraperUrl()
  49 {
  50   relevance = 0;
  51 }
  52
  53 CScraperUrl::~CScraperUrl()
  54 {
  55 }
  56
  57 void CScraperUrl::Clear()
  58 {
  59   m_url.clear();
  60   m_spoof.clear();
  61   m_xml.clear();
  62   relevance = 0;
  63 }
  64
  65 bool CScraperUrl::Parse()
  66 {
  67   CStdString strToParse = m_xml;
  68   m_xml.clear();
  69   return ParseString(strToParse);
  70 }
  71
  72 bool CScraperUrl::ParseElement(const TiXmlElement* element)
  73 {
  74   if (!element || !element->FirstChild() ||
  75       !element->FirstChild()->Value()) return false;
  76
  77   stringstream stream;
  78   stream << *element;
  79   m_xml += stream.str();
  80
  81   SUrlEntry url;
  82   url.m_url = element->FirstChild()->Value();
  83   const char* pSpoof = element->Attribute("spoof");
  84   if (pSpoof)
  85     url.m_spoof = pSpoof;
  86   const char* szPost=element->Attribute("post");
  87   if (szPost && stricmp(szPost,"yes") == 0)
  88     url.m_post = true;
  89   else
  90     url.m_post = false;
  91   const char* szIsGz=element->Attribute("gzip");
  92   if (szIsGz && stricmp(szIsGz,"yes") == 0)
  93     url.m_isgz = true;
  94   else
  95     url.m_isgz = false;
  96   const char* pCache = element->Attribute("cache");
  97   if (pCache)
  98     url.m_cache = pCache;
  99
 100   const char* szType = element->Attribute("type");
 101   url.m_type = URL_TYPE_GENERAL;
 102   url.m_season = -1;
 103   if (szType && stricmp(szType,"season") == 0)
 104   {
 105     url.m_type = URL_TYPE_SEASON;
 106     const char* szSeason = element->Attribute("season");
 107     if (szSeason)
 108       url.m_season = atoi(szSeason);
 109   }
 110   const char *aspect = element->Attribute("aspect");
 111   if (aspect)
 112     url.m_aspect = aspect;
 113
 114   m_url.push_back(url);
 115
 116   return true;
 117 }
 118
 119 bool CScraperUrl::ParseString(CStdString strUrl)
 120 {
 121   if (strUrl.IsEmpty())
 122     return false;
 123
 124   CXBMCTinyXML doc;
 125   doc.Parse(strUrl, TIXML_ENCODING_UNKNOWN);
 126
 127   TiXmlElement* pElement = doc.RootElement();
 128   if (!pElement)
 129   {
 130     SUrlEntry url;
 131     url.m_url = strUrl;
 132     url.m_type = URL_TYPE_GENERAL;
 133     url.m_season = -1;
 134     url.m_post = false;
 135     url.m_isgz = false;
 136     m_url.push_back(url);
 137     m_xml = strUrl;
 138   }
 139   else
 140   {
 141     while (pElement)
 142     {
 143       ParseElement(pElement);
 144       pElement = pElement->NextSiblingElement(pElement->Value());
 145     }
 146   }
 147
 148   return true;
 149 }
 150
 151 const CScraperUrl::SUrlEntry CScraperUrl::GetFirstThumb(const std::string &type) const
 152 {
 153   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 154   {
 155     if (iter->m_type == URL_TYPE_GENERAL && (type.empty() || type == "thumb" || iter->m_aspect == type))
 156       return *iter;
 157   }
 158
 159   SUrlEntry result;
 160   result.m_type = URL_TYPE_GENERAL;
 161   result.m_post = false;
 162   result.m_isgz = false;
 163   result.m_season = -1;
 164   return result;
 165 }
 166
 167 const CScraperUrl::SUrlEntry CScraperUrl::GetSeasonThumb(int season, const std::string &type) const
 168 {
 169   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 170   {
 171     if (iter->m_type == URL_TYPE_SEASON && iter->m_season == season &&
 172        (type.empty() || type == "thumb" || iter->m_aspect == type))
 173       return *iter;
 174   }
 175
 176   SUrlEntry result;
 177   result.m_type = URL_TYPE_GENERAL;
 178   result.m_post = false;
 179   result.m_isgz = false;
 180   result.m_season = -1;
 181   return result;
 182 }
 183
 184 unsigned int CScraperUrl::GetMaxSeasonThumb() const
 185 {
 186   unsigned int maxSeason = 0;
 187   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 188   {
 189     if (iter->m_type == URL_TYPE_SEASON && iter->m_season > 0 && (unsigned int)iter->m_season > maxSeason)
 190       maxSeason = iter->m_season;
 191   }
 192   return maxSeason;
 193 }
 194
 195 bool CScraperUrl::Get(const SUrlEntry& scrURL, std::string& strHTML, XFILE::CCurlFile& http, const CStdString& cacheContext)
 196 {
 197   CURL url(scrURL.m_url);
 198   http.SetReferer(scrURL.m_spoof);
 199   CStdString strCachePath;
 200
 201   if (scrURL.m_isgz)
 202     http.SetContentEncoding("gzip");
 203
 204   if (!scrURL.m_cache.IsEmpty())
 205   {
 206     strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
 207                               "scrapers/" + cacheContext + "/" + scrURL.m_cache);
 208     if (XFILE::CFile::Exists(strCachePath))
 209     {
 210       XFILE::CFile file;
 211       XFILE::auto_buffer buffer;
 212       if (file.LoadFile(strCachePath, buffer))
 213       {
 214         strHTML.assign(buffer.get(), buffer.length());
 215         return true;
 216       }
 217     }
 218   }
 219
 220   CStdString strHTML1(strHTML);
 221
 222   if (scrURL.m_post)
 223   {
 224     CStdString strOptions = url.GetOptions();
 225     strOptions = strOptions.substr(1);
 226     url.SetOptions("");
 227
 228     if (!http.Post(url.Get(), strOptions, strHTML1))
 229       return false;
 230   }
 231   else
 232     if (!http.Get(url.Get(), strHTML1))
 233       return false;
 234
 235   strHTML = strHTML1;
 236   std::string fileCharset(http.GetServerReportedCharset());
 237
 238   if (scrURL.m_url.Find(".zip") > -1 )
 239   {
 240     XFILE::CZipFile file;
 241     CStdString strBuffer;
 242     int iSize = file.UnpackFromMemory(strBuffer,strHTML,scrURL.m_isgz);
 243     if (iSize)
 244     {
 245       fileCharset.clear();
 246       strHTML.clear();
 247       strHTML.append(strBuffer.c_str(),strBuffer.data()+iSize);
 248     }
 249   }
 250
 251   if (!fileCharset.empty() && fileCharset != "UTF-8")
 252   {
 253     std::string converted;
 254     if (g_charsetConverter.ToUtf8(fileCharset, strHTML, converted) && !converted.empty())
 255       strHTML = converted;
 256   }
 257
 258   if (!scrURL.m_cache.IsEmpty())
 259   {
 260     CStdString strCachePath = URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
 261                               "scrapers/" + cacheContext + "/" + scrURL.m_cache);
 262     XFILE::CFile file;
 263     if (file.OpenForWrite(strCachePath,true))
 264       file.Write(strHTML.data(),strHTML.size());
 265     file.Close();
 266   }
 267   return true;
 268 }
 269
 270 // XML format is of strUrls is:
 271 // <TAG><url>...</url>...</TAG> (parsed by ParseElement) or <url>...</url> (ditto)
 272 bool CScraperUrl::ParseEpisodeGuide(CStdString strUrls)
 273 {
 274   if (strUrls.IsEmpty())
 275     return false;
 276
 277   // ok, now parse the xml file
 278   CXBMCTinyXML doc;
 279   doc.Parse(strUrls, TIXML_ENCODING_UNKNOWN);
 280   if (doc.RootElement())
 281   {
 282     TiXmlHandle docHandle( &doc );
 283     TiXmlElement *link = docHandle.FirstChild("episodeguide").Element();
 284     if (link->FirstChildElement("url"))
 285     {
 286       for (link = link->FirstChildElement("url"); link; link = link->NextSiblingElement("url"))
 287         ParseElement(link);
 288     }
 289     else if (link->FirstChild() && link->FirstChild()->Value())
 290       ParseElement(link);
 291   }
 292   else
 293     return false;
 294
 295   return true;
 296 }
 297
 298 CStdString CScraperUrl::GetThumbURL(const CScraperUrl::SUrlEntry &entry)
 299 {
 300   if (entry.m_spoof.IsEmpty())
 301     return entry.m_url;
 302   CStdString spoof = entry.m_spoof;
 303   CURL::Encode(spoof);
 304   return entry.m_url + "|Referer=" + spoof;
 305 }
 306
 307 void CScraperUrl::GetThumbURLs(std::vector<CStdString> &thumbs, const std::string &type, int season) const
 308 {
 309   for (vector<SUrlEntry>::const_iterator iter = m_url.begin(); iter != m_url.end(); ++iter)
 310   {
 311     if (iter->m_aspect == type || type.empty() || type == "thumb" || iter->m_aspect.empty())
 312     {
 313       if ((iter->m_type == CScraperUrl::URL_TYPE_GENERAL && season == -1)
 314        || (iter->m_type == CScraperUrl::URL_TYPE_SEASON && iter->m_season == season))
 315       {
 316         thumbs.push_back(GetThumbURL(*iter));
 317       }
 318     }
 319   }
 320 }