code.vuplus.com Git - vuplus_xbmc/blob - xbmc/utils/ScraperUrl.cpp

   1 /*
   2  *      Copyright (C) 2005-2012 Team XBMC
   3  *      http://www.xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20
  21 #include "XMLUtils.h"
  22 #include "ScraperUrl.h"
  23 #include "settings/AdvancedSettings.h"
  24 #include "HTMLUtil.h"
  25 #include "CharsetConverter.h"
  26 #include "URL.h"
  27 #include "filesystem/CurlFile.h"
  28 #include "filesystem/ZipFile.h"
  29 #include "URIUtils.h"
  30
  31 #include <cstring>
  32 #include <sstream>
  33
  34 using namespace std;
  35
  36 CScraperUrl::CScraperUrl(const CStdString& strUrl)
  37 {
  38   relevance = 0;
  39   ParseString(strUrl);
  40 }
  41
  42 CScraperUrl::CScraperUrl(const TiXmlElement* element)
  43 {
  44   relevance = 0;
  45   ParseElement(element);
  46 }
  47
  48 CScraperUrl::CScraperUrl()
  49 {
  50   relevance = 0;
  51 }
  52
  53 CScraperUrl::~CScraperUrl()
  54 {
  55 }
  56
  57 void CScraperUrl::Clear()
  58 {
  59   m_url.clear();
  60   m_spoof.clear();
  61   m_xml.clear();
  62   relevance = 0;
  63 }
  64
  65 bool CScraperUrl::Parse()
  66 {
  67   CStdString strToParse = m_xml;
  68   m_xml.Empty();
  69   return ParseString(strToParse);
  70 }
  71
  72 bool CScraperUrl::ParseElement(const TiXmlElement* element)
  73 {
  74   if (!element || !element->FirstChild() ||
  75       !element->FirstChild()->Value()) return false;
  76
  77   stringstream stream;
  78   stream << *element;
  79   m_xml += stream.str();
  80
  81   SUrlEntry url;
  82   url.m_url = element->FirstChild()->Value();
  83   const char* pSpoof = element->Attribute("spoof");
  84   if (pSpoof)
  85     url.m_spoof = pSpoof;
  86   const char* szPost=element->Attribute("post");
  87   if (szPost && stricmp(szPost,"yes") == 0)
  88     url.m_post = true;
  89   else
  90     url.m_post = false;
  91   const char* szIsGz=element->Attribute("gzip");
  92   if (szIsGz && stricmp(szIsGz,"yes") == 0)
  93     url.m_isgz = true;
  94   else
  95     url.m_isgz = false;
  96   const char* pCache = element->Attribute("cache");
  97   if (pCache)
  98     url.m_cache = pCache;
  99
 100   const char* szType = element->Attribute("type");
 101   url.m_type = URL_TYPE_GENERAL;
 102   url.m_season = -1;
 103   if (szType && stricmp(szType,"season") == 0)
 104   {
 105     url.m_type = URL_TYPE_SEASON;
 106     const char* szSeason = element->Attribute("season");
 107     if (szSeason)
 108       url.m_season = atoi(szSeason);
 109   }
 110   const char *aspect = element->Attribute("aspect");
 111   if (aspect)
 112     url.m_aspect = aspect;
 113
 114   m_url.push_back(url);
 115
 116   return true;
 117 }
 118
 119 bool CScraperUrl::ParseString(CStdString strUrl)
 120 {
 121   if (strUrl.IsEmpty())
 122     return false;
 123
 124   // ok, now parse the xml file
 125   if (!XMLUtils::HasUTF8Declaration(strUrl))
 126     g_charsetConverter.unknownToUTF8(strUrl);
 127
 128   CXBMCTinyXML doc;
 129   doc.Parse(strUrl.c_str(),0,TIXML_ENCODING_UTF8);
 130
 131   TiXmlElement* pElement = doc.RootElement();
 132   if (!pElement)
 133   {
 134     SUrlEntry url;
 135     url.m_url = strUrl;
 136     url.m_type = URL_TYPE_GENERAL;
 137     url.m_season = -1;
 138     url.m_post = false;
 139     url.m_isgz = false;
 140     m_url.push_back(url);
 141     m_xml = strUrl;
 142   }
 143   else
 144   {
 145     while (pElement)
 146     {
 147       ParseElement(pElement);
 148       pElement = pElement->NextSiblingElement(pElement->Value());
 149     }
 150   }
 151
 152   return true;
 153 }
 154
 155 const CScraperUrl::SUrlEntry CScraperUrl::GetFirstThumb(const std::string &type) const
 156 {
 157   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 158   {
 159     if (iter->m_type == URL_TYPE_GENERAL && (type.empty() || type == "thumb" || iter->m_aspect == type))
 160       return *iter;
 161   }
 162
 163   SUrlEntry result;
 164   result.m_type = URL_TYPE_GENERAL;
 165   result.m_post = false;
 166   result.m_isgz = false;
 167   result.m_season = -1;
 168   return result;
 169 }
 170
 171 const CScraperUrl::SUrlEntry CScraperUrl::GetSeasonThumb(int season, const std::string &type) const
 172 {
 173   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 174   {
 175     if (iter->m_type == URL_TYPE_SEASON && iter->m_season == season &&
 176        (type.empty() || type == "thumb" || iter->m_aspect == type))
 177       return *iter;
 178   }
 179
 180   SUrlEntry result;
 181   result.m_type = URL_TYPE_GENERAL;
 182   result.m_post = false;
 183   result.m_isgz = false;
 184   result.m_season = -1;
 185   return result;
 186 }
 187
 188 unsigned int CScraperUrl::GetMaxSeasonThumb() const
 189 {
 190   unsigned int maxSeason = 0;
 191   for (vector<SUrlEntry>::const_iterator iter=m_url.begin();iter != m_url.end();++iter)
 192   {
 193     if (iter->m_type == URL_TYPE_SEASON && iter->m_season > 0 && (unsigned int)iter->m_season > maxSeason)
 194       maxSeason = iter->m_season;
 195   }
 196   return maxSeason;
 197 }
 198
 199 bool CScraperUrl::Get(const SUrlEntry& scrURL, std::string& strHTML, XFILE::CCurlFile& http, const CStdString& cacheContext)
 200 {
 201   CURL url(scrURL.m_url);
 202   http.SetReferer(scrURL.m_spoof);
 203   CStdString strCachePath;
 204
 205   if (scrURL.m_isgz)
 206     http.SetContentEncoding("gzip");
 207
 208   if (!scrURL.m_cache.IsEmpty())
 209   {
 210     URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
 211                               "scrapers/"+cacheContext+"/"+scrURL.m_cache,
 212                               strCachePath);
 213     if (XFILE::CFile::Exists(strCachePath))
 214     {
 215       XFILE::CFile file;
 216       if (file.Open(strCachePath))
 217       {
 218         char* temp = new char[(int)file.GetLength()];
 219         file.Read(temp,file.GetLength());
 220         strHTML.clear();
 221         strHTML.append(temp,temp+file.GetLength());
 222         file.Close();
 223         delete[] temp;
 224         return true;
 225       }
 226     }
 227   }
 228
 229   CStdString strHTML1(strHTML);
 230
 231   if (scrURL.m_post)
 232   {
 233     CStdString strOptions = url.GetOptions();
 234     strOptions = strOptions.substr(1);
 235     url.SetOptions("");
 236
 237     if (!http.Post(url.Get(), strOptions, strHTML1))
 238       return false;
 239   }
 240   else
 241     if (!http.Get(url.Get(), strHTML1))
 242       return false;
 243
 244   strHTML = strHTML1;
 245
 246   if (scrURL.m_url.Find(".zip") > -1 )
 247   {
 248     XFILE::CZipFile file;
 249     CStdString strBuffer;
 250     int iSize = file.UnpackFromMemory(strBuffer,strHTML,scrURL.m_isgz);
 251     if (iSize)
 252     {
 253       strHTML.clear();
 254       strHTML.append(strBuffer.c_str(),strBuffer.data()+iSize);
 255     }
 256   }
 257
 258   if (!scrURL.m_cache.IsEmpty())
 259   {
 260     CStdString strCachePath;
 261     URIUtils::AddFileToFolder(g_advancedSettings.m_cachePath,
 262                               "scrapers/"+cacheContext+"/"+scrURL.m_cache,
 263                               strCachePath);
 264     XFILE::CFile file;
 265     if (file.OpenForWrite(strCachePath,true))
 266       file.Write(strHTML.data(),strHTML.size());
 267     file.Close();
 268   }
 269   return true;
 270 }
 271
 272 // XML format is of strUrls is:
 273 // <TAG><url>...</url>...</TAG> (parsed by ParseElement) or <url>...</url> (ditto)
 274 bool CScraperUrl::ParseEpisodeGuide(CStdString strUrls)
 275 {
 276   if (strUrls.IsEmpty())
 277     return false;
 278
 279   // ok, now parse the xml file
 280   if (!XMLUtils::HasUTF8Declaration(strUrls))
 281     g_charsetConverter.unknownToUTF8(strUrls);
 282
 283   CXBMCTinyXML doc;
 284   doc.Parse(strUrls.c_str(),0,TIXML_ENCODING_UTF8);
 285   if (doc.RootElement())
 286   {
 287     TiXmlHandle docHandle( &doc );
 288     TiXmlElement *link = docHandle.FirstChild("episodeguide").Element();
 289     if (link->FirstChildElement("url"))
 290     {
 291       for (link = link->FirstChildElement("url"); link; link = link->NextSiblingElement("url"))
 292         ParseElement(link);
 293     }
 294     else if (link->FirstChild() && link->FirstChild()->Value())
 295       ParseElement(link);
 296   }
 297   else
 298     return false;
 299
 300   return true;
 301 }
 302
 303 CStdString CScraperUrl::GetThumbURL(const CScraperUrl::SUrlEntry &entry)
 304 {
 305   if (entry.m_spoof.IsEmpty())
 306     return entry.m_url;
 307   CStdString spoof = entry.m_spoof;
 308   CURL::Encode(spoof);
 309   return entry.m_url + "|Referer=" + spoof;
 310 }
 311
 312 void CScraperUrl::GetThumbURLs(std::vector<CStdString> &thumbs, const std::string &type, int season) const
 313 {
 314   for (vector<SUrlEntry>::const_iterator iter = m_url.begin(); iter != m_url.end(); ++iter)
 315   {
 316     if (iter->m_aspect == type || type.empty() || type == "thumb" || iter->m_aspect.empty())
 317     {
 318       if ((iter->m_type == CScraperUrl::URL_TYPE_GENERAL && season == -1)
 319        || (iter->m_type == CScraperUrl::URL_TYPE_SEASON && iter->m_season == season))
 320       {
 321         thumbs.push_back(GetThumbURL(*iter));
 322       }
 323     }
 324   }
 325 }