2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
27 CHTMLUtil::CHTMLUtil(void)
30 CHTMLUtil::~CHTMLUtil(void)
33 int CHTMLUtil::FindTag(const CStdString& strHTML, const CStdString& strTag, CStdString& strtagFound, int iPos) const
35 CStdString strHTMLLow = strHTML;
36 CStdString strTagLow = strTag;
40 int iStart = strHTMLLow.Find(strTag, iPos);
41 if (iStart < 0) return -1;
42 int iEnd = strHTMLLow.Find(">", iStart);
43 if (iEnd < 0) iEnd = (int)strHTMLLow.size();
44 strtagFound = strHTMLLow.Mid(iStart, (iEnd + 1) - iStart);
48 int CHTMLUtil::FindClosingTag(const CStdString& strHTML, const CStdString& strTag, CStdString& strtagFound, int iPos) const
50 CStdString strHTMLLow = strHTML;
51 CStdString strTagLow = strTag;
55 int iStart = strHTMLLow.Find("</" + strTag, iPos);
56 if (iStart < 0) return -1;
57 int iOpenStart = strHTMLLow.Find("<" + strTag, iPos);
58 while (iOpenStart < iStart && iOpenStart != -1)
60 iStart = strHTMLLow.Find("</" + strTag, iStart + 1);
61 iOpenStart = strHTMLLow.Find("<" + strTag, iOpenStart + 1);
64 int iEnd = strHTMLLow.Find(">", iStart);
65 if (iEnd < 0) iEnd = (int)strHTMLLow.size();
66 strtagFound = strHTMLLow.Mid(iStart, (iEnd + 1) - iStart);
70 void CHTMLUtil::getValueOfTag(const CStdString& strTagAndValue, CStdString& strValue)
72 // strTagAndValue contains:
73 // like <a href=blablabla.....>value</a>
74 strValue = strTagAndValue;
75 int iStart = strTagAndValue.Find(">");
76 int iEnd = strTagAndValue.Find("<", iStart + 1);
77 if (iStart >= 0 && iEnd >= 0)
80 strValue = strTagAndValue.Mid(iStart, iEnd - iStart);
84 void CHTMLUtil::getAttributeOfTag(const CStdString& strTagAndValue, const CStdString& strTag, CStdString& strValue)
86 // strTagAndValue contains:
87 // like <a href=""value".....
88 strValue = strTagAndValue;
89 int iStart = strTagAndValue.Find(strTag);
90 if (iStart < 0) return ;
91 iStart += (int)strTag.size();
92 while (strTagAndValue[iStart + 1] == 0x20 || strTagAndValue[iStart + 1] == 0x27 || strTagAndValue[iStart + 1] == 34) iStart++;
93 int iEnd = iStart + 1;
94 while (strTagAndValue[iEnd] != 0x27 && strTagAndValue[iEnd] != 0x20 && strTagAndValue[iEnd] != 34 && strTagAndValue[iEnd] != '>') iEnd++;
95 if (iStart >= 0 && iEnd >= 0)
97 strValue = strTagAndValue.Mid(iStart, iEnd - iStart);
101 void CHTMLUtil::RemoveTags(CStdString& strHTML)
104 CStdString strReturn = "";
105 for (int i = 0; i < (int) strHTML.size(); ++i)
107 if (strHTML[i] == '<') iNested++;
108 else if (strHTML[i] == '>') iNested--;
113 strReturn += strHTML[i];
127 static const HTMLMapping mappings[] =
130 {L"´", 0x00B4},
131 {L"à", 0x00E0},
132 {L"á", 0x00E1},
133 {L"â", 0x00E2},
134 {L"ã", 0x00E3},
136 {L"å", 0x00E5},
137 {L"æ", 0x00E6},
138 {L"À", 0x00C0},
139 {L"Á", 0x00C1},
140 {L"Â", 0x00C2},
141 {L"Ã", 0x00C3},
143 {L"Å", 0x00C5},
144 {L"Æ", 0x00C6},
145 {L"„", 0x201E},
146 {L"¦", 0x00A6},
148 {L"•", 0x2022},
151 {L"¤", 0x00A4},
153 {L"¸", 0x00B8},
154 {L"Ç", 0x00C7},
155 {L"ç", 0x00E7},
156 {L"†", 0x2020},
158 {L"÷", 0x00F7},
159 {L"‡", 0x2021},
160 {L"è", 0x00E8},
161 {L"é", 0x00E9},
162 {L"ê", 0x00EA},
168 {L"È", 0x00C8},
169 {L"É", 0x00C9},
170 {L"Ê", 0x00CA},
174 {L"⁄", 0x2044},
175 {L"¼", 0x00BC},
176 {L"½", 0x00BD},
177 {L"¾", 0x00BE},
179 {L"…", 0x2026},
180 {L"¡", 0x00A1},
181 {L"¿", 0x00BF},
182 {L"ì", 0x00EC},
183 {L"í", 0x00ED},
184 {L"î", 0x00EE},
186 {L"Ì", 0x00CC},
187 {L"Í", 0x00CD},
188 {L"Î", 0x00CE},
192 {L"«", 0x00AB},
193 {L"“", 0x201C},
194 {L"‹", 0x2039},
195 {L"‘", 0x2018},
197 {L"µ", 0x00B5},
198 {L"·", 0x00B7},
199 {L"—", 0x2014},
201 {L"–", 0x2013},
202 {L"ñ", 0x00F1},
204 {L"Ñ", 0x00D1},
207 {L"œ", 0x0153},
208 {L"ò", 0x00F2},
209 {L"ó", 0x00F3},
210 {L"ô", 0x00F4},
211 {L"õ", 0x00F5},
213 {L"ø", 0x00F8},
214 {L"Œ", 0x0152},
215 {L"Ò", 0x00D2},
216 {L"Ó", 0x00D3},
217 {L"Ô", 0x00D4},
218 {L"Õ", 0x00D5},
220 {L"Ø", 0x00D8},
222 {L"‰", 0x2030},
223 {L"±", 0x00B1},
224 {L"£", 0x00A3},
225 {L"»", 0x00BB},
226 {L"”", 0x201D},
229 {L"›", 0x203A},
230 {L"’", 0x2019},
231 {L"‚", 0x201A},
232 {L"š", 0x0161},
238 {L"ß", 0x00DF},
239 {L"Š", 0x0160},
240 {L" ", 0x2009},
241 {L"þ", 0x00FE},
242 {L"˜", 0x02DC},
243 {L"×", 0x00D7},
244 {L"™", 0x2122},
245 {L"Þ", 0x00DE},
247 {L"ù", 0x00F9},
248 {L"ú", 0x00FA},
249 {L"û", 0x00FB},
251 {L"Ù", 0x00D9},
252 {L"Ú", 0x00DA},
253 {L"Û", 0x00DB},
257 {L"ý", 0x00FD},
258 {L"Ý", 0x00DD},
264 void CHTMLUtil::ConvertHTMLToW(const CStdStringW& strHTML, CStdStringW& strStripped)
266 if (strHTML.size() == 0)
272 strStripped = strHTML;
273 while (mappings[iPos].html)
275 strStripped.Replace(mappings[iPos].html,CStdStringW(1, mappings[iPos].w));
279 iPos = strStripped.Find(L"&#");
280 while (iPos > 0 && iPos < (int)strStripped.size()-4)
282 int iStart = iPos + 1;
286 if (strStripped[iPos+1] == L'x')
293 while ( iPos < (int)strStripped.size() &&
294 (base==16?iswxdigit(strStripped[iPos]):iswdigit(strStripped[iPos])))
297 num = strStripped.Mid(i,iPos-i);
298 wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base);
300 num.Format(L"&#%ls;",num.c_str());
302 num.Format(L"&#x%ls;",num.c_str());
304 strStripped.Replace(num,CStdStringW(1,val));
305 iPos = strStripped.Find(L"&#", iStart);