CLog::Log(LOGNOTICE, "The executable running is: %s", executable.c_str());
CLog::Log(LOGNOTICE, "Local hostname: %s", m_network->GetHostName().c_str());
CLog::Log(LOGNOTICE, "Log File is located: %sxbmc.log", g_advancedSettings.m_logFolder.c_str());
+ CRegExp::LogCheckUtf8Support();
CLog::Log(LOGNOTICE, "-----------------------------------------------------------------------");
CStdString strExecutablePath;
{
// Precompile our REs
VECCREGEXP folderRegExps;
- CRegExp folderRegExp(true);
+ CRegExp folderRegExp(true, true);
const CStdStringArray& strFolderRegExps = g_advancedSettings.m_folderStackRegExps;
CStdStringArray::const_iterator strExpression = strFolderRegExps.begin();
{
// Precompile our REs
VECCREGEXP stackRegExps;
- CRegExp tmpRegExp(true);
+ CRegExp tmpRegExp(true, true);
const CStdStringArray& strStackRegExps = g_advancedSettings.m_videoStackRegExps;
CStdStringArray::const_iterator strRegExp = strStackRegExps.begin();
while (strRegExp != strStackRegExps.end())
// Precompile our REs
VECCREGEXP matchRegExps;
- CRegExp tmpRegExp(true);
+ CRegExp tmpRegExp(true, true);
const CStdStringArray& strMatchRegExps = g_advancedSettings.m_trailerMatchRegExps;
CStdStringArray::const_iterator strRegExp = strMatchRegExps.begin();
const CStdStringArray ®exps = g_advancedSettings.m_videoCleanStringRegExps;
- CRegExp reTags(true);
- CRegExp reYear;
+ CRegExp reTags(true, true);
+ CRegExp reYear(false, true);
if (!reYear.RegComp(g_advancedSettings.m_videoCleanDateTimeRegExp))
{
if (strFileOrFolder.IsEmpty())
return false;
- CRegExp regExExcludes(true); // case insensitive regex
+ CRegExp regExExcludes(true, true); // case insensitive regex
for (unsigned int i = 0; i < regexps.size(); i++)
{
CStdString strMatch = vecSplit[0];
strMatch.Replace(",,",",");
bool bCaseless = vecSplit[3].Find('i') > -1;
- CRegExp regExp(bCaseless);
+ CRegExp regExp(bCaseless, true);
if (!regExp.RegComp(strMatch.c_str()))
{ // invalid regexp - complain in logs
if (m_tDVDFile >= 0 && (m_tDVDFile > 0) != item.IsDVDFile()) return;
if (m_tDVDImage >= 0 && (m_tDVDImage > 0) != item.IsDVDImage()) return;
- CRegExp regExp;
+ CRegExp regExp(false, true);
if (m_bStreamDetails)
{
{
// Load up our REs
VECCREGEXP RegExps;
- CRegExp tempRE(true);
+ CRegExp tempRE(true, true);
const CStdStringArray& strRegExps = g_advancedSettings.m_videoStackRegExps;
CStdStringArray::const_iterator itRegExp = strRegExps.begin();
vector<pair<int, CStdString> > badStacks;
using namespace PCRE;
-CRegExp::CRegExp(bool caseless)
+#ifndef PCRE_UCP
+#define PCRE_UCP 0
+#endif // PCRE_UCP
+
+int CRegExp::m_Utf8Supported = -1;
+int CRegExp::m_UcpSupported = -1;
+
+
+CRegExp::CRegExp(bool caseless /*= false*/, bool utf8 /*= false*/)
{
m_re = NULL;
m_iOptions = PCRE_DOTALL | PCRE_NEWLINE_ANY;
if(caseless)
m_iOptions |= PCRE_CASELESS;
+ if (utf8)
+ {
+ if (IsUtf8Supported())
+ m_iOptions |= PCRE_UTF8;
+ if (AreUnicodePropertiesSupported())
+ m_iOptions |= PCRE_UCP;
+ }
m_offset = 0;
m_bMatched = false;
CLog::Log(LOGERROR, "PCRE: Match limit reached");
return -1;
+#ifdef PCRE_ERROR_SHORTUTF8
+ case PCRE_ERROR_SHORTUTF8:
+#endif
+ case PCRE_ERROR_BADUTF8:
+ CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character");
+ return -1;
+
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ CLog::Log(LOGERROR, "PCRE: Offset (%d) is pointing to the middle of UTF-8 character", startoffset);
+ return -1;
+
default:
CLog::Log(LOGERROR, "PCRE: Unknown error: %d", rc);
return -1;
return iSub >= 0 && iSub <= m_iMatchCount && iSub <= m_MaxNumOfBackrefrences;
}
+
+bool CRegExp::IsUtf8Supported(void)
+{
+ if (m_Utf8Supported == -1)
+ {
+ if (pcre_config(PCRE_CONFIG_UTF8, &m_Utf8Supported) != 0)
+ m_Utf8Supported = 0;
+ }
+
+ return m_Utf8Supported == 1;
+}
+
+bool CRegExp::AreUnicodePropertiesSupported(void)
+{
+#if defined(PCRE_CONFIG_UNICODE_PROPERTIES) && PCRE_UCP != 0
+ if (m_UcpSupported == -1)
+ {
+ if (pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &m_UcpSupported) != 0)
+ m_UcpSupported = 0;
+ }
+#endif
+
+ return m_UcpSupported == 1;
+}
+
+bool CRegExp::LogCheckUtf8Support(void)
+{
+ bool utf8FullSupport = true;
+
+ if (!CRegExp::IsUtf8Supported())
+ {
+ utf8FullSupport = false;
+ CLog::Log(LOGWARNING, "UTF-8 is not supported in PCRE lib, support for national symbols is limited!");
+ }
+
+ if (!CRegExp::AreUnicodePropertiesSupported())
+ {
+ utf8FullSupport = false;
+ CLog::Log(LOGWARNING, "Unicode properties are not enabled in PCRE lib, support for national symbols may be limited!");
+ }
+
+ if (!utf8FullSupport)
+ {
+ CLog::Log(LOGNOTICE, "Consider installing PCRE lib version 8.10 or later with enabled Unicode properties and UTF-8 support. Your PCRE lib version: %s", PCRE::pcre_version());
+#if PCRE_UCP == 0
+ CLog::Log(LOGNOTICE, "You will need to rebuild XBMC after PCRE lib update.", PCRE::pcre_version());
+#endif
+ }
+
+ return utf8FullSupport;
+}
{
public:
static const int m_MaxNumOfBackrefrences = 20;
- CRegExp(bool caseless = false);
+ CRegExp(bool caseless = false, bool utf8 = false);
CRegExp(const CRegExp& re);
~CRegExp();
int GetNamedSubPatternNumber(const char* strName) const;
void DumpOvector(int iLog);
const CRegExp& operator= (const CRegExp& re);
+ static bool IsUtf8Supported(void);
+ static bool AreUnicodePropertiesSupported(void);
+ static bool LogCheckUtf8Support(void);
private:
int PrivateRegFind(size_t bufferLen, const char *str, unsigned int startoffset = 0, int maxNumberOfCharsToTest = -1);
bool m_bMatched;
std::string m_subject;
std::string m_pattern;
+ static int m_Utf8Supported;
+ static int m_UcpSupported;
};
typedef std::vector<CRegExp> VECCREGEXP;
if (stricmp(sensitive,"yes") == 0)
bInsensitive=false; // match case sensitive
- CRegExp reg(bInsensitive);
+ CRegExp reg(bInsensitive, true);
CStdString strExpression;
if (pExpression->FirstChild())
strExpression = pExpression->FirstChild()->Value();
for (unsigned int i=0;i<expression.size();++i)
{
- CRegExp reg(true);
+ CRegExp reg(true, true);
if (!reg.RegComp(expression[i].regexp))
continue;
// add what we found by now
episodeList.push_back(episode);
- CRegExp reg2(true);
+ CRegExp reg2(true, true);
// check the remainder of the string for any further episodes.
if (!byDate && reg2.RegComp(g_advancedSettings.m_tvshowMultiPartEnumRegExp))
{