Merge pull request #3667 from Karlson2k/pcre_reports_more_01

[vuplus_xbmc] / xbmc / utils / RegExp.cpp
diff --git a/xbmc/utils/RegExp.cpp b/xbmc/utils/RegExp.cpp

index 7db40df..459deb2 100644 (file)
--- a/xbmc/utils/RegExp.cpp
+++ b/xbmc/utils/RegExp.cpp
@@ -25,6 +25,7 @@
  #include "StdString.h"
  #include "log.h"
  #include "utils/StringUtils.h"
+#include "utils/Utf8Utils.h"
  
  using namespace PCRE;
  
@@ -226,6 +227,7 @@ int CRegExp::PrivateRegFind(size_t bufferLen, const char *str, unsigned int star
  
    if (rc<1)
    {
+    static const int fragmentLen = 80; // length of excerpt before erroneous char for log
      switch(rc)
      {
      case PCRE_ERROR_NOMATCH:
@@ -237,11 +239,24 @@ int CRegExp::PrivateRegFind(size_t bufferLen, const char *str, unsigned int star
  
  #ifdef PCRE_ERROR_SHORTUTF8 
      case PCRE_ERROR_SHORTUTF8:
+      {
+        const size_t startPos = (m_subject.length() > fragmentLen) ? CUtf8Utils::RFindValidUtf8Char(m_subject, m_subject.length() - fragmentLen) : 0;
+        if (startPos != std::string::npos)
+          CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character at the end of string. Text before bad character: \"%s\"", m_subject.substr(startPos).c_str());
+        else
+          CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character at the end of string");
+        return -1;
+      }
  #endif
      case PCRE_ERROR_BADUTF8:
-      CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character");
-      return -1;
-
+      {
+        const size_t startPos = (m_iOvector[0] > fragmentLen) ? CUtf8Utils::RFindValidUtf8Char(m_subject, m_iOvector[0] - fragmentLen) : 0;
+        if (m_iOvector[0] >= 0 && startPos != std::string::npos)
+          CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character, error code: %d, position: %d. Text before bad char: \"%s\"", m_iOvector[0], m_iOvector[1], m_subject.substr(startPos, m_iOvector[0] - startPos + 1).c_str());
+        else
+          CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character, error code: %d, position: %d", m_iOvector[0], m_iOvector[1]);
+        return -1;
+      }
      case PCRE_ERROR_BADUTF8_OFFSET:
        CLog::Log(LOGERROR, "PCRE: Offset is pointing to the middle of UTF-8 character");
        return -1;