#include "StdString.h"
#include "log.h"
#include "utils/StringUtils.h"
+#include "utils/Utf8Utils.h"
using namespace PCRE;
if (rc<1)
{
+ static const int fragmentLen = 80; // length of excerpt before erroneous char for log
switch(rc)
{
case PCRE_ERROR_NOMATCH:
#ifdef PCRE_ERROR_SHORTUTF8
case PCRE_ERROR_SHORTUTF8:
+ {
+ const size_t startPos = (m_subject.length() > fragmentLen) ? CUtf8Utils::RFindValidUtf8Char(m_subject, m_subject.length() - fragmentLen) : 0;
+ if (startPos != std::string::npos)
+ CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character at the end of string. Text before bad character: \"%s\"", m_subject.substr(startPos).c_str());
+ else
+ CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character at the end of string");
+ return -1;
+ }
#endif
case PCRE_ERROR_BADUTF8:
- CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character");
- return -1;
-
+ {
+ const size_t startPos = (m_iOvector[0] > fragmentLen) ? CUtf8Utils::RFindValidUtf8Char(m_subject, m_iOvector[0] - fragmentLen) : 0;
+ if (m_iOvector[0] >= 0 && startPos != std::string::npos)
+ CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character, error code: %d, position: %d. Text before bad char: \"%s\"", m_iOvector[0], m_iOvector[1], m_subject.substr(startPos, m_iOvector[0] - startPos + 1).c_str());
+ else
+ CLog::Log(LOGERROR, "PCRE: Bad UTF-8 character, error code: %d, position: %d", m_iOvector[0], m_iOvector[1]);
+ return -1;
+ }
case PCRE_ERROR_BADUTF8_OFFSET:
CLog::Log(LOGERROR, "PCRE: Offset is pointing to the middle of UTF-8 character");
return -1;