2 * Copyright (C) 2012-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #include "utils/POUtils.h"
22 #include "filesystem/File.h"
23 #include "utils/log.h"
26 CPODocument::CPODocument()
31 m_Entry.msgStrPlural.clear();
32 m_Entry.msgStrPlural.resize(1);
35 CPODocument::~CPODocument() {}
37 bool CPODocument::LoadFile(const std::string &pofilename)
40 if (!file.Open(pofilename))
43 int64_t fileLength = file.GetLength();
44 if (fileLength < 18) // at least a size of a minimalistic header
47 CLog::Log(LOGERROR, "POParser: non valid length found for string file: %s", pofilename.c_str());
51 m_POfilelength = static_cast<size_t> (fileLength);
53 m_strBuffer.resize(m_POfilelength+1);
54 m_strBuffer[0] = '\n';
56 unsigned int readBytes = file.Read(&m_strBuffer[1], m_POfilelength);
59 if (readBytes != m_POfilelength)
61 CLog::Log(LOGERROR, "POParser: actual read data differs from file size, for string file: %s",
66 ConvertLineEnds(pofilename);
68 // we make sure, to have an LF at the end of buffer
69 if (*m_strBuffer.rbegin() != '\n')
74 m_POfilelength = m_strBuffer.size();
76 if (GetNextEntry() && m_Entry.Type == MSGID_FOUND)
79 CLog::Log(LOGERROR, "POParser: unable to read PO file header from file: %s", pofilename.c_str());
83 bool CPODocument::GetNextEntry()
87 // if we don't find LFLF, we reached the end of the buffer and the last entry to check
88 // we indicate this with setting m_nextEntryPos to the end of the buffer
89 if ((m_nextEntryPos = m_strBuffer.find("\n\n", m_CursorPos)) == std::string::npos)
90 m_nextEntryPos = m_POfilelength-1;
92 // now we read the actual entry into a temp string for further processing
93 m_Entry.Content.assign(m_strBuffer, m_CursorPos, m_nextEntryPos - m_CursorPos +1);
94 m_CursorPos = m_nextEntryPos+1; // jump cursor to the second LF character
96 if (FindLineStart ("\nmsgid ", m_Entry.msgID.Pos))
98 if (FindLineStart ("\nmsgctxt \"#", m_Entry.xIDPos) && ParseNumID())
100 m_Entry.Type = ID_FOUND; // we found an entry with a valid numeric id
105 if (FindLineStart ("\nmsgid_plural ", plurPos))
107 m_Entry.Type = MSGID_PLURAL_FOUND; // we found a pluralized entry
111 m_Entry.Type = MSGID_FOUND; // we found a normal entry, with no numeric id
115 while (m_nextEntryPos != m_POfilelength-1);
116 // we reached the end of buffer AND we have not found a valid entry
121 void CPODocument::ParseEntry(bool bisSourceLang)
125 if (m_Entry.Type == ID_FOUND)
126 GetString(m_Entry.msgID);
128 m_Entry.msgID.Str.clear();
132 if (m_Entry.Type != ID_FOUND)
134 GetString(m_Entry.msgID);
135 if (FindLineStart ("\nmsgctxt ", m_Entry.msgCtxt.Pos))
136 GetString(m_Entry.msgCtxt);
138 m_Entry.msgCtxt.Str.clear();
141 if (m_Entry.Type != MSGID_PLURAL_FOUND)
143 if (FindLineStart ("\nmsgstr ", m_Entry.msgStr.Pos))
145 GetString(m_Entry.msgStr);
146 GetString(m_Entry.msgID);
150 CLog::Log(LOGERROR, "POParser: missing msgstr line in entry. Failed entry: %s",
151 m_Entry.Content.c_str());
152 m_Entry.msgStr.Str.clear();
157 // We found a plural form entry. We read it into a vector of CStrEntry types
158 m_Entry.msgStrPlural.clear();
159 std::string strPattern = "\nmsgstr[0] ";
162 for (int n=0; n<7 ; n++)
164 strPattern[8] = static_cast<char>(n+'0');
165 if (FindLineStart (strPattern, strEntry.Pos))
168 if (strEntry.Str.empty())
170 m_Entry.msgStrPlural.push_back(strEntry);
176 if (m_Entry.msgStrPlural.size() == 0)
178 CLog::Log(LOGERROR, "POParser: msgstr[] plural lines have zero valid strings. "
179 "Failed entry: %s", m_Entry.Content.c_str());
180 m_Entry.msgStrPlural.resize(1); // Put 1 element with an empty string into the vector
186 const std::string& CPODocument::GetPlurMsgstr(size_t plural) const
188 if (m_Entry.msgStrPlural.size() < plural+1)
190 CLog::Log(LOGERROR, "POParser: msgstr[%i] plural field requested, but not found in PO file. "
191 "Failed entry: %s", static_cast<int>(plural), m_Entry.Content.c_str());
192 plural = m_Entry.msgStrPlural.size()-1;
194 return m_Entry.msgStrPlural[plural].Str;
197 std::string CPODocument::UnescapeString(const std::string &strInput)
199 std::string strOutput;
200 if (strInput.empty())
204 strOutput.reserve(strInput.size());
205 std::string::const_iterator it = strInput.begin();
206 while (it < strInput.end())
209 if (oescchar == '\\')
211 if (it == strInput.end())
214 "POParser: warning, unhandled escape character "
215 "at line-end. Problematic entry: %s",
216 m_Entry.Content.c_str());
221 case 'a': oescchar = '\a'; break;
222 case 'b': oescchar = '\b'; break;
223 case 'v': oescchar = '\v'; break;
224 case 'n': oescchar = '\n'; break;
225 case 't': oescchar = '\t'; break;
226 case 'r': oescchar = '\r'; break;
227 case '"': oescchar = '"' ; break;
228 case '0': oescchar = '\0'; break;
229 case 'f': oescchar = '\f'; break;
230 case '?': oescchar = '\?'; break;
231 case '\'': oescchar = '\''; break;
232 case '\\': oescchar = '\\'; break;
237 "POParser: warning, unhandled escape character. Problematic entry: %s",
238 m_Entry.Content.c_str());
243 strOutput.push_back(oescchar);
248 bool CPODocument::FindLineStart(const std::string &strToFind, size_t &FoundPos)
251 FoundPos = m_Entry.Content.find(strToFind);
253 if (FoundPos == std::string::npos || FoundPos + strToFind.size() + 2 > m_Entry.Content.size())
254 return false; // if we don't find the string or if we don't have at least one char after it
256 FoundPos += strToFind.size(); // to set the pos marker to the exact start of the real data
260 bool CPODocument::ParseNumID()
262 if (isdigit(m_Entry.Content.at(m_Entry.xIDPos))) // verify if the first char is digit
264 // we check for the numeric id for the fist 10 chars (uint32)
265 m_Entry.xID = strtol(&m_Entry.Content[m_Entry.xIDPos], NULL, 10);
269 CLog::Log(LOGERROR, "POParser: found numeric id descriptor, but no valid id can be read, "
270 "entry was handled as normal msgid entry");
271 CLog::Log(LOGERROR, "POParser: The problematic entry: %s",
272 m_Entry.Content.c_str());
276 void CPODocument::GetString(CStrEntry &strEntry)
279 size_t startPos = strEntry.Pos;
280 strEntry.Str.clear();
282 while (startPos < m_Entry.Content.size())
284 nextLFPos = m_Entry.Content.find("\n", startPos);
285 if (nextLFPos == std::string::npos)
286 nextLFPos = m_Entry.Content.size();
288 // check syntax, if it really is a valid quoted string line
289 if (nextLFPos-startPos < 2 || m_Entry.Content[startPos] != '\"' ||
290 m_Entry.Content[nextLFPos-1] != '\"')
293 strEntry.Str.append(m_Entry.Content, startPos+1, nextLFPos-2-startPos);
294 startPos = nextLFPos+1;
297 strEntry.Str = UnescapeString(strEntry.Str);
300 void CPODocument::ConvertLineEnds(const std::string &filename)
302 size_t foundPos = m_strBuffer.find_first_of("\r");
303 if (foundPos == std::string::npos)
304 return; // We have only Linux style line endings in the file, nothing to do
306 if (foundPos+1 >= m_strBuffer.size() || m_strBuffer[foundPos+1] != '\n')
307 CLog::Log(LOGDEBUG, "POParser: PO file has Mac Style Line Endings. "
308 "Converted in memory to Linux LF for file: %s", filename.c_str());
310 CLog::Log(LOGDEBUG, "POParser: PO file has Win Style Line Endings. "
311 "Converted in memory to Linux LF for file: %s", filename.c_str());
314 strTemp.reserve(m_strBuffer.size());
315 for (std::string::const_iterator it = m_strBuffer.begin(); it < m_strBuffer.end(); it++)
319 if (it+1 == m_strBuffer.end() || *(it+1) != '\n')
320 strTemp.push_back('\n'); // convert Mac style line ending and continue
321 continue; // we have Win style line ending so we exclude this CR now
323 strTemp.push_back(*it);
325 m_strBuffer.swap(strTemp);
326 m_POfilelength = m_strBuffer.size();