2 * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
3 * Copyright (C) 2010-2011 Patrick Gansterer <paroga@paroga.com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * This library is distributed in the hope that i will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Library General Public License for more details.
19 * You should have received a copy of the GNU Library General Public License
20 * along with this library; see the file COPYING.LIB. If not, write to
21 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 * Boston, MA 02110-1301, USA.
26 #include "TextCodecWinCE.h"
28 #include "FontCache.h"
32 #include <wtf/HashMap.h>
33 #include <wtf/HashSet.h>
34 #include <wtf/text/CString.h>
35 #include <wtf/text/WTFString.h>
36 #include <wtf/text/StringHash.h>
42 String m_friendlyName;
44 Vector<CString> m_aliases;
47 class LanguageManager {
51 friend LanguageManager& languageManager();
54 // Usage: a lookup table used to get CharsetInfo with code page ID.
55 // Key: code page ID. Value: charset information.
56 static HashMap<UINT, CString>& codePageCharsets()
58 static HashMap<UINT, CString> cc;
62 static HashMap<String, CharsetInfo>& knownCharsets()
64 static HashMap<String, CharsetInfo> kc;
68 // Usage: a map that stores charsets that are supported by system. Sorted by name.
69 // Key: charset. Value: code page ID.
70 typedef HashSet<String> CharsetSet;
71 static CharsetSet& supportedCharsets()
77 static LanguageManager& languageManager()
79 static LanguageManager lm;
83 LanguageManager::LanguageManager()
85 IEnumCodePage* enumInterface;
86 IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
87 if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
90 while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
91 if (!IsValidCodePage(cpInfo.uiCodePage))
94 HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
96 CString name(String(cpInfo.wszWebCharset).latin1());
97 if (i == codePageCharsets().end()) {
99 info.m_codePage = cpInfo.uiCodePage;
100 knownCharsets().set(name.data(), info);
101 i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
103 if (i != codePageCharsets().end()) {
104 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
105 ASSERT(j != knownCharsets().end());
106 CharsetInfo& info = j->second;
107 info.m_name = i->second.data();
108 info.m_friendlyName = cpInfo.wszDescription;
109 info.m_aliases.append(name);
110 info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
111 info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
112 String cpName = "cp" + String::number(cpInfo.uiCodePage);
113 info.m_aliases.append(cpName.latin1());
114 supportedCharsets().add(i->second.data());
117 enumInterface->Release();
121 static UINT getCodePage(const char* name)
123 // Explicitly use a "const" reference to fix the silly VS build error
124 // saying "==" is not found for const_iterator and iterator
125 const HashMap<String, CharsetInfo>& charsets = knownCharsets();
126 HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
127 return i == charsets.end() ? CP_ACP : i->second.m_codePage;
130 static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
132 return adoptPtr(new TextCodecWinCE(getCodePage(encoding.name())));
135 TextCodecWinCE::TextCodecWinCE(UINT codePage)
136 : m_codePage(codePage)
140 TextCodecWinCE::~TextCodecWinCE()
144 void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
147 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
148 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
149 if (j != knownCharsets().end()) {
150 registrar(j->second.m_name.data(), j->second.m_name.data());
151 for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
152 registrar(alias->data(), j->second.m_name.data());
157 void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
160 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
161 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
162 if (j != knownCharsets().end())
163 registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
167 static DWORD getCodePageFlags(UINT codePage)
169 if (codePage == 42) // Symbol
172 // Microsoft says the flag must be 0 for the following code pages
173 if (codePage > 50000) {
174 if ((codePage >= 50220 && codePage <= 50222)
180 || (codePage >= 57002 && codePage <= 57001)
181 || codePage == 65000 // UTF-7
186 return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
189 static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
191 for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
198 static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left)
201 if (!bytes || !length)
204 DWORD flags = getCodePageFlags(codePage);
206 int testLength = length;
207 int untestedLength = length;
209 int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
211 if (resultLength > 0) {
212 int oldSize = result.size();
213 result.resize(oldSize + resultLength);
215 MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
217 if (testLength == untestedLength) {
218 *left = length - testLength;
221 untestedLength -= testLength;
222 length -= testLength;
225 untestedLength = testLength - 1;
226 if (!untestedLength) {
231 testLength = (untestedLength + 1) / 2;
235 String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
237 if (!m_decodeBuffer.isEmpty()) {
238 m_decodeBuffer.append(bytes, length);
239 bytes = m_decodeBuffer.data();
240 length = m_decodeBuffer.size();
244 Vector<UChar, 8192> result;
246 decodeInternal(result, m_codePage, bytes, length, &left);
250 if (!flush && left < 16)
256 return String::adopt(result);
261 bytes += length - left + 1;
264 if (left && !flush) {
265 if (m_decodeBuffer.isEmpty())
266 m_decodeBuffer.append(bytes + length - left, left);
268 memmove(m_decodeBuffer.data(), bytes + length - left, left);
269 m_decodeBuffer.resize(left);
272 m_decodeBuffer.clear();
274 return String::adopt(result);
277 CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
279 if (!characters || !length)
282 int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
284 // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
286 if (resultLength <= 0)
289 char* characterBuffer;
290 CString result = CString::newUninitialized(resultLength, characterBuffer);
292 WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, characterBuffer, resultLength, 0, 0);
297 void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
300 for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
301 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
302 if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
307 } // namespace WebCore