code.vuplus.com Git - vuplus_webkit/blob - Source/WebCore/platform/text/wince/TextCodecWinCE.cpp

   1 /*
   2  * Copyright (C) 2007-2009 Torch Mobile, Inc. All rights reserved.
   3  * Copyright (C) 2010-2011 Patrick Gansterer <paroga@paroga.com>
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  *  This library is distributed in the hope that i will be useful,
  15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  *  Library General Public License for more details.
  18  *
  19  *  You should have received a copy of the GNU Library General Public License
  20  *  along with this library; see the file COPYING.LIB.  If not, write to
  21  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22  *  Boston, MA 02110-1301, USA.
  23  */
  24
  25 #include "config.h"
  26 #include "TextCodecWinCE.h"
  27
  28 #include "FontCache.h"
  29 #include <mlang.h>
  30 #include <winbase.h>
  31 #include <winnls.h>
  32 #include <wtf/HashMap.h>
  33 #include <wtf/HashSet.h>
  34 #include <wtf/text/CString.h>
  35 #include <wtf/text/WTFString.h>
  36 #include <wtf/text/StringHash.h>
  37
  38 namespace WebCore {
  39
  40 struct CharsetInfo {
  41     CString m_name;
  42     String m_friendlyName;
  43     UINT m_codePage;
  44     Vector<CString> m_aliases;
  45 };
  46
  47 class LanguageManager {
  48 private:
  49     LanguageManager();
  50
  51     friend LanguageManager& languageManager();
  52 };
  53
  54 // Usage: a lookup table used to get CharsetInfo with code page ID.
  55 // Key: code page ID. Value: charset information.
  56 static HashMap<UINT, CString>& codePageCharsets()
  57 {
  58     static HashMap<UINT, CString> cc;
  59     return cc;
  60 }
  61
  62 static HashMap<String, CharsetInfo>& knownCharsets()
  63 {
  64     static HashMap<String, CharsetInfo> kc;
  65     return kc;
  66 }
  67
  68 // Usage: a map that stores charsets that are supported by system. Sorted by name.
  69 // Key: charset. Value: code page ID.
  70 typedef HashSet<String> CharsetSet;
  71 static CharsetSet& supportedCharsets()
  72 {
  73     static CharsetSet sl;
  74     return sl;
  75 }
  76
  77 static LanguageManager& languageManager()
  78 {
  79     static LanguageManager lm;
  80     return lm;
  81 }
  82
  83 LanguageManager::LanguageManager()
  84 {
  85     IEnumCodePage* enumInterface;
  86     IMultiLanguage* mli = FontCache::getMultiLanguageInterface();
  87     if (mli && S_OK == mli->EnumCodePages(MIMECONTF_BROWSER, &enumInterface)) {
  88         MIMECPINFO cpInfo;
  89         ULONG ccpInfo;
  90         while (S_OK == enumInterface->Next(1, &cpInfo, &ccpInfo) && ccpInfo) {
  91             if (!IsValidCodePage(cpInfo.uiCodePage))
  92                 continue;
  93
  94             HashMap<UINT, CString>::iterator i = codePageCharsets().find(cpInfo.uiCodePage);
  95
  96             CString name(String(cpInfo.wszWebCharset).latin1());
  97             if (i == codePageCharsets().end()) {
  98                 CharsetInfo info;
  99                 info.m_codePage = cpInfo.uiCodePage;
 100                 knownCharsets().set(name.data(), info);
 101                 i = codePageCharsets().set(cpInfo.uiCodePage, name).first;
 102             }
 103             if (i != codePageCharsets().end()) {
 104                 HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(String(i->second.data(), i->second.length()));
 105                 ASSERT(j != knownCharsets().end());
 106                 CharsetInfo& info = j->second;
 107                 info.m_name = i->second.data();
 108                 info.m_friendlyName = cpInfo.wszDescription;
 109                 info.m_aliases.append(name);
 110                 info.m_aliases.append(String(cpInfo.wszHeaderCharset).latin1());
 111                 info.m_aliases.append(String(cpInfo.wszBodyCharset).latin1());
 112                 String cpName = "cp" + String::number(cpInfo.uiCodePage);
 113                 info.m_aliases.append(cpName.latin1());
 114                 supportedCharsets().add(i->second.data());
 115             }
 116         }
 117         enumInterface->Release();
 118     }
 119 }
 120
 121 static UINT getCodePage(const char* name)
 122 {
 123     // Explicitly use a "const" reference to fix the silly VS build error
 124     // saying "==" is not found for const_iterator and iterator
 125     const HashMap<String, CharsetInfo>& charsets = knownCharsets();
 126     HashMap<String, CharsetInfo>::const_iterator i = charsets.find(name);
 127     return i == charsets.end() ? CP_ACP : i->second.m_codePage;
 128 }
 129
 130 static PassOwnPtr<TextCodec> newTextCodecWinCE(const TextEncoding& encoding, const void*)
 131 {
 132     return adoptPtr(new TextCodecWinCE(getCodePage(encoding.name())));
 133 }
 134
 135 TextCodecWinCE::TextCodecWinCE(UINT codePage)
 136     : m_codePage(codePage)
 137 {
 138 }
 139
 140 TextCodecWinCE::~TextCodecWinCE()
 141 {
 142 }
 143
 144 void TextCodecWinCE::registerExtendedEncodingNames(EncodingNameRegistrar registrar)
 145 {
 146     languageManager();
 147     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
 148         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
 149         if (j != knownCharsets().end()) {
 150             registrar(j->second.m_name.data(), j->second.m_name.data());
 151             for (Vector<CString>::const_iterator alias = j->second.m_aliases.begin(); alias != j->second.m_aliases.end(); ++alias)
 152                 registrar(alias->data(), j->second.m_name.data());
 153         }
 154     }
 155 }
 156
 157 void TextCodecWinCE::registerExtendedCodecs(TextCodecRegistrar registrar)
 158 {
 159     languageManager();
 160     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
 161         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
 162         if (j != knownCharsets().end())
 163             registrar(j->second.m_name.data(), newTextCodecWinCE, 0);
 164     }
 165 }
 166
 167 static DWORD getCodePageFlags(UINT codePage)
 168 {
 169     if (codePage == 42) // Symbol
 170         return 0;
 171
 172     // Microsoft says the flag must be 0 for the following code pages
 173     if (codePage > 50000) {
 174         if ((codePage >= 50220 && codePage <= 50222)
 175             || codePage == 50225
 176             || codePage == 50227
 177             || codePage == 50229
 178             || codePage == 52936
 179             || codePage == 54936
 180             || (codePage >= 57002 && codePage <= 57001)
 181             || codePage == 65000 // UTF-7
 182             )
 183             return 0;
 184     }
 185
 186     return MB_PRECOMPOSED | MB_ERR_INVALID_CHARS;
 187 }
 188
 189 static inline const char* findFirstNonAsciiCharacter(const char* bytes, size_t length)
 190 {
 191     for (const char* bytesEnd = bytes + length; bytes < bytesEnd; ++bytes) {
 192         if (*bytes & 0x80)
 193             break;
 194     }
 195     return bytes;
 196 }
 197
 198 static void decodeInternal(Vector<UChar, 8192>& result, UINT codePage, const char* bytes, size_t length, size_t* left)
 199 {
 200     *left = length;
 201     if (!bytes || !length)
 202         return;
 203
 204     DWORD flags = getCodePageFlags(codePage);
 205
 206     int testLength = length;
 207     int untestedLength = length;
 208     for (;;) {
 209         int resultLength = MultiByteToWideChar(codePage, flags, bytes, testLength, 0, 0);
 210
 211         if (resultLength > 0) {
 212             int oldSize = result.size();
 213             result.resize(oldSize + resultLength);
 214
 215             MultiByteToWideChar(codePage, flags, bytes, testLength, result.data() + oldSize, resultLength);
 216
 217             if (testLength == untestedLength) {
 218                 *left = length - testLength;
 219                 break;
 220             }
 221             untestedLength -= testLength;
 222             length -= testLength;
 223             bytes += testLength;
 224         } else {
 225             untestedLength = testLength - 1;
 226             if (!untestedLength) {
 227                 *left = length;
 228                 break;
 229             }
 230         }
 231         testLength = (untestedLength + 1) / 2;
 232     }
 233 }
 234
 235 String TextCodecWinCE::decode(const char* bytes, size_t length, bool flush, bool stopOnError, bool& sawError)
 236 {
 237     if (!m_decodeBuffer.isEmpty()) {
 238         m_decodeBuffer.append(bytes, length);
 239         bytes = m_decodeBuffer.data();
 240         length = m_decodeBuffer.size();
 241     }
 242
 243     size_t left;
 244     Vector<UChar, 8192> result;
 245     for (;;) {
 246         decodeInternal(result, m_codePage, bytes, length, &left);
 247         if (!left)
 248             break;
 249
 250         if (!flush && left < 16)
 251             break;
 252
 253         result.append(L'?');
 254         sawError = true;
 255         if (stopOnError)
 256             return String::adopt(result);
 257
 258         if (left == 1)
 259             break;
 260
 261         bytes += length - left + 1;
 262         length = left - 1;
 263     }
 264     if (left && !flush) {
 265         if (m_decodeBuffer.isEmpty())
 266             m_decodeBuffer.append(bytes + length - left, left);
 267         else {
 268             memmove(m_decodeBuffer.data(), bytes + length - left, left);
 269             m_decodeBuffer.resize(left);
 270         }
 271     } else
 272         m_decodeBuffer.clear();
 273
 274     return String::adopt(result);
 275 }
 276
 277 CString TextCodecWinCE::encode(const UChar* characters, size_t length, UnencodableHandling)
 278 {
 279     if (!characters || !length)
 280         return CString();
 281
 282     int resultLength = WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, 0, 0, 0, 0);
 283
 284     // FIXME: We need to implement UnencodableHandling: QuestionMarksForUnencodables, EntitiesForUnencodables, and URLEncodedEntitiesForUnencodables.
 285
 286     if (resultLength <= 0)
 287         return "?";
 288
 289     char* characterBuffer;
 290     CString result = CString::newUninitialized(resultLength, characterBuffer);
 291
 292     WideCharToMultiByte(m_codePage, WC_COMPOSITECHECK, characters, length, characterBuffer, resultLength, 0, 0);
 293
 294     return result;
 295 }
 296
 297 void TextCodecWinCE::enumerateSupportedEncodings(EncodingReceiver& receiver)
 298 {
 299     languageManager();
 300     for (CharsetSet::iterator i = supportedCharsets().begin(); i != supportedCharsets().end(); ++i) {
 301         HashMap<String, CharsetInfo>::iterator j = knownCharsets().find(*i);
 302         if (j != knownCharsets().end() && !receiver.receive(j->second.m_name.data(), j->second.m_friendlyName.charactersWithNullTermination(), j->second.m_codePage))
 303             break;
 304     }
 305 }
 306
 307 } // namespace WebCore