code.vuplus.com Git - vuplus_webkit/blob - Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp

   1 /*
   2  * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserved.
   3  * Copyright (C) 2008 Holger Hans Peter Freyther
   4  * Copyright (C) Research In Motion Limited 2011. All rights reserved.
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public License
  17  * along with this library; see the file COPYING.LIB.  If not, write to
  18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19  * Boston, MA 02110-1301, USA.
  20  *
  21  */
  22
  23 #include "config.h"
  24 #include "SurrogatePairAwareTextIterator.h"
  25
  26 #if USE(ICU_UNICODE)
  27 #include <unicode/unorm.h>
  28 #endif
  29
  30 using namespace WTF;
  31 using namespace Unicode;
  32
  33 namespace WebCore {
  34
  35 SurrogatePairAwareTextIterator::SurrogatePairAwareTextIterator(const UChar* characters, int currentCharacter, int lastCharacter, int endCharacter)
  36     : m_characters(characters)
  37     , m_currentCharacter(currentCharacter)
  38     , m_lastCharacter(lastCharacter)
  39     , m_endCharacter(endCharacter)
  40 {
  41 }
  42
  43 bool SurrogatePairAwareTextIterator::consume(UChar32& character, unsigned& clusterLength)
  44 {
  45     if (m_currentCharacter >= m_lastCharacter)
  46         return false;
  47
  48     character = *m_characters;
  49     clusterLength = 1;
  50
  51     if (character < 0x3041)
  52         return true;
  53
  54     if (character <= 0x30FE) {
  55         // Deal with Hiragana and Katakana voiced and semi-voiced syllables.
  56         // Normalize into composed form, and then look for glyph with base + combined mark.
  57         // Check above for character range to minimize performance impact.
  58         if (UChar32 normalized = normalizeVoicingMarks()) {
  59             character = normalized;
  60             clusterLength = 2;
  61         }
  62         return true;
  63     }
  64
  65     if (!U16_IS_SURROGATE(character))
  66         return true;
  67
  68     // If we have a surrogate pair, make sure it starts with the high part.
  69     if (!U16_IS_SURROGATE_LEAD(character))
  70         return false;
  71
  72     // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) code point before glyph lookup.
  73     // Make sure we have another character and it's a low surrogate.
  74     if (m_currentCharacter + 1 >= m_endCharacter)
  75         return false;
  76
  77     UChar low = m_characters[1];
  78     if (!U16_IS_TRAIL(low))
  79         return false;
  80
  81     character = U16_GET_SUPPLEMENTARY(character, low);
  82     clusterLength = 2;
  83     return true;
  84 }
  85
  86 void SurrogatePairAwareTextIterator::advance(unsigned advanceLength)
  87 {
  88     m_characters += advanceLength;
  89     m_currentCharacter += advanceLength;
  90 }
  91
  92 UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
  93 {
  94     // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
  95     static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
  96
  97     if (m_currentCharacter + 1 >= m_endCharacter)
  98         return 0;
  99
 100     if (combiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
 101 #if USE(ICU_UNICODE)
 102         // Normalize into composed form using 3.2 rules.
 103         UChar normalizedCharacters[2] = { 0, 0 };
 104         UErrorCode uStatus = U_ZERO_ERROR;
 105         int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
 106         if (resultLength == 1 && !uStatus)
 107             return normalizedCharacters[0];
 108 #elif USE(QT4_UNICODE)
 109         QString tmp(reinterpret_cast<const QChar*>(m_characters), 2);
 110         QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2);
 111         if (res.length() == 1)
 112             return res.at(0).unicode();
 113 #endif
 114     }
 115
 116     return 0;
 117 }
 118
 119 }