2 * Copyright (C) 2006 George Staikos <staikos@kde.org>
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
5 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 * Copyright (C) 2010 Company 100, Inc.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
28 #include "ScriptCodesFromICU.h"
29 #include "UnicodeFromICU.h"
30 #include "UnicodeMacrosFromICU.h"
36 LeftToRight = ICU::U_LEFT_TO_RIGHT,
37 RightToLeft = ICU::U_RIGHT_TO_LEFT,
38 EuropeanNumber = ICU::U_EUROPEAN_NUMBER,
39 EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR,
40 EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR,
41 ArabicNumber = ICU::U_ARABIC_NUMBER,
42 CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR,
43 BlockSeparator = ICU::U_BLOCK_SEPARATOR,
44 SegmentSeparator = ICU::U_SEGMENT_SEPARATOR,
45 WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL,
46 OtherNeutral = ICU::U_OTHER_NEUTRAL,
47 LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING,
48 LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE,
49 RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC,
50 RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING,
51 RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE,
52 PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT,
53 NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK,
54 BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL
57 enum DecompositionType {
58 DecompositionNone = ICU::U_DT_NONE,
59 DecompositionCanonical = ICU::U_DT_CANONICAL,
60 DecompositionCompat = ICU::U_DT_COMPAT,
61 DecompositionCircle = ICU::U_DT_CIRCLE,
62 DecompositionFinal = ICU::U_DT_FINAL,
63 DecompositionFont = ICU::U_DT_FONT,
64 DecompositionFraction = ICU::U_DT_FRACTION,
65 DecompositionInitial = ICU::U_DT_INITIAL,
66 DecompositionIsolated = ICU::U_DT_ISOLATED,
67 DecompositionMedial = ICU::U_DT_MEDIAL,
68 DecompositionNarrow = ICU::U_DT_NARROW,
69 DecompositionNoBreak = ICU::U_DT_NOBREAK,
70 DecompositionSmall = ICU::U_DT_SMALL,
71 DecompositionSquare = ICU::U_DT_SQUARE,
72 DecompositionSub = ICU::U_DT_SUB,
73 DecompositionSuper = ICU::U_DT_SUPER,
74 DecompositionVertical = ICU::U_DT_VERTICAL,
75 DecompositionWide = ICU::U_DT_WIDE,
80 Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES),
81 Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER),
82 Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER),
83 Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER),
84 Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER),
85 Letter_Other = TO_MASK(ICU::U_OTHER_LETTER),
87 Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK),
88 Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK),
89 Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK),
91 Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER),
92 Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER),
93 Number_Other = TO_MASK(ICU::U_OTHER_NUMBER),
95 Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR),
96 Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR),
97 Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR),
99 Other_Control = TO_MASK(ICU::U_CONTROL_CHAR),
100 Other_Format = TO_MASK(ICU::U_FORMAT_CHAR),
101 Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR),
102 Other_Surrogate = TO_MASK(ICU::U_SURROGATE),
104 Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION),
105 Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION),
106 Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION),
107 Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION),
108 Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION),
110 Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL),
111 Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL),
112 Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL),
113 Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL),
115 Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION),
116 Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION)
119 UChar foldCase(UChar);
121 int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
123 int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
125 UChar toUpper(UChar);
126 UChar toLower(UChar);
130 int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
132 UChar toTitleCase(UChar);
134 inline bool isArabicChar(UChar32 c)
136 return c >= 0x0600 && c <= 0x06FF;
139 bool isAlphanumeric(UChar);
141 CharCategory category(unsigned int);
143 inline bool isSeparatorSpace(UChar c)
145 return category(c) == Separator_Space;
148 bool isPrintableChar(UChar);
154 inline bool hasLineBreakingPropertyComplexContext(UChar32)
160 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
166 UChar mirroredChar(UChar32);
168 Direction direction(UChar32);
172 int digitValue(UChar);
174 unsigned char combiningClass(UChar32);
176 DecompositionType decompositionType(UChar32);
178 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
180 for (int i = 0; i < len; ++i) {
181 UChar c1 = foldCase(a[i]);
182 UChar c2 = foldCase(b[i]);
190 bool isLetter(UChar);
192 } // namespace Unicode