2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
27 #include "JSGlobalObjectFunctions.h"
29 #include "Identifier.h"
30 #include "Operations.h"
36 #include <wtf/ASCIICType.h>
37 #include <wtf/Assertions.h>
38 #include <wtf/MathExtras.h>
39 #include <wtf/StringExtras.h>
40 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
49 using namespace WTF::Unicode;
54 COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
56 // Construct a string with UTF-16 data.
57 UString::UString(const UChar* characters, unsigned length)
58 : m_impl(characters ? StringImpl::create(characters, length) : 0)
62 // Construct a string with UTF-16 data, from a null-terminated source.
63 UString::UString(const UChar* characters)
69 while (characters[length] != UChar(0))
72 m_impl = StringImpl::create(characters, length);
75 // Construct a string with latin1 data.
76 UString::UString(const char* characters, unsigned length)
77 : m_impl(characters ? StringImpl::create(characters, length) : 0)
81 // Construct a string with latin1 data, from a null-terminated source.
82 UString::UString(const char* characters)
83 : m_impl(characters ? StringImpl::create(characters) : 0)
87 UString UString::number(int i)
89 UChar buf[1 + sizeof(i) * 3];
90 UChar* end = buf + WTF_ARRAY_LENGTH(buf);
95 else if (i == INT_MIN) {
96 char minBuf[1 + sizeof(i) * 3];
97 snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
98 return UString(minBuf);
100 bool negative = false;
106 *--p = static_cast<unsigned short>((i % 10) + '0');
113 return UString(p, static_cast<unsigned>(end - p));
116 UString UString::number(long long i)
118 UChar buf[1 + sizeof(i) * 3];
119 UChar* end = buf + WTF_ARRAY_LENGTH(buf);
124 else if (i == std::numeric_limits<long long>::min()) {
125 char minBuf[1 + sizeof(i) * 3];
127 snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
129 snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
131 return UString(minBuf);
133 bool negative = false;
139 *--p = static_cast<unsigned short>((i % 10) + '0');
146 return UString(p, static_cast<unsigned>(end - p));
149 UString UString::number(unsigned u)
151 UChar buf[sizeof(u) * 3];
152 UChar* end = buf + WTF_ARRAY_LENGTH(buf);
159 *--p = static_cast<unsigned short>((u % 10) + '0');
164 return UString(p, static_cast<unsigned>(end - p));
167 UString UString::number(long l)
169 UChar buf[1 + sizeof(l) * 3];
170 UChar* end = buf + WTF_ARRAY_LENGTH(buf);
175 else if (l == LONG_MIN) {
176 char minBuf[1 + sizeof(l) * 3];
177 snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
178 return UString(minBuf);
180 bool negative = false;
186 *--p = static_cast<unsigned short>((l % 10) + '0');
193 return UString(p, end - p);
196 UString UString::number(double d)
198 NumberToStringBuffer buffer;
199 return UString(numberToString(d, buffer));
202 UString UString::substringSharingImpl(unsigned offset, unsigned length) const
204 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
206 unsigned stringLength = this->length();
207 offset = min(offset, stringLength);
208 length = min(length, stringLength - offset);
210 if (!offset && length == stringLength)
212 return UString(StringImpl::create(m_impl, offset, length));
215 bool operator==(const UString& s1, const char *s2)
220 const UChar* u = s1.characters();
221 const UChar* uend = u + s1.length();
222 while (u != uend && *s2) {
223 if (u[0] != (unsigned char)*s2)
229 return u == uend && *s2 == 0;
232 bool operator<(const UString& s1, const UString& s2)
234 const unsigned l1 = s1.length();
235 const unsigned l2 = s2.length();
236 const unsigned lmin = l1 < l2 ? l1 : l2;
237 const UChar* c1 = s1.characters();
238 const UChar* c2 = s2.characters();
240 while (l < lmin && *c1 == *c2) {
246 return (c1[0] < c2[0]);
251 bool operator>(const UString& s1, const UString& s2)
253 const unsigned l1 = s1.length();
254 const unsigned l2 = s2.length();
255 const unsigned lmin = l1 < l2 ? l1 : l2;
256 const UChar* c1 = s1.characters();
257 const UChar* c2 = s2.characters();
259 while (l < lmin && *c1 == *c2) {
265 return (c1[0] > c2[0]);
270 CString UString::ascii() const
272 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
273 // preserved, characters outside of this range are converted to '?'.
275 unsigned length = this->length();
276 const UChar* characters = this->characters();
278 char* characterBuffer;
279 CString result = CString::newUninitialized(length, characterBuffer);
281 for (unsigned i = 0; i < length; ++i) {
282 UChar ch = characters[i];
283 characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch;
289 CString UString::latin1() const
291 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
292 // preserved, characters outside of this range are converted to '?'.
294 unsigned length = this->length();
295 const UChar* characters = this->characters();
297 char* characterBuffer;
298 CString result = CString::newUninitialized(length, characterBuffer);
300 for (unsigned i = 0; i < length; ++i) {
301 UChar ch = characters[i];
302 characterBuffer[i] = ch > 0xff ? '?' : ch;
308 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
309 static inline void putUTF8Triple(char*& buffer, UChar ch)
311 ASSERT(ch >= 0x0800);
312 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
313 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
314 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
317 CString UString::utf8(bool strict) const
319 unsigned length = this->length();
320 const UChar* characters = this->characters();
322 // Allocate a buffer big enough to hold all the characters
323 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
324 // Optimization ideas, if we find this function is hot:
325 // * We could speculatively create a CStringBuffer to contain 'length'
326 // characters, and resize if necessary (i.e. if the buffer contains
327 // non-ascii characters). (Alternatively, scan the buffer first for
328 // ascii characters, so we know this will be sufficient).
329 // * We could allocate a CStringBuffer with an appropriate size to
330 // have a good chance of being able to write the string into the
331 // buffer without reallocing (say, 1.5 x length).
332 if (length > numeric_limits<unsigned>::max() / 3)
334 Vector<char, 1024> bufferVector(length * 3);
336 char* buffer = bufferVector.data();
337 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
338 ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
340 // Only produced from strict conversion.
341 if (result == sourceIllegal)
344 // Check for an unconverted high surrogate.
345 if (result == sourceExhausted) {
348 // This should be one unpaired high surrogate. Treat it the same
349 // was as an unpaired high surrogate would have been handled in
350 // the middle of a string with non-strict conversion - which is
351 // to say, simply encode it to UTF-8.
352 ASSERT((characters + 1) == (this->characters() + length));
353 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
354 // There should be room left, since one UChar hasn't been converted.
355 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
356 putUTF8Triple(buffer, *characters);
359 return CString(bufferVector.data(), buffer - bufferVector.data());