2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "MarkupAccumulator.h"
30 #include "CDATASection.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
35 #include "HTMLElement.h"
36 #include "HTMLNames.h"
38 #include "ProcessingInstruction.h"
39 #include "XMLNSNames.h"
40 #include <wtf/text/StringBuilder.h>
41 #include <wtf/unicode/CharacterNames.h>
45 using namespace HTMLNames;
47 void appendCharactersReplacingEntities(StringBuilder& out, const UChar* content, size_t length, EntityMask entityMask)
49 DEFINE_STATIC_LOCAL(const String, ampReference, ("&"));
50 DEFINE_STATIC_LOCAL(const String, ltReference, ("<"));
51 DEFINE_STATIC_LOCAL(const String, gtReference, (">"));
52 DEFINE_STATIC_LOCAL(const String, quotReference, ("""));
53 DEFINE_STATIC_LOCAL(const String, nbspReference, (" "));
55 static const EntityDescription entityMaps[] = {
56 { '&', ampReference, EntityAmp },
57 { '<', ltReference, EntityLt },
58 { '>', gtReference, EntityGt },
59 { '"', quotReference, EntityQuot },
60 { noBreakSpace, nbspReference, EntityNbsp },
63 size_t positionAfterLastEntity = 0;
64 for (size_t i = 0; i < length; ++i) {
65 for (size_t m = 0; m < WTF_ARRAY_LENGTH(entityMaps); ++m) {
66 if (content[i] == entityMaps[m].entity && entityMaps[m].mask & entityMask) {
67 out.append(content + positionAfterLastEntity, i - positionAfterLastEntity);
68 out.append(entityMaps[m].reference);
69 positionAfterLastEntity = i + 1;
74 out.append(content + positionAfterLastEntity, length - positionAfterLastEntity);
77 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range)
80 , m_resolveURLsMethod(resolveUrlsMethod)
84 MarkupAccumulator::~MarkupAccumulator()
88 String MarkupAccumulator::serializeNodes(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly)
91 serializeNodesWithNamespaces(node, nodeToSkip, childrenOnly, 0);
92 out.reserveCapacity(length());
93 concatenateMarkup(out);
94 return out.toString();
97 void MarkupAccumulator::serializeNodesWithNamespaces(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces)
99 if (node == nodeToSkip)
102 Namespaces namespaceHash;
104 namespaceHash = *namespaces;
107 appendStartTag(node, &namespaceHash);
109 if (!(node->document()->isHTMLDocument() && elementCannotHaveEndTag(node))) {
110 for (Node* current = node->firstChild(); current; current = current->nextSibling())
111 serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash);
118 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const
120 switch (m_resolveURLsMethod) {
122 return element->document()->completeURL(urlString).string();
124 case ResolveNonLocalURLs:
125 if (!element->document()->url().isLocalFile())
126 return element->document()->completeURL(urlString).string();
129 case DoNotResolveURLs:
135 void MarkupAccumulator::appendString(const String& string)
137 m_succeedingMarkup.append(string);
140 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
142 StringBuilder markup;
143 appendStartMarkup(markup, node, namespaces);
144 appendString(markup.toString());
146 m_nodes->append(node);
149 void MarkupAccumulator::appendEndTag(Node* node)
151 StringBuilder markup;
152 appendEndMarkup(markup, node);
153 appendString(markup.toString());
156 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
159 for (size_t i = 0; i < strings.size(); ++i)
160 length += strings[i].length();
164 // FIXME: This is a very inefficient way of accumulating the markup.
165 // We're converting results of appendStartMarkup and appendEndMarkup from StringBuilder to String
166 // and then back to StringBuilder and again to String here.
167 void MarkupAccumulator::concatenateMarkup(StringBuilder& out)
169 for (size_t i = 0; i < m_succeedingMarkup.size(); ++i)
170 out.append(m_succeedingMarkup[i]);
173 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
175 appendCharactersReplacingEntities(result, attribute.characters(), attribute.length(),
176 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
179 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*)
183 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute)
185 ASSERT(element->isURLAttribute(const_cast<Attribute*>(&attribute)));
186 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
187 UChar quoteChar = '\"';
188 String strippedURLString = resolvedURLString.stripWhiteSpace();
189 if (protocolIsJavaScript(strippedURLString)) {
190 // minimal escaping for javascript urls
191 if (strippedURLString.contains('"')) {
192 if (strippedURLString.contains('\''))
193 strippedURLString.replace('\"', """);
197 result.append(quoteChar);
198 result.append(strippedURLString);
199 result.append(quoteChar);
203 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
204 result.append(quoteChar);
205 appendAttributeValue(result, resolvedURLString, false);
206 result.append(quoteChar);
209 void MarkupAccumulator::appendNodeValue(StringBuilder& out, const Node* node, const Range* range, EntityMask entityMask)
211 String str = node->nodeValue();
212 const UChar* characters = str.characters();
213 size_t length = str.length();
217 if (node == range->endContainer(ec))
218 length = range->endOffset(ec);
219 if (node == range->startContainer(ec)) {
220 size_t start = range->startOffset(ec);
226 appendCharactersReplacingEntities(out, characters, length, entityMask);
229 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
231 // Don't add namespace attribute if it is already defined for this elem.
232 const AtomicString& prefix = element->prefix();
233 if (prefix.isEmpty())
234 return !element->hasAttribute(xmlnsAtom);
236 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:"));
237 return !element->hasAttribute(xmlnsWithColon + prefix);
240 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
242 namespaces.checkConsistency();
244 // Don't add namespace attributes twice
245 if (attribute.name() == XMLNSNames::xmlnsAttr) {
246 namespaces.set(emptyAtom.impl(), attribute.value().impl());
250 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
251 if (attribute.name() == xmlnsPrefixAttr) {
252 namespaces.set(attribute.localName().impl(), attribute.value().impl());
259 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
261 namespaces.checkConsistency();
262 if (namespaceURI.isEmpty())
265 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
266 AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
267 AtomicStringImpl* foundNS = namespaces.get(pre);
268 if (foundNS != namespaceURI.impl()) {
269 namespaces.set(pre, namespaceURI.impl());
271 result.append(xmlnsAtom.string());
272 if (!prefix.isEmpty()) {
274 result.append(prefix);
279 appendAttributeValue(result, namespaceURI, false);
284 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
286 const QualifiedName* parentName = 0;
287 if (text->parentElement())
288 parentName = &static_cast<Element*>(text->parentElement())->tagQName();
290 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
291 return EntityMaskInCDATA;
293 return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
296 void MarkupAccumulator::appendText(StringBuilder& out, Text* text)
298 appendNodeValue(out, text, m_range, entityMaskForText(text));
301 void MarkupAccumulator::appendComment(StringBuilder& out, const String& comment)
303 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
309 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n)
311 if (n->name().isEmpty())
314 result.append("<!DOCTYPE ");
315 result.append(n->name());
316 if (!n->publicId().isEmpty()) {
317 result.append(" PUBLIC \"");
318 result.append(n->publicId());
320 if (!n->systemId().isEmpty()) {
321 result.append(" \"");
322 result.append(n->systemId());
325 } else if (!n->systemId().isEmpty()) {
326 result.append(" SYSTEM \"");
327 result.append(n->systemId());
330 if (!n->internalSubset().isEmpty()) {
332 result.append(n->internalSubset());
338 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& out, const String& target, const String& data)
340 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
348 void MarkupAccumulator::appendElement(StringBuilder& out, Element* element, Namespaces* namespaces)
350 appendOpenTag(out, element, namespaces);
352 NamedNodeMap* attributes = element->attributes();
353 unsigned length = attributes->length();
354 for (unsigned int i = 0; i < length; i++)
355 appendAttribute(out, element, *attributes->attributeItem(i), namespaces);
357 // Give an opportunity to subclasses to add their own attributes.
358 appendCustomAttributes(out, element, namespaces);
360 appendCloseTag(out, element);
363 void MarkupAccumulator::appendOpenTag(StringBuilder& out, Element* element, Namespaces* namespaces)
366 out.append(element->nodeNamePreservingCase());
367 if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
368 appendNamespace(out, element->prefix(), element->namespaceURI(), *namespaces);
371 void MarkupAccumulator::appendCloseTag(StringBuilder& out, Element* element)
373 if (shouldSelfClose(element)) {
374 if (element->isHTMLElement())
375 out.append(' '); // XHTML 1.0 <-> HTML compatibility.
381 void MarkupAccumulator::appendAttribute(StringBuilder& out, Element* element, const Attribute& attribute, Namespaces* namespaces)
383 bool documentIsHTML = element->document()->isHTMLDocument();
388 out.append(attribute.name().localName());
390 out.append(attribute.name().toString());
394 if (element->isURLAttribute(const_cast<Attribute*>(&attribute)))
395 appendQuotedURLAttributeValue(out, element, attribute);
398 appendAttributeValue(out, attribute.value(), documentIsHTML);
402 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
403 appendNamespace(out, attribute.prefix(), attribute.namespaceURI(), *namespaces);
406 void MarkupAccumulator::appendCDATASection(StringBuilder& out, const String& section)
408 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
409 out.append("<![CDATA[");
414 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces)
417 namespaces->checkConsistency();
419 switch (node->nodeType()) {
420 case Node::TEXT_NODE:
421 appendText(result, static_cast<Text*>(const_cast<Node*>(node)));
423 case Node::COMMENT_NODE:
424 appendComment(result, static_cast<const Comment*>(node)->data());
426 case Node::DOCUMENT_NODE:
427 case Node::DOCUMENT_FRAGMENT_NODE:
429 case Node::DOCUMENT_TYPE_NODE:
430 appendDocumentType(result, static_cast<const DocumentType*>(node));
432 case Node::PROCESSING_INSTRUCTION_NODE:
433 appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
435 case Node::ELEMENT_NODE:
436 appendElement(result, static_cast<Element*>(const_cast<Node*>(node)), namespaces);
438 case Node::CDATA_SECTION_NODE:
439 appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
441 case Node::ATTRIBUTE_NODE:
442 case Node::ENTITY_NODE:
443 case Node::ENTITY_REFERENCE_NODE:
444 case Node::NOTATION_NODE:
445 case Node::XPATH_NAMESPACE_NODE:
446 case Node::SHADOW_ROOT_NODE:
447 ASSERT_NOT_REACHED();
452 // Rules of self-closure
453 // 1. No elements in HTML documents use the self-closing syntax.
454 // 2. Elements w/ children never self-close because they use a separate end tag.
455 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
456 // 4. Other elements self-close.
457 bool MarkupAccumulator::shouldSelfClose(const Node* node)
459 if (node->document()->isHTMLDocument())
461 if (node->hasChildNodes())
463 if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
468 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
470 if (!node->isHTMLElement())
473 // FIXME: ieForbidsInsertHTML may not be the right function to call here
474 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
475 // or createContextualFragment. It does not necessarily align with
476 // which elements should be serialized w/o end tags.
477 return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
480 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node)
482 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
487 result.append(static_cast<const Element*>(node)->nodeNamePreservingCase());