/*
* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
* Copyright (C) 2009, 2010 Google Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include "MarkupAccumulator.h"
#include "CDATASection.h"
#include "Comment.h"
#include "DocumentFragment.h"
#include "DocumentType.h"
#include "Editor.h"
#include "HTMLElement.h"
#include "HTMLNames.h"
#include "KURL.h"
#include "ProcessingInstruction.h"
#include "XMLNSNames.h"
#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
using namespace HTMLNames;
void appendCharactersReplacingEntities(Vector<UChar>& out, const UChar* content, size_t length, EntityMask entityMask)
{
DEFINE_STATIC_LOCAL(const String, ampReference, ("&"));
DEFINE_STATIC_LOCAL(const String, ltReference, ("<"));
DEFINE_STATIC_LOCAL(const String, gtReference, (">"));
DEFINE_STATIC_LOCAL(const String, quotReference, ("""));
DEFINE_STATIC_LOCAL(const String, nbspReference, (" "));
static const EntityDescription entityMaps[] = {
{ '&', ampReference, EntityAmp },
{ '<', ltReference, EntityLt },
{ '>', gtReference, EntityGt },
{ '"', quotReference, EntityQuot },
{ noBreakSpace, nbspReference, EntityNbsp },
};
size_t positionAfterLastEntity = 0;
for (size_t i = 0; i < length; ++i) {
for (size_t m = 0; m < WTF_ARRAY_LENGTH(entityMaps); ++m) {
if (content[i] == entityMaps[m].entity && entityMaps[m].mask & entityMask) {
out.append(content + positionAfterLastEntity, i - positionAfterLastEntity);
append(out, entityMaps[m].reference);
positionAfterLastEntity = i + 1;
break;
}
}
}
out.append(content + positionAfterLastEntity, length - positionAfterLastEntity);
}
MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs shouldResolveURLs, const Range* range)
: m_nodes(nodes)
, m_range(range)
, m_shouldResolveURLs(shouldResolveURLs)
{
}
MarkupAccumulator::~MarkupAccumulator()
{
}
String MarkupAccumulator::serializeNodes(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly)
{
Vector<UChar> out;
serializeNodesWithNamespaces(node, nodeToSkip, childrenOnly, 0);
out.reserveInitialCapacity(length());
concatenateMarkup(out);
return String::adopt(out);
}
void MarkupAccumulator::serializeNodesWithNamespaces(Node* node, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces)
{
if (node == nodeToSkip)
return;
Namespaces namespaceHash;
if (namespaces)
namespaceHash = *namespaces;
if (!childrenOnly)
appendStartTag(node, &namespaceHash);
if (!(node->document()->isHTMLDocument() && elementCannotHaveEndTag(node))) {
for (Node* current = node->firstChild(); current; current = current->nextSibling())
serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash);
}
if (!childrenOnly)
appendEndTag(node);
}
void MarkupAccumulator::appendString(const String& string)
{
m_succeedingMarkup.append(string);
}
void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
{
Vector<UChar> markup;
appendStartMarkup(markup, node, namespaces);
appendString(String::adopt(markup));
if (m_nodes)
m_nodes->append(node);
}
void MarkupAccumulator::appendEndTag(Node* node)
{
Vector<UChar> markup;
appendEndMarkup(markup, node);
appendString(String::adopt(markup));
}
size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
{
size_t length = 0;
for (size_t i = 0; i < strings.size(); ++i)
length += strings[i].length();
return length;
}
// FIXME: This is a very inefficient way of accumulating the markup.
// We're converting results of appendStartMarkup and appendEndMarkup from Vector<UChar> to String
// and then back to Vector<UChar> and again to String here.
void MarkupAccumulator::concatenateMarkup(Vector<UChar>& out)
{
for (size_t i = 0; i < m_succeedingMarkup.size(); ++i)
append(out, m_succeedingMarkup[i]);
}
void MarkupAccumulator::appendAttributeValue(Vector<UChar>& result, const String& attribute, bool documentIsHTML)
{
appendCharactersReplacingEntities(result, attribute.characters(), attribute.length(),
documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
}
void MarkupAccumulator::appendQuotedURLAttributeValue(Vector<UChar>& result, const String& urlString)
{
UChar quoteChar = '\"';
String strippedURLString = urlString.stripWhiteSpace();
if (protocolIsJavaScript(strippedURLString)) {
// minimal escaping for javascript urls
if (strippedURLString.contains('"')) {
if (strippedURLString.contains('\''))
strippedURLString.replace('\"', """);
else
quoteChar = '\'';
}
result.append(quoteChar);
append(result, strippedURLString);
result.append(quoteChar);
return;
}
// FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
result.append(quoteChar);
appendAttributeValue(result, urlString, false);
result.append(quoteChar);
}
void MarkupAccumulator::appendNodeValue(Vector<UChar>& out, const Node* node, const Range* range, EntityMask entityMask)
{
String str = node->nodeValue();
const UChar* characters = str.characters();
size_t length = str.length();
if (range) {
ExceptionCode ec;
if (node == range->endContainer(ec))
length = range->endOffset(ec);
if (node == range->startContainer(ec)) {
size_t start = range->startOffset(ec);
characters += start;
length -= start;
}
}
appendCharactersReplacingEntities(out, characters, length, entityMask);
}
bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
{
// Don't add namespace attribute if it is already defined for this elem.
const AtomicString& prefix = element->prefix();
AtomicString attr = !prefix.isEmpty() ? "xmlns:" + prefix : "xmlns";
return !element->hasAttribute(attr);
}
bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
{
namespaces.checkConsistency();
// Don't add namespace attributes twice
if (attribute.name() == XMLNSNames::xmlnsAttr) {
namespaces.set(emptyAtom.impl(), attribute.value().impl());
return false;
}
QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
if (attribute.name() == xmlnsPrefixAttr) {
namespaces.set(attribute.localName().impl(), attribute.value().impl());
return false;
}
return true;
}
void MarkupAccumulator::appendNamespace(Vector<UChar>& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
{
namespaces.checkConsistency();
if (namespaceURI.isEmpty())
return;
// Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
AtomicStringImpl* foundNS = namespaces.get(pre);
if (foundNS != namespaceURI.impl()) {
namespaces.set(pre, namespaceURI.impl());
result.append(' ');
append(result, xmlnsAtom.string());
if (!prefix.isEmpty()) {
result.append(':');
append(result, prefix);
}
result.append('=');
result.append('"');
appendAttributeValue(result, namespaceURI, false);
result.append('"');
}
}
EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
{
const QualifiedName* parentName = 0;
if (text->parentElement())
parentName = &static_cast<Element*>(text->parentElement())->tagQName();
if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
return EntityMaskInCDATA;
return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
}
void MarkupAccumulator::appendText(Vector<UChar>& out, Text* text)
{
appendNodeValue(out, text, m_range, entityMaskForText(text));
}
void MarkupAccumulator::appendComment(Vector<UChar>& out, const String& comment)
{
// FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
append(out, "<!--");
append(out, comment);
append(out, "-->");
}
void MarkupAccumulator::appendDocumentType(Vector<UChar>& result, const DocumentType* n)
{
if (n->name().isEmpty())
return;
append(result, "<!DOCTYPE ");
append(result, n->name());
if (!n->publicId().isEmpty()) {
append(result, " PUBLIC \"");
append(result, n->publicId());
append(result, "\"");
if (!n->systemId().isEmpty()) {
append(result, " \"");
append(result, n->systemId());
append(result, "\"");
}
} else if (!n->systemId().isEmpty()) {
append(result, " SYSTEM \"");
append(result, n->systemId());
append(result, "\"");
}
if (!n->internalSubset().isEmpty()) {
append(result, " [");
append(result, n->internalSubset());
append(result, "]");
}
append(result, ">");
}
void MarkupAccumulator::appendProcessingInstruction(Vector<UChar>& out, const String& target, const String& data)
{
// FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
append(out, "<?");
append(out, target);
append(out, " ");
append(out, data);
append(out, "?>");
}
void MarkupAccumulator::appendElement(Vector<UChar>& out, Element* element, Namespaces* namespaces)
{
appendOpenTag(out, element, namespaces);
NamedNodeMap* attributes = element->attributes();
unsigned length = attributes->length();
for (unsigned int i = 0; i < length; i++)
appendAttribute(out, element, *attributes->attributeItem(i), namespaces);
appendCloseTag(out, element);
}
void MarkupAccumulator::appendOpenTag(Vector<UChar>& out, Element* element, Namespaces* namespaces)
{
out.append('<');
append(out, element->nodeNamePreservingCase());
if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
appendNamespace(out, element->prefix(), element->namespaceURI(), *namespaces);
}
void MarkupAccumulator::appendCloseTag(Vector<UChar>& out, Element* element)
{
if (shouldSelfClose(element)) {
if (element->isHTMLElement())
out.append(' '); // XHTML 1.0 <-> HTML compatibility.
out.append('/');
}
out.append('>');
}
void MarkupAccumulator::appendAttribute(Vector<UChar>& out, Element* element, const Attribute& attribute, Namespaces* namespaces)
{
bool documentIsHTML = element->document()->isHTMLDocument();
out.append(' ');
if (documentIsHTML)
append(out, attribute.name().localName());
else
append(out, attribute.name().toString());
out.append('=');
if (element->isURLAttribute(const_cast<Attribute*>(&attribute))) {
// We don't want to complete file:/// URLs because it may contain sensitive information
// about the user's system.
if (shouldResolveURLs() && !element->document()->url().isLocalFile())
appendQuotedURLAttributeValue(out, element->document()->completeURL(attribute.value()).string());
else
appendQuotedURLAttributeValue(out, attribute.value());
} else {
out.append('\"');
appendAttributeValue(out, attribute.value(), documentIsHTML);
out.append('\"');
}
if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
appendNamespace(out, attribute.prefix(), attribute.namespaceURI(), *namespaces);
}
void MarkupAccumulator::appendCDATASection(Vector<UChar>& out, const String& section)
{
// FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
append(out, "<![CDATA[");
append(out, section);
append(out, "]]>");
}
void MarkupAccumulator::appendStartMarkup(Vector<UChar>& result, const Node* node, Namespaces* namespaces)
{
if (namespaces)
namespaces->checkConsistency();
switch (node->nodeType()) {
case Node::TEXT_NODE:
appendText(result, static_cast<Text*>(const_cast<Node*>(node)));
break;
case Node::COMMENT_NODE:
appendComment(result, static_cast<const Comment*>(node)->data());
break;
case Node::DOCUMENT_NODE:
case Node::DOCUMENT_FRAGMENT_NODE:
break;
case Node::DOCUMENT_TYPE_NODE:
appendDocumentType(result, static_cast<const DocumentType*>(node));
break;
case Node::PROCESSING_INSTRUCTION_NODE:
appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
break;
case Node::ELEMENT_NODE:
appendElement(result, static_cast<Element*>(const_cast<Node*>(node)), namespaces);
break;
case Node::CDATA_SECTION_NODE:
appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
break;
case Node::ATTRIBUTE_NODE:
case Node::ENTITY_NODE:
case Node::ENTITY_REFERENCE_NODE:
case Node::NOTATION_NODE:
case Node::XPATH_NAMESPACE_NODE:
ASSERT_NOT_REACHED();
break;
}
}
// Rules of self-closure
// 1. No elements in HTML documents use the self-closing syntax.
// 2. Elements w/ children never self-close because they use a separate end tag.
// 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
// 4. Other elements self-close.
bool MarkupAccumulator::shouldSelfClose(const Node* node)
{
if (node->document()->isHTMLDocument())
return false;
if (node->hasChildNodes())
return false;
if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
return false;
return true;
}
bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
{
if (!node->isHTMLElement())
return false;
// FIXME: ieForbidsInsertHTML may not be the right function to call here
// ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
// or createContextualFragment. It does not necessarily align with
// which elements should be serialized w/o end tags.
return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
}
void MarkupAccumulator::appendEndMarkup(Vector<UChar>& result, const Node* node)
{
if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
return;
result.append('<');
result.append('/');
append(result, static_cast<const Element*>(node)->nodeNamePreservingCase());
result.append('>');
}
}