HELLO·Android
系统源代码
IT资讯
技术文章
我的收藏
注册
登录
-
我收藏的文章
创建代码块
我的代码块
我的账号
Nougat 7.1
|
7.1.1_r28
下载
查看原文件
收藏
根目录
external
chromium-trace
catapult
third_party
html5lib-python
html5lib
tests
test_serializer.py
from __future__ import absolute_import, division, unicode_literals import json import unittest from .support import get_data_files try: unittest.TestCase.assertEqual except AttributeError: unittest.TestCase.assertEqual = unittest.TestCase.assertEquals import html5lib from html5lib import constants from html5lib.serializer import HTMLSerializer, serialize from html5lib.treewalkers._base import TreeWalker optionals_loaded = [] try: from lxml import etree optionals_loaded.append("lxml") except ImportError: pass default_namespace = constants.namespaces["html"] class JsonWalker(TreeWalker): def __iter__(self): for token in self.tree: type = token[0] if type == "StartTag": if len(token) == 4: namespace, name, attrib = token[1:4] else: namespace = default_namespace name, attrib = token[1:3] yield self.startTag(namespace, name, self._convertAttrib(attrib)) elif type == "EndTag": if len(token) == 3: namespace, name = token[1:3] else: namespace = default_namespace name = token[1] yield self.endTag(namespace, name) elif type == "EmptyTag": if len(token) == 4: namespace, name, attrib = token[1:] else: namespace = default_namespace name, attrib = token[1:] for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)): yield token elif type == "Comment": yield self.comment(token[1]) elif type in ("Characters", "SpaceCharacters"): for token in self.text(token[1]): yield token elif type == "Doctype": if len(token) == 4: yield self.doctype(token[1], token[2], token[3]) elif len(token) == 3: yield self.doctype(token[1], token[2]) else: yield self.doctype(token[1]) else: raise ValueError("Unknown token type: " + type) def _convertAttrib(self, attribs): """html5lib tree-walkers use a dict of (namespace, name): value for attributes, but JSON cannot represent this. Convert from the format in the serializer tests (a list of dicts with "namespace", "name", and "value" as keys) to html5lib's tree-walker format.""" attrs = {} for attrib in attribs: name = (attrib["namespace"], attrib["name"]) assert(name not in attrs) attrs[name] = attrib["value"] return attrs def serialize_html(input, options): options = dict([(str(k), v) for k, v in options.items()]) stream = JsonWalker(input) serializer = HTMLSerializer(alphabetical_attributes=True, **options) return serializer.render(stream, options.get("encoding", None)) def runSerializerTest(input, expected, options): encoding = options.get("encoding", None) if encoding: encode = lambda x: x.encode(encoding) expected = list(map(encode, expected)) result = serialize_html(input, options) if len(expected) == 1: assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) elif result not in expected: assert False, "Expected: %s, Received: %s" % (expected, result) class EncodingTestCase(unittest.TestCase): def throwsWithLatin1(self, input): self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"}) def testDoctypeName(self): self.throwsWithLatin1([["Doctype", "\u0101"]]) def testDoctypePublicId(self): self.throwsWithLatin1([["Doctype", "potato", "\u0101"]]) def testDoctypeSystemId(self): self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) def testCdataCharacters(self): runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], ["