added venv

c5db80e over 2 years ago

117 kB

	from __future__ import absolute_import, division, unicode_literals
	from pip._vendor.six import with_metaclass, viewkeys

	import types

	from . import _inputstream
	from . import _tokenizer

	from . import treebuilders
	from .treebuilders.base import Marker

	from . import _utils
	from .constants import (
	spaceCharacters, asciiUpper2Lower,
	specialElements, headingElements, cdataElements, rcdataElements,
	tokenTypes, tagTokenTypes,
	namespaces,
	htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
	adjustForeignAttributes as adjustForeignAttributesMap,
	adjustMathMLAttributes, adjustSVGAttributes,
	E,
	_ReparseException
	)


	def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
	"""Parse an HTML document as a string or file-like object into a tree

	:arg doc: the document to parse as a string or file-like object

	:arg treebuilder: the treebuilder to use when parsing

	:arg namespaceHTMLElements: whether or not to namespace HTML elements

	:returns: parsed tree

	Example:

	>>> from html5lib.html5parser import parse
	>>> parse('<html><body><p>This is a doc</p></body></html>')
	<Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

	"""
	tb = treebuilders.getTreeBuilder(treebuilder)
	p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
	return p.parse(doc, **kwargs)


	def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
	"""Parse an HTML fragment as a string or file-like object into a tree

	:arg doc: the fragment to parse as a string or file-like object

	:arg container: the container context to parse the fragment in

	:arg treebuilder: the treebuilder to use when parsing

	:arg namespaceHTMLElements: whether or not to namespace HTML elements

	:returns: parsed tree

	Example:

	>>> from html5lib.html5libparser import parseFragment
	>>> parseFragment('<b>this is a fragment</b>')
	<Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

	"""
	tb = treebuilders.getTreeBuilder(treebuilder)
	p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
	return p.parseFragment(doc, container=container, **kwargs)


	def method_decorator_metaclass(function):
	class Decorated(type):
	def __new__(meta, classname, bases, classDict):
	for attributeName, attribute in classDict.items():
	if isinstance(attribute, types.FunctionType):
	attribute = function(attribute)

	classDict[attributeName] = attribute
	return type.__new__(meta, classname, bases, classDict)
	return Decorated


	class HTMLParser(object):
	"""HTML parser

	Generates a tree structure from a stream of (possibly malformed) HTML.

	"""

	def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
	"""
	:arg tree: a treebuilder class controlling the type of tree that will be
	returned. Built in treebuilders can be accessed through
	html5lib.treebuilders.getTreeBuilder(treeType)

	:arg strict: raise an exception when a parse error is encountered

	:arg namespaceHTMLElements: whether or not to namespace HTML elements

	:arg debug: whether or not to enable debug mode which logs things

	Example:

	>>> from html5lib.html5parser import HTMLParser
	>>> parser = HTMLParser() # generates parser with etree builder
	>>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict

	"""

	# Raise an exception on the first error encountered
	self.strict = strict

	if tree is None:
	tree = treebuilders.getTreeBuilder("etree")
	self.tree = tree(namespaceHTMLElements)
	self.errors = []

	self.phases = {name: cls(self, self.tree) for name, cls in
	getPhases(debug).items()}

	def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):

	self.innerHTMLMode = innerHTML
	self.container = container
	self.scripting = scripting
	self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
	self.reset()

	try:
	self.mainLoop()
	except _ReparseException:
	self.reset()
	self.mainLoop()

	def reset(self):
	self.tree.reset()
	self.firstStartTag = False
	self.errors = []
	self.log = [] # only used with debug mode
	# "quirks" / "limited quirks" / "no quirks"
	self.compatMode = "no quirks"

	if self.innerHTMLMode:
	self.innerHTML = self.container.lower()

	if self.innerHTML in cdataElements:
	self.tokenizer.state = self.tokenizer.rcdataState
	elif self.innerHTML in rcdataElements:
	self.tokenizer.state = self.tokenizer.rawtextState
	elif self.innerHTML == 'plaintext':
	self.tokenizer.state = self.tokenizer.plaintextState
	else:
	# state already is data state
	# self.tokenizer.state = self.tokenizer.dataState
	pass
	self.phase = self.phases["beforeHtml"]
	self.phase.insertHtmlElement()
	self.resetInsertionMode()
	else:
	self.innerHTML = False # pylint:disable=redefined-variable-type
	self.phase = self.phases["initial"]

	self.lastPhase = None

	self.beforeRCDataPhase = None

	self.framesetOK = True

	@property
	def documentEncoding(self):
	"""Name of the character encoding that was used to decode the input stream, or
	:obj:`None` if that is not determined yet

	"""
	if not hasattr(self, 'tokenizer'):
	return None
	return self.tokenizer.stream.charEncoding[0].name

	def isHTMLIntegrationPoint(self, element):
	if (element.name == "annotation-xml" and
	element.namespace == namespaces["mathml"]):
	return ("encoding" in element.attributes and
	element.attributes["encoding"].translate(
	asciiUpper2Lower) in
	("text/html", "application/xhtml+xml"))
	else:
	return (element.namespace, element.name) in htmlIntegrationPointElements

	def isMathMLTextIntegrationPoint(self, element):
	return (element.namespace, element.name) in mathmlTextIntegrationPointElements

	def mainLoop(self):
	CharactersToken = tokenTypes["Characters"]
	SpaceCharactersToken = tokenTypes["SpaceCharacters"]
	StartTagToken = tokenTypes["StartTag"]
	EndTagToken = tokenTypes["EndTag"]
	CommentToken = tokenTypes["Comment"]
	DoctypeToken = tokenTypes["Doctype"]
	ParseErrorToken = tokenTypes["ParseError"]

	for token in self.tokenizer:
	prev_token = None
	new_token = token
	while new_token is not None:
	prev_token = new_token
	currentNode = self.tree.openElements[-1] if self.tree.openElements else None
	currentNodeNamespace = currentNode.namespace if currentNode else None
	currentNodeName = currentNode.name if currentNode else None

	type = new_token["type"]

	if type == ParseErrorToken:
	self.parseError(new_token["data"], new_token.get("datavars", {}))
	new_token = None
	else:
	if (len(self.tree.openElements) == 0 or
	currentNodeNamespace == self.tree.defaultNamespace or
	(self.isMathMLTextIntegrationPoint(currentNode) and
	((type == StartTagToken and
	token["name"] not in frozenset(["mglyph", "malignmark"])) or
	type in (CharactersToken, SpaceCharactersToken))) or
	(currentNodeNamespace == namespaces["mathml"] and
	currentNodeName == "annotation-xml" and
	type == StartTagToken and
	token["name"] == "svg") or
	(self.isHTMLIntegrationPoint(currentNode) and
	type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
	phase = self.phase
	else:
	phase = self.phases["inForeignContent"]

	if type == CharactersToken:
	new_token = phase.processCharacters(new_token)
	elif type == SpaceCharactersToken:
	new_token = phase.processSpaceCharacters(new_token)
	elif type == StartTagToken:
	new_token = phase.processStartTag(new_token)
	elif type == EndTagToken:
	new_token = phase.processEndTag(new_token)
	elif type == CommentToken:
	new_token = phase.processComment(new_token)
	elif type == DoctypeToken:
	new_token = phase.processDoctype(new_token)

	if (type == StartTagToken and prev_token["selfClosing"] and
	not prev_token["selfClosingAcknowledged"]):
	self.parseError("non-void-element-with-trailing-solidus",
	{"name": prev_token["name"]})

	# When the loop finishes it's EOF
	reprocess = True
	phases = []
	while reprocess:
	phases.append(self.phase)
	reprocess = self.phase.processEOF()
	if reprocess:
	assert self.phase not in phases

	def parse(self, stream, args, *kwargs):
	"""Parse a HTML document into a well-formed tree

	:arg stream: a file-like object or string containing the HTML to be parsed

	The optional encoding parameter must be a string that indicates
	the encoding. If specified, that encoding will be used,
	regardless of any BOM or later declaration (such as in a meta
	element).

	:arg scripting: treat noscript elements as if JavaScript was turned on

	:returns: parsed tree

	Example:

	>>> from html5lib.html5parser import HTMLParser
	>>> parser = HTMLParser()
	>>> parser.parse('<html><body><p>This is a doc</p></body></html>')
	<Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>

	"""
	self._parse(stream, False, None, args, *kwargs)
	return self.tree.getDocument()

	def parseFragment(self, stream, args, *kwargs):
	"""Parse a HTML fragment into a well-formed tree fragment

	:arg container: name of the element we're setting the innerHTML
	property if set to None, default to 'div'

	:arg stream: a file-like object or string containing the HTML to be parsed

	The optional encoding parameter must be a string that indicates
	the encoding. If specified, that encoding will be used,
	regardless of any BOM or later declaration (such as in a meta
	element)

	:arg scripting: treat noscript elements as if JavaScript was turned on

	:returns: parsed tree

	Example:

	>>> from html5lib.html5libparser import HTMLParser
	>>> parser = HTMLParser()
	>>> parser.parseFragment('<b>this is a fragment</b>')
	<Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

	"""
	self._parse(stream, True, args, *kwargs)
	return self.tree.getFragment()

	def parseError(self, errorcode="XXX-undefined-error", datavars=None):
	# XXX The idea is to make errorcode mandatory.
	if datavars is None:
	datavars = {}
	self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
	if self.strict:
	raise ParseError(E[errorcode] % datavars)

	def adjustMathMLAttributes(self, token):
	adjust_attributes(token, adjustMathMLAttributes)

	def adjustSVGAttributes(self, token):
	adjust_attributes(token, adjustSVGAttributes)

	def adjustForeignAttributes(self, token):
	adjust_attributes(token, adjustForeignAttributesMap)

	def reparseTokenNormal(self, token):
	# pylint:disable=unused-argument
	self.parser.phase()

	def resetInsertionMode(self):
	# The name of this method is mostly historical. (It's also used in the
	# specification.)
	last = False
	newModes = {
	"select": "inSelect",
	"td": "inCell",
	"th": "inCell",
	"tr": "inRow",
	"tbody": "inTableBody",
	"thead": "inTableBody",
	"tfoot": "inTableBody",
	"caption": "inCaption",
	"colgroup": "inColumnGroup",
	"table": "inTable",
	"head": "inBody",
	"body": "inBody",
	"frameset": "inFrameset",
	"html": "beforeHead"
	}
	for node in self.tree.openElements[::-1]:
	nodeName = node.name
	new_phase = None
	if node == self.tree.openElements[0]:
	assert self.innerHTML
	last = True
	nodeName = self.innerHTML
	# Check for conditions that should only happen in the innerHTML
	# case
	if nodeName in ("select", "colgroup", "head", "html"):
	assert self.innerHTML

	if not last and node.namespace != self.tree.defaultNamespace:
	continue

	if nodeName in newModes:
	new_phase = self.phases[newModes[nodeName]]
	break
	elif last:
	new_phase = self.phases["inBody"]
	break

	self.phase = new_phase

	def parseRCDataRawtext(self, token, contentType):
	# Generic RCDATA/RAWTEXT Parsing algorithm
	assert contentType in ("RAWTEXT", "RCDATA")

	self.tree.insertElement(token)

	if contentType == "RAWTEXT":
	self.tokenizer.state = self.tokenizer.rawtextState
	else:
	self.tokenizer.state = self.tokenizer.rcdataState

	self.originalPhase = self.phase

	self.phase = self.phases["text"]


	@_utils.memoize
	def getPhases(debug):
	def log(function):
	"""Logger that records which phase processes each token"""
	type_names = {value: key for key, value in tokenTypes.items()}

	def wrapped(self, args, *kwargs):
	if function.__name__.startswith("process") and len(args) > 0:
	token = args[0]
	info = {"type": type_names[token['type']]}
	if token['type'] in tagTokenTypes:
	info["name"] = token['name']

	self.parser.log.append((self.parser.tokenizer.state.__name__,
	self.parser.phase.__class__.__name__,
	self.__class__.__name__,
	function.__name__,
	info))
	return function(self, args, *kwargs)
	else:
	return function(self, args, *kwargs)
	return wrapped

	def getMetaclass(use_metaclass, metaclass_func):
	if use_metaclass:
	return method_decorator_metaclass(metaclass_func)
	else:
	return type

	# pylint:disable=unused-argument
	class Phase(with_metaclass(getMetaclass(debug, log))):
	"""Base class for helper object that implements each phase of processing
	"""
	__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")

	def __init__(self, parser, tree):
	self.parser = parser
	self.tree = tree
	self.__startTagCache = {}
	self.__endTagCache = {}

	def processEOF(self):
	raise NotImplementedError

	def processComment(self, token):
	# For most phases the following is correct. Where it's not it will be
	# overridden.
	self.tree.insertComment(token, self.tree.openElements[-1])

	def processDoctype(self, token):
	self.parser.parseError("unexpected-doctype")

	def processCharacters(self, token):
	self.tree.insertText(token["data"])

	def processSpaceCharacters(self, token):
	self.tree.insertText(token["data"])

	def processStartTag(self, token):
	# Note the caching is done here rather than BoundMethodDispatcher as doing it there
	# requires a circular reference to the Phase, and this ends up with a significant
	# (CPython 2.7, 3.8) GC cost when parsing many short inputs
	name = token["name"]
	# In Py2, using `in` is quicker in general than try/except KeyError
	# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
	if name in self.__startTagCache:
	func = self.__startTagCache[name]
	else:
	func = self.__startTagCache[name] = self.startTagHandler[name]
	# bound the cache size in case we get loads of unknown tags
	while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
	# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
	self.__startTagCache.pop(next(iter(self.__startTagCache)))
	return func(token)

	def startTagHtml(self, token):
	if not self.parser.firstStartTag and token["name"] == "html":
	self.parser.parseError("non-html-root")
	# XXX Need a check here to see if the first start tag token emitted is
	# this token... If it's not, invoke self.parser.parseError().
	for attr, value in token["data"].items():
	if attr not in self.tree.openElements[0].attributes:
	self.tree.openElements[0].attributes[attr] = value
	self.parser.firstStartTag = False

	def processEndTag(self, token):
	# Note the caching is done here rather than BoundMethodDispatcher as doing it there
	# requires a circular reference to the Phase, and this ends up with a significant
	# (CPython 2.7, 3.8) GC cost when parsing many short inputs
	name = token["name"]
	# In Py2, using `in` is quicker in general than try/except KeyError
	# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
	if name in self.__endTagCache:
	func = self.__endTagCache[name]
	else:
	func = self.__endTagCache[name] = self.endTagHandler[name]
	# bound the cache size in case we get loads of unknown tags
	while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
	# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
	self.__endTagCache.pop(next(iter(self.__endTagCache)))
	return func(token)

	class InitialPhase(Phase):
	__slots__ = tuple()

	def processSpaceCharacters(self, token):
	pass

	def processComment(self, token):
	self.tree.insertComment(token, self.tree.document)

	def processDoctype(self, token):
	name = token["name"]
	publicId = token["publicId"]
	systemId = token["systemId"]
	correct = token["correct"]

	if (name != "html" or publicId is not None or
	systemId is not None and systemId != "about:legacy-compat"):
	self.parser.parseError("unknown-doctype")

	if publicId is None:
	publicId = ""

	self.tree.insertDoctype(token)

	if publicId != "":
	publicId = publicId.translate(asciiUpper2Lower)

	if (not correct or token["name"] != "html" or
	publicId.startswith(
	("+//silmaril//dtd html pro v0r11 19970101//",
	"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
	"-//as//dtd html 3.0 aswedit + extensions//",
	"-//ietf//dtd html 2.0 level 1//",
	"-//ietf//dtd html 2.0 level 2//",
	"-//ietf//dtd html 2.0 strict level 1//",
	"-//ietf//dtd html 2.0 strict level 2//",
	"-//ietf//dtd html 2.0 strict//",
	"-//ietf//dtd html 2.0//",
	"-//ietf//dtd html 2.1e//",
	"-//ietf//dtd html 3.0//",
	"-//ietf//dtd html 3.2 final//",
	"-//ietf//dtd html 3.2//",
	"-//ietf//dtd html 3//",
	"-//ietf//dtd html level 0//",
	"-//ietf//dtd html level 1//",
	"-//ietf//dtd html level 2//",
	"-//ietf//dtd html level 3//",
	"-//ietf//dtd html strict level 0//",
	"-//ietf//dtd html strict level 1//",
	"-//ietf//dtd html strict level 2//",
	"-//ietf//dtd html strict level 3//",
	"-//ietf//dtd html strict//",
	"-//ietf//dtd html//",
	"-//metrius//dtd metrius presentational//",
	"-//microsoft//dtd internet explorer 2.0 html strict//",
	"-//microsoft//dtd internet explorer 2.0 html//",
	"-//microsoft//dtd internet explorer 2.0 tables//",
	"-//microsoft//dtd internet explorer 3.0 html strict//",
	"-//microsoft//dtd internet explorer 3.0 html//",
	"-//microsoft//dtd internet explorer 3.0 tables//",
	"-//netscape comm. corp.//dtd html//",
	"-//netscape comm. corp.//dtd strict html//",
	"-//o'reilly and associates//dtd html 2.0//",
	"-//o'reilly and associates//dtd html extended 1.0//",
	"-//o'reilly and associates//dtd html extended relaxed 1.0//",
	"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
	"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
	"-//spyglass//dtd html 2.0 extended//",
	"-//sq//dtd html 2.0 hotmetal + extensions//",
	"-//sun microsystems corp.//dtd hotjava html//",
	"-//sun microsystems corp.//dtd hotjava strict html//",
	"-//w3c//dtd html 3 1995-03-24//",
	"-//w3c//dtd html 3.2 draft//",
	"-//w3c//dtd html 3.2 final//",
	"-//w3c//dtd html 3.2//",
	"-//w3c//dtd html 3.2s draft//",
	"-//w3c//dtd html 4.0 frameset//",
	"-//w3c//dtd html 4.0 transitional//",
	"-//w3c//dtd html experimental 19960712//",
	"-//w3c//dtd html experimental 970421//",
	"-//w3c//dtd w3 html//",
	"-//w3o//dtd w3 html 3.0//",
	"-//webtechs//dtd mozilla html 2.0//",
	"-//webtechs//dtd mozilla html//")) or
	publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
	"-/w3c/dtd html 4.0 transitional/en",
	"html") or
	publicId.startswith(
	("-//w3c//dtd html 4.01 frameset//",
	"-//w3c//dtd html 4.01 transitional//")) and
	systemId is None or
	systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
	self.parser.compatMode = "quirks"
	elif (publicId.startswith(
	("-//w3c//dtd xhtml 1.0 frameset//",
	"-//w3c//dtd xhtml 1.0 transitional//")) or
	publicId.startswith(
	("-//w3c//dtd html 4.01 frameset//",
	"-//w3c//dtd html 4.01 transitional//")) and
	systemId is not None):
	self.parser.compatMode = "limited quirks"

	self.parser.phase = self.parser.phases["beforeHtml"]

	def anythingElse(self):
	self.parser.compatMode = "quirks"
	self.parser.phase = self.parser.phases["beforeHtml"]

	def processCharacters(self, token):
	self.parser.parseError("expected-doctype-but-got-chars")
	self.anythingElse()
	return token

	def processStartTag(self, token):
	self.parser.parseError("expected-doctype-but-got-start-tag",
	{"name": token["name"]})
	self.anythingElse()
	return token

	def processEndTag(self, token):
	self.parser.parseError("expected-doctype-but-got-end-tag",
	{"name": token["name"]})
	self.anythingElse()
	return token

	def processEOF(self):
	self.parser.parseError("expected-doctype-but-got-eof")
	self.anythingElse()
	return True

	class BeforeHtmlPhase(Phase):
	__slots__ = tuple()

	# helper methods
	def insertHtmlElement(self):
	self.tree.insertRoot(impliedTagToken("html", "StartTag"))
	self.parser.phase = self.parser.phases["beforeHead"]

	# other
	def processEOF(self):
	self.insertHtmlElement()
	return True

	def processComment(self, token):
	self.tree.insertComment(token, self.tree.document)

	def processSpaceCharacters(self, token):
	pass

	def processCharacters(self, token):
	self.insertHtmlElement()
	return token

	def processStartTag(self, token):
	if token["name"] == "html":
	self.parser.firstStartTag = True
	self.insertHtmlElement()
	return token

	def processEndTag(self, token):
	if token["name"] not in ("head", "body", "html", "br"):
	self.parser.parseError("unexpected-end-tag-before-html",
	{"name": token["name"]})
	else:
	self.insertHtmlElement()
	return token

	class BeforeHeadPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	self.startTagHead(impliedTagToken("head", "StartTag"))
	return True

	def processSpaceCharacters(self, token):
	pass

	def processCharacters(self, token):
	self.startTagHead(impliedTagToken("head", "StartTag"))
	return token

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagHead(self, token):
	self.tree.insertElement(token)
	self.tree.headPointer = self.tree.openElements[-1]
	self.parser.phase = self.parser.phases["inHead"]

	def startTagOther(self, token):
	self.startTagHead(impliedTagToken("head", "StartTag"))
	return token

	def endTagImplyHead(self, token):
	self.startTagHead(impliedTagToken("head", "StartTag"))
	return token

	def endTagOther(self, token):
	self.parser.parseError("end-tag-after-implied-root",
	{"name": token["name"]})

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml),
	("head", startTagHead)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	(("head", "body", "html", "br"), endTagImplyHead)
	])
	endTagHandler.default = endTagOther

	class InHeadPhase(Phase):
	__slots__ = tuple()

	# the real thing
	def processEOF(self):
	self.anythingElse()
	return True

	def processCharacters(self, token):
	self.anythingElse()
	return token

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagHead(self, token):
	self.parser.parseError("two-heads-are-not-better-than-one")

	def startTagBaseLinkCommand(self, token):
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def startTagMeta(self, token):
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	attributes = token["data"]
	if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
	if "charset" in attributes:
	self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
	elif ("content" in attributes and
	"http-equiv" in attributes and
	attributes["http-equiv"].lower() == "content-type"):
	# Encoding it as UTF-8 here is a hack, as really we should pass
	# the abstract Unicode string, and just use the
	# ContentAttrParser on that, but using UTF-8 allows all chars
	# to be encoded and as a ASCII-superset works.
	data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
	parser = _inputstream.ContentAttrParser(data)
	codec = parser.parse()
	self.parser.tokenizer.stream.changeEncoding(codec)

	def startTagTitle(self, token):
	self.parser.parseRCDataRawtext(token, "RCDATA")

	def startTagNoFramesStyle(self, token):
	# Need to decide whether to implement the scripting-disabled case
	self.parser.parseRCDataRawtext(token, "RAWTEXT")

	def startTagNoscript(self, token):
	if self.parser.scripting:
	self.parser.parseRCDataRawtext(token, "RAWTEXT")
	else:
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inHeadNoscript"]

	def startTagScript(self, token):
	self.tree.insertElement(token)
	self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
	self.parser.originalPhase = self.parser.phase
	self.parser.phase = self.parser.phases["text"]

	def startTagOther(self, token):
	self.anythingElse()
	return token

	def endTagHead(self, token):
	node = self.parser.tree.openElements.pop()
	assert node.name == "head", "Expected head got %s" % node.name
	self.parser.phase = self.parser.phases["afterHead"]

	def endTagHtmlBodyBr(self, token):
	self.anythingElse()
	return token

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def anythingElse(self):
	self.endTagHead(impliedTagToken("head"))

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml),
	("title", startTagTitle),
	(("noframes", "style"), startTagNoFramesStyle),
	("noscript", startTagNoscript),
	("script", startTagScript),
	(("base", "basefont", "bgsound", "command", "link"),
	startTagBaseLinkCommand),
	("meta", startTagMeta),
	("head", startTagHead)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("head", endTagHead),
	(("br", "html", "body"), endTagHtmlBodyBr)
	])
	endTagHandler.default = endTagOther

	class InHeadNoscriptPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	self.parser.parseError("eof-in-head-noscript")
	self.anythingElse()
	return True

	def processComment(self, token):
	return self.parser.phases["inHead"].processComment(token)

	def processCharacters(self, token):
	self.parser.parseError("char-in-head-noscript")
	self.anythingElse()
	return token

	def processSpaceCharacters(self, token):
	return self.parser.phases["inHead"].processSpaceCharacters(token)

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagBaseLinkCommand(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagHeadNoscript(self, token):
	self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

	def startTagOther(self, token):
	self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
	self.anythingElse()
	return token

	def endTagNoscript(self, token):
	node = self.parser.tree.openElements.pop()
	assert node.name == "noscript", "Expected noscript got %s" % node.name
	self.parser.phase = self.parser.phases["inHead"]

	def endTagBr(self, token):
	self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
	self.anythingElse()
	return token

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def anythingElse(self):
	# Caller must raise parse error first!
	self.endTagNoscript(impliedTagToken("noscript"))

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml),
	(("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
	(("head", "noscript"), startTagHeadNoscript),
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("noscript", endTagNoscript),
	("br", endTagBr),
	])
	endTagHandler.default = endTagOther

	class AfterHeadPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	self.anythingElse()
	return True

	def processCharacters(self, token):
	self.anythingElse()
	return token

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagBody(self, token):
	self.parser.framesetOK = False
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inBody"]

	def startTagFrameset(self, token):
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inFrameset"]

	def startTagFromHead(self, token):
	self.parser.parseError("unexpected-start-tag-out-of-my-head",
	{"name": token["name"]})
	self.tree.openElements.append(self.tree.headPointer)
	self.parser.phases["inHead"].processStartTag(token)
	for node in self.tree.openElements[::-1]:
	if node.name == "head":
	self.tree.openElements.remove(node)
	break

	def startTagHead(self, token):
	self.parser.parseError("unexpected-start-tag", {"name": token["name"]})

	def startTagOther(self, token):
	self.anythingElse()
	return token

	def endTagHtmlBodyBr(self, token):
	self.anythingElse()
	return token

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def anythingElse(self):
	self.tree.insertElement(impliedTagToken("body", "StartTag"))
	self.parser.phase = self.parser.phases["inBody"]
	self.parser.framesetOK = True

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml),
	("body", startTagBody),
	("frameset", startTagFrameset),
	(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
	"style", "title"),
	startTagFromHead),
	("head", startTagHead)
	])
	startTagHandler.default = startTagOther
	endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
	endTagHtmlBodyBr)])
	endTagHandler.default = endTagOther

	class InBodyPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
	# the really-really-really-very crazy mode
	__slots__ = ("processSpaceCharacters",)

	def __init__(self, args, *kwargs):
	super(InBodyPhase, self).__init__(args, *kwargs)
	# Set this to the default handler
	self.processSpaceCharacters = self.processSpaceCharactersNonPre

	def isMatchingFormattingElement(self, node1, node2):
	return (node1.name == node2.name and
	node1.namespace == node2.namespace and
	node1.attributes == node2.attributes)

	# helper
	def addFormattingElement(self, token):
	self.tree.insertElement(token)
	element = self.tree.openElements[-1]

	matchingElements = []
	for node in self.tree.activeFormattingElements[::-1]:
	if node is Marker:
	break
	elif self.isMatchingFormattingElement(node, element):
	matchingElements.append(node)

	assert len(matchingElements) <= 3
	if len(matchingElements) == 3:
	self.tree.activeFormattingElements.remove(matchingElements[-1])
	self.tree.activeFormattingElements.append(element)

	# the real deal
	def processEOF(self):
	allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
	"tfoot", "th", "thead", "tr", "body",
	"html"))
	for node in self.tree.openElements[::-1]:
	if node.name not in allowed_elements:
	self.parser.parseError("expected-closing-tag-but-got-eof")
	break
	# Stop parsing

	def processSpaceCharactersDropNewline(self, token):
	# Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
	# want to drop leading newlines
	data = token["data"]
	self.processSpaceCharacters = self.processSpaceCharactersNonPre
	if (data.startswith("\n") and
	self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
	not self.tree.openElements[-1].hasContent()):
	data = data[1:]
	if data:
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertText(data)

	def processCharacters(self, token):
	if token["data"] == "\u0000":
	# The tokenizer should always emit null on its own
	return
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertText(token["data"])
	# This must be bad for performance
	if (self.parser.framesetOK and
	any([char not in spaceCharacters
	for char in token["data"]])):
	self.parser.framesetOK = False

	def processSpaceCharactersNonPre(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertText(token["data"])

	def startTagProcessInHead(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagBody(self, token):
	self.parser.parseError("unexpected-start-tag", {"name": "body"})
	if (len(self.tree.openElements) == 1 or
	self.tree.openElements[1].name != "body"):
	assert self.parser.innerHTML
	else:
	self.parser.framesetOK = False
	for attr, value in token["data"].items():
	if attr not in self.tree.openElements[1].attributes:
	self.tree.openElements[1].attributes[attr] = value

	def startTagFrameset(self, token):
	self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
	if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
	assert self.parser.innerHTML
	elif not self.parser.framesetOK:
	pass
	else:
	if self.tree.openElements[1].parent:
	self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
	while self.tree.openElements[-1].name != "html":
	self.tree.openElements.pop()
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inFrameset"]

	def startTagCloseP(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.insertElement(token)

	def startTagPreListing(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.insertElement(token)
	self.parser.framesetOK = False
	self.processSpaceCharacters = self.processSpaceCharactersDropNewline

	def startTagForm(self, token):
	if self.tree.formPointer:
	self.parser.parseError("unexpected-start-tag", {"name": "form"})
	else:
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.insertElement(token)
	self.tree.formPointer = self.tree.openElements[-1]

	def startTagListItem(self, token):
	self.parser.framesetOK = False

	stopNamesMap = {"li": ["li"],
	"dt": ["dt", "dd"],
	"dd": ["dt", "dd"]}
	stopNames = stopNamesMap[token["name"]]
	for node in reversed(self.tree.openElements):
	if node.name in stopNames:
	self.parser.phase.processEndTag(
	impliedTagToken(node.name, "EndTag"))
	break
	if (node.nameTuple in specialElements and
	node.name not in ("address", "div", "p")):
	break

	if self.tree.elementInScope("p", variant="button"):
	self.parser.phase.processEndTag(
	impliedTagToken("p", "EndTag"))

	self.tree.insertElement(token)

	def startTagPlaintext(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.insertElement(token)
	self.parser.tokenizer.state = self.parser.tokenizer.plaintextState

	def startTagHeading(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	if self.tree.openElements[-1].name in headingElements:
	self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
	self.tree.openElements.pop()
	self.tree.insertElement(token)

	def startTagA(self, token):
	afeAElement = self.tree.elementInActiveFormattingElements("a")
	if afeAElement:
	self.parser.parseError("unexpected-start-tag-implies-end-tag",
	{"startName": "a", "endName": "a"})
	self.endTagFormatting(impliedTagToken("a"))
	if afeAElement in self.tree.openElements:
	self.tree.openElements.remove(afeAElement)
	if afeAElement in self.tree.activeFormattingElements:
	self.tree.activeFormattingElements.remove(afeAElement)
	self.tree.reconstructActiveFormattingElements()
	self.addFormattingElement(token)

	def startTagFormatting(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.addFormattingElement(token)

	def startTagNobr(self, token):
	self.tree.reconstructActiveFormattingElements()
	if self.tree.elementInScope("nobr"):
	self.parser.parseError("unexpected-start-tag-implies-end-tag",
	{"startName": "nobr", "endName": "nobr"})
	self.processEndTag(impliedTagToken("nobr"))
	# XXX Need tests that trigger the following
	self.tree.reconstructActiveFormattingElements()
	self.addFormattingElement(token)

	def startTagButton(self, token):
	if self.tree.elementInScope("button"):
	self.parser.parseError("unexpected-start-tag-implies-end-tag",
	{"startName": "button", "endName": "button"})
	self.processEndTag(impliedTagToken("button"))
	return token
	else:
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(token)
	self.parser.framesetOK = False

	def startTagAppletMarqueeObject(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(token)
	self.tree.activeFormattingElements.append(Marker)
	self.parser.framesetOK = False

	def startTagXmp(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.reconstructActiveFormattingElements()
	self.parser.framesetOK = False
	self.parser.parseRCDataRawtext(token, "RAWTEXT")

	def startTagTable(self, token):
	if self.parser.compatMode != "quirks":
	if self.tree.elementInScope("p", variant="button"):
	self.processEndTag(impliedTagToken("p"))
	self.tree.insertElement(token)
	self.parser.framesetOK = False
	self.parser.phase = self.parser.phases["inTable"]

	def startTagVoidFormatting(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True
	self.parser.framesetOK = False

	def startTagInput(self, token):
	framesetOK = self.parser.framesetOK
	self.startTagVoidFormatting(token)
	if ("type" in token["data"] and
	token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
	# input type=hidden doesn't change framesetOK
	self.parser.framesetOK = framesetOK

	def startTagParamSource(self, token):
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def startTagHr(self, token):
	if self.tree.elementInScope("p", variant="button"):
	self.endTagP(impliedTagToken("p"))
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True
	self.parser.framesetOK = False

	def startTagImage(self, token):
	# No really...
	self.parser.parseError("unexpected-start-tag-treated-as",
	{"originalName": "image", "newName": "img"})
	self.processStartTag(impliedTagToken("img", "StartTag",
	attributes=token["data"],
	selfClosing=token["selfClosing"]))

	def startTagIsIndex(self, token):
	self.parser.parseError("deprecated-tag", {"name": "isindex"})
	if self.tree.formPointer:
	return
	form_attrs = {}
	if "action" in token["data"]:
	form_attrs["action"] = token["data"]["action"]
	self.processStartTag(impliedTagToken("form", "StartTag",
	attributes=form_attrs))
	self.processStartTag(impliedTagToken("hr", "StartTag"))
	self.processStartTag(impliedTagToken("label", "StartTag"))
	# XXX Localization ...
	if "prompt" in token["data"]:
	prompt = token["data"]["prompt"]
	else:
	prompt = "This is a searchable index. Enter search keywords: "
	self.processCharacters(
	{"type": tokenTypes["Characters"], "data": prompt})
	attributes = token["data"].copy()
	if "action" in attributes:
	del attributes["action"]
	if "prompt" in attributes:
	del attributes["prompt"]
	attributes["name"] = "isindex"
	self.processStartTag(impliedTagToken("input", "StartTag",
	attributes=attributes,
	selfClosing=token["selfClosing"]))
	self.processEndTag(impliedTagToken("label"))
	self.processStartTag(impliedTagToken("hr", "StartTag"))
	self.processEndTag(impliedTagToken("form"))

	def startTagTextarea(self, token):
	self.tree.insertElement(token)
	self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
	self.processSpaceCharacters = self.processSpaceCharactersDropNewline
	self.parser.framesetOK = False

	def startTagIFrame(self, token):
	self.parser.framesetOK = False
	self.startTagRawtext(token)

	def startTagNoscript(self, token):
	if self.parser.scripting:
	self.startTagRawtext(token)
	else:
	self.startTagOther(token)

	def startTagRawtext(self, token):
	"""iframe, noembed noframes, noscript(if scripting enabled)"""
	self.parser.parseRCDataRawtext(token, "RAWTEXT")

	def startTagOpt(self, token):
	if self.tree.openElements[-1].name == "option":
	self.parser.phase.processEndTag(impliedTagToken("option"))
	self.tree.reconstructActiveFormattingElements()
	self.parser.tree.insertElement(token)

	def startTagSelect(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(token)
	self.parser.framesetOK = False
	if self.parser.phase in (self.parser.phases["inTable"],
	self.parser.phases["inCaption"],
	self.parser.phases["inColumnGroup"],
	self.parser.phases["inTableBody"],
	self.parser.phases["inRow"],
	self.parser.phases["inCell"]):
	self.parser.phase = self.parser.phases["inSelectInTable"]
	else:
	self.parser.phase = self.parser.phases["inSelect"]

	def startTagRpRt(self, token):
	if self.tree.elementInScope("ruby"):
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1].name != "ruby":
	self.parser.parseError()
	self.tree.insertElement(token)

	def startTagMath(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.parser.adjustMathMLAttributes(token)
	self.parser.adjustForeignAttributes(token)
	token["namespace"] = namespaces["mathml"]
	self.tree.insertElement(token)
	# Need to get the parse error right for the case where the token
	# has a namespace not equal to the xmlns attribute
	if token["selfClosing"]:
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def startTagSvg(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.parser.adjustSVGAttributes(token)
	self.parser.adjustForeignAttributes(token)
	token["namespace"] = namespaces["svg"]
	self.tree.insertElement(token)
	# Need to get the parse error right for the case where the token
	# has a namespace not equal to the xmlns attribute
	if token["selfClosing"]:
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def startTagMisplaced(self, token):
	""" Elements that should be children of other elements that have a
	different insertion mode; here they are ignored
	"caption", "col", "colgroup", "frame", "frameset", "head",
	"option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
	"tr", "noscript"
	"""
	self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})

	def startTagOther(self, token):
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(token)

	def endTagP(self, token):
	if not self.tree.elementInScope("p", variant="button"):
	self.startTagCloseP(impliedTagToken("p", "StartTag"))
	self.parser.parseError("unexpected-end-tag", {"name": "p"})
	self.endTagP(impliedTagToken("p", "EndTag"))
	else:
	self.tree.generateImpliedEndTags("p")
	if self.tree.openElements[-1].name != "p":
	self.parser.parseError("unexpected-end-tag", {"name": "p"})
	node = self.tree.openElements.pop()
	while node.name != "p":
	node = self.tree.openElements.pop()

	def endTagBody(self, token):
	if not self.tree.elementInScope("body"):
	self.parser.parseError()
	return
	elif self.tree.openElements[-1].name != "body":
	for node in self.tree.openElements[2:]:
	if node.name not in frozenset(("dd", "dt", "li", "optgroup",
	"option", "p", "rp", "rt",
	"tbody", "td", "tfoot",
	"th", "thead", "tr", "body",
	"html")):
	# Not sure this is the correct name for the parse error
	self.parser.parseError(
	"expected-one-end-tag-but-got-another",
	{"gotName": "body", "expectedName": node.name})
	break
	self.parser.phase = self.parser.phases["afterBody"]

	def endTagHtml(self, token):
	# We repeat the test for the body end tag token being ignored here
	if self.tree.elementInScope("body"):
	self.endTagBody(impliedTagToken("body"))
	return token

	def endTagBlock(self, token):
	# Put us back in the right whitespace handling mode
	if token["name"] == "pre":
	self.processSpaceCharacters = self.processSpaceCharactersNonPre
	inScope = self.tree.elementInScope(token["name"])
	if inScope:
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError("end-tag-too-early", {"name": token["name"]})
	if inScope:
	node = self.tree.openElements.pop()
	while node.name != token["name"]:
	node = self.tree.openElements.pop()

	def endTagForm(self, token):
	node = self.tree.formPointer
	self.tree.formPointer = None
	if node is None or not self.tree.elementInScope(node):
	self.parser.parseError("unexpected-end-tag",
	{"name": "form"})
	else:
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1] != node:
	self.parser.parseError("end-tag-too-early-ignored",
	{"name": "form"})
	self.tree.openElements.remove(node)

	def endTagListItem(self, token):
	if token["name"] == "li":
	variant = "list"
	else:
	variant = None
	if not self.tree.elementInScope(token["name"], variant=variant):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
	else:
	self.tree.generateImpliedEndTags(exclude=token["name"])
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError(
	"end-tag-too-early",
	{"name": token["name"]})
	node = self.tree.openElements.pop()
	while node.name != token["name"]:
	node = self.tree.openElements.pop()

	def endTagHeading(self, token):
	for item in headingElements:
	if self.tree.elementInScope(item):
	self.tree.generateImpliedEndTags()
	break
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError("end-tag-too-early", {"name": token["name"]})

	for item in headingElements:
	if self.tree.elementInScope(item):
	item = self.tree.openElements.pop()
	while item.name not in headingElements:
	item = self.tree.openElements.pop()
	break

	def endTagFormatting(self, token):
	"""The much-feared adoption agency algorithm"""
	# http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
	# XXX Better parseError messages appreciated.

	# Step 1
	outerLoopCounter = 0

	# Step 2
	while outerLoopCounter < 8:

	# Step 3
	outerLoopCounter += 1

	# Step 4:

	# Let the formatting element be the last element in
	# the list of active formatting elements that:
	# - is between the end of the list and the last scope
	# marker in the list, if any, or the start of the list
	# otherwise, and
	# - has the same tag name as the token.
	formattingElement = self.tree.elementInActiveFormattingElements(
	token["name"])
	if (not formattingElement or
	(formattingElement in self.tree.openElements and
	not self.tree.elementInScope(formattingElement.name))):
	# If there is no such node, then abort these steps
	# and instead act as described in the "any other
	# end tag" entry below.
	self.endTagOther(token)
	return

	# Otherwise, if there is such a node, but that node is
	# not in the stack of open elements, then this is a
	# parse error; remove the element from the list, and
	# abort these steps.
	elif formattingElement not in self.tree.openElements:
	self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
	self.tree.activeFormattingElements.remove(formattingElement)
	return

	# Otherwise, if there is such a node, and that node is
	# also in the stack of open elements, but the element
	# is not in scope, then this is a parse error; ignore
	# the token, and abort these steps.
	elif not self.tree.elementInScope(formattingElement.name):
	self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
	return

	# Otherwise, there is a formatting element and that
	# element is in the stack and is in scope. If the
	# element is not the current node, this is a parse
	# error. In any case, proceed with the algorithm as
	# written in the following steps.
	else:
	if formattingElement != self.tree.openElements[-1]:
	self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})

	# Step 5:

	# Let the furthest block be the topmost node in the
	# stack of open elements that is lower in the stack
	# than the formatting element, and is an element in
	# the special category. There might not be one.
	afeIndex = self.tree.openElements.index(formattingElement)
	furthestBlock = None
	for element in self.tree.openElements[afeIndex:]:
	if element.nameTuple in specialElements:
	furthestBlock = element
	break

	# Step 6:

	# If there is no furthest block, then the UA must
	# first pop all the nodes from the bottom of the stack
	# of open elements, from the current node up to and
	# including the formatting element, then remove the
	# formatting element from the list of active
	# formatting elements, and finally abort these steps.
	if furthestBlock is None:
	element = self.tree.openElements.pop()
	while element != formattingElement:
	element = self.tree.openElements.pop()
	self.tree.activeFormattingElements.remove(element)
	return

	# Step 7
	commonAncestor = self.tree.openElements[afeIndex - 1]

	# Step 8:
	# The bookmark is supposed to help us identify where to reinsert
	# nodes in step 15. We have to ensure that we reinsert nodes after
	# the node before the active formatting element. Note the bookmark
	# can move in step 9.7
	bookmark = self.tree.activeFormattingElements.index(formattingElement)

	# Step 9
	lastNode = node = furthestBlock
	innerLoopCounter = 0

	index = self.tree.openElements.index(node)
	while innerLoopCounter < 3:
	innerLoopCounter += 1
	# Node is element before node in open elements
	index -= 1
	node = self.tree.openElements[index]
	if node not in self.tree.activeFormattingElements:
	self.tree.openElements.remove(node)
	continue
	# Step 9.6
	if node == formattingElement:
	break
	# Step 9.7
	if lastNode == furthestBlock:
	bookmark = self.tree.activeFormattingElements.index(node) + 1
	# Step 9.8
	clone = node.cloneNode()
	# Replace node with clone
	self.tree.activeFormattingElements[
	self.tree.activeFormattingElements.index(node)] = clone
	self.tree.openElements[
	self.tree.openElements.index(node)] = clone
	node = clone
	# Step 9.9
	# Remove lastNode from its parents, if any
	if lastNode.parent:
	lastNode.parent.removeChild(lastNode)
	node.appendChild(lastNode)
	# Step 9.10
	lastNode = node

	# Step 10
	# Foster parent lastNode if commonAncestor is a
	# table, tbody, tfoot, thead, or tr we need to foster
	# parent the lastNode
	if lastNode.parent:
	lastNode.parent.removeChild(lastNode)

	if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
	parent, insertBefore = self.tree.getTableMisnestedNodePosition()
	parent.insertBefore(lastNode, insertBefore)
	else:
	commonAncestor.appendChild(lastNode)

	# Step 11
	clone = formattingElement.cloneNode()

	# Step 12
	furthestBlock.reparentChildren(clone)

	# Step 13
	furthestBlock.appendChild(clone)

	# Step 14
	self.tree.activeFormattingElements.remove(formattingElement)
	self.tree.activeFormattingElements.insert(bookmark, clone)

	# Step 15
	self.tree.openElements.remove(formattingElement)
	self.tree.openElements.insert(
	self.tree.openElements.index(furthestBlock) + 1, clone)

	def endTagAppletMarqueeObject(self, token):
	if self.tree.elementInScope(token["name"]):
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError("end-tag-too-early", {"name": token["name"]})

	if self.tree.elementInScope(token["name"]):
	element = self.tree.openElements.pop()
	while element.name != token["name"]:
	element = self.tree.openElements.pop()
	self.tree.clearActiveFormattingElements()

	def endTagBr(self, token):
	self.parser.parseError("unexpected-end-tag-treated-as",
	{"originalName": "br", "newName": "br element"})
	self.tree.reconstructActiveFormattingElements()
	self.tree.insertElement(impliedTagToken("br", "StartTag"))
	self.tree.openElements.pop()

	def endTagOther(self, token):
	for node in self.tree.openElements[::-1]:
	if node.name == token["name"]:
	self.tree.generateImpliedEndTags(exclude=token["name"])
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
	while self.tree.openElements.pop() != node:
	pass
	break
	else:
	if node.nameTuple in specialElements:
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
	break

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	(("base", "basefont", "bgsound", "command", "link", "meta",
	"script", "style", "title"),
	startTagProcessInHead),
	("body", startTagBody),
	("frameset", startTagFrameset),
	(("address", "article", "aside", "blockquote", "center", "details",
	"dir", "div", "dl", "fieldset", "figcaption", "figure",
	"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
	"section", "summary", "ul"),
	startTagCloseP),
	(headingElements, startTagHeading),
	(("pre", "listing"), startTagPreListing),
	("form", startTagForm),
	(("li", "dd", "dt"), startTagListItem),
	("plaintext", startTagPlaintext),
	("a", startTagA),
	(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
	"strong", "tt", "u"), startTagFormatting),
	("nobr", startTagNobr),
	("button", startTagButton),
	(("applet", "marquee", "object"), startTagAppletMarqueeObject),
	("xmp", startTagXmp),
	("table", startTagTable),
	(("area", "br", "embed", "img", "keygen", "wbr"),
	startTagVoidFormatting),
	(("param", "source", "track"), startTagParamSource),
	("input", startTagInput),
	("hr", startTagHr),
	("image", startTagImage),
	("isindex", startTagIsIndex),
	("textarea", startTagTextarea),
	("iframe", startTagIFrame),
	("noscript", startTagNoscript),
	(("noembed", "noframes"), startTagRawtext),
	("select", startTagSelect),
	(("rp", "rt"), startTagRpRt),
	(("option", "optgroup"), startTagOpt),
	(("math"), startTagMath),
	(("svg"), startTagSvg),
	(("caption", "col", "colgroup", "frame", "head",
	"tbody", "td", "tfoot", "th", "thead",
	"tr"), startTagMisplaced)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("body", endTagBody),
	("html", endTagHtml),
	(("address", "article", "aside", "blockquote", "button", "center",
	"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
	"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
	"section", "summary", "ul"), endTagBlock),
	("form", endTagForm),
	("p", endTagP),
	(("dd", "dt", "li"), endTagListItem),
	(headingElements, endTagHeading),
	(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
	"strike", "strong", "tt", "u"), endTagFormatting),
	(("applet", "marquee", "object"), endTagAppletMarqueeObject),
	("br", endTagBr),
	])
	endTagHandler.default = endTagOther

	class TextPhase(Phase):
	__slots__ = tuple()

	def processCharacters(self, token):
	self.tree.insertText(token["data"])

	def processEOF(self):
	self.parser.parseError("expected-named-closing-tag-but-got-eof",
	{"name": self.tree.openElements[-1].name})
	self.tree.openElements.pop()
	self.parser.phase = self.parser.originalPhase
	return True

	def startTagOther(self, token):
	assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']

	def endTagScript(self, token):
	node = self.tree.openElements.pop()
	assert node.name == "script"
	self.parser.phase = self.parser.originalPhase
	# The rest of this method is all stuff that only happens if
	# document.write works

	def endTagOther(self, token):
	self.tree.openElements.pop()
	self.parser.phase = self.parser.originalPhase

	startTagHandler = _utils.MethodDispatcher([])
	startTagHandler.default = startTagOther
	endTagHandler = _utils.MethodDispatcher([
	("script", endTagScript)])
	endTagHandler.default = endTagOther

	class InTablePhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-table
	__slots__ = tuple()

	# helper methods
	def clearStackToTableContext(self):
	# "clear the stack back to a table context"
	while self.tree.openElements[-1].name not in ("table", "html"):
	# self.parser.parseError("unexpected-implied-end-tag-in-table",
	# {"name": self.tree.openElements[-1].name})
	self.tree.openElements.pop()
	# When the current node is <html> it's an innerHTML case

	# processing methods
	def processEOF(self):
	if self.tree.openElements[-1].name != "html":
	self.parser.parseError("eof-in-table")
	else:
	assert self.parser.innerHTML
	# Stop parsing

	def processSpaceCharacters(self, token):
	originalPhase = self.parser.phase
	self.parser.phase = self.parser.phases["inTableText"]
	self.parser.phase.originalPhase = originalPhase
	self.parser.phase.processSpaceCharacters(token)

	def processCharacters(self, token):
	originalPhase = self.parser.phase
	self.parser.phase = self.parser.phases["inTableText"]
	self.parser.phase.originalPhase = originalPhase
	self.parser.phase.processCharacters(token)

	def insertText(self, token):
	# If we get here there must be at least one non-whitespace character
	# Do the table magic!
	self.tree.insertFromTable = True
	self.parser.phases["inBody"].processCharacters(token)
	self.tree.insertFromTable = False

	def startTagCaption(self, token):
	self.clearStackToTableContext()
	self.tree.activeFormattingElements.append(Marker)
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inCaption"]

	def startTagColgroup(self, token):
	self.clearStackToTableContext()
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inColumnGroup"]

	def startTagCol(self, token):
	self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
	return token

	def startTagRowGroup(self, token):
	self.clearStackToTableContext()
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inTableBody"]

	def startTagImplyTbody(self, token):
	self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
	return token

	def startTagTable(self, token):
	self.parser.parseError("unexpected-start-tag-implies-end-tag",
	{"startName": "table", "endName": "table"})
	self.parser.phase.processEndTag(impliedTagToken("table"))
	if not self.parser.innerHTML:
	return token

	def startTagStyleScript(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagInput(self, token):
	if ("type" in token["data"] and
	token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
	self.parser.parseError("unexpected-hidden-input-in-table")
	self.tree.insertElement(token)
	# XXX associate with form
	self.tree.openElements.pop()
	else:
	self.startTagOther(token)

	def startTagForm(self, token):
	self.parser.parseError("unexpected-form-in-table")
	if self.tree.formPointer is None:
	self.tree.insertElement(token)
	self.tree.formPointer = self.tree.openElements[-1]
	self.tree.openElements.pop()

	def startTagOther(self, token):
	self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
	# Do the table magic!
	self.tree.insertFromTable = True
	self.parser.phases["inBody"].processStartTag(token)
	self.tree.insertFromTable = False

	def endTagTable(self, token):
	if self.tree.elementInScope("table", variant="table"):
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1].name != "table":
	self.parser.parseError("end-tag-too-early-named",
	{"gotName": "table",
	"expectedName": self.tree.openElements[-1].name})
	while self.tree.openElements[-1].name != "table":
	self.tree.openElements.pop()
	self.tree.openElements.pop()
	self.parser.resetInsertionMode()
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def endTagIgnore(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
	# Do the table magic!
	self.tree.insertFromTable = True
	self.parser.phases["inBody"].processEndTag(token)
	self.tree.insertFromTable = False

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("caption", startTagCaption),
	("colgroup", startTagColgroup),
	("col", startTagCol),
	(("tbody", "tfoot", "thead"), startTagRowGroup),
	(("td", "th", "tr"), startTagImplyTbody),
	("table", startTagTable),
	(("style", "script"), startTagStyleScript),
	("input", startTagInput),
	("form", startTagForm)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("table", endTagTable),
	(("body", "caption", "col", "colgroup", "html", "tbody", "td",
	"tfoot", "th", "thead", "tr"), endTagIgnore)
	])
	endTagHandler.default = endTagOther

	class InTableTextPhase(Phase):
	__slots__ = ("originalPhase", "characterTokens")

	def __init__(self, args, *kwargs):
	super(InTableTextPhase, self).__init__(args, *kwargs)
	self.originalPhase = None
	self.characterTokens = []

	def flushCharacters(self):
	data = "".join([item["data"] for item in self.characterTokens])
	if any([item not in spaceCharacters for item in data]):
	token = {"type": tokenTypes["Characters"], "data": data}
	self.parser.phases["inTable"].insertText(token)
	elif data:
	self.tree.insertText(data)
	self.characterTokens = []

	def processComment(self, token):
	self.flushCharacters()
	self.parser.phase = self.originalPhase
	return token

	def processEOF(self):
	self.flushCharacters()
	self.parser.phase = self.originalPhase
	return True

	def processCharacters(self, token):
	if token["data"] == "\u0000":
	return
	self.characterTokens.append(token)

	def processSpaceCharacters(self, token):
	# pretty sure we should never reach here
	self.characterTokens.append(token)
	# assert False

	def processStartTag(self, token):
	self.flushCharacters()
	self.parser.phase = self.originalPhase
	return token

	def processEndTag(self, token):
	self.flushCharacters()
	self.parser.phase = self.originalPhase
	return token

	class InCaptionPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
	__slots__ = tuple()

	def ignoreEndTagCaption(self):
	return not self.tree.elementInScope("caption", variant="table")

	def processEOF(self):
	self.parser.phases["inBody"].processEOF()

	def processCharacters(self, token):
	return self.parser.phases["inBody"].processCharacters(token)

	def startTagTableElement(self, token):
	self.parser.parseError()
	# XXX Have to duplicate logic here to find out if the tag is ignored
	ignoreEndTag = self.ignoreEndTagCaption()
	self.parser.phase.processEndTag(impliedTagToken("caption"))
	if not ignoreEndTag:
	return token

	def startTagOther(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def endTagCaption(self, token):
	if not self.ignoreEndTagCaption():
	# AT this code is quite similar to endTagTable in "InTable"
	self.tree.generateImpliedEndTags()
	if self.tree.openElements[-1].name != "caption":
	self.parser.parseError("expected-one-end-tag-but-got-another",
	{"gotName": "caption",
	"expectedName": self.tree.openElements[-1].name})
	while self.tree.openElements[-1].name != "caption":
	self.tree.openElements.pop()
	self.tree.openElements.pop()
	self.tree.clearActiveFormattingElements()
	self.parser.phase = self.parser.phases["inTable"]
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def endTagTable(self, token):
	self.parser.parseError()
	ignoreEndTag = self.ignoreEndTagCaption()
	self.parser.phase.processEndTag(impliedTagToken("caption"))
	if not ignoreEndTag:
	return token

	def endTagIgnore(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def endTagOther(self, token):
	return self.parser.phases["inBody"].processEndTag(token)

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
	"thead", "tr"), startTagTableElement)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("caption", endTagCaption),
	("table", endTagTable),
	(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
	"thead", "tr"), endTagIgnore)
	])
	endTagHandler.default = endTagOther

	class InColumnGroupPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-column
	__slots__ = tuple()

	def ignoreEndTagColgroup(self):
	return self.tree.openElements[-1].name == "html"

	def processEOF(self):
	if self.tree.openElements[-1].name == "html":
	assert self.parser.innerHTML
	return
	else:
	ignoreEndTag = self.ignoreEndTagColgroup()
	self.endTagColgroup(impliedTagToken("colgroup"))
	if not ignoreEndTag:
	return True

	def processCharacters(self, token):
	ignoreEndTag = self.ignoreEndTagColgroup()
	self.endTagColgroup(impliedTagToken("colgroup"))
	if not ignoreEndTag:
	return token

	def startTagCol(self, token):
	self.tree.insertElement(token)
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def startTagOther(self, token):
	ignoreEndTag = self.ignoreEndTagColgroup()
	self.endTagColgroup(impliedTagToken("colgroup"))
	if not ignoreEndTag:
	return token

	def endTagColgroup(self, token):
	if self.ignoreEndTagColgroup():
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()
	else:
	self.tree.openElements.pop()
	self.parser.phase = self.parser.phases["inTable"]

	def endTagCol(self, token):
	self.parser.parseError("no-end-tag", {"name": "col"})

	def endTagOther(self, token):
	ignoreEndTag = self.ignoreEndTagColgroup()
	self.endTagColgroup(impliedTagToken("colgroup"))
	if not ignoreEndTag:
	return token

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("col", startTagCol)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("colgroup", endTagColgroup),
	("col", endTagCol)
	])
	endTagHandler.default = endTagOther

	class InTableBodyPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
	__slots__ = tuple()

	# helper methods
	def clearStackToTableBodyContext(self):
	while self.tree.openElements[-1].name not in ("tbody", "tfoot",
	"thead", "html"):
	# self.parser.parseError("unexpected-implied-end-tag-in-table",
	# {"name": self.tree.openElements[-1].name})
	self.tree.openElements.pop()
	if self.tree.openElements[-1].name == "html":
	assert self.parser.innerHTML

	# the rest
	def processEOF(self):
	self.parser.phases["inTable"].processEOF()

	def processSpaceCharacters(self, token):
	return self.parser.phases["inTable"].processSpaceCharacters(token)

	def processCharacters(self, token):
	return self.parser.phases["inTable"].processCharacters(token)

	def startTagTr(self, token):
	self.clearStackToTableBodyContext()
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inRow"]

	def startTagTableCell(self, token):
	self.parser.parseError("unexpected-cell-in-table-body",
	{"name": token["name"]})
	self.startTagTr(impliedTagToken("tr", "StartTag"))
	return token

	def startTagTableOther(self, token):
	# XXX AT Any ideas on how to share this with endTagTable?
	if (self.tree.elementInScope("tbody", variant="table") or
	self.tree.elementInScope("thead", variant="table") or
	self.tree.elementInScope("tfoot", variant="table")):
	self.clearStackToTableBodyContext()
	self.endTagTableRowGroup(
	impliedTagToken(self.tree.openElements[-1].name))
	return token
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def startTagOther(self, token):
	return self.parser.phases["inTable"].processStartTag(token)

	def endTagTableRowGroup(self, token):
	if self.tree.elementInScope(token["name"], variant="table"):
	self.clearStackToTableBodyContext()
	self.tree.openElements.pop()
	self.parser.phase = self.parser.phases["inTable"]
	else:
	self.parser.parseError("unexpected-end-tag-in-table-body",
	{"name": token["name"]})

	def endTagTable(self, token):
	if (self.tree.elementInScope("tbody", variant="table") or
	self.tree.elementInScope("thead", variant="table") or
	self.tree.elementInScope("tfoot", variant="table")):
	self.clearStackToTableBodyContext()
	self.endTagTableRowGroup(
	impliedTagToken(self.tree.openElements[-1].name))
	return token
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def endTagIgnore(self, token):
	self.parser.parseError("unexpected-end-tag-in-table-body",
	{"name": token["name"]})

	def endTagOther(self, token):
	return self.parser.phases["inTable"].processEndTag(token)

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("tr", startTagTr),
	(("td", "th"), startTagTableCell),
	(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
	startTagTableOther)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	(("tbody", "tfoot", "thead"), endTagTableRowGroup),
	("table", endTagTable),
	(("body", "caption", "col", "colgroup", "html", "td", "th",
	"tr"), endTagIgnore)
	])
	endTagHandler.default = endTagOther

	class InRowPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-row
	__slots__ = tuple()

	# helper methods (XXX unify this with other table helper methods)
	def clearStackToTableRowContext(self):
	while self.tree.openElements[-1].name not in ("tr", "html"):
	self.parser.parseError("unexpected-implied-end-tag-in-table-row",
	{"name": self.tree.openElements[-1].name})
	self.tree.openElements.pop()

	def ignoreEndTagTr(self):
	return not self.tree.elementInScope("tr", variant="table")

	# the rest
	def processEOF(self):
	self.parser.phases["inTable"].processEOF()

	def processSpaceCharacters(self, token):
	return self.parser.phases["inTable"].processSpaceCharacters(token)

	def processCharacters(self, token):
	return self.parser.phases["inTable"].processCharacters(token)

	def startTagTableCell(self, token):
	self.clearStackToTableRowContext()
	self.tree.insertElement(token)
	self.parser.phase = self.parser.phases["inCell"]
	self.tree.activeFormattingElements.append(Marker)

	def startTagTableOther(self, token):
	ignoreEndTag = self.ignoreEndTagTr()
	self.endTagTr(impliedTagToken("tr"))
	# XXX how are we sure it's always ignored in the innerHTML case?
	if not ignoreEndTag:
	return token

	def startTagOther(self, token):
	return self.parser.phases["inTable"].processStartTag(token)

	def endTagTr(self, token):
	if not self.ignoreEndTagTr():
	self.clearStackToTableRowContext()
	self.tree.openElements.pop()
	self.parser.phase = self.parser.phases["inTableBody"]
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def endTagTable(self, token):
	ignoreEndTag = self.ignoreEndTagTr()
	self.endTagTr(impliedTagToken("tr"))
	# Reprocess the current tag if the tr end tag was not ignored
	# XXX how are we sure it's always ignored in the innerHTML case?
	if not ignoreEndTag:
	return token

	def endTagTableRowGroup(self, token):
	if self.tree.elementInScope(token["name"], variant="table"):
	self.endTagTr(impliedTagToken("tr"))
	return token
	else:
	self.parser.parseError()

	def endTagIgnore(self, token):
	self.parser.parseError("unexpected-end-tag-in-table-row",
	{"name": token["name"]})

	def endTagOther(self, token):
	return self.parser.phases["inTable"].processEndTag(token)

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	(("td", "th"), startTagTableCell),
	(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
	"tr"), startTagTableOther)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("tr", endTagTr),
	("table", endTagTable),
	(("tbody", "tfoot", "thead"), endTagTableRowGroup),
	(("body", "caption", "col", "colgroup", "html", "td", "th"),
	endTagIgnore)
	])
	endTagHandler.default = endTagOther

	class InCellPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
	__slots__ = tuple()

	# helper
	def closeCell(self):
	if self.tree.elementInScope("td", variant="table"):
	self.endTagTableCell(impliedTagToken("td"))
	elif self.tree.elementInScope("th", variant="table"):
	self.endTagTableCell(impliedTagToken("th"))

	# the rest
	def processEOF(self):
	self.parser.phases["inBody"].processEOF()

	def processCharacters(self, token):
	return self.parser.phases["inBody"].processCharacters(token)

	def startTagTableOther(self, token):
	if (self.tree.elementInScope("td", variant="table") or
	self.tree.elementInScope("th", variant="table")):
	self.closeCell()
	return token
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def startTagOther(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def endTagTableCell(self, token):
	if self.tree.elementInScope(token["name"], variant="table"):
	self.tree.generateImpliedEndTags(token["name"])
	if self.tree.openElements[-1].name != token["name"]:
	self.parser.parseError("unexpected-cell-end-tag",
	{"name": token["name"]})
	while True:
	node = self.tree.openElements.pop()
	if node.name == token["name"]:
	break
	else:
	self.tree.openElements.pop()
	self.tree.clearActiveFormattingElements()
	self.parser.phase = self.parser.phases["inRow"]
	else:
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def endTagIgnore(self, token):
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	def endTagImply(self, token):
	if self.tree.elementInScope(token["name"], variant="table"):
	self.closeCell()
	return token
	else:
	# sometimes innerHTML case
	self.parser.parseError()

	def endTagOther(self, token):
	return self.parser.phases["inBody"].processEndTag(token)

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
	"thead", "tr"), startTagTableOther)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	(("td", "th"), endTagTableCell),
	(("body", "caption", "col", "colgroup", "html"), endTagIgnore),
	(("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
	])
	endTagHandler.default = endTagOther

	class InSelectPhase(Phase):
	__slots__ = tuple()

	# http://www.whatwg.org/specs/web-apps/current-work/#in-select
	def processEOF(self):
	if self.tree.openElements[-1].name != "html":
	self.parser.parseError("eof-in-select")
	else:
	assert self.parser.innerHTML

	def processCharacters(self, token):
	if token["data"] == "\u0000":
	return
	self.tree.insertText(token["data"])

	def startTagOption(self, token):
	# We need to imply </option> if <option> is the current node.
	if self.tree.openElements[-1].name == "option":
	self.tree.openElements.pop()
	self.tree.insertElement(token)

	def startTagOptgroup(self, token):
	if self.tree.openElements[-1].name == "option":
	self.tree.openElements.pop()
	if self.tree.openElements[-1].name == "optgroup":
	self.tree.openElements.pop()
	self.tree.insertElement(token)

	def startTagSelect(self, token):
	self.parser.parseError("unexpected-select-in-select")
	self.endTagSelect(impliedTagToken("select"))

	def startTagInput(self, token):
	self.parser.parseError("unexpected-input-in-select")
	if self.tree.elementInScope("select", variant="select"):
	self.endTagSelect(impliedTagToken("select"))
	return token
	else:
	assert self.parser.innerHTML

	def startTagScript(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("unexpected-start-tag-in-select",
	{"name": token["name"]})

	def endTagOption(self, token):
	if self.tree.openElements[-1].name == "option":
	self.tree.openElements.pop()
	else:
	self.parser.parseError("unexpected-end-tag-in-select",
	{"name": "option"})

	def endTagOptgroup(self, token):
	# </optgroup> implicitly closes <option>
	if (self.tree.openElements[-1].name == "option" and
	self.tree.openElements[-2].name == "optgroup"):
	self.tree.openElements.pop()
	# It also closes </optgroup>
	if self.tree.openElements[-1].name == "optgroup":
	self.tree.openElements.pop()
	# But nothing else
	else:
	self.parser.parseError("unexpected-end-tag-in-select",
	{"name": "optgroup"})

	def endTagSelect(self, token):
	if self.tree.elementInScope("select", variant="select"):
	node = self.tree.openElements.pop()
	while node.name != "select":
	node = self.tree.openElements.pop()
	self.parser.resetInsertionMode()
	else:
	# innerHTML case
	assert self.parser.innerHTML
	self.parser.parseError()

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag-in-select",
	{"name": token["name"]})

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("option", startTagOption),
	("optgroup", startTagOptgroup),
	("select", startTagSelect),
	(("input", "keygen", "textarea"), startTagInput),
	("script", startTagScript)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("option", endTagOption),
	("optgroup", endTagOptgroup),
	("select", endTagSelect)
	])
	endTagHandler.default = endTagOther

	class InSelectInTablePhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	self.parser.phases["inSelect"].processEOF()

	def processCharacters(self, token):
	return self.parser.phases["inSelect"].processCharacters(token)

	def startTagTable(self, token):
	self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
	self.endTagOther(impliedTagToken("select"))
	return token

	def startTagOther(self, token):
	return self.parser.phases["inSelect"].processStartTag(token)

	def endTagTable(self, token):
	self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
	if self.tree.elementInScope(token["name"], variant="table"):
	self.endTagOther(impliedTagToken("select"))
	return token

	def endTagOther(self, token):
	return self.parser.phases["inSelect"].processEndTag(token)

	startTagHandler = _utils.MethodDispatcher([
	(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
	startTagTable)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
	endTagTable)
	])
	endTagHandler.default = endTagOther

	class InForeignContentPhase(Phase):
	__slots__ = tuple()

	breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
	"center", "code", "dd", "div", "dl", "dt",
	"em", "embed", "h1", "h2", "h3",
	"h4", "h5", "h6", "head", "hr", "i", "img",
	"li", "listing", "menu", "meta", "nobr",
	"ol", "p", "pre", "ruby", "s", "small",
	"span", "strong", "strike", "sub", "sup",
	"table", "tt", "u", "ul", "var"])

	def adjustSVGTagNames(self, token):
	replacements = {"altglyph": "altGlyph",
	"altglyphdef": "altGlyphDef",
	"altglyphitem": "altGlyphItem",
	"animatecolor": "animateColor",
	"animatemotion": "animateMotion",
	"animatetransform": "animateTransform",
	"clippath": "clipPath",
	"feblend": "feBlend",
	"fecolormatrix": "feColorMatrix",
	"fecomponenttransfer": "feComponentTransfer",
	"fecomposite": "feComposite",
	"feconvolvematrix": "feConvolveMatrix",
	"fediffuselighting": "feDiffuseLighting",
	"fedisplacementmap": "feDisplacementMap",
	"fedistantlight": "feDistantLight",
	"feflood": "feFlood",
	"fefunca": "feFuncA",
	"fefuncb": "feFuncB",
	"fefuncg": "feFuncG",
	"fefuncr": "feFuncR",
	"fegaussianblur": "feGaussianBlur",
	"feimage": "feImage",
	"femerge": "feMerge",
	"femergenode": "feMergeNode",
	"femorphology": "feMorphology",
	"feoffset": "feOffset",
	"fepointlight": "fePointLight",
	"fespecularlighting": "feSpecularLighting",
	"fespotlight": "feSpotLight",
	"fetile": "feTile",
	"feturbulence": "feTurbulence",
	"foreignobject": "foreignObject",
	"glyphref": "glyphRef",
	"lineargradient": "linearGradient",
	"radialgradient": "radialGradient",
	"textpath": "textPath"}

	if token["name"] in replacements:
	token["name"] = replacements[token["name"]]

	def processCharacters(self, token):
	if token["data"] == "\u0000":
	token["data"] = "\uFFFD"
	elif (self.parser.framesetOK and
	any(char not in spaceCharacters for char in token["data"])):
	self.parser.framesetOK = False
	Phase.processCharacters(self, token)

	def processStartTag(self, token):
	currentNode = self.tree.openElements[-1]
	if (token["name"] in self.breakoutElements or
	(token["name"] == "font" and
	set(token["data"].keys()) & {"color", "face", "size"})):
	self.parser.parseError("unexpected-html-element-in-foreign-content",
	{"name": token["name"]})
	while (self.tree.openElements[-1].namespace !=
	self.tree.defaultNamespace and
	not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
	not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
	self.tree.openElements.pop()
	return token

	else:
	if currentNode.namespace == namespaces["mathml"]:
	self.parser.adjustMathMLAttributes(token)
	elif currentNode.namespace == namespaces["svg"]:
	self.adjustSVGTagNames(token)
	self.parser.adjustSVGAttributes(token)
	self.parser.adjustForeignAttributes(token)
	token["namespace"] = currentNode.namespace
	self.tree.insertElement(token)
	if token["selfClosing"]:
	self.tree.openElements.pop()
	token["selfClosingAcknowledged"] = True

	def processEndTag(self, token):
	nodeIndex = len(self.tree.openElements) - 1
	node = self.tree.openElements[-1]
	if node.name.translate(asciiUpper2Lower) != token["name"]:
	self.parser.parseError("unexpected-end-tag", {"name": token["name"]})

	while True:
	if node.name.translate(asciiUpper2Lower) == token["name"]:
	# XXX this isn't in the spec but it seems necessary
	if self.parser.phase == self.parser.phases["inTableText"]:
	self.parser.phase.flushCharacters()
	self.parser.phase = self.parser.phase.originalPhase
	while self.tree.openElements.pop() != node:
	assert self.tree.openElements
	new_token = None
	break
	nodeIndex -= 1

	node = self.tree.openElements[nodeIndex]
	if node.namespace != self.tree.defaultNamespace:
	continue
	else:
	new_token = self.parser.phase.processEndTag(token)
	break
	return new_token

	class AfterBodyPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	# Stop parsing
	pass

	def processComment(self, token):
	# This is needed because data is to be appended to the <html> element
	# here and not to whatever is currently open.
	self.tree.insertComment(token, self.tree.openElements[0])

	def processCharacters(self, token):
	self.parser.parseError("unexpected-char-after-body")
	self.parser.phase = self.parser.phases["inBody"]
	return token

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("unexpected-start-tag-after-body",
	{"name": token["name"]})
	self.parser.phase = self.parser.phases["inBody"]
	return token

	def endTagHtml(self, name):
	if self.parser.innerHTML:
	self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
	else:
	self.parser.phase = self.parser.phases["afterAfterBody"]

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag-after-body",
	{"name": token["name"]})
	self.parser.phase = self.parser.phases["inBody"]
	return token

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
	endTagHandler.default = endTagOther

	class InFramesetPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
	__slots__ = tuple()

	def processEOF(self):
	if self.tree.openElements[-1].name != "html":
	self.parser.parseError("eof-in-frameset")
	else:
	assert self.parser.innerHTML

	def processCharacters(self, token):
	self.parser.parseError("unexpected-char-in-frameset")

	def startTagFrameset(self, token):
	self.tree.insertElement(token)

	def startTagFrame(self, token):
	self.tree.insertElement(token)
	self.tree.openElements.pop()

	def startTagNoframes(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("unexpected-start-tag-in-frameset",
	{"name": token["name"]})

	def endTagFrameset(self, token):
	if self.tree.openElements[-1].name == "html":
	# innerHTML case
	self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
	else:
	self.tree.openElements.pop()
	if (not self.parser.innerHTML and
	self.tree.openElements[-1].name != "frameset"):
	# If we're not in innerHTML mode and the current node is not a
	# "frameset" element (anymore) then switch.
	self.parser.phase = self.parser.phases["afterFrameset"]

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag-in-frameset",
	{"name": token["name"]})

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("frameset", startTagFrameset),
	("frame", startTagFrame),
	("noframes", startTagNoframes)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("frameset", endTagFrameset)
	])
	endTagHandler.default = endTagOther

	class AfterFramesetPhase(Phase):
	# http://www.whatwg.org/specs/web-apps/current-work/#after3
	__slots__ = tuple()

	def processEOF(self):
	# Stop parsing
	pass

	def processCharacters(self, token):
	self.parser.parseError("unexpected-char-after-frameset")

	def startTagNoframes(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("unexpected-start-tag-after-frameset",
	{"name": token["name"]})

	def endTagHtml(self, token):
	self.parser.phase = self.parser.phases["afterAfterFrameset"]

	def endTagOther(self, token):
	self.parser.parseError("unexpected-end-tag-after-frameset",
	{"name": token["name"]})

	startTagHandler = _utils.MethodDispatcher([
	("html", Phase.startTagHtml),
	("noframes", startTagNoframes)
	])
	startTagHandler.default = startTagOther

	endTagHandler = _utils.MethodDispatcher([
	("html", endTagHtml)
	])
	endTagHandler.default = endTagOther

	class AfterAfterBodyPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	pass

	def processComment(self, token):
	self.tree.insertComment(token, self.tree.document)

	def processSpaceCharacters(self, token):
	return self.parser.phases["inBody"].processSpaceCharacters(token)

	def processCharacters(self, token):
	self.parser.parseError("expected-eof-but-got-char")
	self.parser.phase = self.parser.phases["inBody"]
	return token

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("expected-eof-but-got-start-tag",
	{"name": token["name"]})
	self.parser.phase = self.parser.phases["inBody"]
	return token

	def processEndTag(self, token):
	self.parser.parseError("expected-eof-but-got-end-tag",
	{"name": token["name"]})
	self.parser.phase = self.parser.phases["inBody"]
	return token

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml)
	])
	startTagHandler.default = startTagOther

	class AfterAfterFramesetPhase(Phase):
	__slots__ = tuple()

	def processEOF(self):
	pass

	def processComment(self, token):
	self.tree.insertComment(token, self.tree.document)

	def processSpaceCharacters(self, token):
	return self.parser.phases["inBody"].processSpaceCharacters(token)

	def processCharacters(self, token):
	self.parser.parseError("expected-eof-but-got-char")

	def startTagHtml(self, token):
	return self.parser.phases["inBody"].processStartTag(token)

	def startTagNoFrames(self, token):
	return self.parser.phases["inHead"].processStartTag(token)

	def startTagOther(self, token):
	self.parser.parseError("expected-eof-but-got-start-tag",
	{"name": token["name"]})

	def processEndTag(self, token):
	self.parser.parseError("expected-eof-but-got-end-tag",
	{"name": token["name"]})

	startTagHandler = _utils.MethodDispatcher([
	("html", startTagHtml),
	("noframes", startTagNoFrames)
	])
	startTagHandler.default = startTagOther

	# pylint:enable=unused-argument

	return {
	"initial": InitialPhase,
	"beforeHtml": BeforeHtmlPhase,
	"beforeHead": BeforeHeadPhase,
	"inHead": InHeadPhase,
	"inHeadNoscript": InHeadNoscriptPhase,
	"afterHead": AfterHeadPhase,
	"inBody": InBodyPhase,
	"text": TextPhase,
	"inTable": InTablePhase,
	"inTableText": InTableTextPhase,
	"inCaption": InCaptionPhase,
	"inColumnGroup": InColumnGroupPhase,
	"inTableBody": InTableBodyPhase,
	"inRow": InRowPhase,
	"inCell": InCellPhase,
	"inSelect": InSelectPhase,
	"inSelectInTable": InSelectInTablePhase,
	"inForeignContent": InForeignContentPhase,
	"afterBody": AfterBodyPhase,
	"inFrameset": InFramesetPhase,
	"afterFrameset": AfterFramesetPhase,
	"afterAfterBody": AfterAfterBodyPhase,
	"afterAfterFrameset": AfterAfterFramesetPhase,
	# XXX after after frameset
	}


	def adjust_attributes(token, replacements):
	needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
	if needs_adjustment:
	token['data'] = type(token['data'])((replacements.get(k, k), v)
	for k, v in token['data'].items())


	def impliedTagToken(name, type="EndTag", attributes=None,
	selfClosing=False):
	if attributes is None:
	attributes = {}
	return {"type": tokenTypes[type], "name": name, "data": attributes,
	"selfClosing": selfClosing}


	class ParseError(Exception):
	"""Error in parsed document"""
	pass