GameServerZ

Running

App Files Files Community

Kano001 commited on Aug 31, 2024

Commit

cf2a15a

verified ·

1 Parent(s): 6a86ad5

Upload 527 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

MLPY/Lib/site-packages/tensorboard/__init__.py +113 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/__init__.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/assets.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/auth.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/context.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/data_compat.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/dataclass_compat.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/default.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/errors.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/lazy.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/main.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/main_lib.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/manager.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/notebook.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/plugin_util.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/program.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/__pycache__/version.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/__init__.py +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/__pycache__/__init__.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__init__.py +124 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/__init__.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/callbacks.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/encoding.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/linkifier.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/sanitizer.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/utils.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/version.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/callbacks.py +25 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/encoding.py +62 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/linkifier.py +526 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/sanitizer.py +368 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/utils.py +23 -0
MLPY/Lib/site-packages/tensorboard/_vendor/bleach/version.py +6 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__init__.py +35 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/__init__.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_ihatexml.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_inputstream.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_tokenizer.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_utils.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/constants.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/html5parser.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/serializer.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_ihatexml.py +289 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_inputstream.py +918 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_tokenizer.py +1735 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__init__.py +5 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/__init__.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/_base.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/py.cpython-39.pyc +0 -0
MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/_base.py +40 -0

MLPY/Lib/site-packages/tensorboard/__init__.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TensorBoard is a webapp for understanding TensorFlow runs and graphs."""
+from tensorboard import lazy as _lazy
+from tensorboard import version as _version
+# TensorBoard public API.
+__all__ = [
+    "__version__",
+    "errors",
+    "notebook",
+    "program",
+    "summary",
+]
+# Please be careful when changing the structure of this file.
+#
+# The lazy imports in this file must use `importlib.import_module`, not
+# `import tensorboard.foo` or `from tensorboard import foo`, or it will
+# be impossible to reload the TensorBoard module without breaking these
+# top-level public APIs. This has to do with the gory details of
+# Python's module system. Take `tensorboard.notebook` as an example:
+#
+#   - When the `tensorboard` module (that's us!) is initialized, its
+#     `notebook` attribute is initialized to a new LazyModule. The
+#     actual `tensorboard.notebook` submodule is not loaded.
+#
+#   - When the `tensorboard.notebook` submodule is first loaded, Python
+#     _reassigns_ the `notebook` attribute on the `tensorboard` module
+#     object to point to the underlying `tensorboard.notebook` module
+#     object, rather than its former LazyModule value. This occurs
+#     whether the module is loaded via the lazy module or directly as an
+#     import:
+#
+#       - import tensorboard; tensorboard.notebook.start(...)  # one way
+#       - from tensorboard import notebook  # other way; same effect
+#
+#   - When the `tensorboard` module is reloaded, its `notebook`
+#     attribute is once again bound to a (new) LazyModule, while the
+#     `tensorboard.notebook` module object is unaffected and still
+#     exists in `sys.modules`. But then...
+#
+#   - When the new LazyModule is forced, it must resolve to the existing
+#     `tensorboard.notebook` module object rather than itself (which
+#     just creates a stack overflow). If the LazyModule load function
+#     uses `import tensorboard.notebook; return tensorboard.notebook`,
+#     then the first statement will do _nothing_ because the
+#     `tensorboard.notebook` module is already loaded, and the second
+#     statement will return the LazyModule itself. The same goes for the
+#     `from tensorboard import notebook` form. We need to ensure that
+#     the submodule is loaded and then pull the actual module object out
+#     of `sys.modules`... which is exactly what `importlib` handles for
+#     us.
+#
+# See <https://github.com/tensorflow/tensorboard/issues/1989> for
+# additional discussion.
+@_lazy.lazy_load("tensorboard.errors")
+def errors():
+    import importlib
+    return importlib.import_module("tensorboard.errors")
+@_lazy.lazy_load("tensorboard.notebook")
+def notebook():
+    import importlib
+    return importlib.import_module("tensorboard.notebook")
+@_lazy.lazy_load("tensorboard.program")
+def program():
+    import importlib
+    return importlib.import_module("tensorboard.program")
+@_lazy.lazy_load("tensorboard.summary")
+def summary():
+    import importlib
+    return importlib.import_module("tensorboard.summary")
+def load_ipython_extension(ipython):
+    """IPython API entry point.
+    Only intended to be called by the IPython runtime.
+    See:
+      https://ipython.readthedocs.io/en/stable/config/extensions/index.html
+    """
+    notebook._load_ipython_extension(ipython)
+__version__ = _version.VERSION

MLPY/Lib/site-packages/tensorboard/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.41 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/assets.cpython-39.pyc ADDED Viewed

Binary file (1.01 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/auth.cpython-39.pyc ADDED Viewed

Binary file (3.45 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/context.cpython-39.pyc ADDED Viewed

Binary file (4.19 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/data_compat.cpython-39.pyc ADDED Viewed

Binary file (4.99 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/dataclass_compat.cpython-39.pyc ADDED Viewed

Binary file (6.47 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/default.cpython-39.pyc ADDED Viewed

Binary file (4.02 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/errors.cpython-39.pyc ADDED Viewed

Binary file (4.57 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/lazy.cpython-39.pyc ADDED Viewed

Binary file (2.84 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/main.cpython-39.pyc ADDED Viewed

Binary file (1.26 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/main_lib.cpython-39.pyc ADDED Viewed

Binary file (1.33 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/manager.cpython-39.pyc ADDED Viewed

Binary file (15 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/notebook.cpython-39.pyc ADDED Viewed

Binary file (11.7 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/plugin_util.cpython-39.pyc ADDED Viewed

Binary file (6.67 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/program.cpython-39.pyc ADDED Viewed

Binary file (26.7 kB). View file

MLPY/Lib/site-packages/tensorboard/__pycache__/version.cpython-39.pyc ADDED Viewed

Binary file (257 Bytes). View file

MLPY/Lib/site-packages/tensorboard/_vendor/__init__.py ADDED Viewed

File without changes

MLPY/Lib/site-packages/tensorboard/_vendor/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (156 Bytes). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__init__.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+from tensorboard._vendor.bleach.linkifier import (
+    DEFAULT_CALLBACKS,
+    Linker,
+    LinkifyFilter,
+)
+from tensorboard._vendor.bleach.sanitizer import (
+    ALLOWED_ATTRIBUTES,
+    ALLOWED_PROTOCOLS,
+    ALLOWED_STYLES,
+    ALLOWED_TAGS,
+    BleachSanitizerFilter,
+    Cleaner,
+)
+from tensorboard._vendor.bleach.version import __version__, VERSION # flake8: noqa
+__all__ = ['clean', 'linkify']
+def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
+          styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
+          strip_comments=True):
+    """Clean an HTML fragment of malicious content and return it
+    This function is a security-focused function whose sole purpose is to
+    remove malicious content from a string such that it can be displayed as
+    content in a web page.
+    This function is not designed to use to transform content to be used in
+    non-web-page contexts.
+    Example::
+        import bleach
+        better_text = bleach.clean(yucky_text)
+    .. Note::
+       If you're cleaning a lot of text and passing the same argument values or
+       you want more configurability, consider using a
+       :py:class:`bleach.sanitizer.Cleaner` instance.
+    :arg str text: the text to clean
+    :arg list tags: allowed list of tags; defaults to
+        ``bleach.sanitizer.ALLOWED_TAGS``
+    :arg dict attributes: allowed attributes; can be a callable, list or dict;
+        defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
+    :arg list styles: allowed list of css styles; defaults to
+        ``bleach.sanitizer.ALLOWED_STYLES``
+    :arg list protocols: allowed list of protocols for links; defaults
+        to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
+    :arg bool strip: whether or not to strip disallowed elements
+    :arg bool strip_comments: whether or not to strip HTML comments
+    :returns: cleaned text as unicode
+    """
+    cleaner = Cleaner(
+        tags=tags,
+        attributes=attributes,
+        styles=styles,
+        protocols=protocols,
+        strip=strip,
+        strip_comments=strip_comments,
+    )
+    return cleaner.clean(text)
+def linkify(text, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False):
+    """Convert URL-like strings in an HTML fragment to links
+    This function converts strings that look like URLs, domain names and email
+    addresses in text that may be an HTML fragment to links, while preserving:
+    1. links already in the string
+    2. urls found in attributes
+    3. email addresses
+    linkify does a best-effort approach and tries to recover from bad
+    situations due to crazy text.
+    .. Note::
+       If you're linking a lot of text and passing the same argument values or
+       you want more configurability, consider using a
+       :py:class:`bleach.linkifier.Linker` instance.
+    .. Note::
+       If you have text that you want to clean and then linkify, consider using
+       the :py:class:`bleach.linkifier.LinkifyFilter` as a filter in the clean
+       pass. That way you're not parsing the HTML twice.
+    :arg str text: the text to linkify
+    :arg list callbacks: list of callbacks to run when adjusting tag attributes;
+        defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
+    :arg list skip_tags: list of tags that you don't want to linkify the
+        contents of; for example, you could set this to ``['pre']`` to skip
+        linkifying contents of ``pre`` tags
+    :arg bool parse_email: whether or not to linkify email addresses
+    :returns: linkified text as unicode
+    """
+    linker = Linker(
+        callbacks=callbacks,
+        skip_tags=skip_tags,
+        parse_email=parse_email
+    )
+    return linker.linkify(text)

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (3.79 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/callbacks.cpython-39.pyc ADDED Viewed

Binary file (1.06 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/encoding.cpython-39.pyc ADDED Viewed

Binary file (1.6 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/linkifier.cpython-39.pyc ADDED Viewed

Binary file (11.2 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/sanitizer.cpython-39.pyc ADDED Viewed

Binary file (8.63 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.03 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/__pycache__/version.cpython-39.pyc ADDED Viewed

Binary file (401 Bytes). View file

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/callbacks.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""A set of basic callbacks for bleach.linkify."""
+from __future__ import unicode_literals
+def nofollow(attrs, new=False):
+    href_key = (None, u'href')
+    if href_key not in attrs or attrs[href_key].startswith(u'mailto:'):
+        return attrs
+    rel_key = (None, u'rel')
+    rel_values = [val for val in attrs.get(rel_key, u'').split(u' ') if val]
+    if u'nofollow' not in [rel_val.lower() for rel_val in rel_values]:
+        rel_values.append(u'nofollow')
+    attrs[rel_key] = u' '.join(rel_values)
+    return attrs
+def target_blank(attrs, new=False):
+    href_key = (None, u'href')
+    if attrs[href_key].startswith(u'mailto:'):
+        return attrs
+    attrs[(None, u'target')] = u'_blank'
+    return attrs

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/encoding.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import datetime
+from decimal import Decimal
+import types
+import six
+def is_protected_type(obj):
+    """Determine if the object instance is of a protected type.
+    Objects of protected types are preserved as-is when passed to
+    force_unicode(strings_only=True).
+    """
+    return isinstance(obj, (
+        six.integer_types +
+        (types.NoneType,
+         datetime.datetime, datetime.date, datetime.time,
+         float, Decimal))
+    )
+def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Similar to smart_text, except that lazy instances are resolved to
+    strings, rather than kept as lazy objects.
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    # Handle the common case first, saves 30-40% when s is an instance of
+    # six.text_type. This function gets called often in that setting.
+    if isinstance(s, six.text_type):
+        return s
+    if strings_only and is_protected_type(s):
+        return s
+    try:
+        if not isinstance(s, six.string_types):
+            if hasattr(s, '__unicode__'):
+                s = s.__unicode__()
+            else:
+                if six.PY3:
+                    if isinstance(s, bytes):
+                        s = six.text_type(s, encoding, errors)
+                    else:
+                        s = six.text_type(s)
+                else:
+                    s = six.text_type(bytes(s), encoding, errors)
+        else:
+            # Note: We use .decode() here, instead of six.text_type(s,
+            # encoding, errors), so that if s is a SafeBytes, it ends up being
+            # a SafeText at the end.
+            s = s.decode(encoding, errors)
+    except UnicodeDecodeError as e:
+        if not isinstance(s, Exception):
+            raise UnicodeDecodeError(*e.args)
+        else:
+            # If we get to here, the caller has passed in an Exception
+            # subclass populated with non-ASCII bytestring data without a
+            # working unicode method. Try to handle this without raising a
+            # further exception by individually forcing the exception args
+            # to unicode.
+            s = ' '.join([force_unicode(arg, encoding, strings_only,
+                          errors) for arg in s])
+    return s

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/linkifier.py ADDED Viewed

	@@ -0,0 +1,526 @@

+from __future__ import unicode_literals
+import re
+from tensorboard._vendor import html5lib
+from tensorboard._vendor.html5lib.filters.base import Filter
+from tensorboard._vendor.html5lib.filters.sanitizer import allowed_protocols
+from tensorboard._vendor.html5lib.serializer import HTMLSerializer
+from tensorboard._vendor.bleach import callbacks as linkify_callbacks
+from tensorboard._vendor.bleach.encoding import force_unicode
+from tensorboard._vendor.bleach.utils import alphabetize_attributes
+#: List of default callbacks
+DEFAULT_CALLBACKS = [linkify_callbacks.nofollow]
+TLDS = """ac ad ae aero af ag ai al am an ao aq ar arpa as asia at au aw ax az
+       ba bb bd be bf bg bh bi biz bj bm bn bo br bs bt bv bw by bz ca cat
+       cc cd cf cg ch ci ck cl cm cn co com coop cr cu cv cx cy cz de dj dk
+       dm do dz ec edu ee eg er es et eu fi fj fk fm fo fr ga gb gd ge gf gg
+       gh gi gl gm gn gov gp gq gr gs gt gu gw gy hk hm hn hr ht hu id ie il
+       im in info int io iq ir is it je jm jo jobs jp ke kg kh ki km kn kp
+       kr kw ky kz la lb lc li lk lr ls lt lu lv ly ma mc md me mg mh mil mk
+       ml mm mn mo mobi mp mq mr ms mt mu museum mv mw mx my mz na name nc ne
+       net nf ng ni nl no np nr nu nz om org pa pe pf pg ph pk pl pm pn post
+       pr pro ps pt pw py qa re ro rs ru rw sa sb sc sd se sg sh si sj sk sl
+       sm sn so sr ss st su sv sx sy sz tc td tel tf tg th tj tk tl tm tn to
+       tp tr travel tt tv tw tz ua ug uk us uy uz va vc ve vg vi vn vu wf ws
+       xn xxx ye yt yu za zm zw""".split()
+# Make sure that .com doesn't get matched by .co first
+TLDS.reverse()
+def build_url_re(tlds=TLDS, protocols=allowed_protocols):
+    """Builds the url regex used by linkifier
+   If you want a different set of tlds or allowed protocols, pass those in
+   and stomp on the existing ``url_re``::
+       from bleach import linkifier
+       my_url_re = linkifier.build_url_re(my_tlds_list, my_protocols)
+       linker = LinkifyFilter(url_re=my_url_re)
+    """
+    return re.compile(
+        r"""\(*  # Match any opening parentheses.
+        \b(?<![@.])(?:(?:{0}):/{{0,3}}(?:(?:\w+:)?\w+@)?)?  # http://
+        ([\w-]+\.)+(?:{1})(?:\:[0-9]+)?(?!\.\w)\b   # xx.yy.tld(:##)?
+        (?:[/?][^\s\{{\}}\|\\\^\[\]`<>"]*)?
+            # /path/zz (excluding "unsafe" chars from RFC 1738,
+            # except for # and ~, which happen in practice)
+        """.format('|'.join(protocols), '|'.join(tlds)),
+        re.IGNORECASE | re.VERBOSE | re.UNICODE)
+URL_RE = build_url_re()
+PROTO_RE = re.compile(r'^[\w-]+:/{0,3}', re.IGNORECASE)
+EMAIL_RE = re.compile(
+    r"""(?<!//)
+    (([-!#$%&'*+/=?^_`{}|~0-9A-Z]+
+        (\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*  # dot-atom
+    |^"([\001-\010\013\014\016-\037!#-\[\]-\177]
+        |\\[\001-\011\013\014\016-\177])*"  # quoted-string
+    )@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6})  # domain
+    """,
+    re.IGNORECASE | re.MULTILINE | re.VERBOSE)
+class Linker(object):
+    """Convert URL-like strings in an HTML fragment to links
+    This function converts strings that look like URLs, domain names and email
+    addresses in text that may be an HTML fragment to links, while preserving:
+    1. links already in the string
+    2. urls found in attributes
+    3. email addresses
+    linkify does a best-effort approach and tries to recover from bad
+    situations due to crazy text.
+    """
+    def __init__(self, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False,
+                 url_re=URL_RE, email_re=EMAIL_RE):
+        """Creates a Linker instance
+        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
+            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
+        :arg list skip_tags: list of tags that you don't want to linkify the
+            contents of; for example, you could set this to ``['pre']`` to skip
+            linkifying contents of ``pre`` tags
+        :arg bool parse_email: whether or not to linkify email addresses
+        :arg re url_re: url matching regex
+        :arg re email_re: email matching regex
+        :returns: linkified text as unicode
+        """
+        self.callbacks = callbacks
+        self.skip_tags = skip_tags
+        self.parse_email = parse_email
+        self.url_re = url_re
+        self.email_re = email_re
+        self.parser = html5lib.HTMLParser(namespaceHTMLElements=False)
+        self.walker = html5lib.getTreeWalker('etree')
+        self.serializer = HTMLSerializer(
+            quote_attr_values='always',
+            omit_optional_tags=False,
+            # linkify does not sanitize
+            sanitize=False,
+            # linkify alphabetizes
+            alphabetical_attributes=False,
+        )
+    def linkify(self, text):
+        """Linkify specified text
+        :arg str text: the text to add links to
+        :returns: linkified text as unicode
+        """
+        text = force_unicode(text)
+        if not text:
+            return u''
+        dom = self.parser.parseFragment(text)
+        filtered = LinkifyFilter(
+            source=self.walker(dom),
+            callbacks=self.callbacks,
+            skip_tags=self.skip_tags,
+            parse_email=self.parse_email,
+            url_re=self.url_re,
+            email_re=self.email_re,
+        )
+        return self.serializer.render(filtered)
+class LinkifyFilter(Filter):
+    """html5lib filter that linkifies text
+    This will do the following:
+    * convert email addresses into links
+    * convert urls into links
+    * edit existing links by running them through callbacks--the default is to
+      add a ``rel="nofollow"``
+    This filter can be used anywhere html5lib filters can be used.
+    """
+    def __init__(self, source, callbacks=None, skip_tags=None, parse_email=False,
+                 url_re=URL_RE, email_re=EMAIL_RE):
+        """Creates a LinkifyFilter instance
+        :arg TreeWalker source: stream
+        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
+            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
+        :arg list skip_tags: list of tags that you don't want to linkify the
+            contents of; for example, you could set this to ``['pre']`` to skip
+            linkifying contents of ``pre`` tags
+        :arg bool parse_email: whether or not to linkify email addresses
+        :arg re url_re: url matching regex
+        :arg re email_re: email matching regex
+        """
+        super(LinkifyFilter, self).__init__(source)
+        self.callbacks = callbacks or []
+        self.skip_tags = skip_tags or []
+        self.parse_email = parse_email
+        self.url_re = url_re
+        self.email_re = email_re
+    def apply_callbacks(self, attrs, is_new):
+        """Given an attrs dict and an is_new bool, runs through callbacks
+        Callbacks can return an adjusted attrs dict or ``None``. In the case of
+        ``None``, we stop going through callbacks and return that and the link
+        gets dropped.
+        :arg dict attrs: map of ``(namespace, name)`` -> ``value``
+        :arg bool is_new: whether or not this link was added by linkify
+        :returns: adjusted attrs dict or ``None``
+        """
+        for cb in self.callbacks:
+            attrs = cb(attrs, is_new)
+            if attrs is None:
+                return None
+        return attrs
+    def extract_character_data(self, token_list):
+        """Extracts and squashes character sequences in a token stream"""
+        # FIXME(willkg): This is a terrible idea. What it does is drop all the
+        # tags from the token list and merge the Characters and SpaceCharacters
+        # tokens into a single text.
+        #
+        # So something like this::
+        #
+        #     "<span>" "<b>" "some text" "</b>" "</span>"
+        #
+        # gets converted to "some text".
+        #
+        # This gets used to figure out the ``_text`` fauxttribute value for
+        # linkify callables.
+        #
+        # I'm not really sure how else to support that ``_text`` fauxttribute and
+        # maintain some modicum of backwards compatability with previous versions
+        # of Bleach.
+        out = []
+        for token in token_list:
+            token_type = token['type']
+            if token_type in ['Characters', 'SpaceCharacters']:
+                out.append(token['data'])
+        return u''.join(out)
+    def handle_email_addresses(self, src_iter):
+        """Handle email addresses in character tokens"""
+        for token in src_iter:
+            if token['type'] == 'Characters':
+                text = token['data']
+                new_tokens = []
+                end = 0
+                # For each email address we find in the text
+                for match in self.email_re.finditer(text):
+                    if match.start() > end:
+                        new_tokens.append(
+                            {u'type': u'Characters', u'data': text[end:match.start()]}
+                        )
+                    # Run attributes through the callbacks to see what we
+                    # should do with this match
+                    attrs = {
+                        (None, u'href'): u'mailto:%s' % match.group(0),
+                        u'_text': match.group(0)
+                    }
+                    attrs = self.apply_callbacks(attrs, True)
+                    if attrs is None:
+                        # Just add the text--but not as a link
+                        new_tokens.append(
+                            {u'type': u'Characters', u'data': match.group(0)}
+                        )
+                    else:
+                        # Add an "a" tag for the new link
+                        _text = attrs.pop(u'_text', '')
+                        attrs = alphabetize_attributes(attrs)
+                        new_tokens.extend([
+                            {u'type': u'StartTag', u'name': u'a', u'data': attrs},
+                            {u'type': u'Characters', u'data': force_unicode(_text)},
+                            {u'type': u'EndTag', u'name': 'a'}
+                        ])
+                    end = match.end()
+                if new_tokens:
+                    # Yield the adjusted set of tokens and then continue
+                    # through the loop
+                    if end < len(text):
+                        new_tokens.append({u'type': u'Characters', u'data': text[end:]})
+                    for new_token in new_tokens:
+                        yield new_token
+                    continue
+            yield token
+    def strip_non_url_bits(self, fragment):
+        """Strips non-url bits from the url
+        This accounts for over-eager matching by the regex.
+        """
+        prefix = suffix = ''
+        while fragment:
+            # Try removing ( from the beginning and, if it's balanced, from the
+            # end, too
+            if fragment.startswith(u'('):
+                prefix = prefix + u'('
+                fragment = fragment[1:]
+                if fragment.endswith(u')'):
+                    suffix = u')' + suffix
+                    fragment = fragment[:-1]
+                continue
+            # Now try extraneous things from the end. For example, sometimes we
+            # pick up ) at the end of a url, but the url is in a parenthesized
+            # phrase like:
+            #
+            #     "i looked at the site (at http://example.com)"
+            if fragment.endswith(u')') and u'(' not in fragment:
+                fragment = fragment[:-1]
+                suffix = u')' + suffix
+                continue
+            # Handle commas
+            if fragment.endswith(u','):
+                fragment = fragment[:-1]
+                suffix = u',' + suffix
+                continue
+            # Handle periods
+            if fragment.endswith(u'.'):
+                fragment = fragment[:-1]
+                suffix = u'.' + suffix
+                continue
+            # Nothing matched, so we're done
+            break
+        return fragment, prefix, suffix
+    def handle_links(self, src_iter):
+        """Handle links in character tokens"""
+        for token in src_iter:
+            if token['type'] == 'Characters':
+                text = token['data']
+                new_tokens = []
+                end = 0
+                for match in self.url_re.finditer(text):
+                    if match.start() > end:
+                        new_tokens.append(
+                            {u'type': u'Characters', u'data': text[end:match.start()]}
+                        )
+                    url = match.group(0)
+                    prefix = suffix = ''
+                    # Sometimes we pick up too much in the url match, so look for
+                    # bits we should drop and remove them from the match
+                    url, prefix, suffix = self.strip_non_url_bits(url)
+                    # If there's no protocol, add one
+                    if PROTO_RE.search(url):
+                        href = url
+                    else:
+                        href = u'http://%s' % url
+                    attrs = {
+                        (None, u'href'): href,
+                        u'_text': url
+                    }
+                    attrs = self.apply_callbacks(attrs, True)
+                    if attrs is None:
+                        # Just add the text
+                        new_tokens.append(
+                            {u'type': u'Characters', u'data': prefix + url + suffix}
+                        )
+                    else:
+                        # Add the "a" tag!
+                        if prefix:
+                            new_tokens.append(
+                                {u'type': u'Characters', u'data': prefix}
+                            )
+                        _text = attrs.pop(u'_text', '')
+                        attrs = alphabetize_attributes(attrs)
+                        new_tokens.extend([
+                            {u'type': u'StartTag', u'name': u'a', u'data': attrs},
+                            {u'type': u'Characters', u'data': force_unicode(_text)},
+                            {u'type': u'EndTag', u'name': 'a'},
+                        ])
+                        if suffix:
+                            new_tokens.append(
+                                {u'type': u'Characters', u'data': suffix}
+                            )
+                    end = match.end()
+                if new_tokens:
+                    # Yield the adjusted set of tokens and then continue
+                    # through the loop
+                    if end < len(text):
+                        new_tokens.append({u'type': u'Characters', u'data': text[end:]})
+                    for new_token in new_tokens:
+                        yield new_token
+                    continue
+            yield token
+    def handle_a_tag(self, token_buffer):
+        """Handle the "a" tag
+        This could adjust the link or drop it altogether depending on what the
+        callbacks return.
+        This yields the new set of tokens.
+        """
+        a_token = token_buffer[0]
+        if a_token['data']:
+            attrs = a_token['data']
+        else:
+            attrs = {}
+        text = self.extract_character_data(token_buffer)
+        attrs['_text'] = text
+        attrs = self.apply_callbacks(attrs, False)
+        if attrs is None:
+            # We're dropping the "a" tag and everything else and replacing
+            # it with character data. So emit that token.
+            yield {'type': 'Characters', 'data': text}
+        else:
+            new_text = attrs.pop('_text', '')
+            a_token['data'] = alphabetize_attributes(attrs)
+            if text == new_text:
+                # The callbacks didn't change the text, so we yield the new "a"
+                # token, then whatever else was there, then the end "a" token
+                yield a_token
+                for mem in token_buffer[1:]:
+                    yield mem
+            else:
+                # If the callbacks changed the text, then we're going to drop
+                # all the tokens between the start and end "a" tags and replace
+                # it with the new text
+                yield a_token
+                yield {'type': 'Characters', 'data': force_unicode(new_text)}
+                yield token_buffer[-1]
+    def __iter__(self):
+        in_a = False
+        in_skip_tag = None
+        token_buffer = []
+        for token in super(LinkifyFilter, self).__iter__():
+            if in_a:
+                # Handle the case where we're in an "a" tag--we want to buffer tokens
+                # until we hit an end "a" tag.
+                if token['type'] == 'EndTag' and token['name'] == 'a':
+                    # Add the end tag to the token buffer and then handle them
+                    # and yield anything returned
+                    token_buffer.append(token)
+                    for new_token in self.handle_a_tag(token_buffer):
+                        yield new_token
+                    # Clear "a" related state and continue since we've yielded all
+                    # the tokens we're going to yield
+                    in_a = False
+                    token_buffer = []
+                    continue
+                else:
+                    token_buffer.append(token)
+                    continue
+            elif token['type'] in ['StartTag', 'EmptyTag']:
+                if token['name'] in self.skip_tags:
+                    # Skip tags start a "special mode" where we don't linkify
+                    # anything until the end tag.
+                    in_skip_tag = token['name']
+                elif token['name'] == 'a':
+                    # The "a" tag is special--we switch to a slurp mode and
+                    # slurp all the tokens until the end "a" tag and then
+                    # figure out what to do with them there.
+                    in_a = True
+                    token_buffer.append(token)
+                    # We buffer the start tag, so we don't want to yield it,
+                    # yet
+                    continue
+            elif in_skip_tag and self.skip_tags:
+                # NOTE(willkg): We put this clause here since in_a and
+                # switching in and out of in_a takes precedence.
+                if token['type'] == 'EndTag' and token['name'] == in_skip_tag:
+                    in_skip_tag = None
+            elif not in_a and not in_skip_tag and token['type'] == 'Characters':
+                new_stream = iter([token])
+                if self.parse_email:
+                    new_stream = self.handle_email_addresses(new_stream)
+                new_stream = self.handle_links(new_stream)
+                for token in new_stream:
+                    yield token
+                # We've already yielded this token, so continue
+                continue
+            yield token

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/sanitizer.py ADDED Viewed

	@@ -0,0 +1,368 @@

+from __future__ import unicode_literals
+import re
+from xml.sax.saxutils import unescape
+from tensorboard._vendor import html5lib
+from tensorboard._vendor.html5lib.constants import namespaces
+from tensorboard._vendor.html5lib.filters import sanitizer
+from tensorboard._vendor.html5lib.serializer import HTMLSerializer
+from tensorboard._vendor.bleach.encoding import force_unicode
+from tensorboard._vendor.bleach.utils import alphabetize_attributes
+#: List of allowed tags
+ALLOWED_TAGS = [
+    'a',
+    'abbr',
+    'acronym',
+    'b',
+    'blockquote',
+    'code',
+    'em',
+    'i',
+    'li',
+    'ol',
+    'strong',
+    'ul',
+]
+#: Map of allowed attributes by tag
+ALLOWED_ATTRIBUTES = {
+    'a': ['href', 'title'],
+    'abbr': ['title'],
+    'acronym': ['title'],
+}
+#: List of allowed styles
+ALLOWED_STYLES = []
+#: List of allowed protocols
+ALLOWED_PROTOCOLS = ['http', 'https', 'mailto']
+class Cleaner(object):
+    """Cleaner for cleaning HTML fragments of malicious content
+    This cleaner is a security-focused function whose sole purpose is to remove
+    malicious content from a string such that it can be displayed as content in
+    a web page.
+    This cleaner is not designed to use to transform content to be used in
+    non-web-page contexts.
+    To use::
+        from bleach.sanitizer import Cleaner
+        cleaner = Cleaner()
+        for text in all_the_yucky_things:
+            sanitized = cleaner.clean(text)
+    """
+    def __init__(self, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
+                 styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
+                 strip_comments=True, filters=None):
+        """Initializes a Cleaner
+        :arg list tags: allowed list of tags; defaults to
+            ``bleach.sanitizer.ALLOWED_TAGS``
+        :arg dict attributes: allowed attributes; can be a callable, list or dict;
+            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
+        :arg list styles: allowed list of css styles; defaults to
+            ``bleach.sanitizer.ALLOWED_STYLES``
+        :arg list protocols: allowed list of protocols for links; defaults
+            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
+        :arg bool strip: whether or not to strip disallowed elements
+        :arg bool strip_comments: whether or not to strip HTML comments
+        :arg list filters: list of html5lib Filter classes to pass streamed content through
+            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters
+            .. Warning::
+               Using filters changes the output of ``bleach.Cleaner.clean``.
+               Make sure the way the filters change the output are secure.
+        """
+        self.tags = tags
+        self.attributes = attributes
+        self.styles = styles
+        self.protocols = protocols
+        self.strip = strip
+        self.strip_comments = strip_comments
+        self.filters = filters or []
+        self.parser = html5lib.HTMLParser(namespaceHTMLElements=False)
+        self.walker = html5lib.getTreeWalker('etree')
+        self.serializer = HTMLSerializer(
+            quote_attr_values='always',
+            omit_optional_tags=False,
+            # Bleach has its own sanitizer, so don't use the html5lib one
+            sanitize=False,
+            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
+            alphabetical_attributes=False,
+        )
+    def clean(self, text):
+        """Cleans text and returns sanitized result as unicode
+        :arg str text: text to be cleaned
+        :returns: sanitized text as unicode
+        """
+        if not text:
+            return u''
+        text = force_unicode(text)
+        dom = self.parser.parseFragment(text)
+        filtered = BleachSanitizerFilter(
+            source=self.walker(dom),
+            # Bleach-sanitizer-specific things
+            attributes=self.attributes,
+            strip_disallowed_elements=self.strip,
+            strip_html_comments=self.strip_comments,
+            # html5lib-sanitizer things
+            allowed_elements=self.tags,
+            allowed_css_properties=self.styles,
+            allowed_protocols=self.protocols,
+            allowed_svg_properties=[],
+        )
+        # Apply any filters after the BleachSanitizerFilter
+        for filter_class in self.filters:
+            filtered = filter_class(source=filtered)
+        return self.serializer.render(filtered)
+def attribute_filter_factory(attributes):
+    """Generates attribute filter function for the given attributes value
+    The attributes value can take one of several shapes. This returns a filter
+    function appropriate to the attributes value. One nice thing about this is
+    that there's less if/then shenanigans in the ``allow_token`` method.
+    """
+    if callable(attributes):
+        return attributes
+    if isinstance(attributes, dict):
+        def _attr_filter(tag, attr, value):
+            if tag in attributes:
+                attr_val = attributes[tag]
+                if callable(attr_val):
+                    return attr_val(tag, attr, value)
+                if attr in attr_val:
+                    return True
+            if '*' in attributes:
+                attr_val = attributes['*']
+                if callable(attr_val):
+                    return attr_val(tag, attr, value)
+                return attr in attr_val
+            return False
+        return _attr_filter
+    if isinstance(attributes, list):
+        def _attr_filter(tag, attr, value):
+            return attr in attributes
+        return _attr_filter
+    raise ValueError('attributes needs to be a callable, a list or a dict')
+class BleachSanitizerFilter(sanitizer.Filter):
+    """html5lib Filter that sanitizes text
+    This filter can be used anywhere html5lib filters can be used.
+    """
+    def __init__(self, source, attributes=ALLOWED_ATTRIBUTES,
+                 strip_disallowed_elements=False, strip_html_comments=True,
+                 **kwargs):
+        """Creates a BleachSanitizerFilter instance
+        :arg Treewalker source: stream
+        :arg list tags: allowed list of tags; defaults to
+            ``bleach.sanitizer.ALLOWED_TAGS``
+        :arg dict attributes: allowed attributes; can be a callable, list or dict;
+            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``
+        :arg list styles: allowed list of css styles; defaults to
+            ``bleach.sanitizer.ALLOWED_STYLES``
+        :arg list protocols: allowed list of protocols for links; defaults
+            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
+        :arg bool strip_disallowed_elements: whether or not to strip disallowed
+            elements
+        :arg bool strip_html_comments: whether or not to strip HTML comments
+        """
+        self.attr_filter = attribute_filter_factory(attributes)
+        self.strip_disallowed_elements = strip_disallowed_elements
+        self.strip_html_comments = strip_html_comments
+        return super(BleachSanitizerFilter, self).__init__(source, **kwargs)
+    def sanitize_token(self, token):
+        """Sanitize a token either by HTML-encoding or dropping.
+        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
+        ['attribute', 'pairs'], 'tag': callable}.
+        Here callable is a function with two arguments of attribute name and
+        value. It should return true of false.
+        Also gives the option to strip tags instead of encoding.
+        """
+        token_type = token['type']
+        if token_type in ['StartTag', 'EndTag', 'EmptyTag']:
+            if token['name'] in self.allowed_elements:
+                return self.allow_token(token)
+            elif self.strip_disallowed_elements:
+                pass
+            else:
+                if 'data' in token:
+                    # Alphabetize the attributes before calling .disallowed_token()
+                    # so that the resulting string is stable
+                    token['data'] = alphabetize_attributes(token['data'])
+                return self.disallowed_token(token)
+        elif token_type == 'Comment':
+            if not self.strip_html_comments:
+                return token
+        else:
+            return token
+    def allow_token(self, token):
+        """Handles the case where we're allowing the tag"""
+        if 'data' in token:
+            # Loop through all the attributes and drop the ones that are not
+            # allowed, are unsafe or break other rules. Additionally, fix
+            # attribute values that need fixing.
+            #
+            # At the end of this loop, we have the final set of attributes
+            # we're keeping.
+            attrs = {}
+            for namespaced_name, val in token['data'].items():
+                namespace, name = namespaced_name
+                # Drop attributes that are not explicitly allowed
+                #
+                # NOTE(willkg): We pass in the attribute name--not a namespaced
+                # name.
+                if not self.attr_filter(token['name'], name, val):
+                    continue
+                # Look at attributes that have uri values
+                if namespaced_name in self.attr_val_is_uri:
+                    val_unescaped = re.sub(
+                        "[`\000-\040\177-\240\s]+",
+                        '',
+                        unescape(val)).lower()
+                    # Remove replacement characters from unescaped characters.
+                    val_unescaped = val_unescaped.replace("\ufffd", "")
+                    # Drop attributes with uri values that have protocols that
+                    # aren't allowed
+                    if (re.match(r'^[a-z0-9][-+.a-z0-9]*:', val_unescaped) and
+                            (val_unescaped.split(':')[0] not in self.allowed_protocols)):
+                        continue
+                # Drop values in svg attrs with non-local IRIs
+                if namespaced_name in self.svg_attr_val_allows_ref:
+                    new_val = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                     ' ',
+                                     unescape(val))
+                    new_val = new_val.strip()
+                    if not new_val:
+                        continue
+                    else:
+                        # Replace the val with the unescaped version because
+                        # it's a iri
+                        val = new_val
+                # Drop href and xlink:href attr for svg elements with non-local IRIs
+                if (None, token['name']) in self.svg_allow_local_href:
+                    if namespaced_name in [(None, 'href'), (namespaces['xlink'], 'href')]:
+                        if re.search(r'^\s*[^#\s]', val):
+                            continue
+                # If it's a style attribute, sanitize it
+                if namespaced_name == (None, u'style'):
+                    val = self.sanitize_css(val)
+                # At this point, we want to keep the attribute, so add it in
+                attrs[namespaced_name] = val
+            token['data'] = alphabetize_attributes(attrs)
+        return token
+    def sanitize_css(self, style):
+        """Sanitizes css in style tags"""
+        # disallow urls
+        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+        # gauntlet
+        # Validate the css in the style tag and if it's not valid, then drop
+        # the whole thing.
+        parts = style.split(';')
+        gauntlet = re.compile(
+            r"""^([-/:,#%.'"\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$"""
+        )
+        for part in parts:
+            if not gauntlet.match(part):
+                return ''
+        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
+        clean = []
+        for prop, value in re.findall('([-\w]+)\s*:\s*([^:;]*)', style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
+        return ' '.join(clean)

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/utils.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from collections import OrderedDict
+def _attr_key(attr):
+    """Returns appropriate key for sorting attribute names
+    Attribute names are a tuple of ``(namespace, name)`` where namespace can be
+    ``None`` or a string. These can't be compared in Python 3, so we conver the
+    ``None`` to an empty string.
+    """
+    key = (attr[0][0] or ''), attr[0][1]
+    return key
+def alphabetize_attributes(attrs):
+    """Takes a dict of attributes (or None) and returns them alphabetized"""
+    if not attrs:
+        return attrs
+    return OrderedDict(
+        [(k, v) for k, v in sorted(attrs.items(), key=_attr_key)]
+    )

MLPY/Lib/site-packages/tensorboard/_vendor/bleach/version.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+VERSION = (2, 0, 0)
+__version__ = '.'.join([str(n) for n in VERSION])

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""
+HTML parsing library based on the `WHATWG HTML specification
+<https://whatwg.org/html>`_. The parser is designed to be compatible with
+existing HTML found in the wild and implements well-defined error recovery that
+is largely compatible with modern desktop web browsers.
+Example usage::
+    import html5lib
+    with open("my_document.html", "rb") as f:
+        tree = html5lib.parse(f)
+For convenience, this module re-exports the following names:
+* :func:`~.html5parser.parse`
+* :func:`~.html5parser.parseFragment`
+* :class:`~.html5parser.HTMLParser`
+* :func:`~.treebuilders.getTreeBuilder`
+* :func:`~.treewalkers.getTreeWalker`
+* :func:`~.serializer.serialize`
+"""
+from __future__ import absolute_import, division, unicode_literals
+from .html5parser import HTMLParser, parse, parseFragment
+from .treebuilders import getTreeBuilder
+from .treewalkers import getTreeWalker
+from .serializer import serialize
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
+# this has to be at the top level, see how setup.py parses this
+#: Distribution version number.
+__version__ = "1.1"

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.26 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_ihatexml.cpython-39.pyc ADDED Viewed

Binary file (13.7 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_inputstream.cpython-39.pyc ADDED Viewed

Binary file (21.6 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_tokenizer.cpython-39.pyc ADDED Viewed

Binary file (39.7 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/_utils.cpython-39.pyc ADDED Viewed

Binary file (4.76 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/constants.cpython-39.pyc ADDED Viewed

Binary file (66.3 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/html5parser.cpython-39.pyc ADDED Viewed

Binary file (91 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/__pycache__/serializer.cpython-39.pyc ADDED Viewed

Binary file (10.8 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_ihatexml.py ADDED Viewed

	@@ -0,0 +1,289 @@

+from __future__ import absolute_import, division, unicode_literals
+import re
+import warnings
+from .constants import DataLossWarning
+baseChar = """
+[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
+[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
+[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
+[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
+[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
+[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
+[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
+[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
+[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
+[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
+[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
+[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
+[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
+[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
+[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
+[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
+[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
+[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
+[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
+[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
+[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
+[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
+[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
+[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
+[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
+[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
+[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
+[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
+[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
+[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
+#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
+#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
+#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
+[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
+[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
+#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
+[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
+[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
+[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
+[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
+[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
+#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
+[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
+[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
+[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
+[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
+combiningCharacter = """
+[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
+[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
+[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
+[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
+#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
+[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
+[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
+#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
+[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
+[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
+#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
+[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
+[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
+[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
+[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
+[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
+#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
+[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
+#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
+[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
+[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
+#x3099 | #x309A"""
+digit = """
+[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
+[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
+[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
+[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+extender = """
+#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
+#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+letter = " | ".join([baseChar, ideographic])
+# Without the
+name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
+                   extender])
+nameFirst = " | ".join([letter, "_"])
+reChar = re.compile(r"#x([\d|A-F]{4,4})")
+reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
+def charStringToList(chars):
+    charRanges = [item.strip() for item in chars.split(" | ")]
+    rv = []
+    for item in charRanges:
+        foundMatch = False
+        for regexp in (reChar, reCharRange):
+            match = regexp.match(item)
+            if match is not None:
+                rv.append([hexToInt(item) for item in match.groups()])
+                if len(rv[-1]) == 1:
+                    rv[-1] = rv[-1] * 2
+                foundMatch = True
+                break
+        if not foundMatch:
+            assert len(item) == 1
+            rv.append([ord(item)] * 2)
+    rv = normaliseCharList(rv)
+    return rv
+def normaliseCharList(charList):
+    charList = sorted(charList)
+    for item in charList:
+        assert item[1] >= item[0]
+    rv = []
+    i = 0
+    while i < len(charList):
+        j = 1
+        rv.append(charList[i])
+        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i + j][1]
+            j += 1
+        i += j
+    return rv
+# We don't really support characters above the BMP :(
+max_unicode = int("FFFF", 16)
+def missingRanges(charList):
+    rv = []
+    if charList[0] != 0:
+        rv.append([0, charList[0][0] - 1])
+    for i, item in enumerate(charList[:-1]):
+        rv.append([item[1] + 1, charList[i + 1][0] - 1])
+    if charList[-1][1] != max_unicode:
+        rv.append([charList[-1][1] + 1, max_unicode])
+    return rv
+def listToRegexpStr(charList):
+    rv = []
+    for item in charList:
+        if item[0] == item[1]:
+            rv.append(escapeRegexp(chr(item[0])))
+        else:
+            rv.append(escapeRegexp(chr(item[0])) + "-" +
+                      escapeRegexp(chr(item[1])))
+    return "[%s]" % "".join(rv)
+def hexToInt(hex_str):
+    return int(hex_str, 16)
+def escapeRegexp(string):
+    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
+                         "[", "]", "|", "(", ")", "-")
+    for char in specialCharacters:
+        string = string.replace(char, "\\" + char)
+    return string
+# output from the above
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
+# Simpler things
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
+class InfosetFilter(object):
+    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
+    def __init__(self,
+                 dropXmlnsLocalName=False,
+                 dropXmlnsAttrNs=False,
+                 preventDoubleDashComments=False,
+                 preventDashAtCommentEnd=False,
+                 replaceFormFeedCharacters=True,
+                 preventSingleQuotePubid=False):
+        self.dropXmlnsLocalName = dropXmlnsLocalName
+        self.dropXmlnsAttrNs = dropXmlnsAttrNs
+        self.preventDoubleDashComments = preventDoubleDashComments
+        self.preventDashAtCommentEnd = preventDashAtCommentEnd
+        self.replaceFormFeedCharacters = replaceFormFeedCharacters
+        self.preventSingleQuotePubid = preventSingleQuotePubid
+        self.replaceCache = {}
+    def coerceAttribute(self, name, namespace=None):
+        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
+            warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
+            return None
+        elif (self.dropXmlnsAttrNs and
+              namespace == "http://www.w3.org/2000/xmlns/"):
+            warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
+            return None
+        else:
+            return self.toXmlName(name)
+    def coerceElement(self, name):
+        return self.toXmlName(name)
+    def coerceComment(self, data):
+        if self.preventDoubleDashComments:
+            while "--" in data:
+                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+                data = data.replace("--", "- -")
+            if data.endswith("-"):
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
+                data += " "
+        return data
+    def coerceCharacters(self, data):
+        if self.replaceFormFeedCharacters:
+            for _ in range(data.count("\x0C")):
+                warnings.warn("Text cannot contain U+000C", DataLossWarning)
+            data = data.replace("\x0C", " ")
+        # Other non-xml characters
+        return data
+    def coercePubid(self, data):
+        dataOutput = data
+        for char in nonPubidCharRegexp.findall(data):
+            warnings.warn("Coercing non-XML pubid", DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            dataOutput = dataOutput.replace(char, replacement)
+        if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
+            warnings.warn("Pubid cannot contain single quote", DataLossWarning)
+            dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
+        return dataOutput
+    def toXmlName(self, name):
+        nameFirst = name[0]
+        nameRest = name[1:]
+        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
+        if m:
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
+            nameFirstOutput = self.getReplacementCharacter(nameFirst)
+        else:
+            nameFirstOutput = nameFirst
+        nameRestOutput = nameRest
+        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
+        for char in replaceChars:
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            nameRestOutput = nameRestOutput.replace(char, replacement)
+        return nameFirstOutput + nameRestOutput
+    def getReplacementCharacter(self, char):
+        if char in self.replaceCache:
+            replacement = self.replaceCache[char]
+        else:
+            replacement = self.escapeChar(char)
+        return replacement
+    def fromXmlName(self, name):
+        for item in set(self.replacementRegexp.findall(name)):
+            name = name.replace(item, self.unescapeChar(item))
+        return name
+    def escapeChar(self, char):
+        replacement = "U%05X" % ord(char)
+        self.replaceCache[char] = replacement
+        return replacement
+    def unescapeChar(self, charcode):
+        return chr(int(charcode[1:], 16))

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_inputstream.py ADDED Viewed

	@@ -0,0 +1,918 @@

+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+from six.moves import http_client, urllib
+import codecs
+import re
+from io import BytesIO, StringIO
+from tensorboard._vendor import webencodings
+from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
+from .constants import _ReparseException
+from . import _utils
+# Non-unicode versions of constants for use in the pre-parser
+spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
+asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
+asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
+if _utils.supports_lone_surrogates:
+    # Use one extra step of indirection and create surrogates with
+    # eval. Not using this indirection would introduce an illegal
+    # unicode literal on platforms not supporting such lone
+    # surrogates.
+    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
+                                    "]")
+else:
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
+non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                              0x10FFFE, 0x10FFFF}
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
+# Cache for charsUntil()
+charsUntilRegEx = {}
+class BufferedStream(object):
+    """Buffering for streams that do not have buffering of their own
+    The buffer is implemented as a list of chunks on the assumption that
+    joining many strings will be slow since it is O(n**2)
+    """
+    def __init__(self, stream):
+        self.stream = stream
+        self.buffer = []
+        self.position = [-1, 0]  # chunk number, offset
+    def tell(self):
+        pos = 0
+        for chunk in self.buffer[:self.position[0]]:
+            pos += len(chunk)
+        pos += self.position[1]
+        return pos
+    def seek(self, pos):
+        assert pos <= self._bufferedBytes()
+        offset = pos
+        i = 0
+        while len(self.buffer[i]) < offset:
+            offset -= len(self.buffer[i])
+            i += 1
+        self.position = [i, offset]
+    def read(self, bytes):
+        if not self.buffer:
+            return self._readStream(bytes)
+        elif (self.position[0] == len(self.buffer) and
+              self.position[1] == len(self.buffer[-1])):
+            return self._readStream(bytes)
+        else:
+            return self._readFromBuffer(bytes)
+    def _bufferedBytes(self):
+        return sum([len(item) for item in self.buffer])
+    def _readStream(self, bytes):
+        data = self.stream.read(bytes)
+        self.buffer.append(data)
+        self.position[0] += 1
+        self.position[1] = len(data)
+        return data
+    def _readFromBuffer(self, bytes):
+        remainingBytes = bytes
+        rv = []
+        bufferIndex = self.position[0]
+        bufferOffset = self.position[1]
+        while bufferIndex < len(self.buffer) and remainingBytes != 0:
+            assert remainingBytes > 0
+            bufferedData = self.buffer[bufferIndex]
+            if remainingBytes <= len(bufferedData) - bufferOffset:
+                bytesToRead = remainingBytes
+                self.position = [bufferIndex, bufferOffset + bytesToRead]
+            else:
+                bytesToRead = len(bufferedData) - bufferOffset
+                self.position = [bufferIndex, len(bufferedData)]
+                bufferIndex += 1
+            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
+            remainingBytes -= bytesToRead
+            bufferOffset = 0
+        if remainingBytes:
+            rv.append(self._readStream(remainingBytes))
+        return b"".join(rv)
+def HTMLInputStream(source, **kwargs):
+    # Work around Python bug #20007: read(0) closes the connection.
+    # http://bugs.python.org/issue20007
+    if (isinstance(source, http_client.HTTPResponse) or
+        # Also check for addinfourl wrapping HTTPResponse
+        (isinstance(source, urllib.response.addbase) and
+         isinstance(source.fp, http_client.HTTPResponse))):
+        isUnicode = False
+    elif hasattr(source, "read"):
+        isUnicode = isinstance(source.read(0), text_type)
+    else:
+        isUnicode = isinstance(source, text_type)
+    if isUnicode:
+        encodings = [x for x in kwargs if x.endswith("_encoding")]
+        if encodings:
+            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
+        return HTMLUnicodeInputStream(source, **kwargs)
+    else:
+        return HTMLBinaryInputStream(source, **kwargs)
+class HTMLUnicodeInputStream(object):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+    """
+    _defaultChunkSize = 10240
+    def __init__(self, source):
+        """Initialises the HTMLInputStream.
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+        source can be either a file-object, local filename or a string.
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+        """
+        if not _utils.supports_lone_surrogates:
+            # Such platforms will have already checked for such
+            # surrogate errors, so no need to do this checking.
+            self.reportCharacterErrors = None
+        elif len("\U0010FFFF") == 1:
+            self.reportCharacterErrors = self.characterErrorsUCS4
+        else:
+            self.reportCharacterErrors = self.characterErrorsUCS2
+        # List of where new lines occur
+        self.newLines = [0]
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
+        self.dataStream = self.openStream(source)
+        self.reset()
+    def reset(self):
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+        self.errors = []
+        # number of (complete) lines in previous chunks
+        self.prevNumLines = 0
+        # number of columns in the last line of the previous chunk
+        self.prevNumCols = 0
+        # Deal with CR LF and surrogates split over chunk boundaries
+        self._bufferedCharacter = None
+    def openStream(self, source):
+        """Produces a file object from source.
+        source can be either a file object, local filename or a string.
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = StringIO(source)
+        return stream
+    def _position(self, offset):
+        chunk = self.chunk
+        nLines = chunk.count('\n', 0, offset)
+        positionLine = self.prevNumLines + nLines
+        lastLinePos = chunk.rfind('\n', 0, offset)
+        if lastLinePos == -1:
+            positionColumn = self.prevNumCols + offset
+        else:
+            positionColumn = offset - (lastLinePos + 1)
+        return (positionLine, positionColumn)
+    def position(self):
+        """Returns (line, col) of the current position in the stream."""
+        line, col = self._position(self.chunkOffset)
+        return (line + 1, col)
+    def char(self):
+        """ Read one character from the stream or queue if available. Return
+            EOF when EOF is reached.
+        """
+        # Read a new chunk from the input stream if necessary
+        if self.chunkOffset >= self.chunkSize:
+            if not self.readChunk():
+                return EOF
+        chunkOffset = self.chunkOffset
+        char = self.chunk[chunkOffset]
+        self.chunkOffset = chunkOffset + 1
+        return char
+    def readChunk(self, chunkSize=None):
+        if chunkSize is None:
+            chunkSize = self._defaultChunkSize
+        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+        data = self.dataStream.read(chunkSize)
+        # Deal with CR LF and surrogates broken across chunks
+        if self._bufferedCharacter:
+            data = self._bufferedCharacter + data
+            self._bufferedCharacter = None
+        elif not data:
+            # We have no more data, bye-bye stream
+            return False
+        if len(data) > 1:
+            lastv = ord(data[-1])
+            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
+                self._bufferedCharacter = data[-1]
+                data = data[:-1]
+        if self.reportCharacterErrors:
+            self.reportCharacterErrors(data)
+        # Replace invalid characters
+        data = data.replace("\r\n", "\n")
+        data = data.replace("\r", "\n")
+        self.chunk = data
+        self.chunkSize = len(data)
+        return True
+    def characterErrorsUCS4(self, data):
+        for _ in range(len(invalid_unicode_re.findall(data))):
+            self.errors.append("invalid-codepoint")
+    def characterErrorsUCS2(self, data):
+        # Someone picked the wrong compile option
+        # You lose
+        skip = False
+        for match in invalid_unicode_re.finditer(data):
+            if skip:
+                continue
+            codepoint = ord(match.group())
+            pos = match.start()
+            # Pretty sure there should be endianness issues here
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
+                # We have a surrogate pair!
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                if char_val in non_bmp_invalid_codepoints:
+                    self.errors.append("invalid-codepoint")
+                skip = True
+            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
+                  pos == len(data) - 1):
+                self.errors.append("invalid-codepoint")
+            else:
+                skip = False
+                self.errors.append("invalid-codepoint")
+    def charsUntil(self, characters, opposite=False):
+        """ Returns a string of characters from the stream up to but not
+        including any character in 'characters' or EOF. 'characters' must be
+        a container that supports the 'in' method and iteration over its
+        characters.
+        """
+        # Use a cache of regexps to find the required characters
+        try:
+            chars = charsUntilRegEx[(characters, opposite)]
+        except KeyError:
+            if __debug__:
+                for c in characters:
+                    assert(ord(c) < 128)
+            regex = "".join(["\\x%02x" % ord(c) for c in characters])
+            if not opposite:
+                regex = "^%s" % regex
+            chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
+        rv = []
+        while True:
+            # Find the longest matching prefix
+            m = chars.match(self.chunk, self.chunkOffset)
+            if m is None:
+                # If nothing matched, and it wasn't because we ran out of chunk,
+                # then stop
+                if self.chunkOffset != self.chunkSize:
+                    break
+            else:
+                end = m.end()
+                # If not the whole chunk matched, return everything
+                # up to the part that didn't match
+                if end != self.chunkSize:
+                    rv.append(self.chunk[self.chunkOffset:end])
+                    self.chunkOffset = end
+                    break
+            # If the whole remainder of the chunk matched,
+            # use it all and read the next chunk
+            rv.append(self.chunk[self.chunkOffset:])
+            if not self.readChunk():
+                # Reached EOF
+                break
+        r = "".join(rv)
+        return r
+    def unget(self, char):
+        # Only one character is allowed to be ungotten at once - it must
+        # be consumed again before any further call to unget
+        if char is not EOF:
+            if self.chunkOffset == 0:
+                # unget is called quite rarely, so it's a good idea to do
+                # more work here if it saves a bit of work in the frequently
+                # called char and charsUntil.
+                # So, just prepend the ungotten character onto the current
+                # chunk:
+                self.chunk = char + self.chunk
+                self.chunkSize += 1
+            else:
+                self.chunkOffset -= 1
+                assert self.chunk[self.chunkOffset] == char
+class HTMLBinaryInputStream(HTMLUnicodeInputStream):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+    """
+    def __init__(self, source, override_encoding=None, transport_encoding=None,
+                 same_origin_parent_encoding=None, likely_encoding=None,
+                 default_encoding="windows-1252", useChardet=True):
+        """Initialises the HTMLInputStream.
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+        source can be either a file-object, local filename or a string.
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+        """
+        # Raw Stream - for unicode objects this will encode to utf-8 and set
+        #              self.charEncoding as appropriate
+        self.rawStream = self.openStream(source)
+        HTMLUnicodeInputStream.__init__(self, self.rawStream)
+        # Encoding Information
+        # Number of bytes to use when looking for a meta element with
+        # encoding information
+        self.numBytesMeta = 1024
+        # Number of bytes to use when using detecting encoding using chardet
+        self.numBytesChardet = 100
+        # Things from args
+        self.override_encoding = override_encoding
+        self.transport_encoding = transport_encoding
+        self.same_origin_parent_encoding = same_origin_parent_encoding
+        self.likely_encoding = likely_encoding
+        self.default_encoding = default_encoding
+        # Determine encoding
+        self.charEncoding = self.determineEncoding(useChardet)
+        assert self.charEncoding[0] is not None
+        # Call superclass
+        self.reset()
+    def reset(self):
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
+        HTMLUnicodeInputStream.reset(self)
+    def openStream(self, source):
+        """Produces a file object from source.
+        source can be either a file object, local filename or a string.
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = BytesIO(source)
+        try:
+            stream.seek(stream.tell())
+        except Exception:
+            stream = BufferedStream(stream)
+        return stream
+    def determineEncoding(self, chardet=True):
+        # BOMs take precedence over everything
+        # This will also read past the BOM if present
+        charEncoding = self.detectBOM(), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # If we've been overridden, we've been overridden
+        charEncoding = lookupEncoding(self.override_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # Now check the transport layer
+        charEncoding = lookupEncoding(self.transport_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # Look for meta elements with encoding information
+        charEncoding = self.detectEncodingMeta(), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # Parent document encoding
+        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
+        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
+            return charEncoding
+        # "likely" encoding
+        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # Guess with chardet, if available
+        if chardet:
+            try:
+                from chardet.universaldetector import UniversalDetector
+            except ImportError:
+                pass
+            else:
+                buffers = []
+                detector = UniversalDetector()
+                while not detector.done:
+                    buffer = self.rawStream.read(self.numBytesChardet)
+                    assert isinstance(buffer, bytes)
+                    if not buffer:
+                        break
+                    buffers.append(buffer)
+                    detector.feed(buffer)
+                detector.close()
+                encoding = lookupEncoding(detector.result['encoding'])
+                self.rawStream.seek(0)
+                if encoding is not None:
+                    return encoding, "tentative"
+        # Try the default encoding
+        charEncoding = lookupEncoding(self.default_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+        # Fallback to html5lib's default if even that hasn't worked
+        return lookupEncoding("windows-1252"), "tentative"
+    def changeEncoding(self, newEncoding):
+        assert self.charEncoding[1] != "certain"
+        newEncoding = lookupEncoding(newEncoding)
+        if newEncoding is None:
+            return
+        if newEncoding.name in ("utf-16be", "utf-16le"):
+            newEncoding = lookupEncoding("utf-8")
+            assert newEncoding is not None
+        elif newEncoding == self.charEncoding[0]:
+            self.charEncoding = (self.charEncoding[0], "certain")
+        else:
+            self.rawStream.seek(0)
+            self.charEncoding = (newEncoding, "certain")
+            self.reset()
+            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+    def detectBOM(self):
+        """Attempts to detect at BOM at the start of the stream. If
+        an encoding can be determined from the BOM return the name of the
+        encoding otherwise return None"""
+        bomDict = {
+            codecs.BOM_UTF8: 'utf-8',
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
+        }
+        # Go to beginning of file and read in 4 bytes
+        string = self.rawStream.read(4)
+        assert isinstance(string, bytes)
+        # Try detecting the BOM using bytes from the string
+        encoding = bomDict.get(string[:3])         # UTF-8
+        seek = 3
+        if not encoding:
+            # Need to detect UTF-32 before UTF-16
+            encoding = bomDict.get(string)         # UTF-32
+            seek = 4
+            if not encoding:
+                encoding = bomDict.get(string[:2])  # UTF-16
+                seek = 2
+        # Set the read position past the BOM if one was found, otherwise
+        # set it to the start of the stream
+        if encoding:
+            self.rawStream.seek(seek)
+            return lookupEncoding(encoding)
+        else:
+            self.rawStream.seek(0)
+            return None
+    def detectEncodingMeta(self):
+        """Report the encoding declared by the meta element
+        """
+        buffer = self.rawStream.read(self.numBytesMeta)
+        assert isinstance(buffer, bytes)
+        parser = EncodingParser(buffer)
+        self.rawStream.seek(0)
+        encoding = parser.getEncoding()
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+            encoding = lookupEncoding("utf-8")
+        return encoding
+class EncodingBytes(bytes):
+    """String-like object with an associated position and various extra methods
+    If the position is ever greater than the string length then an exception is
+    raised"""
+    def __new__(self, value):
+        assert isinstance(value, bytes)
+        return bytes.__new__(self, value.lower())
+    def __init__(self, value):
+        # pylint:disable=unused-argument
+        self._position = -1
+    def __iter__(self):
+        return self
+    def __next__(self):
+        p = self._position = self._position + 1
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        return self[p:p + 1]
+    def next(self):
+        # Py2 compat
+        return self.__next__()
+    def previous(self):
+        p = self._position
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        self._position = p = p - 1
+        return self[p:p + 1]
+    def setPosition(self, position):
+        if self._position >= len(self):
+            raise StopIteration
+        self._position = position
+    def getPosition(self):
+        if self._position >= len(self):
+            raise StopIteration
+        if self._position >= 0:
+            return self._position
+        else:
+            return None
+    position = property(getPosition, setPosition)
+    def getCurrentByte(self):
+        return self[self.position:self.position + 1]
+    currentByte = property(getCurrentByte)
+    def skip(self, chars=spaceCharactersBytes):
+        """Skip past a list of characters"""
+        p = self.position               # use property for the error-checking
+        while p < len(self):
+            c = self[p:p + 1]
+            if c not in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+    def skipUntil(self, chars):
+        p = self.position
+        while p < len(self):
+            c = self[p:p + 1]
+            if c in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+    def matchBytes(self, bytes):
+        """Look for a sequence of bytes at the start of a string. If the bytes
+        are found return True and advance the position to the byte after the
+        match. Otherwise return False and leave the position alone"""
+        rv = self.startswith(bytes, self.position)
+        if rv:
+            self.position += len(bytes)
+        return rv
+    def jumpTo(self, bytes):
+        """Look for the next sequence of bytes matching a given sequence. If
+        a match is found advance the position to the last byte of the match"""
+        try:
+            self._position = self.index(bytes, self.position) + len(bytes) - 1
+        except ValueError:
+            raise StopIteration
+        return True
+class EncodingParser(object):
+    """Mini parser for detecting character encoding from meta elements"""
+    def __init__(self, data):
+        """string - the data to work on for encoding detection"""
+        self.data = EncodingBytes(data)
+        self.encoding = None
+    def getEncoding(self):
+        if b"<meta" not in self.data:
+            return None
+        methodDispatch = (
+            (b"<!--", self.handleComment),
+            (b"<meta", self.handleMeta),
+            (b"</", self.handlePossibleEndTag),
+            (b"<!", self.handleOther),
+            (b"<?", self.handleOther),
+            (b"<", self.handlePossibleStartTag))
+        for _ in self.data:
+            keepParsing = True
+            try:
+                self.data.jumpTo(b"<")
+            except StopIteration:
+                break
+            for key, method in methodDispatch:
+                if self.data.matchBytes(key):
+                    try:
+                        keepParsing = method()
+                        break
+                    except StopIteration:
+                        keepParsing = False
+                        break
+            if not keepParsing:
+                break
+        return self.encoding
+    def handleComment(self):
+        """Skip over comments"""
+        return self.data.jumpTo(b"-->")
+    def handleMeta(self):
+        if self.data.currentByte not in spaceCharactersBytes:
+            # if we have <meta not followed by a space so just keep going
+            return True
+        # We have a valid meta element we want to search for attributes
+        hasPragma = False
+        pendingEncoding = None
+        while True:
+            # Try to find the next attribute after the current position
+            attr = self.getAttribute()
+            if attr is None:
+                return True
+            else:
+                if attr[0] == b"http-equiv":
+                    hasPragma = attr[1] == b"content-type"
+                    if hasPragma and pendingEncoding is not None:
+                        self.encoding = pendingEncoding
+                        return False
+                elif attr[0] == b"charset":
+                    tentativeEncoding = attr[1]
+                    codec = lookupEncoding(tentativeEncoding)
+                    if codec is not None:
+                        self.encoding = codec
+                        return False
+                elif attr[0] == b"content":
+                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
+                    tentativeEncoding = contentParser.parse()
+                    if tentativeEncoding is not None:
+                        codec = lookupEncoding(tentativeEncoding)
+                        if codec is not None:
+                            if hasPragma:
+                                self.encoding = codec
+                                return False
+                            else:
+                                pendingEncoding = codec
+    def handlePossibleStartTag(self):
+        return self.handlePossibleTag(False)
+    def handlePossibleEndTag(self):
+        next(self.data)
+        return self.handlePossibleTag(True)
+    def handlePossibleTag(self, endTag):
+        data = self.data
+        if data.currentByte not in asciiLettersBytes:
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handleOther
+            if endTag:
+                data.previous()
+                self.handleOther()
+            return True
+        c = data.skipUntil(spacesAngleBrackets)
+        if c == b"<":
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
+            data.previous()
+        else:
+            # Read all attributes
+            attr = self.getAttribute()
+            while attr is not None:
+                attr = self.getAttribute()
+        return True
+    def handleOther(self):
+        return self.data.jumpTo(b">")
+    def getAttribute(self):
+        """Return a name,value pair for the next attribute in the stream,
+        if one is found, or None"""
+        data = self.data
+        # Step 1 (skip chars)
+        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
+        assert c is None or len(c) == 1
+        # Step 2
+        if c in (b">", None):
+            return None
+        # Step 3
+        attrName = []
+        attrValue = []
+        # Step 4 attribute name
+        while True:
+            if c == b"=" and attrName:
+                break
+            elif c in spaceCharactersBytes:
+                # Step 6!
+                c = data.skip()
+                break
+            elif c in (b"/", b">"):
+                return b"".join(attrName), b""
+            elif c in asciiUppercaseBytes:
+                attrName.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrName.append(c)
+            # Step 5
+            c = next(data)
+        # Step 7
+        if c != b"=":
+            data.previous()
+            return b"".join(attrName), b""
+        # Step 8
+        next(data)
+        # Step 9
+        c = data.skip()
+        # Step 10
+        if c in (b"'", b'"'):
+            # 10.1
+            quoteChar = c
+            while True:
+                # 10.2
+                c = next(data)
+                # 10.3
+                if c == quoteChar:
+                    next(data)
+                    return b"".join(attrName), b"".join(attrValue)
+                # 10.4
+                elif c in asciiUppercaseBytes:
+                    attrValue.append(c.lower())
+                # 10.5
+                else:
+                    attrValue.append(c)
+        elif c == b">":
+            return b"".join(attrName), b""
+        elif c in asciiUppercaseBytes:
+            attrValue.append(c.lower())
+        elif c is None:
+            return None
+        else:
+            attrValue.append(c)
+        # Step 11
+        while True:
+            c = next(data)
+            if c in spacesAngleBrackets:
+                return b"".join(attrName), b"".join(attrValue)
+            elif c in asciiUppercaseBytes:
+                attrValue.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrValue.append(c)
+class ContentAttrParser(object):
+    def __init__(self, data):
+        assert isinstance(data, bytes)
+        self.data = data
+    def parse(self):
+        try:
+            # Check if the attr name is charset
+            # otherwise return
+            self.data.jumpTo(b"charset")
+            self.data.position += 1
+            self.data.skip()
+            if not self.data.currentByte == b"=":
+                # If there is no = sign keep looking for attrs
+                return None
+            self.data.position += 1
+            self.data.skip()
+            # Look for an encoding between matching quote marks
+            if self.data.currentByte in (b'"', b"'"):
+                quoteMark = self.data.currentByte
+                self.data.position += 1
+                oldPosition = self.data.position
+                if self.data.jumpTo(quoteMark):
+                    return self.data[oldPosition:self.data.position]
+                else:
+                    return None
+            else:
+                # Unquoted value
+                oldPosition = self.data.position
+                try:
+                    self.data.skipUntil(spaceCharactersBytes)
+                    return self.data[oldPosition:self.data.position]
+                except StopIteration:
+                    # Return the whole remaining value
+                    return self.data[oldPosition:]
+        except StopIteration:
+            return None
+def lookupEncoding(encoding):
+    """Return the python codec name corresponding to an encoding or None if the
+    string doesn't correspond to a valid encoding."""
+    if isinstance(encoding, bytes):
+        try:
+            encoding = encoding.decode("ascii")
+        except UnicodeDecodeError:
+            return None
+    if encoding is not None:
+        try:
+            return webencodings.lookup(encoding)
+        except AttributeError:
+            return None
+    else:
+        return None

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_tokenizer.py ADDED Viewed

	@@ -0,0 +1,1735 @@

+from __future__ import absolute_import, division, unicode_literals
+from six import unichr as chr
+from collections import deque, OrderedDict
+from sys import version_info
+from .constants import spaceCharacters
+from .constants import entities
+from .constants import asciiLetters, asciiUpper2Lower
+from .constants import digits, hexDigits, EOF
+from .constants import tokenTypes, tagTokenTypes
+from .constants import replacementCharacters
+from ._inputstream import HTMLInputStream
+from ._trie import Trie
+entitiesTrie = Trie(entities)
+if version_info >= (3, 7):
+    attributeMap = dict
+else:
+    attributeMap = OrderedDict
+class HTMLTokenizer(object):
+    """ This class takes care of tokenizing HTML.
+    * self.currentToken
+      Holds the token that is currently being processed.
+    * self.state
+      Holds a reference to the method to be invoked... XXX
+    * self.stream
+      Points to HTMLInputStream object.
+    """
+    def __init__(self, stream, parser=None, **kwargs):
+        self.stream = HTMLInputStream(stream, **kwargs)
+        self.parser = parser
+        # Setup the initial tokenizer state
+        self.escapeFlag = False
+        self.lastFourChars = []
+        self.state = self.dataState
+        self.escape = False
+        # The current token being created
+        self.currentToken = None
+        super(HTMLTokenizer, self).__init__()
+    def __iter__(self):
+        """ This is where the magic happens.
+        We do our usually processing through the states and when we have a token
+        to return we yield the token which pauses processing until the next token
+        is requested.
+        """
+        self.tokenQueue = deque([])
+        # Start processing. When EOF is reached self.state will return False
+        # instead of True and the loop will terminate.
+        while self.state():
+            while self.stream.errors:
+                yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
+            while self.tokenQueue:
+                yield self.tokenQueue.popleft()
+    def consumeNumberEntity(self, isHex):
+        """This function returns either U+FFFD or the character based on the
+        decimal or hexadecimal representation. It also discards ";" if present.
+        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
+        """
+        allowed = digits
+        radix = 10
+        if isHex:
+            allowed = hexDigits
+            radix = 16
+        charStack = []
+        # Consume all the characters that are in range while making sure we
+        # don't hit an EOF.
+        c = self.stream.char()
+        while c in allowed and c is not EOF:
+            charStack.append(c)
+            c = self.stream.char()
+        # Convert the set of characters consumed to an int.
+        charAsInt = int("".join(charStack), radix)
+        # Certain characters get replaced with others
+        if charAsInt in replacementCharacters:
+            char = replacementCharacters[charAsInt]
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
+        elif ((0xD800 <= charAsInt <= 0xDFFF) or
+              (charAsInt > 0x10FFFF)):
+            char = "\uFFFD"
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
+        else:
+            # Should speed up this check somehow (e.g. move the set to a constant)
+            if ((0x0001 <= charAsInt <= 0x0008) or
+                (0x000E <= charAsInt <= 0x001F) or
+                (0x007F <= charAsInt <= 0x009F) or
+                (0xFDD0 <= charAsInt <= 0xFDEF) or
+                charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
+                                        0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                                        0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
+                                        0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
+                                        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
+                                        0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
+                                        0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                                        0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
+                                        0xFFFFF, 0x10FFFE, 0x10FFFF])):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data":
+                                        "illegal-codepoint-for-numeric-entity",
+                                        "datavars": {"charAsInt": charAsInt}})
+            try:
+                # Try/except needed as UCS-2 Python builds' unichar only works
+                # within the BMP.
+                char = chr(charAsInt)
+            except ValueError:
+                v = charAsInt - 0x10000
+                char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF))
+        # Discard the ; if present. Otherwise, put it back on the queue and
+        # invoke parseError on parser.
+        if c != ";":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "numeric-entity-without-semicolon"})
+            self.stream.unget(c)
+        return char
+    def consumeEntity(self, allowedChar=None, fromAttribute=False):
+        # Initialise to the default output for when no entity is matched
+        output = "&"
+        charStack = [self.stream.char()]
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
+                (allowedChar is not None and allowedChar == charStack[0])):
+            self.stream.unget(charStack[0])
+        elif charStack[0] == "#":
+            # Read the next character to see if it's hex or decimal
+            hex = False
+            charStack.append(self.stream.char())
+            if charStack[-1] in ("x", "X"):
+                hex = True
+                charStack.append(self.stream.char())
+            # charStack[-1] should be the first digit
+            if (hex and charStack[-1] in hexDigits) \
+                    or (not hex and charStack[-1] in digits):
+                # At least one digit found, so consume the whole number
+                self.stream.unget(charStack[-1])
+                output = self.consumeNumberEntity(hex)
+            else:
+                # No digits found
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data": "expected-numeric-entity"})
+                self.stream.unget(charStack.pop())
+                output = "&" + "".join(charStack)
+        else:
+            # At this point in the process might have named entity. Entities
+            # are stored in the global variable "entities".
+            #
+            # Consume characters and compare to these to a substring of the
+            # entity names in the list until the substring no longer matches.
+            while (charStack[-1] is not EOF):
+                if not entitiesTrie.has_keys_with_prefix("".join(charStack)):
+                    break
+                charStack.append(self.stream.char())
+            # At this point we have a string that starts with some characters
+            # that may match an entity
+            # Try to find the longest entity the string will match to take care
+            # of &noti for instance.
+            try:
+                entityName = entitiesTrie.longest_prefix("".join(charStack[:-1]))
+                entityLength = len(entityName)
+            except KeyError:
+                entityName = None
+            if entityName is not None:
+                if entityName[-1] != ";":
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                            "named-entity-without-semicolon"})
+                if (entityName[-1] != ";" and fromAttribute and
+                    (charStack[entityLength] in asciiLetters or
+                     charStack[entityLength] in digits or
+                     charStack[entityLength] == "=")):
+                    self.stream.unget(charStack.pop())
+                    output = "&" + "".join(charStack)
+                else:
+                    output = entities[entityName]
+                    self.stream.unget(charStack.pop())
+                    output += "".join(charStack[entityLength:])
+            else:
+                self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                        "expected-named-entity"})
+                self.stream.unget(charStack.pop())
+                output = "&" + "".join(charStack)
+        if fromAttribute:
+            self.currentToken["data"][-1][1] += output
+        else:
+            if output in spaceCharacters:
+                tokenType = "SpaceCharacters"
+            else:
+                tokenType = "Characters"
+            self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
+    def processEntityInAttribute(self, allowedChar):
+        """This method replaces the need for "entityInAttributeValueState".
+        """
+        self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
+    def emitCurrentToken(self):
+        """This method is a generic handler for emitting the tags. It also sets
+        the state to "data" because that's what's needed after a token has been
+        emitted.
+        """
+        token = self.currentToken
+        # Add token to the queue to be yielded
+        if (token["type"] in tagTokenTypes):
+            token["name"] = token["name"].translate(asciiUpper2Lower)
+            if token["type"] == tokenTypes["StartTag"]:
+                raw = token["data"]
+                data = attributeMap(raw)
+                if len(raw) > len(data):
+                    # we had some duplicated attribute, fix so first wins
+                    data.update(raw[::-1])
+                token["data"] = data
+            if token["type"] == tokenTypes["EndTag"]:
+                if token["data"]:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "attributes-in-end-tag"})
+                if token["selfClosing"]:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "self-closing-flag-on-end-tag"})
+        self.tokenQueue.append(token)
+        self.state = self.dataState
+    # Below are the various tokenizer states worked out.
+    def dataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.entityDataState
+        elif data == "<":
+            self.state = self.tagOpenState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\u0000"})
+        elif data is EOF:
+            # Tokenization ends.
+            return False
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil(("&", "<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+    def entityDataState(self):
+        self.consumeEntity()
+        self.state = self.dataState
+        return True
+    def rcdataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.characterReferenceInRcdata
+        elif data == "<":
+            self.state = self.rcdataLessThanSignState
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil(("&", "<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+    def characterReferenceInRcdata(self):
+        self.consumeEntity()
+        self.state = self.rcdataState
+        return True
+    def rawtextState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.rawtextLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil(("<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+    def scriptDataState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.scriptDataLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil(("<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+    def plaintextState(self):
+        data = self.stream.char()
+        if data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + self.stream.charsUntil("\u0000")})
+        return True
+    def tagOpenState(self):
+        data = self.stream.char()
+        if data == "!":
+            self.state = self.markupDeclarationOpenState
+        elif data == "/":
+            self.state = self.closeTagOpenState
+        elif data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["StartTag"],
+                                 "name": data, "data": [],
+                                 "selfClosing": False,
+                                 "selfClosingAcknowledged": False}
+            self.state = self.tagNameState
+        elif data == ">":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name-but-got-right-bracket"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
+            self.state = self.dataState
+        elif data == "?":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name-but-got-question-mark"})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        return True
+    def closeTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
+                                 "data": [], "selfClosing": False}
+            self.state = self.tagNameState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-right-bracket"})
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-eof"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.state = self.dataState
+        else:
+            # XXX data can be _'_...
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-char",
+                                    "datavars": {"data": data}})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        return True
+    def tagNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-tag-name"})
+            self.state = self.dataState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += "\uFFFD"
+        else:
+            self.currentToken["name"] += data
+            # (Don't use charsUntil here, because tag names are
+            # very short and it's faster to not do anything fancy)
+        return True
+    def rcdataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rcdataEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    def rcdataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rcdataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    def rcdataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    def rawtextLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rawtextEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    def rawtextEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rawtextEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    def rawtextEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    def scriptDataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEndTagOpenState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
+            self.state = self.scriptDataEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    def scriptDataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.scriptDataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    def scriptDataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    def scriptDataEscapeStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapeStartDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    def scriptDataEscapeStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    def scriptDataEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            chars = self.stream.charsUntil(("<", "-", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+    def scriptDataEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataEscapedDashDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
+            self.state = self.scriptDataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEscapedEndTagOpenState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
+            self.temporaryBuffer = data
+            self.state = self.scriptDataDoubleEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataEscapedEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer = data
+            self.state = self.scriptDataEscapedEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataEscapedEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataDoubleEscapeStartState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataDoubleEscapedState
+            else:
+                self.state = self.scriptDataEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    def scriptDataDoubleEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataDoubleEscapedDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+        return True
+    def scriptDataDoubleEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataDoubleEscapedDashDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    def scriptDataDoubleEscapedDashDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
+            self.state = self.scriptDataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    def scriptDataDoubleEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataDoubleEscapeEndState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    def scriptDataDoubleEscapeEndState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataEscapedState
+            else:
+                self.state = self.scriptDataDoubleEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    def beforeAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data in ("'", '"', "=", "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "invalid-character-in-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append(["\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-name-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+    def attributeNameState(self):
+        data = self.stream.char()
+        leavingThisState = True
+        emitToken = False
+        if data == "=":
+            self.state = self.beforeAttributeValueState
+        elif data in asciiLetters:
+            self.currentToken["data"][-1][0] += data +\
+                self.stream.charsUntil(asciiLetters, True)
+            leavingThisState = False
+        elif data == ">":
+            # XXX If we emit here the attributes are converted to a dict
+            # without being checked and when the code below runs we error
+            # because data is a dict not a list
+            emitToken = True
+        elif data in spaceCharacters:
+            self.state = self.afterAttributeNameState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][0] += "\uFFFD"
+            leavingThisState = False
+        elif data in ("'", '"', "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data":
+                                    "invalid-character-in-attribute-name"})
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "eof-in-attribute-name"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+        if leavingThisState:
+            # Attributes are not dropped at this stage. That happens when the
+            # start tag token is emitted so values can still be safely appended
+            # to attributes, but we do want to report the parse error in time.
+            self.currentToken["data"][-1][0] = (
+                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, _ in self.currentToken["data"][:-1]:
+                if self.currentToken["data"][-1][0] == name:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                            "duplicate-attribute"})
+                    break
+            # XXX Fix for above XXX
+            if emitToken:
+                self.emitCurrentToken()
+        return True
+    def afterAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == "=":
+            self.state = self.beforeAttributeValueState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append(["\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data in ("'", '"', "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "invalid-character-after-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-end-of-tag-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+    def beforeAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == "\"":
+            self.state = self.attributeValueDoubleQuotedState
+        elif data == "&":
+            self.state = self.attributeValueUnQuotedState
+            self.stream.unget(data)
+        elif data == "'":
+            self.state = self.attributeValueSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-value-but-got-right-bracket"})
+            self.emitCurrentToken()
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+            self.state = self.attributeValueUnQuotedState
+        elif data in ("=", "<", "`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "equals-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-value-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        return True
+    def attributeValueDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterAttributeValueState
+        elif data == "&":
+            self.processEntityInAttribute('"')
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-double-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+                self.stream.charsUntil(("\"", "&", "\u0000"))
+        return True
+    def attributeValueSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterAttributeValueState
+        elif data == "&":
+            self.processEntityInAttribute("'")
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-single-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+                self.stream.charsUntil(("'", "&", "\u0000"))
+        return True
+    def attributeValueUnQuotedState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == "&":
+            self.processEntityInAttribute(">")
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data in ('"', "'", "=", "<", "`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-no-quotes"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
+                frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters)
+        return True
+    def afterAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-EOF-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+    def selfClosingStartTagState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.currentToken["selfClosing"] = True
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data":
+                                    "unexpected-EOF-after-solidus-in-tag"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-after-solidus-in-tag"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+    def bogusCommentState(self):
+        # Make a new comment token and give it as value all the characters
+        # until the first > or EOF (charsUntil checks for EOF automatically)
+        # and emit it.
+        data = self.stream.charsUntil(">")
+        data = data.replace("\u0000", "\uFFFD")
+        self.tokenQueue.append(
+            {"type": tokenTypes["Comment"], "data": data})
+        # Eat the character directly after the bogus comment which is either a
+        # ">" or an EOF.
+        self.stream.char()
+        self.state = self.dataState
+        return True
+    def markupDeclarationOpenState(self):
+        charStack = [self.stream.char()]
+        if charStack[-1] == "-":
+            charStack.append(self.stream.char())
+            if charStack[-1] == "-":
+                self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
+                self.state = self.commentStartState
+                return True
+        elif charStack[-1] in ('d', 'D'):
+            matched = True
+            for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'),
+                             ('y', 'Y'), ('p', 'P'), ('e', 'E')):
+                charStack.append(self.stream.char())
+                if charStack[-1] not in expected:
+                    matched = False
+                    break
+            if matched:
+                self.currentToken = {"type": tokenTypes["Doctype"],
+                                     "name": "",
+                                     "publicId": None, "systemId": None,
+                                     "correct": True}
+                self.state = self.doctypeState
+                return True
+        elif (charStack[-1] == "[" and
+              self.parser is not None and
+              self.parser.tree.openElements and
+              self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
+            matched = True
+            for expected in ["C", "D", "A", "T", "A", "["]:
+                charStack.append(self.stream.char())
+                if charStack[-1] != expected:
+                    matched = False
+                    break
+            if matched:
+                self.state = self.cdataSectionState
+                return True
+        self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                "expected-dashes-or-doctype"})
+        while charStack:
+            self.stream.unget(charStack.pop())
+        self.state = self.bogusCommentState
+        return True
+    def commentStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentStartDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data
+            self.state = self.commentState
+        return True
+    def commentStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "-\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "-" + data
+            self.state = self.commentState
+        return True
+    def commentState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data + \
+                self.stream.charsUntil(("-", "\u0000"))
+        return True
+    def commentEndDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "-\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-end-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "-" + data
+            self.state = self.commentState
+        return True
+    def commentEndState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "--\uFFFD"
+            self.state = self.commentState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-bang-after-double-dash-in-comment"})
+            self.state = self.commentEndBangState
+        elif data == "-":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-dash-after-double-dash-in-comment"})
+            self.currentToken["data"] += data
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-double-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-comment"})
+            self.currentToken["data"] += "--" + data
+            self.state = self.commentState
+        return True
+    def commentEndBangState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "-":
+            self.currentToken["data"] += "--!"
+            self.state = self.commentEndDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "--!\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-end-bang-state"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "--!" + data
+            self.state = self.commentState
+        return True
+    def doctypeState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "need-space-after-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeNameState
+        return True
+    def beforeDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-right-bracket"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] = "\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] = data
+            self.state = self.doctypeNameState
+        return True
+    def doctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.state = self.afterDoctypeNameState
+        elif data == ">":
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += "\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype-name"})
+            self.currentToken["correct"] = False
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] += data
+        return True
+    def afterDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.currentToken["correct"] = False
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            if data in ("p", "P"):
+                matched = True
+                for expected in (("u", "U"), ("b", "B"), ("l", "L"),
+                                 ("i", "I"), ("c", "C")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypePublicKeywordState
+                    return True
+            elif data in ("s", "S"):
+                matched = True
+                for expected in (("y", "Y"), ("s", "S"), ("t", "T"),
+                                 ("e", "E"), ("m", "M")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypeSystemKeywordState
+                    return True
+            # All the characters read before the current 'data' will be
+            # [a-zA-Z], so they're garbage in the bogus doctype and can be
+            # discarded; only the latest character might be '>' or EOF
+            # and needs to be ungetted
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-space-or-right-bracket-in-doctype", "datavars":
+                                    {"data": data}})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    def afterDoctypePublicKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        return True
+    def beforeDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["publicId"] = ""
+            self.state = self.doctypePublicIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["publicId"] = ""
+            self.state = self.doctypePublicIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    def doctypePublicIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+    def doctypePublicIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+    def afterDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.betweenDoctypePublicAndSystemIdentifiersState
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    def betweenDoctypePublicAndSystemIdentifiersState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    def afterDoctypeSystemKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        return True
+    def beforeDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    def doctypeSystemIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+    def doctypeSystemIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+    def afterDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.state = self.bogusDoctypeState
+        return True
+    def bogusDoctypeState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            # XXX EMIT
+            self.stream.unget(data)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            pass
+        return True
+    def cdataSectionState(self):
+        data = []
+        while True:
+            data.append(self.stream.charsUntil("]"))
+            data.append(self.stream.charsUntil(">"))
+            char = self.stream.char()
+            if char == EOF:
+                break
+            else:
+                assert char == ">"
+                if data[-1][-2:] == "]]":
+                    data[-1] = data[-1][:-2]
+                    break
+                else:
+                    data.append(char)
+        data = "".join(data)  # pylint:disable=redefined-variable-type
+        # Deal with null here rather than in the parser
+        nullCount = data.count("\u0000")
+        if nullCount > 0:
+            for _ in range(nullCount):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data": "invalid-codepoint"})
+            data = data.replace("\u0000", "\uFFFD")
+        if data:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": data})
+        self.state = self.dataState
+        return True

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from __future__ import absolute_import, division, unicode_literals
+from .py import Trie
+__all__ = ["Trie"]

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (325 Bytes). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/_base.cpython-39.pyc ADDED Viewed

Binary file (1.57 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/__pycache__/py.cpython-39.pyc ADDED Viewed

Binary file (2.22 kB). View file

MLPY/Lib/site-packages/tensorboard/_vendor/html5lib/_trie/_base.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from __future__ import absolute_import, division, unicode_literals
+try:
+    from collections.abc import Mapping
+except ImportError:  # Python 2.7
+    from collections import Mapping
+class Trie(Mapping):
+    """Abstract base class for tries"""
+    def keys(self, prefix=None):
+        # pylint:disable=arguments-differ
+        keys = super(Trie, self).keys()
+        if prefix is None:
+            return set(keys)
+        return {x for x in keys if x.startswith(prefix)}
+    def has_keys_with_prefix(self, prefix):
+        for key in self.keys():
+            if key.startswith(prefix):
+                return True
+        return False
+    def longest_prefix(self, prefix):
+        if prefix in self:
+            return prefix
+        for i in range(1, len(prefix) + 1):
+            if prefix[:-i] in self:
+                return prefix[:-i]
+        raise KeyError(prefix)
+    def longest_prefix_item(self, prefix):
+        lprefix = self.longest_prefix(prefix)
+        return (lprefix, self[lprefix])