File size: 19,070 Bytes

d1ceb73

from itertools import chain

from .ast import AtRule, Declaration, ParseError, QualifiedRule
from .tokenizer import parse_component_value_list


def _to_token_iterator(input, skip_comments=False):
    """Iterate component values out of string or component values iterable.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments: If the input is a string, ignore all CSS comments.
    :returns: An iterator yielding :term:`component values`.

    """
    if isinstance(input, str):
        input = parse_component_value_list(input, skip_comments)
    return iter(input)


def _next_significant(tokens):
    """Return the next significant (neither whitespace or comment) token.

    :type tokens: :term:`iterator`
    :param tokens: An iterator yielding :term:`component values`.
    :returns: A :term:`component value`, or :obj:`None`.

    """
    for token in tokens:
        if token.type not in ('whitespace', 'comment'):
            return token


def parse_one_component_value(input, skip_comments=False):
    """Parse a single :diagram:`component value`.

    This is used e.g. for an attribute value
    referred to by ``attr(foo length)``.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments: If the input is a string, ignore all CSS comments.
    :returns:
        A :term:`component value` (that is neither whitespace or comment),
        or a :class:`~tinycss2.ast.ParseError`.

    """
    tokens = _to_token_iterator(input, skip_comments)
    first = _next_significant(tokens)
    second = _next_significant(tokens)
    if first is None:
        return ParseError(1, 1, 'empty', 'Input is empty')
    if second is not None:
        return ParseError(
            second.source_line, second.source_column, 'extra-input',
            'Got more than one token')
    else:
        return first


def parse_one_declaration(input, skip_comments=False):
    """Parse a single :diagram:`declaration`.

    This is used e.g. for a declaration in an `@supports
    <https://drafts.csswg.org/css-conditional/#at-supports>`_ test.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments: If the input is a string, ignore all CSS comments.
    :returns:
        A :class:`~tinycss2.ast.Declaration`
        or :class:`~tinycss2.ast.ParseError`.

    Any whitespace or comment before the ``:`` colon is dropped.

    """
    tokens = _to_token_iterator(input, skip_comments)
    first_token = _next_significant(tokens)
    if first_token is None:
        return ParseError(1, 1, 'empty', 'Input is empty')
    return _parse_declaration(first_token, tokens)


def _consume_remnants(input, nested):
    for token in input:
        if token == ';':
            return
        elif nested and token == '}':
            return


def _parse_declaration(first_token, tokens, nested=True):
    """Parse a declaration.

    Consume :obj:`tokens` until the end of the declaration or the first error.

    :type first_token: :term:`component value`
    :param first_token: The first component value of the rule.
    :type tokens: :term:`iterator`
    :param tokens: An iterator yielding :term:`component values`.
    :type nested: :obj:`bool`
    :param nested: Whether the declaration is nested or top-level.
    :returns:
        A :class:`~tinycss2.ast.Declaration`
        or :class:`~tinycss2.ast.ParseError`.

    """
    name = first_token
    if name.type != 'ident':
        _consume_remnants(tokens, nested)
        return ParseError(
            name.source_line, name.source_column, 'invalid',
            f'Expected <ident> for declaration name, got {name.type}.')

    colon = _next_significant(tokens)
    if colon is None:
        _consume_remnants(tokens, nested)
        return ParseError(
            name.source_line, name.source_column, 'invalid',
            "Expected ':' after declaration name, got EOF")
    elif colon != ':':
        _consume_remnants(tokens, nested)
        return ParseError(
            colon.source_line, colon.source_column, 'invalid',
            "Expected ':' after declaration name, got {colon.type}.")

    value = []
    state = 'value'
    contains_non_whitespace = False
    contains_simple_block = False
    for i, token in enumerate(tokens):
        if state == 'value' and token == '!':
            state = 'bang'
            bang_position = i
        elif (state == 'bang' and token.type == 'ident'
                and token.lower_value == 'important'):
            state = 'important'
        elif token.type not in ('whitespace', 'comment'):
            state = 'value'
            if token.type == '{} block':
                if contains_non_whitespace:
                    contains_simple_block = True
                else:
                    contains_non_whitespace = True
            else:
                contains_non_whitespace = True
        value.append(token)

    if state == 'important':
        del value[bang_position:]

    # TODO: Handle custom property names

    if contains_simple_block and contains_non_whitespace:
        return ParseError(
            colon.source_line, colon.source_column, 'invalid',
            'Declaration contains {} block')

    # TODO: Handle unicode-range

    return Declaration(
        name.source_line, name.source_column, name.value, name.lower_value,
        value, state == 'important')


def _consume_blocks_content(first_token, tokens):
    """Consume declaration or nested rule."""
    declaration_tokens = []
    semicolon_token = []
    if first_token != ';' and first_token.type != '{} block':
        for token in tokens:
            if token == ';':
                semicolon_token.append(token)
                break
            declaration_tokens.append(token)
            if token.type == '{} block':
                break
    declaration = _parse_declaration(
        first_token, iter(declaration_tokens), nested=True)
    if declaration.type == 'declaration':
        return declaration
    else:
        tokens = chain(declaration_tokens, semicolon_token, tokens)
        return _consume_qualified_rule(first_token, tokens, stop_token=';', nested=True)


def _consume_declaration_in_list(first_token, tokens):
    """Like :func:`_parse_declaration`, but stop at the first ``;``.

    Deprecated, use :func:`_consume_blocks_content` instead.

    """
    other_declaration_tokens = []
    for token in tokens:
        if token == ';':
            break
        other_declaration_tokens.append(token)
    return _parse_declaration(first_token, iter(other_declaration_tokens))


def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False):
    """Parse a block’s contents.

    This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
    of a style rule or ``@page`` rule, or for the ``style`` attribute of an
    HTML element.

    In contexts that don’t expect any at-rule and/or qualified rule,
    all :class:`~tinycss2.ast.AtRule` and/or
    :class:`~tinycss2.ast.QualifiedRule` objects should simply be rejected as
    invalid.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments:
        Ignore CSS comments at the top-level of the list.
        If the input is a string, ignore all comments.
    :type skip_whitespace: :obj:`bool`
    :param skip_whitespace:
        Ignore whitespace at the top-level of the list.
        Whitespace is still preserved
        in the :attr:`~tinycss2.ast.Declaration.value` of declarations
        and the :attr:`~tinycss2.ast.AtRule.prelude`
        and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
    :returns:
        A list of
        :class:`~tinycss2.ast.Declaration`,
        :class:`~tinycss2.ast.AtRule`,
        :class:`~tinycss2.ast.QualifiedRule`,
        :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
        :class:`~tinycss2.ast.WhitespaceToken`
        (if ``skip_whitespace`` is false),
        and :class:`~tinycss2.ast.ParseError` objects

    """
    tokens = _to_token_iterator(input, skip_comments)
    result = []
    for token in tokens:
        if token.type == 'whitespace':
            if not skip_whitespace:
                result.append(token)
        elif token.type == 'comment':
            if not skip_comments:
                result.append(token)
        elif token.type == 'at-keyword':
            result.append(_consume_at_rule(token, tokens))
        elif token != ';':
            result.append(_consume_blocks_content(token, tokens))
    return result


def parse_declaration_list(input, skip_comments=False, skip_whitespace=False):
    """Parse a :diagram:`declaration list` (which may also contain at-rules).

    Deprecated and removed from CSS Syntax Level 3. Use
    :func:`parse_blocks_contents` instead.

    This is used e.g. for the :attr:`~tinycss2.ast.QualifiedRule.content`
    of a style rule or ``@page`` rule, or for the ``style`` attribute of an
    HTML element.

    In contexts that don’t expect any at-rule, all
    :class:`~tinycss2.ast.AtRule` objects should simply be rejected as invalid.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments:
        Ignore CSS comments at the top-level of the list.
        If the input is a string, ignore all comments.
    :type skip_whitespace: :obj:`bool`
    :param skip_whitespace:
        Ignore whitespace at the top-level of the list.
        Whitespace is still preserved
        in the :attr:`~tinycss2.ast.Declaration.value` of declarations
        and the :attr:`~tinycss2.ast.AtRule.prelude`
        and :attr:`~tinycss2.ast.AtRule.content` of at-rules.
    :returns:
        A list of
        :class:`~tinycss2.ast.Declaration`,
        :class:`~tinycss2.ast.AtRule`,
        :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
        :class:`~tinycss2.ast.WhitespaceToken`
        (if ``skip_whitespace`` is false),
        and :class:`~tinycss2.ast.ParseError` objects

    """
    tokens = _to_token_iterator(input, skip_comments)
    result = []
    for token in tokens:
        if token.type == 'whitespace':
            if not skip_whitespace:
                result.append(token)
        elif token.type == 'comment':
            if not skip_comments:
                result.append(token)
        elif token.type == 'at-keyword':
            result.append(_consume_at_rule(token, tokens))
        elif token != ';':
            result.append(_consume_declaration_in_list(token, tokens))
    return result


def parse_one_rule(input, skip_comments=False):
    """Parse a single :diagram:`qualified rule` or :diagram:`at-rule`.

    This would be used e.g. by `insertRule()
    <https://drafts.csswg.org/cssom/#dom-cssstylesheet-insertrule>`_
    in an implementation of CSSOM.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments:
        If the input is a string, ignore all CSS comments.
    :returns:
        A :class:`~tinycss2.ast.QualifiedRule`,
        :class:`~tinycss2.ast.AtRule`,
        or :class:`~tinycss2.ast.ParseError` objects.

    Any whitespace or comment before or after the rule is dropped.

    """
    tokens = _to_token_iterator(input, skip_comments)
    first = _next_significant(tokens)
    if first is None:
        return ParseError(1, 1, 'empty', 'Input is empty')

    rule = _consume_rule(first, tokens)
    next = _next_significant(tokens)
    if next is not None:
        return ParseError(
            next.source_line, next.source_column, 'extra-input',
            'Expected a single rule, got %s after the first rule.' % next.type)
    return rule


def parse_rule_list(input, skip_comments=False, skip_whitespace=False):
    """Parse a non-top-level :diagram:`rule list`.

    Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_content`
    instead.

    This is used for parsing the :attr:`~tinycss2.ast.AtRule.content`
    of nested rules like ``@media``.
    This differs from :func:`parse_stylesheet` in that
    top-level ``<!--`` and ``-->`` tokens are not ignored.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments:
        Ignore CSS comments at the top-level of the list.
        If the input is a string, ignore all comments.
    :type skip_whitespace: :obj:`bool`
    :param skip_whitespace:
        Ignore whitespace at the top-level of the list.
        Whitespace is still preserved
        in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
        and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
    :returns:
        A list of
        :class:`~tinycss2.ast.QualifiedRule`,
        :class:`~tinycss2.ast.AtRule`,
        :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
        :class:`~tinycss2.ast.WhitespaceToken`
        (if ``skip_whitespace`` is false),
        and :class:`~tinycss2.ast.ParseError` objects.

    """
    tokens = _to_token_iterator(input, skip_comments)
    result = []
    for token in tokens:
        if token.type == 'whitespace':
            if not skip_whitespace:
                result.append(token)
        elif token.type == 'comment':
            if not skip_comments:
                result.append(token)
        else:
            result.append(_consume_rule(token, tokens))
    return result


def parse_stylesheet(input, skip_comments=False, skip_whitespace=False):
    """Parse :diagram:`stylesheet` from text.

    This is used e.g. for a ``<style>`` HTML element.

    This differs from :func:`parse_rule_list` in that
    top-level ``<!--`` and ``-->`` tokens are ignored.
    This is a legacy quirk for the ``<style>`` HTML element.

    :type input: :obj:`str` or :term:`iterable`
    :param input: A string or an iterable of :term:`component values`.
    :type skip_comments: :obj:`bool`
    :param skip_comments:
        Ignore CSS comments at the top-level of the stylesheet.
        If the input is a string, ignore all comments.
    :type skip_whitespace: :obj:`bool`
    :param skip_whitespace:
        Ignore whitespace at the top-level of the stylesheet.
        Whitespace is still preserved
        in the :attr:`~tinycss2.ast.QualifiedRule.prelude`
        and the :attr:`~tinycss2.ast.QualifiedRule.content` of rules.
    :returns:
        A list of
        :class:`~tinycss2.ast.QualifiedRule`,
        :class:`~tinycss2.ast.AtRule`,
        :class:`~tinycss2.ast.Comment` (if ``skip_comments`` is false),
        :class:`~tinycss2.ast.WhitespaceToken`
        (if ``skip_whitespace`` is false),
        and :class:`~tinycss2.ast.ParseError` objects.

    """
    tokens = _to_token_iterator(input, skip_comments)
    result = []
    for token in tokens:
        if token.type == 'whitespace':
            if not skip_whitespace:
                result.append(token)
        elif token.type == 'comment':
            if not skip_comments:
                result.append(token)
        elif token not in ('<!--', '-->'):
            result.append(_consume_rule(token, tokens))
    return result


def _consume_rule(first_token, tokens):
    """Parse a qualified rule or at-rule.

    Consume just enough of :obj:`tokens` for this rule.

    :type first_token: :term:`component value`
    :param first_token: The first component value of the rule.
    :type tokens: :term:`iterator`
    :param tokens: An iterator yielding :term:`component values`.
    :returns:
        A :class:`~tinycss2.ast.QualifiedRule`,
        :class:`~tinycss2.ast.AtRule`,
        or :class:`~tinycss2.ast.ParseError`.

    """
    if first_token.type == 'at-keyword':
        return _consume_at_rule(first_token, tokens)
    return _consume_qualified_rule(first_token, tokens)


def _consume_at_rule(at_keyword, tokens):
    """Parse an at-rule.

    Consume just enough of :obj:`tokens` for this rule.

    :type at_keyword: :class:`AtKeywordToken`
    :param at_keyword: The at-rule keyword token starting this rule.
    :type tokens: :term:`iterator`
    :param tokens: An iterator yielding :term:`component values`.
    :type nested: :obj:`bool`
    :param nested: Whether the at-rule is nested or top-level.
    :returns:
        A :class:`~tinycss2.ast.QualifiedRule`,
        or :class:`~tinycss2.ast.ParseError`.

    """
    prelude = []
    content = None
    for token in tokens:
        if token.type == '{} block':
            # TODO: handle nested at-rules
            # https://drafts.csswg.org/css-syntax-3/#consume-at-rule
            content = token.content
            break
        elif token == ';':
            break
        prelude.append(token)
    return AtRule(
        at_keyword.source_line, at_keyword.source_column, at_keyword.value,
        at_keyword.lower_value, prelude, content)


def _rule_error(token, name):
    """Create rule parse error raised because of given token."""
    return ParseError(
        token.source_line, token.source_column, 'invalid',
        f'{name} reached before {{}} block for a qualified rule.')


def _consume_qualified_rule(first_token, tokens, nested=False,
                            stop_token=None):
    """Consume a qualified rule.

    Consume just enough of :obj:`tokens` for this rule.

    :type first_token: :term:`component value`
    :param first_token: The first component value of the rule.
    :type tokens: :term:`iterator`
    :param tokens: An iterator yielding :term:`component values`.
    :type nested: :obj:`bool`
    :param nested: Whether the rule is nested or top-level.
    :type stop_token: :class:`~tinycss2.ast.Node`
    :param stop_token: A token that ends rule parsing when met.

    """
    if first_token == stop_token:
        return _rule_error(first_token, 'Stop token')
    if first_token.type == '{} block':
        prelude = []
        block = first_token
    else:
        prelude = [first_token]
        for token in tokens:
            if token == stop_token:
                return _rule_error(token, 'Stop token')
            if token.type == '{} block':
                block = token
                # TODO: handle special case for CSS variables (using "nested")
                # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule
                break
            prelude.append(token)
        else:
            return _rule_error(prelude[-1], 'EOF')
    return QualifiedRule(
        first_token.source_line, first_token.source_column, prelude, block.content)