|
"""Tests of the builder registry.""" |
|
|
|
import pytest |
|
import warnings |
|
|
|
from bs4 import BeautifulSoup |
|
from bs4.builder import ( |
|
builder_registry as registry, |
|
HTMLParserTreeBuilder, |
|
TreeBuilderRegistry, |
|
) |
|
|
|
from . import ( |
|
HTML5LIB_PRESENT, |
|
LXML_PRESENT, |
|
) |
|
|
|
if HTML5LIB_PRESENT: |
|
from bs4.builder import HTML5TreeBuilder |
|
|
|
if LXML_PRESENT: |
|
from bs4.builder import ( |
|
LXMLTreeBuilderForXML, |
|
LXMLTreeBuilder, |
|
) |
|
|
|
|
|
|
|
|
|
class TestBuiltInRegistry(object): |
|
"""Test the built-in registry with the default builders registered.""" |
|
|
|
def test_combination(self): |
|
assert registry.lookup('strict', 'html') == HTMLParserTreeBuilder |
|
if LXML_PRESENT: |
|
assert registry.lookup('fast', 'html') == LXMLTreeBuilder |
|
assert registry.lookup('permissive', 'xml') == LXMLTreeBuilderForXML |
|
if HTML5LIB_PRESENT: |
|
assert registry.lookup('html5lib', 'html') == HTML5TreeBuilder |
|
|
|
def test_lookup_by_markup_type(self): |
|
if LXML_PRESENT: |
|
assert registry.lookup('html') == LXMLTreeBuilder |
|
assert registry.lookup('xml') == LXMLTreeBuilderForXML |
|
else: |
|
assert registry.lookup('xml') == None |
|
if HTML5LIB_PRESENT: |
|
assert registry.lookup('html') == HTML5TreeBuilder |
|
else: |
|
assert registry.lookup('html') == HTMLParserTreeBuilder |
|
|
|
def test_named_library(self): |
|
if LXML_PRESENT: |
|
assert registry.lookup('lxml', 'xml') == LXMLTreeBuilderForXML |
|
assert registry.lookup('lxml', 'html') == LXMLTreeBuilder |
|
if HTML5LIB_PRESENT: |
|
assert registry.lookup('html5lib') == HTML5TreeBuilder |
|
|
|
assert registry.lookup('html.parser') == HTMLParserTreeBuilder |
|
|
|
def test_beautifulsoup_constructor_does_lookup(self): |
|
|
|
with warnings.catch_warnings(record=True) as w: |
|
|
|
|
|
|
|
|
|
BeautifulSoup("", features="html") |
|
|
|
BeautifulSoup("", features=["html", "fast"]) |
|
pass |
|
|
|
|
|
|
|
with pytest.raises(ValueError): |
|
BeautifulSoup("", features="no-such-feature") |
|
|
|
class TestRegistry(object): |
|
"""Test the TreeBuilderRegistry class in general.""" |
|
|
|
def setup_method(self): |
|
self.registry = TreeBuilderRegistry() |
|
|
|
def builder_for_features(self, *feature_list): |
|
cls = type('Builder_' + '_'.join(feature_list), |
|
(object,), {'features' : feature_list}) |
|
|
|
self.registry.register(cls) |
|
return cls |
|
|
|
def test_register_with_no_features(self): |
|
builder = self.builder_for_features() |
|
|
|
|
|
|
|
assert self.registry.lookup('foo') is None |
|
|
|
|
|
|
|
assert self.registry.lookup() == builder |
|
|
|
def test_register_with_features_makes_lookup_succeed(self): |
|
builder = self.builder_for_features('foo', 'bar') |
|
assert self.registry.lookup('foo') is builder |
|
assert self.registry.lookup('bar') is builder |
|
|
|
def test_lookup_fails_when_no_builder_implements_feature(self): |
|
builder = self.builder_for_features('foo', 'bar') |
|
assert self.registry.lookup('baz') is None |
|
|
|
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): |
|
builder1 = self.builder_for_features('foo') |
|
builder2 = self.builder_for_features('bar') |
|
assert self.registry.lookup() == builder2 |
|
|
|
def test_lookup_fails_when_no_tree_builders_registered(self): |
|
assert self.registry.lookup() is None |
|
|
|
def test_lookup_gets_most_recent_builder_supporting_all_features(self): |
|
has_one = self.builder_for_features('foo') |
|
has_the_other = self.builder_for_features('bar') |
|
has_both_early = self.builder_for_features('foo', 'bar', 'baz') |
|
has_both_late = self.builder_for_features('foo', 'bar', 'quux') |
|
lacks_one = self.builder_for_features('bar') |
|
has_the_other = self.builder_for_features('foo') |
|
|
|
|
|
|
|
assert self.registry.lookup('foo', 'bar') == has_both_late |
|
|
|
|
|
assert self.registry.lookup('foo', 'bar', 'baz') == has_both_early |
|
|
|
def test_lookup_fails_when_cannot_reconcile_requested_features(self): |
|
builder1 = self.builder_for_features('foo', 'bar') |
|
builder2 = self.builder_for_features('foo', 'baz') |
|
assert self.registry.lookup('bar', 'baz') is None |
|
|