Spaces:
Sleeping
Sleeping
File size: 1,337 Bytes
d916065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
"""
Unit tests for nltk.classify. See also: nltk/test/classify.doctest
"""
import pytest
from nltk import classify
TRAIN = [
(dict(a=1, b=1, c=1), "y"),
(dict(a=1, b=1, c=1), "x"),
(dict(a=1, b=1, c=0), "y"),
(dict(a=0, b=1, c=1), "x"),
(dict(a=0, b=1, c=1), "y"),
(dict(a=0, b=0, c=1), "y"),
(dict(a=0, b=1, c=0), "x"),
(dict(a=0, b=0, c=0), "x"),
(dict(a=0, b=1, c=1), "y"),
]
TEST = [
(dict(a=1, b=0, c=1)), # unseen
(dict(a=1, b=0, c=0)), # unseen
(dict(a=0, b=1, c=1)), # seen 3 times, labels=y,y,x
(dict(a=0, b=1, c=0)), # seen 1 time, label=x
]
RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)]
def assert_classifier_correct(algorithm):
try:
classifier = classify.MaxentClassifier.train(
TRAIN, algorithm, trace=0, max_iter=1000
)
except (LookupError, AttributeError) as e:
pytest.skip(str(e))
for (px, py), featureset in zip(RESULTS, TEST):
pdist = classifier.prob_classify(featureset)
assert abs(pdist.prob("x") - px) < 1e-2, (pdist.prob("x"), px)
assert abs(pdist.prob("y") - py) < 1e-2, (pdist.prob("y"), py)
def test_megam():
assert_classifier_correct("MEGAM")
def test_tadm():
assert_classifier_correct("TADM")
|