sunnychenxiwang's picture
update nltk
d916065
raw
history blame
3.71 kB
"""
Unit tests for Senna
"""
import unittest
from os import environ, path, sep
from nltk.classify import Senna
from nltk.tag import SennaChunkTagger, SennaNERTagger, SennaTagger
# Set Senna executable path for tests if it is not specified as an environment variable
if "SENNA" in environ:
SENNA_EXECUTABLE_PATH = path.normpath(environ["SENNA"]) + sep
else:
SENNA_EXECUTABLE_PATH = "/usr/share/senna-v3.0"
senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaPipeline(unittest.TestCase):
"""Unittest for nltk.classify.senna"""
def test_senna_pipeline(self):
"""Senna pipeline interface"""
pipeline = Senna(SENNA_EXECUTABLE_PATH, ["pos", "chk", "ner"])
sent = "Dusseldorf is an international business center".split()
result = [
(token["word"], token["chk"], token["ner"], token["pos"])
for token in pipeline.tag(sent)
]
expected = [
("Dusseldorf", "B-NP", "B-LOC", "NNP"),
("is", "B-VP", "O", "VBZ"),
("an", "B-NP", "O", "DT"),
("international", "I-NP", "O", "JJ"),
("business", "I-NP", "O", "NN"),
("center", "I-NP", "O", "NN"),
]
self.assertEqual(result, expected)
@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
class TestSennaTagger(unittest.TestCase):
"""Unittest for nltk.tag.senna"""
def test_senna_tagger(self):
tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
result = tagger.tag("What is the airspeed of an unladen swallow ?".split())
expected = [
("What", "WP"),
("is", "VBZ"),
("the", "DT"),
("airspeed", "NN"),
("of", "IN"),
("an", "DT"),
("unladen", "NN"),
("swallow", "NN"),
("?", "."),
]
self.assertEqual(result, expected)
def test_senna_chunk_tagger(self):
chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
result_1 = chktagger.tag("What is the airspeed of an unladen swallow ?".split())
expected_1 = [
("What", "B-NP"),
("is", "B-VP"),
("the", "B-NP"),
("airspeed", "I-NP"),
("of", "B-PP"),
("an", "B-NP"),
("unladen", "I-NP"),
("swallow", "I-NP"),
("?", "O"),
]
result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type="NP"))
expected_2 = [
("What", "0"),
("the airspeed", "2-3"),
("an unladen swallow", "5-6-7"),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)
def test_senna_ner_tagger(self):
nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
result_1 = nertagger.tag("Shakespeare theatre was in London .".split())
expected_1 = [
("Shakespeare", "B-PER"),
("theatre", "O"),
("was", "O"),
("in", "O"),
("London", "B-LOC"),
(".", "O"),
]
result_2 = nertagger.tag("UN headquarters are in NY , USA .".split())
expected_2 = [
("UN", "B-ORG"),
("headquarters", "O"),
("are", "O"),
("in", "O"),
("NY", "B-LOC"),
(",", "O"),
("USA", "B-LOC"),
(".", "O"),
]
self.assertEqual(result_1, expected_1)
self.assertEqual(result_2, expected_2)