Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: WordNet stemmer interface | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# Author: Steven Bird <[email protected]> | |
# Edward Loper <[email protected]> | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
from nltk.corpus import wordnet as wn | |
class WordNetLemmatizer: | |
""" | |
WordNet Lemmatizer | |
Lemmatize using WordNet's built-in morphy function. | |
Returns the input word unchanged if it cannot be found in WordNet. | |
>>> from nltk.stem import WordNetLemmatizer | |
>>> wnl = WordNetLemmatizer() | |
>>> print(wnl.lemmatize('dogs')) | |
dog | |
>>> print(wnl.lemmatize('churches')) | |
church | |
>>> print(wnl.lemmatize('aardwolves')) | |
aardwolf | |
>>> print(wnl.lemmatize('abaci')) | |
abacus | |
>>> print(wnl.lemmatize('hardrock')) | |
hardrock | |
""" | |
def lemmatize(self, word: str, pos: str = "n") -> str: | |
"""Lemmatize `word` using WordNet's built-in morphy function. | |
Returns the input word unchanged if it cannot be found in WordNet. | |
:param word: The input word to lemmatize. | |
:type word: str | |
:param pos: The Part Of Speech tag. Valid options are `"n"` for nouns, | |
`"v"` for verbs, `"a"` for adjectives, `"r"` for adverbs and `"s"` | |
for satellite adjectives. | |
:param pos: str | |
:return: The lemma of `word`, for the given `pos`. | |
""" | |
lemmas = wn._morphy(word, pos) | |
return min(lemmas, key=len) if lemmas else word | |
def __repr__(self): | |
return "<WordNetLemmatizer>" | |