Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: Dispersion Plots | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# Author: Steven Bird <[email protected]> | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
""" | |
A utility for displaying lexical dispersion. | |
""" | |
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"): | |
""" | |
Generate a lexical dispersion plot. | |
:param text: The source text | |
:type text: list(str) or iter(str) | |
:param words: The target words | |
:type words: list of str | |
:param ignore_case: flag to set if case should be ignored when searching text | |
:type ignore_case: bool | |
:return: a matplotlib Axes object that may still be modified before plotting | |
:rtype: Axes | |
""" | |
try: | |
import matplotlib.pyplot as plt | |
except ImportError as e: | |
raise ImportError( | |
"The plot function requires matplotlib to be installed. " | |
"See https://matplotlib.org/" | |
) from e | |
word2y = { | |
word.casefold() if ignore_case else word: y | |
for y, word in enumerate(reversed(words)) | |
} | |
xs, ys = [], [] | |
for x, token in enumerate(text): | |
token = token.casefold() if ignore_case else token | |
y = word2y.get(token) | |
if y is not None: | |
xs.append(x) | |
ys.append(y) | |
_, ax = plt.subplots() | |
ax.plot(xs, ys, "|") | |
ax.set_yticks(list(range(len(words))), words, color="C0") | |
ax.set_ylim(-1, len(words)) | |
ax.set_title(title) | |
ax.set_xlabel("Word Offset") | |
return ax | |
if __name__ == "__main__": | |
import matplotlib.pyplot as plt | |
from nltk.corpus import gutenberg | |
words = ["Elinor", "Marianne", "Edward", "Willoughby"] | |
dispersion_plot(gutenberg.words("austen-sense.txt"), words) | |
plt.show() | |