Spaces:

sunnychenxiwang
/

EasyDetect

Sleeping

File size: 1,854 Bytes

d916065

# Natural Language Toolkit: Dispersion Plots
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

"""

A utility for displaying lexical dispersion.

"""


def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
    """

    Generate a lexical dispersion plot.



    :param text: The source text

    :type text: list(str) or iter(str)

    :param words: The target words

    :type words: list of str

    :param ignore_case: flag to set if case should be ignored when searching text

    :type ignore_case: bool

    :return: a matplotlib Axes object that may still be modified before plotting

    :rtype: Axes

    """

    try:
        import matplotlib.pyplot as plt
    except ImportError as e:
        raise ImportError(
            "The plot function requires matplotlib to be installed. "
            "See https://matplotlib.org/"
        ) from e

    word2y = {
        word.casefold() if ignore_case else word: y
        for y, word in enumerate(reversed(words))
    }
    xs, ys = [], []
    for x, token in enumerate(text):
        token = token.casefold() if ignore_case else token
        y = word2y.get(token)
        if y is not None:
            xs.append(x)
            ys.append(y)

    _, ax = plt.subplots()
    ax.plot(xs, ys, "|")
    ax.set_yticks(list(range(len(words))), words, color="C0")
    ax.set_ylim(-1, len(words))
    ax.set_title(title)
    ax.set_xlabel("Word Offset")
    return ax


if __name__ == "__main__":
    import matplotlib.pyplot as plt

    from nltk.corpus import gutenberg

    words = ["Elinor", "Marianne", "Edward", "Willoughby"]
    dispersion_plot(gutenberg.words("austen-sense.txt"), words)
    plt.show()