Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: Generating from a CFG | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# Author: Steven Bird <[email protected]> | |
# Peter Ljunglöf <[email protected]> | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
# | |
import itertools | |
import sys | |
from nltk.grammar import Nonterminal | |
def generate(grammar, start=None, depth=None, n=None): | |
""" | |
Generates an iterator of all sentences from a CFG. | |
:param grammar: The Grammar used to generate sentences. | |
:param start: The Nonterminal from which to start generate sentences. | |
:param depth: The maximal depth of the generated tree. | |
:param n: The maximum number of sentences to return. | |
:return: An iterator of lists of terminal tokens. | |
""" | |
if not start: | |
start = grammar.start() | |
if depth is None: | |
depth = sys.maxsize | |
iter = _generate_all(grammar, [start], depth) | |
if n: | |
iter = itertools.islice(iter, n) | |
return iter | |
def _generate_all(grammar, items, depth): | |
if items: | |
try: | |
for frag1 in _generate_one(grammar, items[0], depth): | |
for frag2 in _generate_all(grammar, items[1:], depth): | |
yield frag1 + frag2 | |
except RecursionError as error: | |
# Helpful error message while still showing the recursion stack. | |
raise RuntimeError( | |
"The grammar has rule(s) that yield infinite recursion!" | |
) from error | |
else: | |
yield [] | |
def _generate_one(grammar, item, depth): | |
if depth > 0: | |
if isinstance(item, Nonterminal): | |
for prod in grammar.productions(lhs=item): | |
yield from _generate_all(grammar, prod.rhs(), depth - 1) | |
else: | |
yield [item] | |
demo_grammar = """ | |
S -> NP VP | |
NP -> Det N | |
PP -> P NP | |
VP -> 'slept' | 'saw' NP | 'walked' PP | |
Det -> 'the' | 'a' | |
N -> 'man' | 'park' | 'dog' | |
P -> 'in' | 'with' | |
""" | |
def demo(N=23): | |
from nltk.grammar import CFG | |
print("Generating the first %d sentences for demo grammar:" % (N,)) | |
print(demo_grammar) | |
grammar = CFG.fromstring(demo_grammar) | |
for n, sent in enumerate(generate(grammar, n=N), 1): | |
print("%3d. %s" % (n, " ".join(sent))) | |
if __name__ == "__main__": | |
demo() | |