Spaces:
Sleeping
Sleeping
File size: 2,381 Bytes
d916065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# Natural Language Toolkit: Generating from a CFG
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <[email protected]>
# Peter Ljunglöf <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
#
import itertools
import sys
from nltk.grammar import Nonterminal
def generate(grammar, start=None, depth=None, n=None):
"""
Generates an iterator of all sentences from a CFG.
:param grammar: The Grammar used to generate sentences.
:param start: The Nonterminal from which to start generate sentences.
:param depth: The maximal depth of the generated tree.
:param n: The maximum number of sentences to return.
:return: An iterator of lists of terminal tokens.
"""
if not start:
start = grammar.start()
if depth is None:
depth = sys.maxsize
iter = _generate_all(grammar, [start], depth)
if n:
iter = itertools.islice(iter, n)
return iter
def _generate_all(grammar, items, depth):
if items:
try:
for frag1 in _generate_one(grammar, items[0], depth):
for frag2 in _generate_all(grammar, items[1:], depth):
yield frag1 + frag2
except RecursionError as error:
# Helpful error message while still showing the recursion stack.
raise RuntimeError(
"The grammar has rule(s) that yield infinite recursion!"
) from error
else:
yield []
def _generate_one(grammar, item, depth):
if depth > 0:
if isinstance(item, Nonterminal):
for prod in grammar.productions(lhs=item):
yield from _generate_all(grammar, prod.rhs(), depth - 1)
else:
yield [item]
demo_grammar = """
S -> NP VP
NP -> Det N
PP -> P NP
VP -> 'slept' | 'saw' NP | 'walked' PP
Det -> 'the' | 'a'
N -> 'man' | 'park' | 'dog'
P -> 'in' | 'with'
"""
def demo(N=23):
from nltk.grammar import CFG
print("Generating the first %d sentences for demo grammar:" % (N,))
print(demo_grammar)
grammar = CFG.fromstring(demo_grammar)
for n, sent in enumerate(generate(grammar, n=N), 1):
print("%3d. %s" % (n, " ".join(sent)))
if __name__ == "__main__":
demo()
|