File size: 2,381 Bytes
d916065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Natural Language Toolkit: Generating from a CFG
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <[email protected]>
#         Peter Ljunglöf <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
#

import itertools
import sys

from nltk.grammar import Nonterminal


def generate(grammar, start=None, depth=None, n=None):
    """

    Generates an iterator of all sentences from a CFG.



    :param grammar: The Grammar used to generate sentences.

    :param start: The Nonterminal from which to start generate sentences.

    :param depth: The maximal depth of the generated tree.

    :param n: The maximum number of sentences to return.

    :return: An iterator of lists of terminal tokens.

    """
    if not start:
        start = grammar.start()
    if depth is None:
        depth = sys.maxsize

    iter = _generate_all(grammar, [start], depth)

    if n:
        iter = itertools.islice(iter, n)

    return iter


def _generate_all(grammar, items, depth):
    if items:
        try:
            for frag1 in _generate_one(grammar, items[0], depth):
                for frag2 in _generate_all(grammar, items[1:], depth):
                    yield frag1 + frag2
        except RecursionError as error:
            # Helpful error message while still showing the recursion stack.
            raise RuntimeError(
                "The grammar has rule(s) that yield infinite recursion!"
            ) from error
    else:
        yield []


def _generate_one(grammar, item, depth):
    if depth > 0:
        if isinstance(item, Nonterminal):
            for prod in grammar.productions(lhs=item):
                yield from _generate_all(grammar, prod.rhs(), depth - 1)
        else:
            yield [item]


demo_grammar = """

  S -> NP VP

  NP -> Det N

  PP -> P NP

  VP -> 'slept' | 'saw' NP | 'walked' PP

  Det -> 'the' | 'a'

  N -> 'man' | 'park' | 'dog'

  P -> 'in' | 'with'

"""


def demo(N=23):
    from nltk.grammar import CFG

    print("Generating the first %d sentences for demo grammar:" % (N,))
    print(demo_grammar)
    grammar = CFG.fromstring(demo_grammar)
    for n, sent in enumerate(generate(grammar, n=N), 1):
        print("%3d. %s" % (n, " ".join(sent)))


if __name__ == "__main__":
    demo()