|
|
|
|
|
""" |
|
Markov chain simulation of words or characters. |
|
""" |
|
|
|
class Markov: |
|
def __init__(self, histsize, choice): |
|
self.histsize = histsize |
|
self.choice = choice |
|
self.trans = {} |
|
|
|
def add(self, state, next): |
|
self.trans.setdefault(state, []).append(next) |
|
|
|
def put(self, seq): |
|
n = self.histsize |
|
add = self.add |
|
add(None, seq[:0]) |
|
for i in range(len(seq)): |
|
add(seq[max(0, i-n):i], seq[i:i+1]) |
|
add(seq[len(seq)-n:], None) |
|
|
|
def get(self): |
|
choice = self.choice |
|
trans = self.trans |
|
n = self.histsize |
|
seq = choice(trans[None]) |
|
while True: |
|
subseq = seq[max(0, len(seq)-n):] |
|
options = trans[subseq] |
|
next = choice(options) |
|
if not next: |
|
break |
|
seq += next |
|
return seq |
|
|
|
|
|
def test(): |
|
import sys, random, getopt |
|
args = sys.argv[1:] |
|
try: |
|
opts, args = getopt.getopt(args, '0123456789cdwq') |
|
except getopt.error: |
|
print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]) |
|
print('Options:') |
|
print('-#: 1-digit history size (default 2)') |
|
print('-c: characters (default)') |
|
print('-w: words') |
|
print('-d: more debugging output') |
|
print('-q: no debugging output') |
|
print('Input files (default stdin) are split in paragraphs') |
|
print('separated blank lines and each paragraph is split') |
|
print('in words by whitespace, then reconcatenated with') |
|
print('exactly one space separating words.') |
|
print('Output consists of paragraphs separated by blank') |
|
print('lines, where lines are no longer than 72 characters.') |
|
sys.exit(2) |
|
histsize = 2 |
|
do_words = False |
|
debug = 1 |
|
for o, a in opts: |
|
if '-0' <= o <= '-9': histsize = int(o[1:]) |
|
if o == '-c': do_words = False |
|
if o == '-d': debug += 1 |
|
if o == '-q': debug = 0 |
|
if o == '-w': do_words = True |
|
if not args: |
|
args = ['-'] |
|
|
|
m = Markov(histsize, random.choice) |
|
try: |
|
for filename in args: |
|
if filename == '-': |
|
f = sys.stdin |
|
if f.isatty(): |
|
print('Sorry, need stdin from file') |
|
continue |
|
else: |
|
f = open(filename, 'r') |
|
with f: |
|
if debug: print('processing', filename, '...') |
|
text = f.read() |
|
paralist = text.split('\n\n') |
|
for para in paralist: |
|
if debug > 1: print('feeding ...') |
|
words = para.split() |
|
if words: |
|
if do_words: |
|
data = tuple(words) |
|
else: |
|
data = ' '.join(words) |
|
m.put(data) |
|
except KeyboardInterrupt: |
|
print('Interrupted -- continue with data read so far') |
|
if not m.trans: |
|
print('No valid input files') |
|
return |
|
if debug: print('done.') |
|
|
|
if debug > 1: |
|
for key in m.trans.keys(): |
|
if key is None or len(key) < histsize: |
|
print(repr(key), m.trans[key]) |
|
if histsize == 0: print(repr(''), m.trans['']) |
|
print() |
|
while True: |
|
data = m.get() |
|
if do_words: |
|
words = data |
|
else: |
|
words = data.split() |
|
n = 0 |
|
limit = 72 |
|
for w in words: |
|
if n + len(w) > limit: |
|
print() |
|
n = 0 |
|
print(w, end=' ') |
|
n += len(w) + 1 |
|
print() |
|
print() |
|
|
|
if __name__ == "__main__": |
|
test() |
|
|