File size: 2,073 Bytes
c45d283 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import json;
import operator;
import os;
import sys;
from graph import Graph
def read(fp, text = None, robust = False):
input, i = None, 0;
def compute(form):
nonlocal i;
m = None;
j = input.find(form, i);
if j >= i:
i, m = j, len(form);
else:
base = form;
k, l = len(input), 0;
for old, new in {("β", "`"), ("β", "'"), ("β", "'"), ("`", "'"),
("β", "\""), ("β", "\""),
("β", "--"), ("β", "---"), ("β", "---"),
("β¦", "..."), ("β¦", ". . .")}:
form = base.replace(old, new);
j = input.find(form, i);
if j >= i and j < k: k, l = j, len(form);
if k < len(input): i, m = k, l;
if m:
match = {"from": i, "to": i + m};
i += m;
return match;
else:
raise Exception("failed to anchor |{}| in |{}|{}| ({})"
"".format(form, input[:i], input[i:], i));
def anchor(graph, old, new):
nonlocal input, i;
strings = dict();
for node in graph.nodes:
for j in range(len(node.anchors) if node.anchors else 0):
start, end = node.anchors[j]["from"], node.anchors[j]["to"];
strings[(start, end)] = old[start:end];
input, i = new, 0;
for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)):
strings[key] = compute(strings[key]);
for node in graph.nodes:
for j in range(len(node.anchors) if node.anchors else 0):
node.anchors[j] \
= strings[(node.anchors[j]["from"], node.anchors[j]["to"])];
for j, line in enumerate(fp):
try:
graph = Graph.decode(json.loads(line.rstrip()), robust = robust);
if text is not None:
if graph.input in text:
graph.id = text[graph.input];
else:
old = graph.input;
graph.add_input(text);
anchor(graph, old, graph.input);
yield graph, None;
except Exception as error:
print("codec.mrp.read(): ignoring line {}: {}"
"".format(j, error), file = sys.stderr);
|