ltg
/

File size: 2,073 Bytes
c45d283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import json;
import operator;
import os;
import sys;

from graph import Graph

def read(fp, text = None, robust = False):
  input, i = None, 0;
  def compute(form):
    nonlocal i;
    m = None;
    j = input.find(form, i);
    if j >= i:
      i, m = j, len(form);
    else:
      base = form;
      k, l = len(input), 0;
      for old, new in {("β€˜", "`"), ("β€˜", "'"), ("’", "'"), ("`", "'"),
                       ("β€œ", "\""), ("”", "\""),
                       ("–", "--"), ("–", "---"), ("β€”", "---"),
                       ("…", "..."), ("…", ". . .")}:
        form = base.replace(old, new);
        j = input.find(form, i);
        if j >= i and j < k: k, l = j, len(form);
      if k < len(input): i, m = k, l;
    if m:
      match = {"from": i, "to": i + m}; 
      i += m;
      return match;
    else:
      raise Exception("failed to anchor |{}| in |{}|{}| ({})"
                      "".format(form, input[:i], input[i:], i));

  def anchor(graph, old, new):
    nonlocal input, i;
    strings = dict();
    for node in graph.nodes:
      for j in range(len(node.anchors) if node.anchors else 0):
        start, end = node.anchors[j]["from"], node.anchors[j]["to"];
        strings[(start, end)] = old[start:end];
    input, i = new, 0;
    for key in sorted(strings.keys(), key = operator.itemgetter(0, 1)):
      strings[key] = compute(strings[key]);
    for node in graph.nodes:
      for j in range(len(node.anchors) if node.anchors else 0):
        node.anchors[j] \
          = strings[(node.anchors[j]["from"], node.anchors[j]["to"])];

  for j, line in enumerate(fp):
    try:
      graph = Graph.decode(json.loads(line.rstrip()), robust = robust);
      if text is not None:
        if graph.input in text:
          graph.id = text[graph.input];
        else:
          old = graph.input;
          graph.add_input(text);
          anchor(graph, old, graph.input);
      yield graph, None;
    except Exception as error:
      print("codec.mrp.read(): ignoring line {}: {}"
            "".format(j, error), file = sys.stderr);