Spaces:
Runtime error
Runtime error
import re; | |
import sys; | |
import codec.mrp; | |
from graph import Edge, Graph; | |
from smatch.amr import AMR; | |
STASH = re.compile(r'__[0-9]+__'); | |
INDEX = re.compile(r'x([0-9]+)((:?_[0-9]+)*)'); | |
def amr_lines(fp, camr, alignment): | |
id, snt, lines = None, None, []; | |
stash = dict(); | |
def _stash_(match): | |
prefix, constant, suffix = match.groups(); | |
fields = constant.split("/"); | |
if fields[0] in stash: | |
if stash[fields[0]][2] != fields[1]: | |
raise Exception("amr_lines(): " | |
"ambiguously defined constant in graph #{}, " | |
"β{}β: β{}β vs. β{}β; exit." | |
"".format(id, fields[0], | |
stash[fields[0]][2], fields[1])); | |
else: | |
stash[fields[0]] = (len(stash), fields[0], fields[1]); | |
return "{}__{}__{}".format(prefix, stash[fields[0]][0], suffix); | |
alignment = read_alignment(alignment); | |
for line in fp: | |
line = line.strip(); | |
if len(line) == 0: | |
if len(lines) > 0: | |
i = mapping = None; | |
try: | |
i, mapping = next(alignment); | |
except Exception as error: | |
print("amr_lines(): missing alignment for graph #{}." | |
"".format(id), file = sys.stderr); | |
pass; | |
yield id, snt, " ".join(lines), stash.values(), \ | |
mapping if mapping is not None and i == id else None; | |
id, lines = None, []; stash.clear(); | |
else: | |
if line.startswith("#"): | |
if line.startswith("# ::id"): | |
id = line.split()[2]; | |
if line.startswith("# ::snt"): | |
snt = line[8:].strip(); | |
else: | |
if camr: | |
line = re.sub(r'((?:^|[ \t]):[^( ]+)\([^ \t]*\)([ \t]|$)', | |
"\\1\\2", line, count = 0); | |
line = re.sub(r'(^|[ \t])(x[0-9]+/[^ \t]+)([ \t]|$)', | |
_stash_, line, count = 0); | |
lines.append(line) | |
if len(lines) > 0: | |
i = mapping = None; | |
try: | |
i, mapping = next(alignment); | |
except: | |
print("amr_lines(): missing alignment for graph #{}." | |
"".format(id), file = sys.stderr); | |
pass; | |
yield id, snt, " ".join(lines), stash.values(), \ | |
mapping if mapping is not None and i == id else None; | |
def read_alignment(stream): | |
if stream is None: | |
while True: yield None, None; | |
else: | |
id = None; | |
alignment = dict(); | |
for line in stream: | |
line = line.strip(); | |
if len(line) == 0: | |
yield id, alignment; | |
id = None; | |
alignment.clear(); | |
else: | |
if line.startswith("#"): | |
if line.startswith("# ::id"): | |
id = line.split()[2]; | |
else: | |
fields = line.split("\t"); | |
if len(fields) == 2: | |
start, end = fields[1].split("-"); | |
span = set(range(int(start), int(end) + 1)); | |
fields = fields[0].split(); | |
if len(fields) > 1 and fields[1].startswith(":"): | |
fields[1] = fields[1][1:]; | |
if fields[1] == "wiki": continue; | |
if fields[0] not in alignment: | |
alignment[fields[0]] = bucket = dict(); | |
else: bucket = alignment[fields[0]]; | |
path = tuple(fields[1:]); | |
if path not in bucket: bucket[path] = can = set(); | |
else: can = bucket[path]; | |
can |= span; | |
yield id, alignment; | |
def amr2graph(id, amr, text, stash, camr = False, | |
full = False, reify = False, quiet = False, alignment = None): | |
graph = Graph(id, flavor = 2, framework = "amr"); | |
node2id = dict(); | |
anchoring = list(); | |
i = 0; | |
def _anchor_(form): | |
nonlocal i; | |
m = None; | |
j = graph.input.find(form, i); | |
if j >= i: | |
i, m = j, len(form); | |
else: | |
base = form; | |
k, l = len(graph.input), 0; | |
for old, new in {("β", "`"), ("β", "'"), ("β", "'"), ("`", "'"), | |
("β", "\""), ("β", "\""), | |
("β", "--"), ("β", "---"), ("β", "---"), | |
("β¦", "..."), ("β¦", ". . .")}: | |
form = base.replace(old, new); | |
j = graph.input.find(form, i); | |
if j >= i and j < k: k, l = j, len(form); | |
if k < len(graph.input): i, m = k, l; | |
if m: | |
match = {"from": i, "to": i + m}; | |
i += m; | |
return match; | |
else: | |
raise Exception("failed to anchor |{}| in |{}|{}| ({})" | |
"".format(form, graph.input[:i], | |
graph.input[i:], i)); | |
if text: | |
graph.add_input(text, quiet = quiet); | |
if camr: | |
for token in graph.input.split(" "): | |
anchoring.append(_anchor_(token)); | |
i = 0; | |
for n, v, a in zip(amr.nodes, amr.node_values, amr.attributes): | |
j = i; | |
node2id[n] = j; | |
top = False; | |
for key, val in a: | |
if key == "TOP": | |
top = True; | |
anchors = find_anchors(n, anchoring) if camr else None; | |
node = graph.add_node(j, label = v, top = top, anchors = anchors); | |
i += 1 | |
for key, val in a: | |
if STASH.match(val) is not None: | |
index = int(val[2:-2]); | |
val = next(v for k, x, v in stash if k == index); | |
if key != "TOP" and (key not in {"wiki"} or full): | |
if val.endswith("Β¦"): | |
val = val[:-1]; | |
if reify: | |
graph.add_node(i, label = val); | |
graph.add_edge(j, i, key); | |
i += 1 | |
else: | |
# | |
# _fix_me_ | |
# this assumes that properties are unique. (1-apr-20; oe) | |
# | |
node.set_property(key.lower(), str(val).lower()); | |
for src, r in zip(amr.nodes, amr.relations): | |
for label, tgt in r: | |
normal = None; | |
if label == "mod": | |
normal = "domain"; | |
elif label.endswith("-of-of") \ | |
or label.endswith("-of") \ | |
and label not in {"consist-of" "subset-of"} \ | |
and not label.startswith("prep-"): | |
normal = label[:-3]; | |
graph.add_edge(node2id[src], node2id[tgt], label, normal) | |
overlay = None; | |
if alignment is not None: | |
overlay = Graph(id, flavor = -1, framework = "anchoring"); | |
for node in alignment: | |
for path, span in alignment[node].items(): | |
if len(path) == 0: | |
anchors = [{"#": token} for token in span]; | |
node = overlay.add_node(node2id[node], anchors = anchors); | |
for node in alignment: | |
id = node2id[node]; | |
for path, span in alignment[node].items(): | |
if len(path) == 1: | |
key = path[0].lower(); | |
node = overlay.find_node(id); | |
if node is None: node = overlay.add_node(id); | |
reference = graph.find_node(id); | |
anchors = [{"#": token} for token in span]; | |
if reference.properties is not None \ | |
and key in reference.properties: | |
node.set_anchoring(key, anchors); | |
else: | |
edge = next(edge for edge in graph.edges if edge.lab.lower() == key and edge.src == id); | |
overlay.edges.add(Edge(edge.id, None, None, None, anchors = anchors)); | |
elif len(path) > 1: | |
print("amr2graph(): " | |
"ignoring alignment path {} on node #{} ({})" | |
"".format(path, id, node)); | |
return graph, overlay; | |
def find_anchors(index, anchors): | |
result = list(); | |
for match in INDEX.finditer(index): | |
i, suffix = match.group(1), match.group(2); | |
i = int(i) - 1; | |
if i >= len(anchors): continue; | |
anchor = anchors[i]; | |
if suffix != "": | |
fields = suffix[1:].split("_"); | |
start = anchor["from"]; | |
for field in fields: | |
j = int(field); | |
result.append({"from": start + j - 1, "to": start + j}); | |
else: | |
result.append(anchor); | |
return result if len(result) > 0 else None; | |
def convert_amr_id(id): | |
m = re.search(r'wsj_([0-9]+)\.([0-9]+)', id); | |
if m: | |
return "2%04d%03d" % (int(m.group(1)), int(m.group(2))); | |
m = re.search(r'lpp_1943\.([0-9]+)', id); | |
if m: | |
return "1%04d0" % (int(m.group(1))); | |
else: | |
raise Exception('Could not convert id: %s' % id); | |
def read(fp, full = False, reify = False, camr = False, | |
text = None, alignment = None, | |
quiet = False, trace = 0): | |
n = 0; | |
for id, snt, amr_line, stash, mapping in amr_lines(fp, camr, alignment): | |
if trace: | |
print("{}: {}".format(id, amr_line), file = sys.stderr); | |
amr = AMR.parse_AMR_line(amr_line); | |
if not amr: | |
raise Exception("failed to parse #{} β{}β; exit." | |
"".format(id, amr_line)); | |
if id is not None: | |
try: | |
id = convert_amr_id(id); | |
except: | |
pass; | |
else: | |
id = n; | |
n += 1; | |
graph, overlay = amr2graph(id, amr, text or snt, stash, | |
camr, full, reify, quiet, mapping); | |
yield graph, overlay; | |