Spaces:
Runtime error
Runtime error
import os.path; | |
import re; | |
from graph import Graph; | |
EDS_MATCHER = re.compile(r'(.+?)(?<!\\):(.+)(?<!\\)\[(.*)(?<!\\)\]') | |
PROPERTIES_MATCHER = re.compile(r"{(.+)}$"); | |
CARG_MATCHER = re.compile(r'\(\"(.+)(?<!\\)"\)$'); | |
LNK_MATCHER = re.compile(r"<([0-9]+):([0-9]+)>$"); | |
def read_instances(fp): | |
top_handle, predicates = None, []; | |
sentence_id = None; | |
try: | |
sentence_id = int(os.path.splitext(os.path.basename(fp.name))[0]); | |
except: | |
pass; | |
first_curly = True | |
for line in fp: | |
line = line.strip() | |
if len(line) == 0: | |
pass | |
elif line.startswith("#"): | |
sentence_id = line[1:] | |
first_curly = True | |
elif line.startswith("{"): | |
colon = line.index(":") | |
assert colon >= 0 | |
top_handle = line[1:colon].strip() | |
elif line.endswith("}"): | |
assert len(line) == 1 | |
if first_curly: | |
assert sentence_id is not None | |
assert top_handle is not None | |
assert len(predicates) > 0 | |
yield (sentence_id, top_handle, predicates) | |
sentence_id, top_handle, predicates = None, None, [] | |
first_curly = False | |
else: | |
match = EDS_MATCHER.match(line) | |
assert match is not None | |
node_id, label, arguments = match.groups() | |
arguments = [tuple(arg.split()) for arg in arguments.split(',') if len(arg) > 0] | |
predicates.append((node_id, label.strip(), arguments)) | |
def instance2graph(instance, reify = False, text = None): | |
sentence_id, top, predicates = instance; | |
anchors = None; | |
graph = Graph(sentence_id, flavor = 1, framework = "eds"); | |
if text: graph.add_input(text); | |
handle2node = {}; | |
for handle, label, _ in predicates: | |
assert handle not in handle2node | |
properties = None; | |
values = None; | |
match = PROPERTIES_MATCHER.search(label); | |
if match: | |
label = label[:match.start()]; | |
fields = match.group(1).replace(",", "").split(); | |
properties, values = list(), list(); | |
for i, field in enumerate(fields[1:]): | |
if i % 2 == 0: properties.append(field); | |
else: values.append(field); | |
carg = None; | |
match = CARG_MATCHER.search(label); | |
if match: | |
label = label[:match.start()]; | |
if not reify: | |
properties = ["CARG"] + properties; | |
values = [match.group(1)] + values; | |
else: | |
carg = match.group(1); | |
anchors = None; | |
match = LNK_MATCHER.search(label); | |
if match: | |
label = label[:match.start()]; | |
anchors = [{"from": int(match.group(1)), "to": int(match.group(2))}]; | |
handle2node[handle] = \ | |
graph.add_node(label = label, properties = properties, values = values, anchors = anchors); | |
if carg and reify: | |
carg = graph.add_node(label = carg, anchors = anchors); | |
source = handle2node[handle].id; | |
target = carg.id; | |
graph.add_edge(source, target, "CARG"); | |
handle2node[top].is_top = True | |
for src_handle, _, arguments in predicates: | |
src = handle2node[src_handle].id | |
for relation, tgt_handle in arguments: | |
tgt = handle2node[tgt_handle].id | |
graph.add_edge(src, tgt, relation) | |
return graph | |
def read(fp, reify = False, text = None): | |
for instance in read_instances(fp): | |
yield instance2graph(instance, reify, text), None | |