Spaces:
Runtime error
Runtime error
File size: 3,585 Bytes
8044721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os.path;
import re;
from graph import Graph;
EDS_MATCHER = re.compile(r'(.+?)(?<!\\):(.+)(?<!\\)\[(.*)(?<!\\)\]')
PROPERTIES_MATCHER = re.compile(r"{(.+)}$");
CARG_MATCHER = re.compile(r'\(\"(.+)(?<!\\)"\)$');
LNK_MATCHER = re.compile(r"<([0-9]+):([0-9]+)>$");
def read_instances(fp):
top_handle, predicates = None, [];
sentence_id = None;
try:
sentence_id = int(os.path.splitext(os.path.basename(fp.name))[0]);
except:
pass;
first_curly = True
for line in fp:
line = line.strip()
if len(line) == 0:
pass
elif line.startswith("#"):
sentence_id = line[1:]
first_curly = True
elif line.startswith("{"):
colon = line.index(":")
assert colon >= 0
top_handle = line[1:colon].strip()
elif line.endswith("}"):
assert len(line) == 1
if first_curly:
assert sentence_id is not None
assert top_handle is not None
assert len(predicates) > 0
yield (sentence_id, top_handle, predicates)
sentence_id, top_handle, predicates = None, None, []
first_curly = False
else:
match = EDS_MATCHER.match(line)
assert match is not None
node_id, label, arguments = match.groups()
arguments = [tuple(arg.split()) for arg in arguments.split(',') if len(arg) > 0]
predicates.append((node_id, label.strip(), arguments))
def instance2graph(instance, reify = False, text = None):
sentence_id, top, predicates = instance;
anchors = None;
graph = Graph(sentence_id, flavor = 1, framework = "eds");
if text: graph.add_input(text);
handle2node = {};
for handle, label, _ in predicates:
assert handle not in handle2node
properties = None;
values = None;
match = PROPERTIES_MATCHER.search(label);
if match:
label = label[:match.start()];
fields = match.group(1).replace(",", "").split();
properties, values = list(), list();
for i, field in enumerate(fields[1:]):
if i % 2 == 0: properties.append(field);
else: values.append(field);
carg = None;
match = CARG_MATCHER.search(label);
if match:
label = label[:match.start()];
if not reify:
properties = ["CARG"] + properties;
values = [match.group(1)] + values;
else:
carg = match.group(1);
anchors = None;
match = LNK_MATCHER.search(label);
if match:
label = label[:match.start()];
anchors = [{"from": int(match.group(1)), "to": int(match.group(2))}];
handle2node[handle] = \
graph.add_node(label = label, properties = properties, values = values, anchors = anchors);
if carg and reify:
carg = graph.add_node(label = carg, anchors = anchors);
source = handle2node[handle].id;
target = carg.id;
graph.add_edge(source, target, "CARG");
handle2node[top].is_top = True
for src_handle, _, arguments in predicates:
src = handle2node[src_handle].id
for relation, tgt_handle in arguments:
tgt = handle2node[tgt_handle].id
graph.add_edge(src, tgt, relation)
return graph
def read(fp, reify = False, text = None):
for instance in read_instances(fp):
yield instance2graph(instance, reify, text), None
|