Spaces:
Runtime error
Runtime error
File size: 10,271 Bytes
8044721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
from operator import itemgetter;
import os.path;
import re;
import sys;
from graph import Graph;
conditions = {"APX": "โ", "EQU": "=", "LEQ": "โค", "LES": "<", "NEQ": "โ ",
"SXN": "ยซ", "SXP": "ยป", "SXY": "โ", "SZN": "\\", "SZP": "/",
"STI": "โ", "STO": "โ", "SY1": "โฅ", "SY2": "โฎ",
"TAB": "โ", "TPR": "โบ"};
#
# in parsing the clauses, patterns are ordered by specificity
#
id_matcher = re.compile(r'^%%% bin/boxer --input (?:[^/]+/)?p([0-9]+)/d([0-9]+)/');
referent_matcher = re.compile(r'^(b[0-9]+) REF ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$');
condition_matcher = re.compile(r'^(b[0-9]+) (EQU|NEQ|APX|LE[SQ]|TPR|TAB|S[ZX][PN]|ST[IO]|SY[12]|SXY) ([enpstx][0-9]+|"[^"]+") ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$');
role_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ([enpstx][0-9]+) ([enpstx][0-9]+|"[^"]+") +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$');
concept_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) ("[^ ]+") ([enpstx][0-9]+) +%(?: .* \[([0-9]+)\.\.\.([0-9]+)\])?$');
discourse_matcher = re.compile(r'^(b[0-9]+) ([^ ]+) (b[0-9]+)(?: (b[0-9]+))? +%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$');
empty_matcher = re.compile(r'^ *%(?: .* \[[0-9]+\.\.\.[0-9]+\])?$');
def read(fp, text = None, full = False, reify = False, trace = 0, strict = 0):
def finish(graph, mapping, finis, scopes):
if reify:
for box, referent, node in finis:
#
# in full reification mode, or when the corresponding box cannot be
# easily inferred for a reified role (including when the source node is
# a constant, as e.g. in a 'future' temporal discourse conditions),
# add an explicit box membership edge.
#
if full \
or referent[0] == referent[-1] == "\"" \
or box not in scopes[referent]:
graph.add_edge(mapping[box].id, node.id, "โ");
else:
for referent in scopes:
if len(scopes[referent]) > 1:
print("pbm.read(): [graph #{}] stray referent โ{}โ in boxes {}."
"".format(graph.id, referent, scopes[referent]),
file=sys.stderr);
#
# after the fact, mark all boxes that structurally are roots as top nodes.
#
for node in graph.nodes:
if node.type == 0 and node.is_root(): node.is_top = True;
graph = None; id = None; sentence = None;
mapping = dict(); scopes = dict(); finis = list();
i = 0;
header = 3;
for line in fp:
line = line.rstrip(); i += 1;
if trace: print("{}: {}".format(i, line));
#
# to support newline-separated concatenations of clause files (a format not
# used in the native PMB 3.0 release),
#
if len(line) == 0:
finish(graph, mapping, finis, scopes);
yield graph, None;
graph = None; id = None;
mapping = dict(); scopes = dict(); finis = list();
header = 3;
continue;
#
# each block of clauses is preceded by three comment lines, which we use to
# extract the sentence identifier and underlying string.
#
if header:
if header == 3: pass;
elif header == 2:
match = id_matcher.match(line);
if match is None:
raise Exception("pbm.read(): "
"[line {}] missing identifier in โ{}โ; exit."
"".format(i, line));
part, document = match.groups();
id = "{:02d}{:04d}".format(int(part), int(document));
elif header == 1:
if text is not None and id in text: sentence = text[id];
else: sentence = line[5:-1];
graph = Graph(id, flavor = 2, framework = "drg");
graph.add_input(sentence);
header -= 1;
continue;
#
# from here onwards, we are looking at genuine, contentful clauses. from
# inspecting some of the files, it appears they are organized according to
# surface (reading) order, and we cannot assume that discourse referents
# are 'introduced' (in some box) prior to their first occurance in e.g. a
# role or concept clause.
#
anchor = None;
match = referent_matcher.match(line);
if match is not None:
box, referent, start, end = match.groups();
if referent in scopes:
if strict and box not in scopes[referent] and reify:
raise Exception("pbm.read(): "
"[line {}] stray referent โ{}โ in box โ{}โ "
"(instead of โ{}โ); exit."
"".format(i, referent, box, scopes[referent]));
else: scopes[referent] = {box};
if box not in mapping: mapping[box] = graph.add_node(type = 0);
if start is not None and end is not None:
anchor = {"from": int(start), "to": int(end)};
if referent not in mapping:
mapping[referent] \
= graph.add_node(anchors = [anchor] if anchor else None);
else:
node = mapping[referent];
node.add_anchor(anchor);
graph.add_edge(mapping[box].id, mapping[referent].id, "โ");
else:
match = condition_matcher.match(line);
if match is not None:
box, condition, source, target, start, end = match.groups();
condition = conditions[condition];
if source[0] == "\"" and source[-1] == "\"" and source not in mapping:
if start is not None and end is not None:
anchor = {"from": int(start), "to": int(end)};
mapping[source] \
= graph.add_node(label = source,
anchors = [anchor] if anchor else None);
elif source not in mapping: mapping[source] = graph.add_node();
if target[0] == "\"" and target[-1] == "\"" and target not in mapping:
if start is not None and end is not None:
anchor = {"from": int(start), "to": int(end)};
mapping[target] \
= graph.add_node(label = target,
anchors = [anchor] if anchor else None);
elif target not in mapping: mapping[target] = graph.add_node();
if reify:
if box not in mapping: mapping[box] = graph.add_node(type = 0);
node = graph.add_node(label = condition, type = 3);
finis.append((box, source, node));
graph.add_edge(mapping[source].id, node.id, None);
graph.add_edge(node.id, mapping[target].id, None);
else:
if source in scopes: scopes[source].add(box);
else: scopes[source] = {box};
graph.add_edge(mapping[source].id, mapping[target].id, condition);
else:
match = role_matcher.match(line);
if match is not None:
box, role, source, target, start, end = match.groups();
if source not in mapping: mapping[source] = graph.add_node();
if target[0] == "\"" and target[-1] == "\"" and target not in mapping:
if start is not None and end is not None:
anchor = {"from": int(start), "to": int(end)};
mapping[target] \
= graph.add_node(label = target,
anchors = [anchor] if anchor else None);
elif target not in mapping: mapping[target] = graph.add_node();
if reify:
if box not in mapping: mapping[box] = graph.add_node(type = 0);
node = graph.add_node(label = role, type = 2);
finis.append((box, source, node));
graph.add_edge(mapping[source].id, node.id, None);
graph.add_edge(node.id, mapping[target].id, None);
else:
if source in scopes: scopes[source].add(box);
else: scopes[source] = {box};
graph.add_edge(mapping[source].id, mapping[target].id, role);
else:
match = concept_matcher.match(line);
if match is not None:
box, lemma, sense, referent, start, end = match.groups();
if referent in scopes:
if strict and box not in scopes[referent] and reify:
raise Exception("pbm.read(): "
"[line {}] stray referent โ{}โ in box โ{}โ "
"(instead of โ{}โ); exit."
"".format(i, referent, box, scopes[referent]));
else: scopes[referent] = {box};
if start is not None and end is not None:
anchor = {"from": int(start), "to": int(end)};
if referent not in mapping:
mapping[referent] = node \
= graph.add_node(anchors = [anchor] if anchor else None);
else:
node = mapping[referent];
node.add_anchor(anchor);
if strict and node.label is not None:
raise Exception("pbm.read(): "
"[line {}] duplicate label โ{}โ on referent โ{}โ "
"(instead of โ{}โ); exit."
"".format(i, lemma, referent, node.label));
node.label = lemma;
if sense[0] == sense[-1] == "\"": sense = sense[1:-1];
node.set_property("sense", sense);
else:
match = discourse_matcher.match(line);
if match is not None:
top, relation, one, two = match.groups();
if one not in mapping: mapping[one] = graph.add_node(type = 0);
if two is not None:
if trace > 1: print("ternary discourse relation");
if two not in mapping: mapping[two] = graph.add_node(type = 0);
graph.add_edge(mapping[one].id, mapping[two].id, relation);
else:
if top not in mapping: mapping[top] = graph.add_node(type = 0);
graph.add_edge(mapping[top].id, mapping[one].id, relation);
elif empty_matcher.search(line) is None:
raise Exception("pmb.read(): [line {}] invalid clause โ{}โ."
"".format(i, line));
#
# finally, as we reach an end of file (without an empty line terminating the
# preceding block of clauses, as is the standard format in PMB), finalize the
# graph and return it.
#
if graph is not None:
finish(graph, mapping, finis, scopes);
yield graph, None;
|