larkkin's picture
Add application code and models, update README
8044721
raw
history blame
2.6 kB
import sys;
#
# _fix_me_
# maybe use Unicode character classes instead, even if it likely would mean
# many calls to match one-character regular expressions?
#
PUNCTUATION = frozenset(".?!;,:β€œ\"β€β€˜'’()[]{} \t\n\f")
SPACE = frozenset(" \t\n\f")
def intersect(golds, systems, quiet = False):
golds = {(graph.language(), graph.framework, graph.id): graph
for graph in golds};
seen = set();
for graph in systems:
language = graph.language();
key = (language, graph.framework, graph.id);
if language is None and key not in golds:
language = "eng";
key = (language, graph.framework, graph.id);
if key in seen:
if not quiet:
print("score.intersect(): ignoring duplicate {} {} graph #{}"
.format(language, graph.framework, graph.id),
file=sys.stderr);
else:
seen.add(key);
gold = golds.get(key);
if gold is None:
if not quiet:
print("score.intersect(): ignoring {} {} graph #{} with no gold graph"
.format(graph.language(), graph.framework, graph.id),
file=sys.stderr);
else:
yield gold, graph;
for key in golds.keys() - seen:
gold = golds[key];
if not quiet:
print("score.intersect(): missing system {} {} graph #{}"
.format(gold.language(), gold.framework, gold.id),
file=sys.stderr);
#
# manufacture an empty graph as the system graph
#
from graph import Graph;
yield gold, Graph(gold.id, flavor = gold.flavor,
framework = gold.framework);
def anchor(node):
result = list();
if node.anchors is not None:
for span in node.anchors:
if "from" in span and "to" in span:
result.append((span["from"], span["to"]));
return result;
def explode(string, anchors, trim = PUNCTUATION):
result = set();
for anchor in anchors:
start = end = None;
if isinstance(anchor, tuple):
start, end = anchor;
elif "from" in anchor and "to" in anchor:
start = anchor["from"]; end = anchor["to"];
if start is not None and end is not None:
while start < end and string[start] in trim:
start += 1;
while end > start and string[end - 1] in trim:
end -= 1;
for i in range(start, end):
if string[i] not in SPACE:
result.add(i);
return frozenset(result);
def fscore(gold, system, correct):
p = correct / system if system else 0.0;
r = correct / gold if gold else 0.0;
f = 2 * p * r / (p + r) if p + r != 0 else 0.0;
return p, r, f;