Spaces:
Build error
Build error
jiangjiechen
commited on
Commit
•
20e9c0d
1
Parent(s):
808cf1c
fix dependencies
Browse files- app.py +0 -1
- src/check_client/fact_checker.py +4 -4
- src/check_client/train.py +4 -14
- src/check_client/utils.py +16 -7
- src/eval_client/cjjpy.py +249 -0
- src/eval_client/culpa.py +61 -0
- src/eval_client/culprit/eval.human.ref.json +100 -0
- src/eval_client/fever_scorer.py +84 -0
- src/eval_client/scorer.py +153 -0
app.py
CHANGED
@@ -96,7 +96,6 @@ iface = gr.Interface(
|
|
96 |
description="LOREN is an interpretable Fact Verification model against Wikipedia. "
|
97 |
"This is a demo system for \"LOREN: Logic-Regularized Reasoning for Interpretable Fact Verification\". "
|
98 |
"See the paper for technical details. You can add FLAG on the bottom to record interesting or bad cases!",
|
99 |
-
flagging_dir='results/flagged/',
|
100 |
allow_flagging=True,
|
101 |
flagging_options=['Good Case!', 'Error: MRC', 'Error: Parsing',
|
102 |
'Error: Commonsense', 'Error: Evidence', 'Error: Other'],
|
|
|
96 |
description="LOREN is an interpretable Fact Verification model against Wikipedia. "
|
97 |
"This is a demo system for \"LOREN: Logic-Regularized Reasoning for Interpretable Fact Verification\". "
|
98 |
"See the paper for technical details. You can add FLAG on the bottom to record interesting or bad cases!",
|
|
|
99 |
allow_flagging=True,
|
100 |
flagging_options=['Good Case!', 'Error: MRC', 'Error: Parsing',
|
101 |
'Error: Commonsense', 'Error: Evidence', 'Error: Other'],
|
src/check_client/fact_checker.py
CHANGED
@@ -28,8 +28,8 @@ from transformers import (
|
|
28 |
try:
|
29 |
from .modules.data_processor import DataProcessor
|
30 |
from .plm_checkers import BertChecker, RobertaChecker
|
31 |
-
from .utils import read_json_lines, compute_metrics
|
32 |
-
from .train import do_evaluate
|
33 |
from ..eval_client.fever_scorer import FeverScorer
|
34 |
except:
|
35 |
sys.path.append(cjj.AbsParentDir(__file__, '.'))
|
@@ -37,8 +37,8 @@ except:
|
|
37 |
from eval_client.fever_scorer import FeverScorer
|
38 |
from modules.data_processor import DataProcessor
|
39 |
from plm_checkers import BertChecker, RobertaChecker
|
40 |
-
from utils import read_json_lines, compute_metrics
|
41 |
-
from train import do_evaluate
|
42 |
|
43 |
MODEL_MAPPING = {
|
44 |
'bert': (BertConfig, BertTokenizer, BertChecker),
|
|
|
28 |
try:
|
29 |
from .modules.data_processor import DataProcessor
|
30 |
from .plm_checkers import BertChecker, RobertaChecker
|
31 |
+
from .utils import read_json_lines, compute_metrics, set_seed
|
32 |
+
from .train import do_evaluate
|
33 |
from ..eval_client.fever_scorer import FeverScorer
|
34 |
except:
|
35 |
sys.path.append(cjj.AbsParentDir(__file__, '.'))
|
|
|
37 |
from eval_client.fever_scorer import FeverScorer
|
38 |
from modules.data_processor import DataProcessor
|
39 |
from plm_checkers import BertChecker, RobertaChecker
|
40 |
+
from utils import read_json_lines, compute_metrics, set_seed
|
41 |
+
from train import do_evaluate
|
42 |
|
43 |
MODEL_MAPPING = {
|
44 |
'bert': (BertConfig, BertTokenizer, BertChecker),
|
src/check_client/train.py
CHANGED
@@ -34,12 +34,12 @@ from pytorch_lightning.loggers import WandbLogger
|
|
34 |
|
35 |
try:
|
36 |
from .modules.data_processor import DataProcessor
|
37 |
-
from .plm_checkers import BertChecker, RobertaChecker
|
38 |
-
from .utils import init_logger, compute_metrics
|
39 |
except:
|
40 |
from modules.data_processor import DataProcessor
|
41 |
-
from plm_checkers import BertChecker, RobertaChecker
|
42 |
-
from utils import init_logger, compute_metrics
|
43 |
|
44 |
try:
|
45 |
from torch.utils.tensorboard import SummaryWriter
|
@@ -49,21 +49,11 @@ except ImportError:
|
|
49 |
mAutoModel = {
|
50 |
'bert': BertChecker,
|
51 |
'roberta': RobertaChecker,
|
52 |
-
'xlnet': XLNetChecker,
|
53 |
-
'deberta': DebertaChecker,
|
54 |
}
|
55 |
|
56 |
logger = logging.getLogger(__name__)
|
57 |
|
58 |
|
59 |
-
def set_seed(args):
|
60 |
-
random.seed(args.seed)
|
61 |
-
np.random.seed(args.seed)
|
62 |
-
torch.manual_seed(args.seed)
|
63 |
-
if args.n_gpu > 0:
|
64 |
-
torch.cuda.manual_seed_all(args.seed)
|
65 |
-
|
66 |
-
|
67 |
def train(args, data_processor, model, tokenizer):
|
68 |
""" Train the model """
|
69 |
global wdblogger
|
|
|
34 |
|
35 |
try:
|
36 |
from .modules.data_processor import DataProcessor
|
37 |
+
from .plm_checkers import BertChecker, RobertaChecker
|
38 |
+
from .utils import init_logger, compute_metrics, set_seed
|
39 |
except:
|
40 |
from modules.data_processor import DataProcessor
|
41 |
+
from plm_checkers import BertChecker, RobertaChecker
|
42 |
+
from utils import init_logger, compute_metrics, set_seed
|
43 |
|
44 |
try:
|
45 |
from torch.utils.tensorboard import SummaryWriter
|
|
|
49 |
mAutoModel = {
|
50 |
'bert': BertChecker,
|
51 |
'roberta': RobertaChecker,
|
|
|
|
|
52 |
}
|
53 |
|
54 |
logger = logging.getLogger(__name__)
|
55 |
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def train(args, data_processor, model, tokenizer):
|
58 |
""" Train the model """
|
59 |
global wdblogger
|
src/check_client/utils.py
CHANGED
@@ -9,10 +9,19 @@
|
|
9 |
"""
|
10 |
|
11 |
import logging
|
12 |
-
|
|
|
13 |
import ujson as json
|
14 |
import torch
|
15 |
-
from plm_checkers.checker_utils import soft_logic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
def init_logger(level, filename=None, mode='a', encoding='utf-8'):
|
@@ -100,7 +109,7 @@ def compute_metrics(truth, predicted, z_predicted, mask):
|
|
100 |
res = {'label': x, 'prediction': y}
|
101 |
if x == y:
|
102 |
cnt += 1
|
103 |
-
|
104 |
res['pred_z'] = z
|
105 |
|
106 |
y_ = soft_logic(torch.tensor([z]), torch.tensor([m]))[0]
|
@@ -108,13 +117,13 @@ def compute_metrics(truth, predicted, z_predicted, mask):
|
|
108 |
z_cnt_s += 1
|
109 |
if y_.argmax(-1).item() == y:
|
110 |
agree_s += 1
|
111 |
-
|
112 |
-
z_h = torch.tensor(z[:torch.tensor(m).sum()]).argmax(-1).tolist()
|
113 |
-
if 0 in z_h:
|
114 |
y__ = 0
|
115 |
elif 1 in z_h: # NEI
|
116 |
y__ = 1
|
117 |
-
else:
|
118 |
y__ = 2
|
119 |
if y__ == x:
|
120 |
z_cnt_h += 1
|
|
|
9 |
"""
|
10 |
|
11 |
import logging
|
12 |
+
import random
|
13 |
+
import numpy as np
|
14 |
import ujson as json
|
15 |
import torch
|
16 |
+
from .plm_checkers.checker_utils import soft_logic
|
17 |
+
|
18 |
+
|
19 |
+
def set_seed(args):
|
20 |
+
random.seed(args.seed)
|
21 |
+
np.random.seed(args.seed)
|
22 |
+
torch.manual_seed(args.seed)
|
23 |
+
if args.n_gpu > 0:
|
24 |
+
torch.cuda.manual_seed_all(args.seed)
|
25 |
|
26 |
|
27 |
def init_logger(level, filename=None, mode='a', encoding='utf-8'):
|
|
|
109 |
res = {'label': x, 'prediction': y}
|
110 |
if x == y:
|
111 |
cnt += 1
|
112 |
+
|
113 |
res['pred_z'] = z
|
114 |
|
115 |
y_ = soft_logic(torch.tensor([z]), torch.tensor([m]))[0]
|
|
|
117 |
z_cnt_s += 1
|
118 |
if y_.argmax(-1).item() == y:
|
119 |
agree_s += 1
|
120 |
+
|
121 |
+
z_h = torch.tensor(z[:torch.tensor(m).sum()]).argmax(-1).tolist() # m' x 3
|
122 |
+
if 0 in z_h: # REFUTES
|
123 |
y__ = 0
|
124 |
elif 1 in z_h: # NEI
|
125 |
y__ = 1
|
126 |
+
else: # SUPPPORTS
|
127 |
y__ = 2
|
128 |
if y__ == x:
|
129 |
z_cnt_h += 1
|
src/eval_client/cjjpy.py
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
'''
|
4 |
+
@Author : Jiangjie Chen
|
5 |
+
@Time : 2018/11/15 17:08
|
6 |
+
@Contact: [email protected]
|
7 |
+
'''
|
8 |
+
|
9 |
+
import re
|
10 |
+
import datetime
|
11 |
+
import os
|
12 |
+
import argparse
|
13 |
+
import logging
|
14 |
+
import traceback
|
15 |
+
|
16 |
+
try:
|
17 |
+
import ujson as json
|
18 |
+
except:
|
19 |
+
import json
|
20 |
+
|
21 |
+
HADOOP_BIN = 'PATH=/usr/bin/:$PATH hdfs'
|
22 |
+
FOR_PUBLIC = True
|
23 |
+
|
24 |
+
|
25 |
+
def LengthStats(filename):
|
26 |
+
len_list = []
|
27 |
+
thresholds = [0.8, 0.9, 0.95, 0.99, 0.999]
|
28 |
+
with open(filename) as f:
|
29 |
+
for line in f:
|
30 |
+
len_list.append(len(line.strip().split()))
|
31 |
+
stats = {
|
32 |
+
'Max': max(len_list),
|
33 |
+
'Min': min(len_list),
|
34 |
+
'Avg': round(sum(len_list) / len(len_list), 4),
|
35 |
+
}
|
36 |
+
len_list.sort()
|
37 |
+
for t in thresholds:
|
38 |
+
stats[f"Top-{t}"] = len_list[int(len(len_list) * t)]
|
39 |
+
|
40 |
+
for k in stats:
|
41 |
+
print(f"- {k}: {stats[k]}")
|
42 |
+
return stats
|
43 |
+
|
44 |
+
|
45 |
+
class AttrDict(dict):
|
46 |
+
def __init__(self, *args, **kwargs):
|
47 |
+
super(AttrDict, self).__init__(*args, **kwargs)
|
48 |
+
self.__dict__ = self
|
49 |
+
|
50 |
+
|
51 |
+
def TraceBack(error_msg):
|
52 |
+
exc = traceback.format_exc()
|
53 |
+
msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}'
|
54 |
+
return msg
|
55 |
+
|
56 |
+
|
57 |
+
def Now():
|
58 |
+
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
59 |
+
|
60 |
+
|
61 |
+
def AbsParentDir(file, parent='..', postfix=None):
|
62 |
+
ppath = os.path.abspath(file)
|
63 |
+
parent_level = parent.count('.')
|
64 |
+
while parent_level > 0:
|
65 |
+
ppath = os.path.dirname(ppath)
|
66 |
+
parent_level -= 1
|
67 |
+
if postfix is not None:
|
68 |
+
return os.path.join(ppath, postfix)
|
69 |
+
else:
|
70 |
+
return ppath
|
71 |
+
|
72 |
+
|
73 |
+
def init_logger(log_file=None, log_file_level=logging.NOTSET, from_scratch=False):
|
74 |
+
from coloredlogs import ColoredFormatter
|
75 |
+
import tensorflow as tf
|
76 |
+
|
77 |
+
fmt = "[%(asctime)s %(levelname)s] %(message)s"
|
78 |
+
log_format = ColoredFormatter(fmt=fmt)
|
79 |
+
# log_format = logging.Formatter()
|
80 |
+
logger = logging.getLogger()
|
81 |
+
logger.setLevel(log_file_level)
|
82 |
+
|
83 |
+
console_handler = logging.StreamHandler()
|
84 |
+
console_handler.setFormatter(log_format)
|
85 |
+
logger.handlers = [console_handler]
|
86 |
+
|
87 |
+
if log_file and log_file != '':
|
88 |
+
if from_scratch and tf.io.gfile.exists(log_file):
|
89 |
+
logger.warning('Removing previous log file: %s' % log_file)
|
90 |
+
tf.io.gfile.remove(log_file)
|
91 |
+
path = os.path.dirname(log_file)
|
92 |
+
os.makedirs(path, exist_ok=True)
|
93 |
+
file_handler = logging.FileHandler(log_file)
|
94 |
+
file_handler.setLevel(log_file_level)
|
95 |
+
file_handler.setFormatter(log_format)
|
96 |
+
logger.addHandler(file_handler)
|
97 |
+
|
98 |
+
return logger
|
99 |
+
|
100 |
+
|
101 |
+
def OverWriteCjjPy(root='.'):
|
102 |
+
# import difflib
|
103 |
+
# diff = difflib.HtmlDiff()
|
104 |
+
cnt = 0
|
105 |
+
golden_cjjpy = os.path.join(root, 'cjjpy.py')
|
106 |
+
# golden_content = open(golden_cjjpy).readlines()
|
107 |
+
for dir, folder, file in os.walk(root):
|
108 |
+
for f in file:
|
109 |
+
if f == 'cjjpy.py':
|
110 |
+
cjjpy = '%s/%s' % (dir, f)
|
111 |
+
# content = open(cjjpy).readlines()
|
112 |
+
# d = diff.make_file(golden_content, content)
|
113 |
+
cnt += 1
|
114 |
+
print('[%d]: %s' % (cnt, cjjpy))
|
115 |
+
os.system('cp %s %s' % (golden_cjjpy, cjjpy))
|
116 |
+
|
117 |
+
|
118 |
+
def ChangeFileFormat(filename, new_fmt):
|
119 |
+
assert type(filename) is str and type(new_fmt) is str
|
120 |
+
spt = filename.split('.')
|
121 |
+
if len(spt) == 0:
|
122 |
+
return filename
|
123 |
+
else:
|
124 |
+
return filename.replace('.' + spt[-1], new_fmt)
|
125 |
+
|
126 |
+
|
127 |
+
def CountLines(fname):
|
128 |
+
with open(fname, 'rb') as f:
|
129 |
+
count = 0
|
130 |
+
last_data = '\n'
|
131 |
+
while True:
|
132 |
+
data = f.read(0x400000)
|
133 |
+
if not data:
|
134 |
+
break
|
135 |
+
count += data.count(b'\n')
|
136 |
+
last_data = data
|
137 |
+
if last_data[-1:] != b'\n':
|
138 |
+
count += 1 # Remove this if a wc-like count is needed
|
139 |
+
return count
|
140 |
+
|
141 |
+
|
142 |
+
def GetDate():
|
143 |
+
return str(datetime.datetime.now())[5:10].replace('-', '')
|
144 |
+
|
145 |
+
|
146 |
+
def TimeClock(seconds):
|
147 |
+
sec = int(seconds)
|
148 |
+
hour = int(sec / 3600)
|
149 |
+
min = int((sec - hour * 3600) / 60)
|
150 |
+
ssec = float(seconds) - hour * 3600 - min * 60
|
151 |
+
# return '%dh %dm %.2fs' % (hour, min, ssec)
|
152 |
+
return '{0:>2d}h{1:>3d}m{2:>6.2f}s'.format(hour, min, ssec)
|
153 |
+
|
154 |
+
|
155 |
+
def StripAll(text):
|
156 |
+
return text.strip().replace('\t', '').replace('\n', '').replace(' ', '')
|
157 |
+
|
158 |
+
|
159 |
+
def GetBracket(text, bracket, en_br=False):
|
160 |
+
# input should be aa(bb)cc, True for bracket, False for text
|
161 |
+
if bracket:
|
162 |
+
try:
|
163 |
+
return re.findall('\((.*?)\)', text.strip())[-1]
|
164 |
+
except:
|
165 |
+
return ''
|
166 |
+
else:
|
167 |
+
if en_br:
|
168 |
+
text = re.sub('\(.*?\)', '', text.strip())
|
169 |
+
return re.sub('(.*?)', '', text.strip())
|
170 |
+
|
171 |
+
|
172 |
+
def CharLang(uchar, lang):
|
173 |
+
assert lang.lower() in ['en', 'cn', 'zh']
|
174 |
+
if lang.lower() in ['cn', 'zh']:
|
175 |
+
if uchar >= '\u4e00' and uchar <= '\u9fa5':
|
176 |
+
return True
|
177 |
+
else:
|
178 |
+
return False
|
179 |
+
elif lang.lower() == 'en':
|
180 |
+
if (uchar <= 'Z' and uchar >= 'A') or (uchar <= 'z' and uchar >= 'a'):
|
181 |
+
return True
|
182 |
+
else:
|
183 |
+
return False
|
184 |
+
else:
|
185 |
+
raise NotImplementedError
|
186 |
+
|
187 |
+
|
188 |
+
def WordLang(word, lang):
|
189 |
+
for i in word.strip():
|
190 |
+
if i.isspace(): continue
|
191 |
+
if not CharLang(i, lang):
|
192 |
+
return False
|
193 |
+
return True
|
194 |
+
|
195 |
+
|
196 |
+
def SortDict(_dict, reverse=True):
|
197 |
+
assert type(_dict) is dict
|
198 |
+
return sorted(_dict.items(), key=lambda d: d[1], reverse=reverse)
|
199 |
+
|
200 |
+
|
201 |
+
def lark(content='test'):
|
202 |
+
print(content)
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == '__main__':
|
206 |
+
parser = argparse.ArgumentParser()
|
207 |
+
|
208 |
+
parser.add_argument('--diff', nargs=2,
|
209 |
+
help='show difference between two files, shown in downloads/diff.html')
|
210 |
+
parser.add_argument('--de_unicode', action='store_true', default=False,
|
211 |
+
help='remove unicode characters')
|
212 |
+
parser.add_argument('--link_entity', action='store_true', default=False,
|
213 |
+
help='')
|
214 |
+
parser.add_argument('--max_comm_len', action='store_true', default=False,
|
215 |
+
help='')
|
216 |
+
parser.add_argument('--search', nargs=2,
|
217 |
+
help='search key from file, 2 args: file name & key')
|
218 |
+
parser.add_argument('--email', nargs=2,
|
219 |
+
help='sending emails, 2 args: subject & content')
|
220 |
+
parser.add_argument('--overwrite', action='store_true', default=None,
|
221 |
+
help='overwrite all cjjpy under given *dir* based on *dir*/cjjpy.py')
|
222 |
+
parser.add_argument('--replace', nargs=3,
|
223 |
+
help='replace char, 3 args: file name & replaced char & replacer char')
|
224 |
+
parser.add_argument('--lark', nargs=1)
|
225 |
+
parser.add_argument('--get_hdfs', nargs=2,
|
226 |
+
help='easy copy from hdfs to local fs, 2 args: remote_file/dir & local_dir')
|
227 |
+
parser.add_argument('--put_hdfs', nargs=2,
|
228 |
+
help='easy put from local fs to hdfs, 2 args: local_file/dir & remote_dir')
|
229 |
+
parser.add_argument('--length_stats', nargs=1,
|
230 |
+
help='simple token lengths distribution of a line-by-line file')
|
231 |
+
|
232 |
+
args = parser.parse_args()
|
233 |
+
|
234 |
+
if args.overwrite:
|
235 |
+
print('* Overwriting cjjpy...')
|
236 |
+
OverWriteCjjPy()
|
237 |
+
|
238 |
+
if args.lark:
|
239 |
+
try:
|
240 |
+
content = args.lark[0]
|
241 |
+
except:
|
242 |
+
content = 'running complete'
|
243 |
+
print(f'* Larking "{content}"...')
|
244 |
+
lark(content)
|
245 |
+
|
246 |
+
if args.length_stats:
|
247 |
+
file = args.length_stats[0]
|
248 |
+
print(f'* Working on {file} lengths statistics...')
|
249 |
+
LengthStats(file)
|
src/eval_client/culpa.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding:utf-8 -*-
|
2 |
+
|
3 |
+
"""
|
4 |
+
@Author : Bao
|
5 |
+
@Date : 2021/9/7
|
6 |
+
@Desc :
|
7 |
+
@Last modified by : Bao
|
8 |
+
@Last modified date : 2021/9/7
|
9 |
+
"""
|
10 |
+
|
11 |
+
import json
|
12 |
+
import numpy as np
|
13 |
+
import argparse
|
14 |
+
from collections import defaultdict
|
15 |
+
from sklearn.metrics import precision_recall_fscore_support
|
16 |
+
|
17 |
+
# ref --> label 1, nei & sup --> label 0
|
18 |
+
idx2label = {0: 1, 1: 0, 2: 0}
|
19 |
+
|
20 |
+
|
21 |
+
def read_json_lines(filename, mode='r', encoding='utf-8', skip=0):
|
22 |
+
with open(filename, mode, encoding=encoding) as fin:
|
23 |
+
for line in fin:
|
24 |
+
if skip > 0:
|
25 |
+
skip -= 1
|
26 |
+
continue
|
27 |
+
yield json.loads(line)
|
28 |
+
|
29 |
+
|
30 |
+
def process(filein):
|
31 |
+
id2info = defaultdict(dict)
|
32 |
+
for line in read_json_lines('eval.human.ref.merged.json'):
|
33 |
+
labels = [0] * len(line['questions'])
|
34 |
+
for cul in line['culprit']:
|
35 |
+
labels[cul] = 1
|
36 |
+
id2info[line['id']].update({'id': line['id'], 'labels': labels})
|
37 |
+
|
38 |
+
for line in read_json_lines(filein):
|
39 |
+
if line['id'] not in id2info: continue
|
40 |
+
predicted = [idx2label[_] for _ in np.argmax(line['z_prob'], axis=-1)]
|
41 |
+
id2info[line['id']]['predicted'] = predicted
|
42 |
+
|
43 |
+
ps, rs, fs = [], [], []
|
44 |
+
for info in id2info.values():
|
45 |
+
p, r, f, _ = precision_recall_fscore_support(info['labels'], info['predicted'], average='binary')
|
46 |
+
ps.append(p)
|
47 |
+
rs.append(r)
|
48 |
+
fs.append(f)
|
49 |
+
print(filein)
|
50 |
+
print('Precision: {}'.format(sum(ps) / len(ps)))
|
51 |
+
print('Recall: {}'.format(sum(rs) / len(rs)))
|
52 |
+
print('F1: {}'.format(sum(fs) / len(fs)))
|
53 |
+
|
54 |
+
return sum(ps) / len(ps), sum(rs) / len(rs), sum(fs) / len(fs)
|
55 |
+
|
56 |
+
|
57 |
+
if __name__ == '__main__':
|
58 |
+
parser = argparse.ArgumentParser()
|
59 |
+
parser.add_argument('-i', type=str, help='predicted jsonl file with phrasal veracity predictions.')
|
60 |
+
args = parser.parse_args()
|
61 |
+
process(args.i)
|
src/eval_client/culprit/eval.human.ref.json
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"id": 102600, "claim": "Sausage Party was released in May of 2016 .", "questions": ["What was the name of the new album released in May 2016? or <mask> was released in May of 2016 .", "When was Sausage Party released? or Sausage Party was released in <mask> of 2016 .", "When was Sausage Party released? or Sausage Party was released in May of <mask> .", "What was Sausage Party's release date? or Sausage Party was <mask> in May of 2016 ."], "answers": [["Sausage Party", 0, 13], ["May", 30, 33], ["2016", 37, 41], ["released", 18, 26]], "evidential": [["Sausage Party", "The Sausage Party", "A Sausage Party", "Sausage party"], ["August", "the summer", "March", "the fall"], ["2016", "the year 2016", "March 2016", "2015"], ["released", "announced", "premiered", "released domestically"]], "culprit": [1]}
|
2 |
+
{"id": 92833, "claim": "Anne Boleyn did not live in England in 1522 .", "questions": ["Who did not live in England in 1522? or <mask> did not live in England in 1522 .", "Where did Anne Boleyn live in 1522? or Anne Boleyn did not live in <mask> in 1522 .", "When did Anne Boleyn not live in England? or Anne Boleyn did not live in England in <mask> .", "What did Anne Boleyn not do in England? or Anne Boleyn did not <mask> in England in 1522 ."], "answers": [["Anne Boleyn", 0, 11], ["England", 28, 35], ["1522", 39, 43], ["live", 20, 24]], "evidential": [["Anne Boleyn", "Ann Boleyn", "Anne Bolyn", "A woman"], ["England", "Europe", "the world", "the UK"], ["1532", "1536", "1533", "1534"], ["live", "stay", "marry", "reside"]], "culprit": [1, 2]}
|
3 |
+
{"id": 159707, "claim": "Edgar Wright is only a producer .", "questions": ["Who is the only producer? or <mask> is only a producer .", "What is Edgar Wright's job title? or Edgar Wright is <mask> ."], "answers": [["Edgar Wright", 0, 12], ["only a producer", 16, 31]], "evidential": [["Edgar Wright", "Edgar Wright Jr.", "Edgar W. Wright", "Edgar Wayne Wright"], ["a producer", "a director", "a screenwriter", "a film producer"]], "culprit": [1]}
|
4 |
+
{"id": 146055, "claim": "The Giver is a bill .", "questions": ["What is a bill called? or <mask> is a bill .", "What is the Giver? or The Giver is <mask> ."], "answers": [["The Giver", 0, 9], ["a bill", 13, 19]], "evidential": [["The Giver", "A The Giver", "The giver", "The Giver Act"], ["a film", "a work", "a motion picture", "a movie"]], "culprit": [1]}
|
5 |
+
{"id": 8443, "claim": "A Milli is by Justin Bieber .", "questions": ["What is the name of Justin Bieber's song? or <mask> is by Justin Bieber .", "Who is A Milli by? or A Milli is by <mask> ."], "answers": [["A Milli", 0, 7], ["Justin Bieber", 14, 27]], "evidential": [["A Milli", "A Milli song", "A Milli Song", "A Milli."], ["Justin Bieber", "a Justin Bieber", "an artist", "a musician"]], "culprit": [1]}
|
6 |
+
{"id": 67833, "claim": "Shane McMahon did not win the Hardcore Championship once .", "questions": ["Who won the Hardcore Championship once? or <mask> did not win the Hardcore Championship once .", "What did Shane McMahon not win once? or Shane McMahon did not win <mask> once .", "What did Shane McMahon not do once? or Shane McMahon did not <mask> the Hardcore Championship once ."], "answers": [["Shane McMahon", 0, 13], ["the Hardcore Championship", 26, 51], ["win", 22, 25]], "evidential": [["Shane McMahon", "Shane McMahon", "Shane McMah", "Shane McMahon ("], ["the European Championship", "the Hardcore Championship", "a wrestling championship", "a championship"], ["win", "won", "achieve", "earn"]], "culprit": [1]}
|
7 |
+
{"id": 116789, "claim": "Minor League Baseball is a hierarchy of only amateur baseball leagues .", "questions": ["What is the name of the only amateur baseball league? or <mask> is a hierarchy of only amateur baseball leagues .", "What is Minor League Baseball? or Minor League Baseball is <mask> of only amateur baseball leagues .", "What is Minor League Baseball a hierarchy of? or Minor League Baseball is a hierarchy of <mask> ."], "answers": [["Minor League Baseball", 0, 21], ["a hierarchy", 25, 36], ["only amateur baseball leagues", 40, 69]], "evidential": [["Minor League Baseball", "The Minor League Baseball", "Minor league Baseball", "Major League Baseball"], ["a hierarchy", "an organization", "a system", "a structure"], ["professional baseball leagues", "minor league baseball", "professional baseball teams", "baseball leagues"]], "culprit": [2]}
|
8 |
+
{"id": 12454, "claim": "Tangled is a silent film .", "questions": ["What is the name of the film that is a silent film? or <mask> is a silent film .", "What type of film is Tangled? or Tangled is <mask> ."], "answers": [["Tangled", 0, 7], ["a silent film", 11, 24]], "evidential": [["Tangled", "Tangles", "Tangled (", "Tangling"], ["an animated film", "a musical fantasy film", "a fantasy film", "a film"]], "culprit": [1]}
|
9 |
+
{"id": 149501, "claim": "Kung Fu Panda was number three at the box office .", "questions": ["What movie was number three at the box office? or <mask> was number three at the box office .", "What was Kung Fu Panda's box office number? or Kung Fu Panda was <mask> at the box office .", "Where was Kung Fu Panda number three? or Kung Fu Panda was number three at <mask> ."], "answers": [["Kung Fu Panda", 0, 13], ["number three", 18, 30], ["the box office", 34, 48]], "evidential": [["Kung Fu Panda", "Kung fu Panda", "Kung F Panda", "Kungfu Panda"], ["the number one", "number one", "the number one movie", "the number one film"], ["the box office", "the movie box office", "the US box office", "a box office"]], "culprit": [1]}
|
10 |
+
{"id": 51962, "claim": "Mandy Moore is a Canadian film actress .", "questions": ["Who is the name of the Canadian film actress? or <mask> is a Canadian film actress .", "What nationality is Mandy Moore? or Mandy Moore is <mask> film actress .", "What is Mandy Moore's career? or Mandy Moore is a Canadian <mask> ."], "answers": [["Mandy Moore", 0, 11], ["a Canadian", 15, 25], ["film actress", 26, 38]], "evidential": [["Mandy Moore", "Mandy Moore ( choreographer )", "Mandy Moore ( dancer )", "Mandy Moore( choreographer )"], ["an American", "an american", "a North American", "an North American"], ["actress", "film actress", "actor", "singer"]], "culprit": [1]}
|
11 |
+
{"id": 217102, "claim": "Innovation is viewed as the application of better solutions that negate market needs .", "questions": ["What is viewed as the application of better solutions that negate market needs? or <mask> is viewed as the application of better solutions that negate market needs .", "Innovation is viewed as what? or Innovation is viewed as <mask> of better solutions that negate market needs .", "Innovation is viewed as the application of what? or Innovation is viewed as the application of <mask> that negate market needs .", "Innovation is viewed as the application of better solutions that negate what? or Innovation is viewed as the application of better solutions that negate <mask> .", "What is innovation <mask> as? or Innovation is <mask> as the application of better solutions that negate market needs .", "Innovation is viewed as the application of better solutions that do what to market needs? or Innovation is viewed as the application of better solutions that <mask> market needs ."], "answers": [["Innovation", 0, 10], ["the application", 24, 39], ["better solutions", 43, 59], ["market needs", 72, 84], ["viewed", 14, 20], ["negate", 65, 71]], "evidential": [["Innovation", "Technology innovation", "Insulin", "In innovation"], ["the application", "an application", "a application", "the applications"], ["solutions", "new solutions", "better solutions", "products"], ["new requirements", "existing market needs", "existing market requirements", "existing requirements"], ["viewed", "perceived", "characterized", "described"], ["meet", "meet existing", "meet current", "met"]], "culprit": [5]}
|
12 |
+
{"id": 202314, "claim": "The New Jersey Turnpike has zero shoulders .", "questions": ["What has zero shoulders? or <mask> has zero shoulders .", "What is the total length of the New Jersey Turnpike? or The New Jersey Turnpike has <mask> ."], "answers": [["The New Jersey Turnpike", 0, 23], ["zero shoulders", 28, 42]], "evidential": [["The New Jersey Turnpike", "New Jersey Turnpike", "A New Jersey Turnpike", "the New Jersey Turnpike"], ["12 ft lanes", "a total length", "12 feet long", "12 feet"]], "culprit": [1]}
|
13 |
+
{"id": 226106, "claim": "Bongwater is set outside of Oregon .", "questions": ["What is the name of the town outside of Oregon? or <mask> is set outside of Oregon .", "What state is Bongwater located outside of? or Bongwater is set outside of <mask> .", "Where is Bongwater located outside of Oregon? or Bongwater is <mask> outside of Oregon .", "Where is Bongwater located? or Bongwater is set <mask> of Oregon ."], "answers": [["Bongwater", 0, 9], ["Oregon", 28, 34], ["set", 13, 16], ["outside", 17, 24]], "evidential": [["Bongwater", "The film Bongwater", "Bongwwater", "Bongswater"], ["Oregon", "a state", "Washington State", "the Oregon"], ["set", "located", "filmed", "based"], ["the state", "outside", "the city", "the coast"]], "culprit": [3]}
|
14 |
+
{"id": 182051, "claim": "The Fly was first released in 1999 .", "questions": ["What was the name of the first film released in 1999? or <mask> was first released in 1999 .", "When was The Fly first released? or The Fly was first released in <mask> .", "When was The Fly first <mask>? or The Fly was first <mask> in 1999 .", "When was The Fly released? or The Fly was <mask> released in 1999 ."], "answers": [["The Fly", 0, 7], ["1999", 30, 34], ["released", 18, 26], ["first", 12, 17]], "evidential": [["The Fly", "The Fly 's", "A film The Fly", "The fly"], ["August 1986", "1986", "the 1980s", "the eighties"], ["released", "published", "distributed", "release"], ["first", "originally", "last", "only"]], "culprit": [1]}
|
15 |
+
{"id": 65598, "claim": "Uganda was not ruled by the British .", "questions": ["What country was not ruled by the British? or <mask> was not ruled by the British .", "Who ruled Uganda? or Uganda was not ruled by <mask> .", "What was Uganda not <mask> by the British? or Uganda was not <mask> by the British ."], "answers": [["Uganda", 0, 6], ["the British", 24, 35], ["ruled", 15, 20]], "evidential": [["Uganda", "Uganda", "Ugandan", "Uganda"], ["the British", "Britain", "a colony", "British"], ["ruled", "controlled", "governed", "owned"]], "culprit": [1]}
|
16 |
+
{"id": 117126, "claim": "Pocahontas was not the daughter of Powhatan .", "questions": ["Who was not the daughter of Powhatan? or <mask> was not the daughter of Powhatan .", "What was Pocahontas' mother's name? or Pocahontas was not <mask> of Powhatan .", "Who was Pocahontas' father? or Pocahontas was not the daughter of <mask> ."], "answers": [["Pocahontas", 0, 10], ["the daughter", 19, 31], ["Powhatan", 35, 43]], "evidential": [["Pocahontas", "Pocahonta", "Pocahontas n't", "Pocahontas Jr."], ["the daughter", "a daughter", "the granddaughter", "the child"], ["Powhatan", "a Native American", "a chief", "a person"]], "culprit": [1, 2]}
|
17 |
+
{"id": 164506, "claim": "The Nobel Prize in Chemistry was awarded to a person from anywhere except the Netherlands .", "questions": ["What award was given to a person from anywhere except the Netherlands? or <mask> was awarded to a person from anywhere except the Netherlands .", "Who was the Nobel Prize in Chemistry awarded to? or The Nobel Prize in Chemistry was awarded to <mask> from anywhere except the Netherlands .", "Where was the Nobel Prize in Chemistry awarded to? or The Nobel Prize in Chemistry was awarded to a person from <mask> except the Netherlands .", "Where was the Nobel Prize in Chemistry awarded to? or The Nobel Prize in Chemistry was awarded to a person from anywhere except <mask> .", "How is the Nobel Prize in Chemistry <mask>? or The Nobel Prize in Chemistry was <mask> to a person from anywhere except the Netherlands ."], "answers": [["The Nobel Prize in Chemistry", 0, 28], ["a person", 44, 52], ["anywhere", 58, 66], ["the Netherlands", 74, 89], ["awarded", 33, 40]], "evidential": [["The Nobel Prize in Chemistry", "A Nobel Prize in Chemistry", "Nobel Prize in Chemistry", "The Nobel prize in Chemistry"], ["scientists", "a scientist", "people", "anyone"], ["every country", "every state", "every place", "all"], ["the Netherlands", "Sweden", "Europe", "Norway"], ["awarded", "given", "presented", "distributed"]], "culprit": [2, 3]}
|
18 |
+
{"id": 113010, "claim": "Duane Chapman is not a former bail bondsman .", "questions": ["Who is not a former bail bondsman? or <mask> is not a former bail bondsman .", "What is Duane Chapman's profession? or Duane Chapman is not <mask> ."], "answers": [["Duane Chapman", 0, 13], ["a former bail bondsman", 21, 43]], "evidential": [["Duane Chapman", "Duane ChapmanI.", "Duane Chapman I.", "Duane Chapman II."], ["a bail bondsman", "a bounty hunter", "a former bail bondsman", "a bail bondsman"]], "culprit": [1]}
|
19 |
+
{"id": 109582, "claim": "US Airways Flight 1549 did not have any people on board .", "questions": ["What flight did not have any people on board? or <mask> did not have any people on board .", "What was not on board the US Airways Flight 1549? or US Airways Flight 1549 did not have any <mask> on board .", "What was the name of the aircraft that did not have any people on? or US Airways Flight 1549 did not have any people on <mask> ."], "answers": [["US Airways Flight 1549", 0, 22], ["people", 40, 46], ["board", 50, 55]], "evidential": [["US Airways Flight 1549", "The US Airways Flight 1549", "American Airways Flight 1549", "United Airways Flight 1549"], ["people", "passengers", "humans", "birds"], ["an aircraft", "an Airbus A320", "the Airbus A320", "an airliner"]], "culprit": [2]}
|
20 |
+
{"id": 23766, "claim": "Charles de Gaulle was a Polish Resistance leader .", "questions": ["Who was the leader of the Polish Resistance? or <mask> was a Polish Resistance leader .", "What nationality was Charles de Gaulle? or Charles de Gaulle was <mask> Resistance leader .", "What political party was Charles de Gaulle a leader of? or Charles de Gaulle was a Polish <mask> leader .", "What was Charles de Gaulle's role in the Polish Resistance? or Charles de Gaulle was a Polish Resistance <mask> ."], "answers": [["Charles de Gaulle", 0, 17], ["a Polish", 22, 30], ["Resistance", 31, 41], ["leader", 42, 48]], "evidential": [["Charles de Gaulle", "Charles De Gaulle", "Charles de Gaulle", "Louis de Gaulle"], ["a French", "an American", "the French", "an English"], ["French", "Nationalist", "Communist", "National Socialist"], ["leader", "chief leader", "person", "chief strategist"]], "culprit": [1]}
|
21 |
+
{"id": 94556, "claim": "Pirates of the Caribbean has yet to be opened in Disneyland Paris .", "questions": ["What is the name of the movie that has yet to be opened at Disneyland Paris? or <mask> has yet to be opened in Disneyland Paris .", "Where is Pirates of the Caribbean currently located? or Pirates of the Caribbean has yet to be opened in <mask> .", "What is the name of the first attraction to open at Disneyland Paris? or Pirates of the Caribbean has yet to be <mask> in Disneyland Paris .", "How long has it been since the Pirates of the Caribbean opened? or Pirates of the Caribbean has <mask> to be opened in Disneyland Paris ."], "answers": [["Pirates of the Caribbean", 0, 24], ["Disneyland Paris", 49, 65], ["opened", 39, 45], ["yet", 29, 32]], "evidential": [["Pirates of the Caribbean", "The Pirates of the Caribbean", "Pirates of The Caribbean", "Pirates of Caribbean"], ["Disneyland Paris", "Disney Disneyland Paris", "Disney Paris", "Disney Park"], ["an attraction", "the first attraction", "a ride", "the first ride"], ["yet", "a decade", "a year", "the time"]], "culprit": [2, 3]}
|
22 |
+
{"id": 225871, "claim": "Revolver has only ever topped a single chart .", "questions": ["What has only ever topped a single chart? or <mask> has only ever topped a single chart .", "How many charts has Revolver ever topped? or Revolver has only ever topped <mask> .", "How many times has Revolver ever <mask> a single chart? or Revolver has only ever <mask> a single chart .", "How many times has Revolver topped a single chart? or Revolver has <mask> ever topped a single chart ."], "answers": [["Revolver", 0, 8], ["a single chart", 30, 44], ["topped", 23, 29], ["only", 13, 17]], "evidential": [["Revolver", "Revololver", "Revolver Record", "The Revolver"], ["four charts", "two charts", "three charts", "zero charts"], ["topped", "charted", "reached", "appeared"], ["never", "n't", "only", "rarely"]], "culprit": [1, 3]}
|
23 |
+
{"id": 164417, "claim": "Carey Hayes was born in 1897 .", "questions": ["Who was born in 1897? or <mask> was born in 1897 .", "When was Carey Hayes born? or Carey Hayes was born in <mask> .", "What was Carey Hayes' birth year? or Carey Hayes was <mask> in 1897 ."], "answers": [["Carey Hayes", 0, 11], ["1897", 24, 28], ["born", 16, 20]], "evidential": [["Carey Hayes", "Carey Hayes (", "Carey Hayes", "Carey Hayden"], ["1961", "the 1960s", "the 1960 's", "the 20th century"], ["born", "conceived", "created", "born in"]], "culprit": [1]}
|
24 |
+
{"id": 70311, "claim": "IMDb is a professional Dota 2 player .", "questions": ["What is the name of the professional Dota 2 player? or <mask> is a professional Dota 2 player .", "What is IMDb's professional name? or IMDb is a professional <mask> 2 player .", "How many players does IMDb have? or IMDb is a professional Dota <mask> .", "IMDb is what type of player? or IMDb is <mask> Dota 2 player ."], "answers": [["IMDb", 0, 4], ["Dota", 23, 27], ["2 player", 28, 36], ["a professional", 8, 22]], "evidential": [["IMDb", "The Internet Movie Database", "The internet movie database", "The internet Movie Database"], ["Game", "Web", "video game", "Webmaster"], ["users", "one player", "one user", "user"], ["an online database", "a fictional", "a popular", "a professional"]], "culprit": [1, 2, 3]}
|
25 |
+
{"id": 123479, "claim": "The Hundred Years ' War does not include the Lancastrian War .", "questions": ["What does not include the Lancastrian War? or <mask> does not include the Lancastrian War .", "What is not included in the Hundred Years' War? or The Hundred Years ' War does not include <mask> .", "What does the Hundred Years' War not <mask>? or The Hundred Years ' War does not <mask> the Lancastrian War ."], "answers": [["The Hundred Years ' War", 0, 23], ["the Lancastrian War", 41, 60], ["include", 33, 40]], "evidential": [["The Hundred Years ' War", "The Hundred Years' War", "Hundred Years ' War", "A Hundred Years ' War"], ["a conflict", "local conflicts", "a war", "several conflicts"], ["mention", "name", "see", "include"]], "culprit": [1]}
|
26 |
+
{"id": 16811, "claim": "Efraim Diveroli is a Spaniard .", "questions": ["Who is a Spaniard? or <mask> is a Spaniard .", "What is Efraim Diveroli's nationality? or Efraim Diveroli is <mask> ."], "answers": [["Efraim Diveroli", 0, 15], ["a Spaniard", 19, 29]], "evidential": [["Efraim Diveroli", "Efranim Diveroli", "Efriim Diveroli", "Efrafri Diveroli"], ["an American", "American", "North American", "a North American"]], "culprit": [1]}
|
27 |
+
{"id": 183618, "claim": "Finding Dory was written by anyone except Andrew Stanton .", "questions": ["What was the name of the book that was written by anyone other than Andrew Stanton? or <mask> was written by anyone except Andrew Stanton .", "Who wrote Finding Dory? or Finding Dory was written by <mask> except Andrew Stanton .", "Who wrote Finding Dory? or Finding Dory was written by anyone except <mask> .", "Who else wrote Finding Dory? or Finding Dory was <mask> by anyone except Andrew Stanton ."], "answers": [["Finding Dory", 0, 12], ["anyone", 28, 34], ["Andrew Stanton", 42, 56], ["written", 17, 24]], "evidential": [["Finding Dory", "The Finding Dory", "Finding dory", "Finding Dory 2"], ["anyone", "every person", "almost anyone", "almost all"], ["Andrew Stanton", "Andrew Strouse", "Andrew Stanton", "Andy Stanton"], ["written", "penned", "directed", "authored"]], "culprit": [2]}
|
28 |
+
{"id": 125315, "claim": "Phoenix , Arizona is the most populous city in Massachusetts .", "questions": ["What is the most populous city in Massachusetts? or <mask> , Arizona is the most populous city in Massachusetts .", "What state is Phoenix located in? or Phoenix , <mask> is the most populous city in Massachusetts .", "What state is Phoenix located in? or Phoenix , Arizona is the most populous city in <mask> .", "What is the population of Phoenix in Massachusetts? or Phoenix , Arizona is <mask> populous city in Massachusetts .", "What is the population of Phoenix? or Phoenix , Arizona is the most <mask> city in Massachusetts ."], "answers": [["Phoenix", 0, 7], ["Arizona", 10, 17], ["Massachusetts", 47, 60], ["the most", 21, 29], ["populous", 30, 38]], "evidential": [["Phoenix", "The Phoenix", "Arizona Phoenix", "Tempe"], ["Arizona", "Arizona Republic", "Arizona State", "United States"], ["the United States", "the US", "Arizona", "a state"], ["the most", "the fifth most", "the 5th most", "the fourth most"], ["populous", "populous city", "populous US", "large"]], "culprit": [2]}
|
29 |
+
{"id": 216367, "claim": "All speakers of the Chagatai language lived in France .", "questions": ["Who lived in France? or All <mask> of the Chagatai language lived in France .", "What language did all French speakers speak? or All speakers of <mask> lived in France .", "Where did the Chagatai language live? or All speakers of the Chagatai language lived in <mask> .", "Where did all speakers of the Chagatai language live? or All speakers of the Chagatai language <mask> in France ."], "answers": [["speakers", 4, 12], ["the Chagatai language", 16, 37], ["France", 47, 53], ["lived", 38, 43]], "evidential": [["The authors", "An author", "People", "A person"], ["the Chagatai language", "Chagatai language", "The Chagatai language", "a Chagatai language"], ["Europe", "a place", "France", "Asia"], ["lived", "existed", "resided", "originated"]], "culprit": [2]}
|
30 |
+
{"id": 23428, "claim": "The Cincinnati Kid was directed by Norman Jewison in 1960 .", "questions": ["What movie was directed by Norman Jewison? or <mask> was directed by Norman Jewison in 1960 .", "Who directed The Cincinnati Kid? or The Cincinnati Kid was directed by <mask> in 1960 .", "When was The Cincinnati Kid directed? or The Cincinnati Kid was directed by Norman Jewison in <mask> .", "What was the name of the film that was produced by Norman Jewison? or The Cincinnati Kid was <mask> by Norman Jewison in 1960 ."], "answers": [["The Cincinnati Kid", 0, 18], ["Norman Jewison", 35, 49], ["1960", 53, 57], ["directed", 23, 31]], "evidential": [["The Cincinnati Kid", "the Cincinnati Kid", "The CincinnatiKid", "Cincinnati Kid"], ["Norman Jewison", "a man", "Norman JewISON", "Norman Jewisons"], ["1965", "the 1960s", "the 1960 's", "the late 1960s"], ["directed", "produced", "written", "filmed"]], "culprit": [2]}
|
31 |
+
{"id": 67903, "claim": "Murda Beatz 's real name is Donald Trump .", "questions": ["Who is Donald Trump's real name? or <mask> 's real name is Donald Trump .", "What is Beatz' real name? or Murda Beatz 's real name is <mask> .", "What is the <mask> name of Murda Beatz? or Murda Beatz 's <mask> name is Donald Trump ."], "answers": [["Murda Beatz", 0, 11], ["Donald Trump", 28, 40], ["real", 15, 19]], "evidential": [["Murda Beatz", "Murdas Beatz", "Murda beatz", "Murdac Beatz"], ["Donald Trump", "a Donald Trump", "Donald Donald Trump", "Donald John Trump"], ["middle", "real", "full", "legal"]], "culprit": [1]}
|
32 |
+
{"id": 45585, "claim": "Harris Jayaraj is from Idaho .", "questions": ["Who is from Idaho? or <mask> is from Idaho .", "Where is Harris Jayaraj from? or Harris Jayaraj is from <mask> ."], "answers": [["Harris Jayaraj", 0, 14], ["Idaho", 23, 28]], "evidential": [["Harris Jayaraj", "Harris Jayaram", "Harris Jayarbaj", "Harris Jayaraja"], ["a state", "Idaho", "a place", "America"]], "culprit": [1]}
|
33 |
+
{"id": 95601, "claim": "Ian Gillan is only a singer .", "questions": ["Who is the only singer? or <mask> is only a singer .", "What is Ian Gillan's job? or Ian Gillan is <mask> ."], "answers": [["Ian Gillan", 0, 10], ["only a singer", 14, 27]], "evidential": [["Ian Gillan", "Ian Gillan", "Ian Gillan", "Ian Gillans"], ["a singer", "a vocalist", "a singer and songwriter", "a performer"]], "culprit": [1]}
|
34 |
+
{"id": 122348, "claim": "Wolfgang Amadeus Mozart never married .", "questions": ["Who never married? or <mask> never married .", "What did Wolfgang Amadeus Mozart never do? or Wolfgang Amadeus Mozart never <mask> .", "How did Wolfgang Amadeus Mozart get married? or Wolfgang Amadeus Mozart <mask> married ."], "answers": [["Wolfgang Amadeus Mozart", 0, 23], ["married", 30, 37], ["never", 24, 29]], "evidential": [["Wolfgang Amadeus Mozart", "Amadeus Mozart", "Johannes Amadeus Mozart", "Wolfgang Amadeu Mozart"], ["married", "marry", "died", "live"], ["got", "eventually", "later", "was"]], "culprit": [2]}
|
35 |
+
{"id": 146157, "claim": "The New England Patriots lost five Super Bowls .", "questions": ["Who lost five Super Bowls? or <mask> lost five Super Bowls .", "What type of game did the New England Patriots lose? or The New England Patriots lost five <mask> .", "How many Super Bowls did the New England Patriots win? or The New England Patriots <mask> five Super Bowls .", "How many Super Bowls did the New England Patriots lose? or The New England Patriots lost <mask> Super Bowls ."], "answers": [["The New England Patriots", 0, 24], ["Super Bowls", 35, 46], ["lost", 25, 29], ["five", 30, 34]], "evidential": [["New England Patriots", "The Patriots", "The New Patriots", "Patriots"], ["Super Bowls", "a Super Bowl", "the Super Bowl", "a football game"], ["won", "played", "reached", "achieved"], ["five", "5", "least five", "seven"]], "culprit": [2]}
|
36 |
+
{"id": 107699, "claim": "Floyd Mayweather Jr. is incapable of boxing .", "questions": ["Who is incapable of boxing? or <mask> is incapable of boxing .", "Floyd Mayweather Jr. is incapable of what sport? or Floyd Mayweather Jr. is incapable of <mask> .", "Is Floyd Mayweather Jr. capable or <mask> of boxing? or Floyd Mayweather Jr. is <mask> of boxing ."], "answers": [["Floyd Mayweather Jr.", 0, 20], ["boxing", 37, 43], ["incapable", 24, 33]], "evidential": [["Floyd Mayweather Jr.", "Floyd Mayweather Jr .", "Floyd Mayweather Jr.?", "Floyd Mayweather Jr.:"], ["boxing", "professional boxing", "boxed", "a sport"], ["incapable", "capable", "a capable", "an athlete"]], "culprit": [2]}
|
37 |
+
{"id": 216594, "claim": "Calcaneal spurs are only detected by a dancing technique .", "questions": ["What is only detected by a dancing technique? or <mask> are only detected by a dancing technique .", "What is the only way to detect Calcaneal spurs? or Calcaneal spurs are only detected by <mask> .", "How are Calcaneal spurs <mask>? or Calcaneal spurs are only <mask> by a dancing technique .", "How are Calcaneal spurs detected by a dancing technique? or Calcaneal spurs are <mask> detected by a dancing technique ."], "answers": [["Calcaneal spurs", 0, 15], ["a dancing technique", 37, 56], ["detected", 25, 33], ["only", 20, 24]], "evidential": [["Calcaneal spurs", "Calcaneal spur", "Calcaneals spurs", "Calcane al spurs"], ["a radiographic examination", "an x ray", "radiographic examination", "a radiographic exam"], ["detected", "observed", "seen", "indicated"], ["typically", "usually", "often", "frequently"]], "culprit": [1, 3]}
|
38 |
+
{"id": 118068, "claim": "Liverpool is unrelated to The Beatles .", "questions": ["What city is not related to The Beatles? or <mask> is unrelated to The Beatles .", "Liverpool is not related to what band? or Liverpool is unrelated to <mask> .", "Is Liverpool related to The Beatles? or Liverpool is <mask> to The Beatles ."], "answers": [["Liverpool", 0, 9], ["The Beatles", 26, 37], ["unrelated", 13, 22]], "evidential": [["Liverpool", "The Liverpool", "Liverpool City", "Liverpool"], ["The Beatles", "the Beatles", "a rock band", "a band"], ["related", "connected", "a home", "home"]], "culprit": [2]}
|
39 |
+
{"id": 110504, "claim": "The Mighty Ducks was only distributed by a subsidiary of 20th Century Fox .", "questions": ["What was the name of the show that was distributed by 20th Century Fox? or <mask> was only distributed by a subsidiary of 20th Century Fox .", "Who distributed the Mighty Ducks? or The Mighty Ducks was only distributed by <mask> of 20th Century Fox .", "Who distributed the Mighty Ducks? or The Mighty Ducks was only distributed by a subsidiary of <mask> .", "How was the Mighty Ducks <mask>? or The Mighty Ducks was only <mask> by a subsidiary of 20th Century Fox .", "How many times was The Mighty Ducks distributed by 20th Century Fox? or The Mighty Ducks was <mask> distributed by a subsidiary of 20th Century Fox ."], "answers": [["The Mighty Ducks", 0, 16], ["a subsidiary", 41, 53], ["20th Century Fox", 57, 73], ["distributed", 26, 37], ["only", 21, 25]], "evidential": [["The Mighty Ducks", "The Mighty Ducks of Anaheim", "The Mighty Duck", "Mighty Ducks"], ["the parent company", "a division", "a subsidiary", "the company"], ["Walt Disney Pictures", "Disney Pictures", "a company", "Walt Disney Productions"], ["distributed", "produced", "released", "created"], ["only", "never", "twice", "previously"]], "culprit": [1, 2, 4]}
|
40 |
+
{"id": 161151, "claim": "No Strings Attached was released on May 21 .", "questions": ["What was released on May 21? or No <mask> was released on May 21 .", "When was No Strings Attached released? or No Strings Attached was released on <mask> .", "When was No Strings Attached <mask>? or No Strings Attached was <mask> on May 21 ."], "answers": [["Strings Attached", 3, 19], ["May 21", 36, 42], ["released", 24, 32]], "evidential": [["Strings Attached", "strings Attached", "Strings Attached album", "Strings Attached film"], ["January 21 , 2011", "January 21st", "January 21st 2011", "January 21"], ["released", "published", "issued", "distributed"]], "culprit": [1]}
|
41 |
+
{"id": 150099, "claim": "Sherilyn Fenn is Japanese .", "questions": ["Who is the name of the Japanese woman who is a native of Japan? or <mask> is Japanese .", "What language is Sherilyn Fenn? or Sherilyn Fenn is <mask> ."], "answers": [["Sherilyn Fenn", 0, 13], ["Japanese", 17, 25]], "evidential": [["Sherilyn Fenn", "The Sherilyn Fenn", "Sherilyn Fenna", "Cherilyn Fenn"], ["American", "English", "North American", "French"]], "culprit": [1]}
|
42 |
+
{"id": 157652, "claim": "Touchscreens are only used in gaming computers .", "questions": ["What type of screen is used in gaming computers? or <mask> are only used in gaming computers .", "What type of computers are touch screens used for? or Touchscreens are only used in <mask> .", "What is the only way a touch screen can be <mask> in gaming computers? or Touchscreens are only <mask> in gaming computers .", "How are touchscreens used in gaming computers? or Touchscreens are <mask> used in gaming computers ."], "answers": [["Touchscreens", 0, 12], ["gaming computers", 30, 46], ["used", 22, 26], ["only", 17, 21]], "evidential": [["Touchscreens", "Touchscreen", "Touchscreen devices", "Touch screens"], ["personal computers", "electronic voting machines", "computer systems", "mobile computers"], ["common", "used", "found", "prevalent"], ["commonly", "frequently", "increasingly", "widely"]], "culprit": [3]}
|
43 |
+
{"id": 209863, "claim": "In a Lonely Place had nothing to do with any novel by Dorthy B. Hughes .", "questions": ["What was the name of the book that had nothing to do with any novel by Dorthy or <mask> had nothing to do with any novel by Dorthy B. Hughes .", "What did In a Lonely Place have to do with Dorthy B. Hughes or In a Lonely Place had <mask> to do with any novel by Dorthy B. Hughes .", "What type of work did In a Lonely Place have nothing to do with? or In a Lonely Place had nothing to do with any <mask> by Dorthy B. Hughes .", "Who wrote In a Lonely Place? or In a Lonely Place had nothing to do with any novel by <mask> ."], "answers": [["In a Lonely Place", 0, 17], ["nothing", 22, 29], ["novel", 45, 50], ["Dorthy B. Hughes", 54, 70]], "evidential": [["In a Lonely Place", "in a Lonely Place", "In a Lonely place", "In a Lonely Place ."], ["a lot", "a thing", "nothing", "a script"], ["novels", "mystery work", "written work", "written works"], ["Dorothy B. Hughes", "a mystery writer", "the mystery writer", "the author"]], "culprit": [1, 2, 3]}
|
44 |
+
{"id": 3305, "claim": "Julianne Moore was not in the television series As the World Turns .", "questions": ["Who was not in the television series As The World Turns? or <mask> was not in the television series As the World Turns .", "What was Julianne Moore not in? or Julianne Moore was not in <mask> As the World Turns .", "What television series did Julianne Moore not appear in? or Julianne Moore was not in the television series As <mask> ."], "answers": [["Julianne Moore", 0, 14], ["the television series", 26, 47], ["the World Turns", 51, 66]], "evidential": [["Julianne Moore", "Juliene Moore", "Juliann Moore", "Julianna Moore"], ["the soap opera", "the television show", "the television series", "the show"], ["the World Turns", "The World Turns", "the World turns", "a World Turns"]], "culprit": [1, 2]}
|
45 |
+
{"id": 83351, "claim": "In 2015 , among Mexicans , 70 % of adults had consumed alcoholic drink in the last year .", "questions": ["In what year did 70 % of Mexican adults drink alcohol? or In <mask> , among Mexicans , 70 % of adults had consumed alcoholic drink in the last year .", "What ethnicity had the highest percentage of alcoholic beverages in 2015? or In 2015 , among <mask> , 70 % of adults had consumed alcoholic drink in the last year .", "What percentage of Mexican adults had consumed alcohol in 2015? or In 2015 , among Mexicans , <mask> of adults had consumed alcoholic drink in the last year .", "What group of Mexicans consumed alcohol in 2015? or In 2015 , among Mexicans , 70 % of <mask> had consumed alcoholic drink in the last year .", "What type of drink did 70 % of Mexican adults consume in 2015? or In 2015 , among Mexicans , 70 % of adults had consumed <mask> in the last year .", "In what year did 70 % of Mexican adults drink alcohol? or In 2015 , among Mexicans , 70 % of adults had consumed alcoholic drink in <mask> .", "What did 70 % of adults in Mexico do with alcoholic beverages? or In 2015 , among Mexicans , 70 % of adults had <mask> alcoholic drink in the last year ."], "answers": [["2015", 3, 7], ["Mexicans", 16, 24], ["70 %", 27, 31], ["adults", 35, 41], ["alcoholic drink", 55, 70], ["the last year", 74, 87], ["consumed", 46, 54]], "evidential": [["2015", "2015 's", "the 2015 year", "the last year"], ["Americans", "Mexican", "the Mexican", "Mexicans"], ["89 %", "90 %", "70 %", "87 %"], ["adults", "people", "adult", "Americans"], ["alcohol", "alcoholic drink", "alcoholic drinks", "alcoholic beverages"], ["the last year", "the past year", "the year", "2015"], ["drank", "drunk", "consumed", "drinking"]], "culprit": [1]}
|
46 |
+
{"id": 97937, "claim": "Watchmen is a film set in the future .", "questions": ["What is the name of the film set in the future? or <mask> is a film set in the future .", "What type of film is Watchmen? or Watchmen is <mask> set in the future .", "What is the setting of Watchmen? or Watchmen is a film set in <mask> .", "Where is Watchmen <mask>? or Watchmen is a film <mask> in the future ."], "answers": [["Watchmen", 0, 8], ["a film", 12, 18], ["the future", 26, 36], ["set", 19, 22]], "evidential": [["Watchmen", "Watchmen ( film )", "Watchmen( film )", "Watchmen(film )"], ["a superhero film", "a film", "a dystopian film", "a cinematic film"], ["an alternate history", "a dystopian history", "a dystopian future", "a past"], ["set", "located", "filmed", "based"]], "culprit": [2]}
|
47 |
+
{"id": 8298, "claim": "Simon Pegg is only a banker .", "questions": ["Who is a banker? or <mask> is only a banker .", "What is Simon Pegg's job title? or Simon Pegg is <mask> ."], "answers": [["Simon Pegg", 0, 10], ["only a banker", 14, 27]], "evidential": [["Simon Pegg", "Simon Pgg", "Simon pegg", "Simon Pegg"], ["a producer", "a screenwriter", "an entertainer", "an executive producer"]], "culprit": [1]}
|
48 |
+
{"id": 193862, "claim": "Barry Van Dyke is the first son of Dick Van Dyke .", "questions": ["Who is the first son of Dick Van Dyke? or <mask> is the first son of Dick Van Dyke .", "What is Barry Van Dyke's first name? or Barry Van Dyke is <mask> of Dick Van Dyke .", "Who is Barry Van Dyke's father? or Barry Van Dyke is the first son of <mask> ."], "answers": [["Barry Van Dyke", 0, 14], ["the first son", 18, 31], ["Dick Van Dyke", 35, 48]], "evidential": [["Barry Van Dyke", "Barry van Dyke", "Dick Van Dyke", "A man"], ["the second son", "the first son", "the second child", "the son"], ["Dick Van Dyke", "an entertainer", "an actor", "a comedian"]], "culprit": [1]}
|
49 |
+
{"id": 55279, "claim": "Helmand Province contains a city .", "questions": ["What province contains a city? or <mask> contains a city .", "What does Helmand Province contain? or Helmand Province contains <mask> .", "What is the name of the city in Helmand Province? or Helmand Province <mask> a city ."], "answers": [["Helmand Province", 0, 16], ["a city", 26, 32], ["contains", 17, 25]], "evidential": [["Helmand Province", "Helmand province", "Helmand Provincial", "Helmand District"], ["people", "a city", "a town", "a population"], ["contains", "includes", "possesses", "features"]], "culprit": [1]}
|
50 |
+
{"id": 69871, "claim": "Robert Zemeckis has rarely directed movies .", "questions": ["Who has rarely directed a movie? or <mask> has rarely directed movies .", "What type of film has Zemeckis rarely directed? or Robert Zemeckis has rarely directed <mask> .", "What type of movies has Zemeckis rarely made? or Robert Zemeckis has rarely <mask> movies .", "How often has Zemeckis directed movies? or Robert Zemeckis has <mask> directed movies ."], "answers": [["Robert Zemeckis", 0, 15], ["movies", 36, 42], ["directed", 27, 35], ["rarely", 20, 26]], "evidential": [["Robert Zemeckis", "Robert Zemeckis", "Robert Zemckis", "Robert Memeckis"], ["a film", "a drama film", "a comedy", "a comedy film"], ["directed", "direct", "produced", "directing"], ["never", "rarely", "always", "only"]], "culprit": [3]}
|
51 |
+
{"id": 48276, "claim": "Raees ( film ) stars an Indian film actor born in April 1965 .", "questions": ["What film stars an Indian actor? or <mask> stars an Indian film actor born in April 1965 .", "What nationality is Raees? or Raees ( film ) stars <mask> film actor born in April 1965 .", "What is Raees' career? or Raees ( film ) stars an Indian <mask> born in April 1965 .", "When was Raees born? or Raees ( film ) stars an Indian film actor born in <mask> .", "What is Raees' career? or Raees ( film ) <mask> an Indian film actor born in April 1965 .", "What is the birth year of Raees? or Raees ( film ) stars an Indian film actor <mask> in April 1965 ."], "answers": [["Raees ( film )", 0, 14], ["an Indian", 21, 30], ["film actor", 31, 41], ["April 1965", 50, 60], ["stars", 15, 20], ["born", 42, 46]], "evidential": [["Raees ( film )", "Raees", "Raees( film )", "Raes ( film )"], ["an Indian", "a Indian", "An Indian", "an India"], ["film actor", "film actor and television personality", "actor", "television personality"], ["1965", "the sixties", "the 1960s", "the year 1965"], ["stars", "features", "starred", "includes"], ["born", "birth year", "birth date", "founded"]], "culprit": [3]}
|
52 |
+
{"id": 101845, "claim": "Richard Kuklinski is a innocent man .", "questions": ["Who is an innocent man? or <mask> is a innocent man .", "What is Richard Kuklinski? or Richard Kuklinski is <mask> ."], "answers": [["Richard Kuklinski", 0, 17], ["a innocent man", 21, 35]], "evidential": [["Richard Kuklinski", "Richard Kuklinski", "Richard Kuklinsky", "Richard Kuplinski"], ["a person", "a killer", "a serial killer", "a criminal"]], "culprit": [1]}
|
53 |
+
{"id": 44240, "claim": "Amancio Ortega refuses to be a businessman .", "questions": ["Who refuses to be a businessman? or <mask> refuses to be a businessman .", "What does Amancio Ortega refuse to be? or Amancio Ortega refuses to be <mask> .", "What does Amancio Ortega do to be a businessman? or Amancio Ortega <mask> to be a businessman ."], "answers": [["Amancio Ortega", 0, 14], ["a businessman", 29, 42], ["refuses", 15, 22]], "evidential": [["Amancio Ortega", "Amancio Ortega Gaona", "Amancio Ortega Jr.", "Amancio Orlando Ortega"], ["a businessman", "a tycoon", "a person", "a businessperson"], ["wants", "used", "works", "acts"]], "culprit": [2]}
|
54 |
+
{"id": 142735, "claim": "Elizabeth I was the daughter of a salesman .", "questions": ["What was my mother's name? or <mask> I was the daughter of a salesman .", "What was Elizabeth I's mother's name? or Elizabeth I was <mask> of a salesman .", "What was Elizabeth I's father's occupation? or Elizabeth I was the daughter of <mask> ."], "answers": [["Elizabeth", 0, 9], ["the daughter", 16, 28], ["a salesman", 32, 42]], "evidential": [["Elizabeth", "Elizabeth I", "ElizabethI", "Elizabeth II"], ["the daughter", "the second daughter", "the first daughter", "the second wife"], ["a man", "a second wife", "Henry VIII", "a person"]], "culprit": [2]}
|
55 |
+
{"id": 167977, "claim": "Don Bradman was called the \" greatest living Australian \" by a President .", "questions": ["Who was called the \"greatest living Australian\" by a President? or <mask> was called the \" greatest living Australian \" by a President .", "What nationality was Don Bradman? or Don Bradman was called the \" greatest living <mask> \" by a President .", "What was Bradman called by a President? or Don Bradman was called the \" greatest living Australian <mask> by a President .", "Who called Don Bradman the \"greatest living Australian\"? or Don Bradman was called the \" greatest living Australian \" by <mask> .", "What was Don Bradman called by a President? or Don Bradman was called <mask> Australian \" by a President ."], "answers": [["Don Bradman", 0, 11], ["Australian", 45, 55], ["\"", 56, 57], ["a President", 61, 72], ["the \" greatest living", 23, 44]], "evidential": [["Don Bradman", "Donald Bradman", "Don Bradm", "An Australian"], ["Australian", "American", "an Australian", "Australia"], ["person", "\"", "honored", "icon"], ["Prime Minister John Howard", "John Howard", "a Prime Minister", "the Prime Minister"], ["the \" greatest living", "the \" great living", "the \" best living", "the \" Greatest living"]], "culprit": [3]}
|
56 |
+
{"id": 227084, "claim": "Roar ( song ) is a Katy Perry song from her fifth album .", "questions": ["What is the name of Katy Perry's fifth album? or <mask> is a Katy Perry song from her fifth album .", "What is the name of the song Roar? or Roar ( song ) is <mask> song from her fifth album .", "What is Roar? or Roar ( song ) is a Katy Perry <mask> from her fifth album .", "What album is Roar from? or Roar ( song ) is a Katy Perry song from her <mask> ."], "answers": [["Roar ( song )", 0, 13], ["a Katy Perry", 17, 29], ["song", 30, 34], ["fifth album", 44, 55]], "evidential": [["Roar", "Roars", "Roar .", "Rar"], ["a Katy Perry", "an Katy Perry", "an American", "an artist 's"], ["song", "title", "single", "track"], ["fourth studio album", "fourth album", "fourth record", "fourth studio record"]], "culprit": [3]}
|
57 |
+
{"id": 205646, "claim": "St. Anger is the second studio album by Metallica .", "questions": ["What is the name of Metallica's second album? or <mask> is the second studio album by Metallica .", "What is the name of the second album by Metallica? or St. Anger is <mask> by Metallica .", "What band released St. Anger? or St. Anger is the second studio album by <mask> ."], "answers": [["St. Anger", 0, 9], ["the second studio album", 13, 36], ["Metallica", 40, 49]], "evidential": [["St. Anger", "The St. Anger", "St . Anger", "St. Anger ."], ["the eighth studio album", "an album", "an eighth studio album", "the eighth album"], ["Metallica", "a heavy metal band", "the Metallica", "a heavy metal group"]], "culprit": [1]}
|
58 |
+
{"id": 209095, "claim": "Stadium Arcadium was released after 2009 .", "questions": ["What stadium was released after 2009? or <mask> was released after 2009 .", "In what year was Stadium Arcadium released? or Stadium Arcadium was released after <mask> .", "What happened to Stadium Arcadium after 2009? or Stadium Arcadium was <mask> after 2009 .", "When was Stadium Arcadium released? or Stadium Arcadium was released <mask> 2009 ."], "answers": [["Stadium Arcadium", 0, 16], ["2009", 36, 40], ["released", 21, 29], ["after", 30, 35]], "evidential": [["Stadium Arcadium", "Stadium Arcadia", "Stadium Arcadadium", "Stadium Arcadion"], ["2006", "the 2000s", "a different year", "a 2006 album"], ["released", "disbanded", "dropped", "cancelled"], ["before", "after", "around", "back"]], "culprit": [1, 3]}
|
59 |
+
{"id": 155657, "claim": "The Prowler was created by Stan Lee , John Buscema , and dust .", "questions": ["What was the name of the film created by Stan Lee, John Buscema and Dust or <mask> was created by Stan Lee , John Buscema , and dust .", "Who created The Prowler? or The Prowler was created by <mask> , John Buscema , and dust .", "Who created The Prowler? or The Prowler was created by Stan Lee , <mask> , and dust .", "What was the Prowler made of? or The Prowler was created by Stan Lee , John Buscema , and <mask> .", "How was The Prowler <mask>? or The Prowler was <mask> by Stan Lee , John Buscema , and dust ."], "answers": [["The Prowler", 0, 11], ["Stan Lee", 27, 35], ["John Buscema", 38, 50], ["dust", 57, 61], ["created", 16, 23]], "evidential": [["The Prowler", "The Prowler ( 1981 film )", "The Prowler( 1981 film )", "Prowler ( 1981 film )"], ["Stan Lee", "Jim Mooney", "writer editor", "writer and editor"], ["comics editor", "comics writers", "people", "comics editors"], ["a writer", "a person", "characters", "comics"], ["created", "produced", "designed", "invented"]], "culprit": [3]}
|
60 |
+
{"id": 172095, "claim": "Selena Gomez & the Scene 's debut album was released in any month except September .", "questions": ["What group's debut album was released in any month except September? or <mask> 's debut album was released in any month except September .", "Selena Gomez & the Scene's debut album was released in what <mask> or Selena Gomez & the Scene 's debut album was released in any <mask> except September .", "Selena Gomez & the Scene's debut album was released in what month or Selena Gomez & the Scene 's debut album was released in any month except <mask> .", "When was Selena Gomez's debut album <mask>? or Selena Gomez & the Scene 's debut album was <mask> in any month except September ."], "answers": [["Selena Gomez & the Scene", 0, 24], ["month", 60, 65], ["September", 73, 82], ["released", 44, 52]], "evidential": [["Selena Gomez & the Scene", "The Selena Gomez & the Scene", "Selena Gomez & The Scene", "Selena Gomez & the Scene"], ["September", "the month", "the US", "the summer"], ["September", "October", "July", "August"], ["released", "published", "issued", "launched"]], "culprit": [2]}
|
61 |
+
{"id": 191441, "claim": "Keith Urban was released by Sony Music Entertainment .", "questions": ["What artist was released by Sony Music Entertainment? or <mask> was released by Sony Music Entertainment .", "What company released Keith Urban? or Keith Urban was released by <mask> .", "When was Keith Urban <mask>? or Keith Urban was <mask> by Sony Music Entertainment ."], "answers": [["Keith Urban", 0, 11], ["Sony Music Entertainment", 28, 52], ["released", 16, 24]], "evidential": [["Keith Urban", "Keith Urban II", "Keith U.", "The Keith Urban"], ["Capitol Nashville", "Capitol Records", "a company", "Capitol"], ["released", "created", "signed", "founded"]], "culprit": [1]}
|
62 |
+
{"id": 188640, "claim": "Foot Locker operates in only 11 countries .", "questions": ["What company operates in only 11 countries? or <mask> operates in only 11 countries .", "How many countries does Foot Locker operate in? or Foot Locker operates in <mask> countries .", "How does Foot Locker operate in 11 countries? or Foot Locker <mask> in only 11 countries ."], "answers": [["Foot Locker", 0, 11], ["only 11", 24, 31], ["operates", 12, 20]], "evidential": [["Foot Locker", "Foot Locker , Inc.", "Foot Locker ( Inc.", "Foot Locker Inc."], ["28", "least 28", "27", "29"], ["operates", "operate", "exists", "runs"]], "culprit": [1]}
|
63 |
+
{"id": 164407, "claim": "Carey Hayes is only a German lawyer .", "questions": ["Who is a German lawyer? or <mask> is only a German lawyer .", "What nationality is Hayes? or Carey Hayes is only <mask> lawyer .", "What is Hayes' profession? or Carey Hayes is only a German <mask> .", "How old is Carey Hayes? or Carey Hayes is <mask> German lawyer ."], "answers": [["Carey Hayes", 0, 11], ["a German", 20, 28], ["lawyer", 29, 35], ["only a", 15, 21]], "evidential": [["Carey Hayes", "Carey Hayes Jr.", "Carey Hayes", "Carey Hayden"], ["an American", "an american", "a North American", "an Oregon"], ["writer", "screenwriter", "a writer", "author"], ["a 21st century", "a 21 year old", "a 21-year old", "a young"]], "culprit": [1, 2, 3]}
|
64 |
+
{"id": 83545, "claim": "Volkswagen Group declines financing , leasing , and fleet management .", "questions": ["Which company declines financing, leasing and fleet management? or <mask> declines financing , leasing , and fleet management .", "What does Volkswagen Group decline? or Volkswagen Group declines financing , leasing , and <mask> .", "What does Volkswagen Group do with financing, leasing and fleet management? or Volkswagen Group <mask> financing , leasing , and fleet management ."], "answers": [["Volkswagen Group", 0, 16], ["fleet management", 52, 68], ["declines", 17, 25]], "evidential": [["Volkswagen Group", "The Volkswagen Group", "VW Group", "Volkswagen group"], ["fleet management", "fleet management services", "fleets management", "vehicles fleet management"], ["offers", "provides", "performs", "facilitates"]], "culprit": [2]}
|
65 |
+
{"id": 97837, "claim": "Caroline Kennedy is against diplomacy .", "questions": ["Who is against diplomacy? or <mask> is against diplomacy .", "Caroline Kennedy is against what? or Caroline Kennedy is against <mask> ."], "answers": [["Caroline Kennedy", 0, 16], ["diplomacy", 28, 37]], "evidential": [["Caroline Kennedy", "Caroline Flemming", "Caroline Klemming", "Caroline Kennedy"], ["politics", "the Democratic Party", "a presidential election", "a presidential campaign"]], "culprit": [1]}
|
66 |
+
{"id": 229309, "claim": "A working animal is released by humans .", "questions": ["What is released by humans? or <mask> is released by humans .", "Who releases a working animal? or A working animal is released by <mask> .", "What happens to a working animal when it is <mask>? or A working animal is <mask> by humans ."], "answers": [["A working animal", 0, 16], ["humans", 32, 38], ["released", 20, 28]], "evidential": [["A working animal", "A Working animal", "Working animal", "An animal"], ["humans", "a human", "human beings", "people"], ["kept", "domesticated", "raised", "captured"]], "culprit": [2]}
|
67 |
+
{"id": 98672, "claim": "Balibo ( film ) starts in the year 1995 .", "questions": ["What film was released in 1995? or <mask> starts in the year 1995 .", "When does Balibo begin? or Balibo ( film ) starts in <mask> .", "When does Balibo begin? or Balibo ( film ) <mask> in the year 1995 ."], "answers": [["Balibo ( film )", 0, 15], ["the year 1995", 26, 39], ["starts", 16, 22]], "evidential": [["Balibo", "Balibo ( film )", "Balibo( film )", "Balibo ( films )"], ["1975", "the 1970s", "the 1980s", "the year 1975"], ["begins", "starts", "began", "begin"]], "culprit": [1]}
|
68 |
+
{"id": 55239, "claim": "Victor Frankenstein is only a romance film .", "questions": ["What is the name of the film that is a romance? or <mask> is only a romance film .", "What is the purpose of Victor Frankenstein? or Victor Frankenstein is <mask> ."], "answers": [["Victor Frankenstein", 0, 19], ["only a romance film", 23, 42]], "evidential": [["Victor Frankenstein ( film )", "Victor Frankenstein", "Victor Frankenstein( film )", "Victor Frankenstein ( films )"], ["a film", "a motion picture", "a recorded work", "directed"]], "culprit": [1]}
|
69 |
+
{"id": 7728, "claim": "Hinduism has zero textual resources .", "questions": ["What religion has zero textual resources? or <mask> has zero textual resources .", "How many textual resources does Hinduism have? or Hinduism has <mask> ."], "answers": [["Hinduism", 0, 8], ["zero textual resources", 13, 35]], "evidential": [["Hinduism", "Hindu religion", "Indianism", "Buddhism"], ["multiple textual resources", "many shared textual resources", "shared textual resources", "many textual resources"]], "culprit": [1]}
|
70 |
+
{"id": 202475, "claim": "Tinker Tailor Soldier Spy only stars Gary Oldman .", "questions": ["What movie stars Gary Oldman? or <mask> only stars Gary Oldman .", "Who stars in Tinker Tailor Soldier Spy? or Tinker Tailor Soldier Spy only stars <mask> .", "What is Gary Oldman's first name? or Tinker Tailor Soldier Spy only <mask> Gary Oldman .", "How many episodes does Tinker Tailor Soldier Spy have? or Tinker Tailor Soldier Spy <mask> stars Gary Oldman ."], "answers": [["Tinker Tailor Soldier Spy", 0, 25], ["Gary Oldman", 37, 48], ["stars", 31, 36], ["only", 26, 30]], "evidential": [["Tinker Tailor Soldier Spy", "The Tinker Tailor Soldier Spy", "Tinker Tailor Soldier Spy", "Tinker Tailor Soldier Spy movie"], ["Gary Oldman", "an actor", "George Smiley", "a man"], ["stars", "features", "includes", "contains"], ["only", "one episode", "2 episodes", "one series"]], "culprit": [2, 3]}
|
71 |
+
{"id": 159091, "claim": "Guatemala has lived without war for its entire existence .", "questions": ["What country has lived without war for its entire existence? or <mask> has lived without war for its entire existence .", "What has Guatemala lived without? or Guatemala has lived without <mask> for its entire existence .", "How long has Guatemala lived without war? or Guatemala has lived without war for its <mask> .", "How long has Guatemala been without war? or Guatemala has <mask> without war for its entire existence ."], "answers": [["Guatemala", 0, 9], ["war", 28, 31], ["entire existence", 40, 56], ["lived", 14, 19]], "evidential": [["Guatemala", "Central America Guatemala", "Guatemalan", "Central America"], ["a military", "a government", "war", "a war"], ["time", "existence", "decade", "decades"], ["existed", "gone", "lived", "been"]], "culprit": [1, 2]}
|
72 |
+
{"id": 24481, "claim": "David Spade was fired from being in Joe Dirt 2 : Beautiful Loser .", "questions": ["Who was fired from being in Joe Dirt 2? or <mask> was fired from being in Joe Dirt 2 : Beautiful Loser .", "What was David Spade's first role in? or David Spade was fired from being in <mask> 2 : Beautiful Loser .", "How many episodes of Joe Dirt did Spade have? or David Spade was fired from being in Joe Dirt <mask> : Beautiful Loser .", "What was the title of Joe Dirt 2? or David Spade was fired from being in Joe Dirt 2 : <mask> .", "How did David Spade react to being in Joe Dirt 2? or David Spade was <mask> from being in Joe Dirt 2 : Beautiful Loser ."], "answers": [["David Spade", 0, 11], ["Joe Dirt", 36, 44], ["2", 45, 46], ["Beautiful Loser", 49, 64], ["fired", 16, 21]], "evidential": [["David Spade", "David Spades", "David Spade", "David Spader"], ["Joe Dirt", "the comedy Joe Dirt", "the film Joe Dirt", "the movie Joe Dirt"], ["2", "two episodes", "two", "2 :"], ["Beautiful Loser", "Beautiful Ler", "BeautifulLoser", "Beautiful Losers"], ["distracted", "banned", "traumatized", "disheartened"]], "culprit": [4]}
|
73 |
+
{"id": 67876, "claim": "Britt Robertson was not in the television series Girlboss .", "questions": ["Who was not in the television series Girlboss? or <mask> was not in the television series Girlboss .", "What television series did Britt Robertson not appear in? or Britt Robertson was not in the television series <mask> .", "What was Britt Robertson not in? or Britt Robertson was not in <mask> Girlboss ."], "answers": [["Britt Robertson", 0, 15], ["Girlboss", 49, 57], ["the television series", 27, 48]], "evidential": [["Britt Robertson", "Brittany Robertson", "Britt Roberts", "Brit Robertson"], ["Girlboss", "The Secret Circle", "Girlsboss", "a Netflix comedy"], ["the comedy television series", "the show", "the comedy TV series", "the TV series"]], "culprit": [1, 2]}
|
74 |
+
{"id": 76324, "claim": "Richard Dawson is still alive .", "questions": ["Who is still alive? or <mask> is still alive .", "How old is Richard Dawson? or Richard Dawson is <mask> alive .", "What is Richard Dawson's age? or Richard Dawson is still <mask> ."], "answers": [["Richard Dawson", 0, 14], ["still", 18, 23], ["alive", 24, 29]], "evidential": [["Richard Dawson", "Richard Dwayne Dawson", "Richard Dawsons", "Richard D Dawson"], ["still", "alive", "barely", "currently"], ["dead", "deceased", "alive", "63"]], "culprit": [1, 2]}
|
75 |
+
{"id": 104710, "claim": "Miranda Otto is the son of Barry Otto .", "questions": ["Who is the son of Barry Otto? or <mask> is the son of Barry Otto .", "What is Miranda Otto's biological name? or Miranda Otto is <mask> of Barry Otto .", "Who is Miranda Otto's father? or Miranda Otto is the son of <mask> ."], "answers": [["Miranda Otto", 0, 12], ["the son", 16, 23], ["Barry Otto", 27, 37]], "evidential": [["Miranda Otto", "Miriam Otto", "Miranda Oster", "Miranda Oste"], ["the daughter", "the sister", "the biological daughter", "the granddaughter"], ["an actor", "Barry Otto", "an actress", "a man"]], "culprit": [1]}
|
76 |
+
{"id": 92988, "claim": "See You on the Other Side is a boat .", "questions": ["What side of the boat is See You on? or See You on the Other <mask> is a boat .", "What is See You on the Other Side? or See You on the Other Side is <mask> .", "What is the name of the boat that is on the other side? or <mask> You on the Other Side is a boat ."], "answers": [["Side", 21, 25], ["a boat", 29, 35], ["See", 0, 3]], "evidential": [["Side", "side", "side 2", "Side 2"], ["an album", "a recorded work", "a record", "a work"], ["See", "The album", "See '", "see"]], "culprit": [1]}
|
77 |
+
{"id": 150834, "claim": "Tool has not produced albums .", "questions": ["Which tool has not produced an album? or <mask> has not produced albums .", "Tool has not produced what? or Tool has not produced <mask> .", "Tool has not what type of albums? or Tool has not <mask> albums ."], "answers": [["Tool", 0, 4], ["albums", 22, 28], ["produced", 13, 21]], "evidential": [["Tool", "Tool ( band )", "Tool( band )", "Tool(band )"], ["albums", "an album", "music", "records"], ["produced", "released", "created", "published"]], "culprit": [1]}
|
78 |
+
{"id": 135684, "claim": "Elizabeth I was the son of Anne Boleyn .", "questions": ["Who was the son of Anne Boleyn? or <mask> I was the son of Anne Boleyn .", "What was Elizabeth I's father's name? or Elizabeth I was <mask> of Anne Boleyn .", "Who was Elizabeth I's mother? or Elizabeth I was the son of <mask> ."], "answers": [["Elizabeth", 0, 9], ["the son", 16, 23], ["Anne Boleyn", 27, 38]], "evidential": [["Elizabeth", "Elizabeth I", "Queen Elizabeth", "ElizabethI"], ["the daughter", "the child", "the son", "a daughter"], ["Anne Boleyn", "Ann Boleyn", "Anne Bolyn", "a woman"]], "culprit": [1]}
|
79 |
+
{"id": 124045, "claim": "Ron Weasley was denied membership to Gryffindor house .", "questions": ["Who was denied membership to Gryffindor house? or <mask> was denied membership to Gryffindor house .", "What was Ron Weasley denied? or Ron Weasley was denied <mask> to Gryffindor house .", "What house was Ron Weasley denied membership to? or Ron Weasley was denied membership to <mask> house .", "What was Ron Weasley denied membership to? or Ron Weasley was denied membership to Gryffindor <mask> .", "What was Ron Weasley's status as a member of Gryffindor or Ron Weasley was <mask> membership to Gryffindor house ."], "answers": [["Ron Weasley", 0, 11], ["membership", 23, 33], ["Gryffindor", 37, 47], ["house", 48, 53], ["denied", 16, 22]], "evidential": [["Ron Weasley", "The Ron Weasley", "A Ron Weasley", "Ronald Weasley"], ["access", "a visit", "membership", "a membership"], ["the Gryffindor", "a Gryffindor", "The Gryffindor", "the Gryfindor"], ["house", "family", "houses", "home"], ["given", "granted", "denied", "required"]], "culprit": [4]}
|
80 |
+
{"id": 56381, "claim": "Lorelai Gilmore 's uncle was played by Edward Herrmann .", "questions": ["Who was the uncle of Edward Herrmann? or <mask> 's uncle was played by Edward Herrmann .", "Who played Lorelai Gilmore's uncle? or Lorelai Gilmore 's uncle was played by <mask> .", "What role did Edward Herrmann play in Lorelai Gilmore's uncle? or Lorelai Gilmore 's uncle was <mask> by Edward Herrmann ."], "answers": [["Lorelai Gilmore", 0, 15], ["Edward Herrmann", 39, 54], ["played", 29, 35]], "evidential": [["Lorelai Gilmore", "Lorelai Gilmore", "Lorelai Gilpin", "Lorelai Glyn"], ["Edward Herrmann", "an actor", "Edward Herrman", "a man"], ["played", "portrayed", "performed", "voiced"]], "culprit": [1]}
|
81 |
+
{"id": 78742, "claim": "Tim Roth is not an English actor .", "questions": ["Who is an English actor? or <mask> is not an English actor .", "What is Tim Roth's nationality? or Tim Roth is not <mask> actor .", "What is Tim Roth's profession? or Tim Roth is not an English <mask> ."], "answers": [["Tim Roth", 0, 8], ["an English", 16, 26], ["actor", 27, 32]], "evidential": [["Tim Roth", "Timothy Roth", "Tim Roth", "Tim R Roth"], ["an English", "a European", "a British", "an European"], ["actor", "director", "film actor", "film director"]], "culprit": [1, 2]}
|
82 |
+
{"id": 180717, "claim": "Victoria ( Dance Exponents song ) was released in New Zealand in 1980 .", "questions": ["What song was released in New Zealand in 1980? or <mask> was released in New Zealand in 1980 .", "Where was Victoria released? or Victoria ( Dance Exponents song ) was released in <mask> in 1980 .", "When was Victoria released in New Zealand? or Victoria ( Dance Exponents song ) was released in New Zealand in <mask> .", "What was the name of Victoria's song? or Victoria ( Dance Exponents song ) was <mask> in New Zealand in 1980 ."], "answers": [["Victoria ( Dance Exponents song )", 0, 33], ["New Zealand", 50, 61], ["1980", 65, 69], ["released", 38, 46]], "evidential": [["Victoria / Dance Exponents song", "Victoria", "Victoria Song", "Victoria song"], ["New Zealand", "China", "Australia", "the world"], ["1982", "the 1980s", "the eighties", "the nineties"], ["released", "performed", "played", "recorded"]], "culprit": [2]}
|
83 |
+
{"id": 125491, "claim": "Hot Right Now is from the album Escape from Planet Monday .", "questions": ["What is the name of the song from the album Escape from Planet Monday? or <mask> is from the album Escape from Planet Monday .", "What is the name of the album that Hot Right Now is from? or Hot Right Now is from <mask> from Planet Monday .", "What album is Hot Right Now from? or Hot Right Now is from the album <mask> ."], "answers": [["Hot Right Now", 0, 13], ["the album Escape", 22, 38], ["Escape from Planet Monday", 32, 57]], "evidential": [["Hot Right Now", "Hot right Now", "Hit Right Now", "Hot Right now"], ["Escape", "the album Escape", "an album", "the single Escape"], ["Escape from Planet Monday", "Nextlevelism", "Escape From Planet Monday", "Next Levelism"]], "culprit": [1, 2]}
|
84 |
+
{"id": 100204, "claim": "Shadowhunters did not premiere in 2016 .", "questions": ["What movie did not premiere in 2016? or <mask> did not premiere in 2016 .", "When did Shadowhunters not premiere? or Shadowhunters did not premiere in <mask> .", "What did Shadowhunters not do in 2016? or Shadowhunters did not <mask> in 2016 ."], "answers": [["Shadowhunters", 0, 13], ["2016", 34, 38], ["premiere", 22, 30]], "evidential": [["Shadowhunters", "The Shadowhunters", "Shadowshunters", "Shadowhunterters"], ["2016", "2015", "January 2016", "the 2010s"], ["premiere", "air", "start", "launch"]], "culprit": [1]}
|
85 |
+
{"id": 73208, "claim": "Reign Over Me was written and directed by Spike Lee .", "questions": ["What movie was directed by Spike Lee? or <mask> was written and directed by Spike Lee .", "Who directed Reign Over Me? or Reign Over Me was written and directed by <mask> .", "What was the name of the film that directed it? or Reign Over Me was <mask> and directed by Spike Lee .", "What was the film <mask> by Spike Lee? or Reign Over Me was written and <mask> by Spike Lee ."], "answers": [["Reign Over Me", 0, 13], ["Spike Lee", 42, 51], ["written", 18, 25], ["directed", 30, 38]], "evidential": [["Reign Over Me", "Reign over Me", "Reign of Me", "Reign Over me"], ["a man", "an American", "a person", "an actor"], ["written", "penned", "authored", "wrote"], ["directed", "produced", "written", "created"]], "culprit": [1]}
|
86 |
+
{"id": 225871, "claim": "Revolver has only ever topped a single chart .", "questions": ["What has only ever topped a single chart? or <mask> has only ever topped a single chart .", "How many charts has Revolver ever topped? or Revolver has only ever topped <mask> .", "How many times has Revolver ever <mask> a single chart? or Revolver has only ever <mask> a single chart .", "How many times has Revolver topped a single chart? or Revolver has <mask> ever topped a single chart ."], "answers": [["Revolver", 0, 8], ["a single chart", 30, 44], ["topped", 23, 29], ["only", 13, 17]], "evidential": [["Revolver", "Revololver", "Revolver Record", "The Revolver"], ["four charts", "two charts", "three charts", "zero charts"], ["topped", "charted", "reached", "appeared"], ["never", "n't", "only", "rarely"]], "culprit": [1, 3]}
|
87 |
+
{"id": 125225, "claim": "Omar Khadr has always been free .", "questions": ["Who has always been free? or <mask> has always been free .", "How long has Omar Khadr been free? or Omar Khadr has <mask> been free .", "Omar Khadr has always been what? or Omar Khadr has always been <mask> ."], "answers": [["Omar Khadr", 0, 10], ["always", 15, 21], ["free", 27, 31]], "evidential": [["Omar Khadr", "Omar Khadri", "Omar Khadr", "Om Khadr"], ["yet", "never", "always", "since"], ["a prisoner", "a person", "a human", "a detainee"]], "culprit": [1, 2]}
|
88 |
+
{"id": 174514, "claim": "Red Bull Racing was eradicated in the United Kingdom .", "questions": ["What was the name of the race that was eradicated in the UK? or <mask> was eradicated in the United Kingdom .", "Where was Red Bull Racing eradicated? or Red Bull Racing was eradicated in <mask> .", "What happened to Red Bull Racing in the UK? or Red Bull Racing was <mask> in the United Kingdom ."], "answers": [["Red Bull Racing", 0, 15], ["the United Kingdom", 34, 52], ["eradicated", 20, 30]], "evidential": [["Red Bull Racing", "Red Bull R&B Racing", "Red Bull Racing", "Red Bull racing"], ["Austria", "Europe", "the UK", "England"], ["acquired", "founded", "established", "created"]], "culprit": [2]}
|
89 |
+
{"id": 67464, "claim": "Louie ( season 1 ) was created by David Benioff .", "questions": ["What was the name of the show created by David Benioff? or <mask> was created by David Benioff .", "Who created Louie? or Louie ( season 1 ) was created by <mask> .", "What was the name of Louie? or Louie ( season 1 ) was <mask> by David Benioff ."], "answers": [["Louie ( season 1 )", 0, 18], ["David Benioff", 34, 47], ["created", 23, 30]], "evidential": [["Louie", "Louie ( season 1 )", "Louis C.K.", "The show Louie"], ["Louis C.K", "a person", "a series creator", "a man"], ["created", "written", "penned", "produced"]], "culprit": [1, 2]}
|
90 |
+
{"id": 84710, "claim": "Buffy the Vampire Slayer is created by Joss Whedon in 1990 .", "questions": ["What movie was created by Joss Whedon? or <mask> is created by Joss Whedon in 1990 .", "Who created Buffy the Vampire Slayer? or Buffy the Vampire Slayer is created by <mask> in 1990 .", "When was Buffy the Vampire Slayer created? or Buffy the Vampire Slayer is created by Joss Whedon in <mask> .", "What was the name of the film that made Buffy the Vampire Slayer? or Buffy the Vampire Slayer is <mask> by Joss Whedon in 1990 ."], "answers": [["Buffy the Vampire Slayer", 0, 24], ["Joss Whedon", 39, 50], ["1990", 54, 58], ["created", 28, 35]], "evidential": [["Buffy the Vampire Slayer", "The Buffy the Vampire Slayer", "Buffy The Vampire Slayer", "Buffy of the Vampire Slayer"], ["Joss Whedon", "a person", "a man", "an American"], ["the 1990s", "the 2000s", "the nineties", "1992"], ["created", "produced", "directed", "a film"]], "culprit": [2]}
|
91 |
+
{"id": 198041, "claim": "The New York City Landmarks Preservation Commission includes zero architects .", "questions": ["What organization has zero architects? or <mask> includes zero architects .", "How many architects does the New York City Landmarks Preservation Commission have? or The New York City Landmarks Preservation Commission includes <mask> .", "How many architects does the New York City Landmarks Preservation Commission have? or The New York City Landmarks Preservation Commission <mask> zero architects ."], "answers": [["The New York City Landmarks Preservation Commission", 0, 51], ["zero architects", 61, 76], ["includes", 52, 60]], "evidential": [["The New York City Landmarks Preservation Commission", "New York City Landmarks Preservation Commission", "The New York City Landmarks Preservation commission", "A New York City Landmarks Preservation Commission"], ["11 architects", "three architects", "11 commissioners", "ten architects"], ["includes", "contains", "consists", "involves"]], "culprit": [1]}
|
92 |
+
{"id": 42390, "claim": "Jack Falahee is Mongolian .", "questions": ["Who is the Mongolian whose name is? or <mask> is Mongolian .", "What nationality is Jack Falahee? or Jack Falahee is <mask> ."], "answers": [["Jack Falahee", 0, 12], ["Mongolian", 16, 25]], "evidential": [["Jack Falahee", "Jack Falahe", "John Falahee", "Jack Falaefhee"], ["American", "an American", "North American", "European"]], "culprit": [1]}
|
93 |
+
{"id": 175736, "claim": "The Cry of the Owl is based on Patricia Highsmith 's eighth movie .", "questions": ["What is the name of the movie based on Patricia Highsmith's eighth film? or <mask> is based on Patricia Highsmith 's eighth movie .", "Who wrote the movie The Cry of the Owl? or The Cry of the Owl is based on <mask> 's eighth movie .", "What is the story of The Cry Of The Owl? or The Cry of the Owl is <mask> on Patricia Highsmith 's eighth movie .", "What was the first movie based on? or The Cry of the Owl is based on Patricia Highsmith 's <mask> movie ."], "answers": [["The Cry of the Owl", 0, 18], ["Patricia Highsmith", 31, 49], ["based", 22, 27], ["eighth", 53, 59]], "evidential": [["The Cry of the Owl", "The Cry of the Owl ( 2009 film )", "The Cry of the Owl( 2009 film )", "The Cry of the Owl(2009 film )"], ["Patricia Highsmith", "an author", "a writer", "a novelist"], ["based", "a story", "a novel", "loosely"], ["first", "book", "novel", "a novel"]], "culprit": [3]}
|
94 |
+
{"id": 152929, "claim": "Firefox is an operating system shell .", "questions": ["What is the name of the operating system shell? or <mask> is an operating system shell .", "What is Firefox? or Firefox is <mask> ."], "answers": [["Firefox", 0, 7], ["an operating system shell", 11, 36]], "evidential": [["Firefox", "Mozilla", "Mozilla Firefox", "The Firefox"], ["a web browser", "a free web browser", "open source", "a free software application"]], "culprit": [1]}
|
95 |
+
{"id": 183589, "claim": "Finding Dory was directed by Ingmar Bergman .", "questions": ["What movie was directed by Ingmar Bergman? or <mask> was directed by Ingmar Bergman .", "Who directed Finding Dory? or Finding Dory was directed by <mask> .", "What was the name of the film that starred in Finding Dory? or Finding Dory was <mask> by Ingmar Bergman ."], "answers": [["Finding Dory", 0, 12], ["Ingmar Bergman", 29, 43], ["directed", 17, 25]], "evidential": [["Finding Dory", "The Finding Dory", "Finding dory", "Finding Dory movie"], ["Andrew Stanton", "Angus MacLane", "a person", "Angus Maclane"], ["directed", "written", "produced", "penned"]], "culprit": [1]}
|
96 |
+
{"id": 108957, "claim": "Agent Raghav \u2013 Crime Branch is a phone .", "questions": ["What is the name of the agent that is on the phone? or <mask> is a phone .", "What is the name of the agent in the Crime Branch? or Agent Raghav \u2013 Crime Branch is <mask> ."], "answers": [["Agent Raghav \u2013 Crime Branch", 0, 27], ["a phone", 31, 38]], "evidential": [["Agent Raghav \u2013 Crime Branch", "Agent Raghav - Crime Branch", "Agent Raghav", "Agent Raghav \u2014 Crime Branch"], ["an anthology television series", "a serial", "a television serial", "a television series"]], "culprit": [1]}
|
97 |
+
{"id": 3160, "claim": "University of Chicago Law School is ranked first in the 2016 QS World University Rankings .", "questions": ["What is the name of the law school that is ranked first in the 2016 QS World or <mask> is ranked first in the 2016 QS World University Rankings .", "What is the name of the organization that ranks law schools in the world? or University of Chicago Law School is ranked first in the 2016 <mask> .", "What is the ranking of University of Chicago Law School in the 2016 QS World University Rankings or University of Chicago Law School is <mask> first in the 2016 QS World University Rankings .", "What is the ranking of University of Chicago Law School in the 2016 QS World University Rankings or University of Chicago Law School is ranked <mask> in the 2016 QS World University Rankings .", "In what year did the University of Chicago Law School rank first in the QS World University Ranking or University of Chicago Law School is ranked first in <mask> QS World University Rankings ."], "answers": [["University of Chicago Law School", 0, 32], ["QS World University Rankings", 61, 89], ["ranked", 36, 42], ["first", 43, 48], ["the 2016", 52, 60]], "evidential": [["University of Chicago Law School", "The University of Chicago Law School", "the University of Chicago Law School", "University of Chicago law School"], ["QS World University Rankings", "the QS World University Rankings", "S&S World University Rankings", "QS World University Rankings."], ["ranked", "listed", "placed", "Ranked"], ["12th", "11th", "twelveth", "ninth"], ["the 2016", "the 2015", "2016", "The 2016"]], "culprit": [3]}
|
98 |
+
{"id": 148309, "claim": "The Adventures of Pluto Nash failed to be a released film .", "questions": ["What failed to be a released film? or <mask> failed to be a released film .", "What did The Adventures of Pluto Nash fail to be? or The Adventures of Pluto Nash failed to be <mask> .", "What was the result of The Adventures of Pluto Nash? or The Adventures of Pluto Nash <mask> to be a released film ."], "answers": [["The Adventures of Pluto Nash", 0, 28], ["a released film", 42, 57], ["failed", 29, 35]], "evidential": [["The Adventures of Pluto Nash", "The adventures of Pluto Nash", "The Adventures of Pluto N", "An Adventures of Pluto Nash"], ["a release", "released", "an release", "release"], ["happened", "ceased", "turned", "failed"]], "culprit": [2]}
|
99 |
+
{"id": 227135, "claim": "The New Orleans Pelicans only play in the NHL .", "questions": ["Who plays in the NHL? or <mask> only play in the NHL .", "What league do the New Orleans Pelicans play in? or The New Orleans Pelicans only play in <mask> .", "What do the New Orleans Pelicans only do in the NHL? or The New Orleans Pelicans only <mask> in the NHL .", "How many of the New Orleans Pelicans play in the NHL? or The New Orleans Pelicans <mask> play in the NHL ."], "answers": [["The New Orleans Pelicans", 0, 24], ["the NHL", 38, 45], ["play", 30, 34], ["only", 25, 29]], "evidential": [["The New Orleans Pelicans", "New Orleans Pelicans", "the New Orleans Pelicans", "The New Orleans Saints"], ["the National Basketball Association", "the NBA", "a league", "the Western Conference"], ["play", "compete", "participate", "plays"], ["only", "two", "one", "currently"]], "culprit": [1, 3]}
|
100 |
+
{"id": 126678, "claim": "The Colosseum is a wrestler from Italy .", "questions": ["What is the name of the wrestler from Italy? or <mask> is a wrestler from Italy .", "Who is the Colosseum? or The Colosseum is <mask> from Italy .", "Where is The Colosseum? or The Colosseum is a wrestler from <mask> ."], "answers": [["The Colosseum", 0, 13], ["a wrestler", 17, 27], ["Italy", 33, 38]], "evidential": [["The Colosseum", "Colosseum", "The colosseum", "A Colosseum"], ["a tourist attraction", "an attraction", "an amphitheater", "a popular tourist attraction"], ["Rome", "Italy", "a city", "the city"]], "culprit": [1]}
|
src/eval_client/fever_scorer.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding:utf-8 -*-
|
2 |
+
|
3 |
+
"""
|
4 |
+
@Author : Bao
|
5 |
+
@Date : 2020/8/24
|
6 |
+
@Desc :
|
7 |
+
@Last modified by : Bao
|
8 |
+
@Last modified date : 2020/9/1
|
9 |
+
"""
|
10 |
+
|
11 |
+
import os
|
12 |
+
import json
|
13 |
+
import numpy as np
|
14 |
+
from collections import defaultdict
|
15 |
+
import tensorflow as tf
|
16 |
+
from sklearn.metrics import precision_recall_fscore_support
|
17 |
+
try:
|
18 |
+
from .scorer import fever_score
|
19 |
+
except:
|
20 |
+
from scorer import fever_score
|
21 |
+
|
22 |
+
|
23 |
+
prefix = os.environ['PJ_HOME']
|
24 |
+
|
25 |
+
|
26 |
+
class FeverScorer:
|
27 |
+
def __init__(self):
|
28 |
+
self.id2label = {2: 'SUPPORTS', 0: 'REFUTES', 1: 'NOT ENOUGH INFO'}
|
29 |
+
self.label2id = {value: key for key, value in self.id2label.items()}
|
30 |
+
|
31 |
+
def get_scores(self, predicted_file, actual_file=f'{prefix}/data/fever/shared_task_dev.jsonl'):
|
32 |
+
id2results = defaultdict(dict)
|
33 |
+
|
34 |
+
with tf.io.gfile.GFile(predicted_file) as f:
|
35 |
+
for line in f:
|
36 |
+
js = json.loads(line)
|
37 |
+
guid = js['id']
|
38 |
+
id2results[guid] = js
|
39 |
+
|
40 |
+
with tf.io.gfile.GFile(actual_file) as fin:
|
41 |
+
for line in fin:
|
42 |
+
line = json.loads(line)
|
43 |
+
guid = line['id']
|
44 |
+
evidence = line['evidence']
|
45 |
+
label = line['label']
|
46 |
+
id2results[guid]['evidence'] = evidence
|
47 |
+
id2results[guid]['label'] = label
|
48 |
+
|
49 |
+
results = self.label_score(list(id2results.values()))
|
50 |
+
score, accuracy, precision, recall, f1 = fever_score(list(id2results.values()))
|
51 |
+
results.update({
|
52 |
+
'Evidence Precision': precision,
|
53 |
+
'Evidence Recall': recall,
|
54 |
+
'Evidence F1': f1,
|
55 |
+
'FEVER Score': score,
|
56 |
+
'Label Accuracy': accuracy
|
57 |
+
})
|
58 |
+
|
59 |
+
return results
|
60 |
+
|
61 |
+
def label_score(self, results):
|
62 |
+
truth = np.array([v['label'] for v in results])
|
63 |
+
prediction = np.array([v['predicted_label'] for v in results])
|
64 |
+
labels = list(self.label2id.keys())
|
65 |
+
results = {}
|
66 |
+
p, r, f, _ = precision_recall_fscore_support(truth, prediction, labels=labels)
|
67 |
+
for i, label in enumerate(self.label2id.keys()):
|
68 |
+
results['{} Precision'.format(label)] = p[i]
|
69 |
+
results['{} Recall'.format(label)] = r[i]
|
70 |
+
results['{} F1'.format(label)] = f[i]
|
71 |
+
|
72 |
+
return results
|
73 |
+
|
74 |
+
|
75 |
+
if __name__ == '__main__':
|
76 |
+
import argparse
|
77 |
+
|
78 |
+
parser = argparse.ArgumentParser()
|
79 |
+
parser.add_argument("--predicted_file", '-i', type=str)
|
80 |
+
args = parser.parse_args()
|
81 |
+
|
82 |
+
scorer = FeverScorer()
|
83 |
+
results = scorer.get_scores(args.predicted_file)
|
84 |
+
print(json.dumps(results, ensure_ascii=False, indent=4))
|
src/eval_client/scorer.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import six
|
2 |
+
|
3 |
+
def check_predicted_evidence_format(instance):
|
4 |
+
if 'predicted_evidence' in instance.keys() and len(instance['predicted_evidence']):
|
5 |
+
assert all(isinstance(prediction, list)
|
6 |
+
for prediction in instance["predicted_evidence"]), \
|
7 |
+
"Predicted evidence must be a list of (page,line) lists"
|
8 |
+
|
9 |
+
assert all(len(prediction) == 2
|
10 |
+
for prediction in instance["predicted_evidence"]), \
|
11 |
+
"Predicted evidence must be a list of (page,line) lists"
|
12 |
+
|
13 |
+
assert all(isinstance(prediction[0], six.string_types)
|
14 |
+
for prediction in instance["predicted_evidence"]), \
|
15 |
+
"Predicted evidence must be a list of (page<string>,line<int>) lists"
|
16 |
+
|
17 |
+
assert all(isinstance(prediction[1], int)
|
18 |
+
for prediction in instance["predicted_evidence"]), \
|
19 |
+
"Predicted evidence must be a list of (page<string>,line<int>) lists"
|
20 |
+
|
21 |
+
|
22 |
+
def is_correct_label(instance):
|
23 |
+
return instance["label"].upper() == instance["predicted_label"].upper()
|
24 |
+
|
25 |
+
|
26 |
+
def is_strictly_correct(instance, max_evidence=None):
|
27 |
+
#Strict evidence matching is only for NEI class
|
28 |
+
check_predicted_evidence_format(instance)
|
29 |
+
|
30 |
+
if instance["label"].upper() != "NOT ENOUGH INFO" and is_correct_label(instance):
|
31 |
+
assert 'predicted_evidence' in instance, "Predicted evidence must be provided for strict scoring"
|
32 |
+
|
33 |
+
if max_evidence is None:
|
34 |
+
max_evidence = len(instance["predicted_evidence"])
|
35 |
+
|
36 |
+
|
37 |
+
for evience_group in instance["evidence"]:
|
38 |
+
#Filter out the annotation ids. We just want the evidence page and line number
|
39 |
+
actual_sentences = [[e[2], e[3]] for e in evience_group]
|
40 |
+
#Only return true if an entire group of actual sentences is in the predicted sentences
|
41 |
+
if all([actual_sent in instance["predicted_evidence"][:max_evidence] for actual_sent in actual_sentences]):
|
42 |
+
return True
|
43 |
+
|
44 |
+
#If the class is NEI, we don't score the evidence retrieval component
|
45 |
+
elif instance["label"].upper() == "NOT ENOUGH INFO" and is_correct_label(instance):
|
46 |
+
return True
|
47 |
+
|
48 |
+
return False
|
49 |
+
|
50 |
+
|
51 |
+
def evidence_macro_precision(instance, max_evidence=None):
|
52 |
+
this_precision = 0.0
|
53 |
+
this_precision_hits = 0.0
|
54 |
+
|
55 |
+
if instance["label"].upper() != "NOT ENOUGH INFO":
|
56 |
+
all_evi = [[e[2], e[3]] for eg in instance["evidence"] for e in eg if e[3] is not None]
|
57 |
+
|
58 |
+
predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
|
59 |
+
instance["predicted_evidence"][:max_evidence]
|
60 |
+
|
61 |
+
for prediction in predicted_evidence:
|
62 |
+
if prediction in all_evi:
|
63 |
+
this_precision += 1.0
|
64 |
+
this_precision_hits += 1.0
|
65 |
+
|
66 |
+
return (this_precision / this_precision_hits) if this_precision_hits > 0 else 1.0, 1.0
|
67 |
+
|
68 |
+
return 0.0, 0.0
|
69 |
+
|
70 |
+
def evidence_macro_recall(instance, max_evidence=None):
|
71 |
+
# We only want to score F1/Precision/Recall of recalled evidence for NEI claims
|
72 |
+
if instance["label"].upper() != "NOT ENOUGH INFO":
|
73 |
+
# If there's no evidence to predict, return 1
|
74 |
+
if len(instance["evidence"]) == 0 or all([len(eg) == 0 for eg in instance]):
|
75 |
+
return 1.0, 1.0
|
76 |
+
|
77 |
+
predicted_evidence = instance["predicted_evidence"] if max_evidence is None else \
|
78 |
+
instance["predicted_evidence"][:max_evidence]
|
79 |
+
|
80 |
+
for evidence_group in instance["evidence"]:
|
81 |
+
evidence = [[e[2], e[3]] for e in evidence_group]
|
82 |
+
if all([item in predicted_evidence for item in evidence]):
|
83 |
+
# We only want to score complete groups of evidence. Incomplete groups are worthless.
|
84 |
+
return 1.0, 1.0
|
85 |
+
return 0.0, 1.0
|
86 |
+
return 0.0, 0.0
|
87 |
+
|
88 |
+
|
89 |
+
# Micro is not used. This code is just included to demostrate our model of macro/micro
|
90 |
+
def evidence_micro_precision(instance):
|
91 |
+
this_precision = 0
|
92 |
+
this_precision_hits = 0
|
93 |
+
|
94 |
+
# We only want to score Macro F1/Precision/Recall of recalled evidence for NEI claims
|
95 |
+
if instance["label"].upper() != "NOT ENOUGH INFO":
|
96 |
+
all_evi = [[e[2], e[3]] for eg in instance["evidence"] for e in eg if e[3] is not None]
|
97 |
+
|
98 |
+
for prediction in instance["predicted_evidence"]:
|
99 |
+
if prediction in all_evi:
|
100 |
+
this_precision += 1.0
|
101 |
+
this_precision_hits += 1.0
|
102 |
+
|
103 |
+
return this_precision, this_precision_hits
|
104 |
+
|
105 |
+
|
106 |
+
def fever_score(predictions,actual=None, max_evidence=5):
|
107 |
+
correct = 0
|
108 |
+
strict = 0
|
109 |
+
|
110 |
+
macro_precision = 0
|
111 |
+
macro_precision_hits = 0
|
112 |
+
|
113 |
+
macro_recall = 0
|
114 |
+
macro_recall_hits = 0
|
115 |
+
|
116 |
+
for idx,instance in enumerate(predictions):
|
117 |
+
assert 'predicted_evidence' in instance.keys(), 'evidence must be provided for the prediction'
|
118 |
+
|
119 |
+
#If it's a blind test set, we need to copy in the values from the actual data
|
120 |
+
if 'evidence' not in instance or 'label' not in instance:
|
121 |
+
assert actual is not None, 'in blind evaluation mode, actual data must be provided'
|
122 |
+
assert len(actual) == len(predictions), 'actual data and predicted data length must match'
|
123 |
+
assert 'evidence' in actual[idx].keys(), 'evidence must be provided for the actual evidence'
|
124 |
+
instance['evidence'] = actual[idx]['evidence']
|
125 |
+
instance['label'] = actual[idx]['label']
|
126 |
+
|
127 |
+
assert 'evidence' in instance.keys(), 'gold evidence must be provided'
|
128 |
+
|
129 |
+
if is_correct_label(instance):
|
130 |
+
correct += 1.0
|
131 |
+
|
132 |
+
if is_strictly_correct(instance, max_evidence):
|
133 |
+
strict+=1.0
|
134 |
+
|
135 |
+
macro_prec = evidence_macro_precision(instance, max_evidence)
|
136 |
+
macro_precision += macro_prec[0]
|
137 |
+
macro_precision_hits += macro_prec[1]
|
138 |
+
|
139 |
+
macro_rec = evidence_macro_recall(instance, max_evidence)
|
140 |
+
macro_recall += macro_rec[0]
|
141 |
+
macro_recall_hits += macro_rec[1]
|
142 |
+
|
143 |
+
total = len(predictions)
|
144 |
+
|
145 |
+
strict_score = strict / total
|
146 |
+
acc_score = correct / total
|
147 |
+
|
148 |
+
pr = (macro_precision / macro_precision_hits) if macro_precision_hits > 0 else 1.0
|
149 |
+
rec = (macro_recall / macro_recall_hits) if macro_recall_hits > 0 else 0.0
|
150 |
+
|
151 |
+
f1 = 2.0 * pr * rec / (pr + rec)
|
152 |
+
|
153 |
+
return strict_score, acc_score, pr, rec, f1
|