Spaces:
Runtime error
Runtime error
# | |
# Pyserini: Python interface to the Anserini IR toolkit built on Lucene | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
"""Compute various statistics for qrels.""" | |
import argparse | |
import collections | |
def is_relevant(grade: int, threshold: int): | |
return grade > threshold | |
def main(): | |
parser = argparse.ArgumentParser(description=__doc__, | |
formatter_class=lambda prog: argparse.HelpFormatter(prog, width=100)) | |
parser.add_argument('--qrels', metavar='FILE', type=str, required=True, help='Qrels file.') | |
parser.add_argument('--output', metavar='FILE', type=str, help='File to store per-topic statistics.') | |
parser.add_argument('--relevance-threshold', metavar='INT', type=int, default=0, | |
help='Threshold for considering a judgment "relevant". ' + | |
'Default is zero, which means any relevance grade above zero is considered relevant.') | |
args = parser.parse_args() | |
qrels = collections.defaultdict(dict) | |
with open(args.qrels) as f: | |
for i, line in enumerate(f): | |
qid, _, docid, relevance = line.rstrip().split() | |
qrels[qid][docid] = int(relevance) | |
total_judgments_count = 0 | |
total_topics_count = 0 | |
total_rel_count = 0 | |
max_per_topic_judged = 0 | |
min_per_topic_judged = 10000000 | |
max_per_topic_relevant = 0 | |
min_per_topic_relevant = 10000000 | |
per_topic_output = ['qid,rel_count,not_rel_count,total'] | |
for qid in sorted(qrels): | |
total_topics_count += 1 | |
rel_count = 0 | |
not_rel_count = 0 | |
for docid in qrels[qid]: | |
if is_relevant(qrels[qid][docid], args.relevance_threshold): | |
rel_count += 1 | |
total_rel_count += 1 | |
else: | |
not_rel_count += 1 | |
total_judgments_count += 1 | |
assert len(qrels[qid]) == rel_count + not_rel_count | |
if rel_count > max_per_topic_relevant: | |
max_per_topic_relevant = rel_count | |
if rel_count + not_rel_count > max_per_topic_judged: | |
max_per_topic_judged = rel_count + not_rel_count | |
if rel_count < min_per_topic_relevant: | |
min_per_topic_relevant = rel_count | |
if rel_count + not_rel_count < min_per_topic_judged: | |
min_per_topic_judged = rel_count + not_rel_count | |
per_topic_output.append(f'{qid},{rel_count},{not_rel_count},{rel_count+not_rel_count}') | |
per_topic_output.append(f'total,{total_rel_count},{total_judgments_count-total_rel_count},{total_judgments_count}') | |
per_topic_output.append(f'avg,{total_rel_count/total_topics_count:.2f},' + | |
f'{(total_judgments_count-total_rel_count)/total_topics_count:.2f},' + | |
f'{total_judgments_count/total_topics_count:.2f}') | |
if args.output: | |
with open(args.output, 'w') as f: | |
for line in per_topic_output: | |
f.write(line + '\n') | |
print('# Summary Statistics') | |
print(f'Total number of topics: {total_topics_count}') | |
print(f'Total number of judgments: {total_judgments_count}') | |
print(f'Total number of relevant labels: {total_rel_count}') | |
print(f'Avg. judgments per topic: {total_judgments_count/total_topics_count:.2f}' + | |
f' (max = {max_per_topic_judged}, min = {min_per_topic_judged})') | |
print(f'Avg. relevant labels per topic: {total_rel_count/total_topics_count:.2f}' + | |
f' (max = {max_per_topic_relevant}, min = {min_per_topic_relevant})') | |
if __name__ == "__main__": | |
main() | |