import os | |
import ujson | |
from collections import defaultdict | |
from colbert.utils.utils import print_message, file_tqdm | |
def load_collection_(path, retain_titles): | |
with open(path) as f: | |
collection = [] | |
for line in file_tqdm(f): | |
_, passage, title = line.strip().split('\t') | |
if retain_titles: | |
passage = title + ' | ' + passage | |
collection.append(passage) | |
return collection | |
def load_qas_(path): | |
print_message("#> Loading the reference QAs from", path) | |
triples = [] | |
with open(path) as f: | |
for line in f: | |
qa = ujson.loads(line) | |
triples.append((qa['qid'], qa['question'], qa['answers'])) | |
return triples | |