Spaces:
Runtime error
Runtime error
import argparse | |
import xml.etree.ElementTree as ET | |
import re | |
from ftfy import fix_text | |
def load_topic_trec(args): | |
xml = ET.parse(args.topics) | |
root = xml.getroot() | |
with open(args.queries, 'w') as fout: | |
for child in root: | |
qid = child.attrib['number'] | |
topic = repr(child.text)[1:-1] | |
topic = fix_text(topic) | |
topic = topic.replace('\\n', ' ') | |
topic = re.sub('\s\s+'," ",topic) | |
fout.write(f"{qid}\t{topic}\n") | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--topics", required=True, type=str, help='topic file') | |
parser.add_argument('--queries', required=True, type=str, help='convert to qid\\tquery tsv format') | |
args = parser.parse_args() | |
load_topic_trec(args) | |
print('Done!') | |