Spaces:
Runtime error
Runtime error
File size: 841 Bytes
d6585f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import argparse
import xml.etree.ElementTree as ET
import re
from ftfy import fix_text
def load_topic_trec(args):
xml = ET.parse(args.topics)
root = xml.getroot()
with open(args.queries, 'w') as fout:
for child in root:
qid = child.attrib['number']
topic = repr(child.text)[1:-1]
topic = fix_text(topic)
topic = topic.replace('\\n', ' ')
topic = re.sub('\s\s+'," ",topic)
fout.write(f"{qid}\t{topic}\n")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--topics", required=True, type=str, help='topic file')
parser.add_argument('--queries', required=True, type=str, help='convert to qid\\tquery tsv format')
args = parser.parse_args()
load_topic_trec(args)
print('Done!')
|