with open("5gram.arpa", "r") as read_file, open("5gram_correct.arpa", "w") as write_file: | |
has_added_eos = False | |
for line in read_file: | |
if not has_added_eos and "ngram 1=" in line: | |
count=line.strip().split("=")[-1] | |
write_file.write(line.replace(f"{count}", f"{int(count)+1}")) | |
elif not has_added_eos and "<s>" in line: | |
write_file.write(line) | |
write_file.write(line.replace("<s>", "</s>")) | |
has_added_eos = True | |
else: | |
write_file.write(line) |