Lemswasabi's picture
add create lm scripts
98591ec
raw
history blame contribute delete
610 Bytes
#!/usr/bin/env python3
#
# Created by lemswasabi on 24/05/2022.
# Copyright © 2022 letzspeak. All rights reserved.
#
with open("5gram.arpa", "r") as read_file, open("5gram_correct.arpa", "w") as write_file:
has_added_eos = False
for line in read_file:
if not has_added_eos and "ngram 1=" in line:
count=line.strip().split("=")[-1]
write_file.write(line.replace(f"{count}", f"{int(count)+1}"))
elif not has_added_eos and "<s>" in line:
write_file.write(line)
write_file.write(line.replace("<s>", "</s>"))
has_added_eos = True
else:
write_file.write(line)