|
|
|
|
|
|
|
|
|
|
|
"""Reduce an ngrams file for training nplm to a smaller version of it. |
|
|
|
The smaller version will have fewer ngrams. |
|
""" |
|
|
|
from sys import argv |
|
|
|
if len(argv) != 5: |
|
print("Wrong number of args, got: " + str(len(argv) - 1) + " expected 4.") |
|
print("Usage: reduce_ngrams.py INFILE OUTFILE START_IDX NGRAMS") |
|
exit() |
|
|
|
INFILE = open(argv[1], 'r') |
|
OUTFILE = open(argv[2], 'w') |
|
START_IDX = int(argv[3]) |
|
NGRAMS = int(argv[4]) |
|
|
|
for line in INFILE: |
|
line = line.split() |
|
line = line[START_IDX:START_IDX + NGRAMS] |
|
linetowrite = "" |
|
for token in line: |
|
linetowrite = linetowrite + token + " " |
|
|
|
linetowrite = linetowrite[:-1] |
|
linetowrite = linetowrite + '\n' |
|
OUTFILE.write(linetowrite) |
|
|
|
INFILE.close() |
|
OUTFILE.close() |
|
|