Update app.py
Browse files
app.py
CHANGED
@@ -234,12 +234,12 @@ def text_to_kenlm(
|
|
234 |
|
235 |
# Commands to run in the container
|
236 |
cmd = (
|
237 |
-
f"{kenlm_bin}/lmplz --temp_prefix
|
238 |
)
|
239 |
print(subprocess.run(cmd, shell=True))
|
240 |
|
241 |
-
file_name =
|
242 |
-
file_name_fixed =
|
243 |
|
244 |
# Fix the ARPA file
|
245 |
with (
|
@@ -262,7 +262,7 @@ def text_to_kenlm(
|
|
262 |
file_name = file_name_fixed
|
263 |
|
264 |
if _do_limit_topk:
|
265 |
-
file_name = f"
|
266 |
|
267 |
_, vocab_str = convert_and_filter_topk(app_dir, intermediate_file, _topk_words)
|
268 |
|
@@ -281,14 +281,14 @@ def text_to_kenlm(
|
|
281 |
|
282 |
if _do_quantize:
|
283 |
file_name_quantized = (
|
284 |
-
f"
|
285 |
)
|
286 |
|
287 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name} {file_name_quantized}"
|
288 |
print(subprocess.run(cmd, shell=True))
|
289 |
else:
|
290 |
if _do_quantize:
|
291 |
-
file_name = f"
|
292 |
|
293 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name_fixed} {file_name}"
|
294 |
print(subprocess.run(cmd, shell=True))
|
|
|
234 |
|
235 |
# Commands to run in the container
|
236 |
cmd = (
|
237 |
+
f"{kenlm_bin}/lmplz --temp_prefix /tmp --memory 90% --text {intermediate_file} --arpa /tmp/my_model.arpa -o {_order} --prune {_arpa_prune} --discount_fallback",
|
238 |
)
|
239 |
print(subprocess.run(cmd, shell=True))
|
240 |
|
241 |
+
file_name = "/tmp/my_model.arpa"
|
242 |
+
file_name_fixed = "/tmp/my_model_correct.arpa"
|
243 |
|
244 |
# Fix the ARPA file
|
245 |
with (
|
|
|
262 |
file_name = file_name_fixed
|
263 |
|
264 |
if _do_limit_topk:
|
265 |
+
file_name = f"/tmp/my_model-{_topk_words}-words.arpa"
|
266 |
|
267 |
_, vocab_str = convert_and_filter_topk(app_dir, intermediate_file, _topk_words)
|
268 |
|
|
|
281 |
|
282 |
if _do_quantize:
|
283 |
file_name_quantized = (
|
284 |
+
f"/tmp/my_model-{_binary_type}-{_topk_words}-words.bin"
|
285 |
)
|
286 |
|
287 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name} {file_name_quantized}"
|
288 |
print(subprocess.run(cmd, shell=True))
|
289 |
else:
|
290 |
if _do_quantize:
|
291 |
+
file_name = f"/tmp/my_model-{_binary_type}.bin"
|
292 |
|
293 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name_fixed} {file_name}"
|
294 |
print(subprocess.run(cmd, shell=True))
|