Update app.py
Browse files
app.py
CHANGED
@@ -263,9 +263,12 @@ def text_to_kenlm(
|
|
263 |
|
264 |
# Commands to run in the container
|
265 |
cmd = (
|
266 |
-
f"{kenlm_bin}/lmplz
|
267 |
)
|
268 |
-
|
|
|
|
|
|
|
269 |
|
270 |
file_name = "/tmp/my_model.arpa"
|
271 |
file_name_fixed = "/tmp/my_model_correct.arpa"
|
@@ -295,18 +298,19 @@ def text_to_kenlm(
|
|
295 |
|
296 |
_, vocab_str = convert_and_filter_topk(intermediate_file, _topk_words)
|
297 |
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
check=True,
|
308 |
-
)
|
309 |
)
|
|
|
|
|
|
|
310 |
|
311 |
generate_files(vocab_str.split("\n"))
|
312 |
|
@@ -316,7 +320,10 @@ def text_to_kenlm(
|
|
316 |
)
|
317 |
|
318 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name} {file_name_quantized}"
|
319 |
-
|
|
|
|
|
|
|
320 |
|
321 |
file_name = file_name_quantized
|
322 |
else:
|
@@ -324,7 +331,10 @@ def text_to_kenlm(
|
|
324 |
file_name = f"/tmp/my_model-{_binary_type}.bin"
|
325 |
|
326 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name_fixed} {file_name}"
|
327 |
-
|
|
|
|
|
|
|
328 |
|
329 |
gr.Success("Model created.")
|
330 |
|
|
|
263 |
|
264 |
# Commands to run in the container
|
265 |
cmd = (
|
266 |
+
f"{kenlm_bin}/lmplz -T /tmp -S 80% --text {intermediate_file} --arpa /tmp/my_model.arpa -o {_order} --prune {_arpa_prune} --discount_fallback",
|
267 |
)
|
268 |
+
r = subprocess.run(cmd, shell=True)
|
269 |
+
print(r)
|
270 |
+
if r.returncode != 0:
|
271 |
+
raise gr.Error("Failed to create the model.")
|
272 |
|
273 |
file_name = "/tmp/my_model.arpa"
|
274 |
file_name_fixed = "/tmp/my_model_correct.arpa"
|
|
|
298 |
|
299 |
_, vocab_str = convert_and_filter_topk(intermediate_file, _topk_words)
|
300 |
|
301 |
+
r = subprocess.run(
|
302 |
+
[
|
303 |
+
os.path.join(kenlm_bin, "filter"),
|
304 |
+
"single",
|
305 |
+
"model:{}".format(file_name_fixed),
|
306 |
+
file_name,
|
307 |
+
],
|
308 |
+
input=vocab_str.encode("utf-8"),
|
309 |
+
check=True,
|
|
|
|
|
310 |
)
|
311 |
+
print(r)
|
312 |
+
if r.returncode != 0:
|
313 |
+
raise gr.Error("Failed to filter the model.")
|
314 |
|
315 |
generate_files(vocab_str.split("\n"))
|
316 |
|
|
|
320 |
)
|
321 |
|
322 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name} {file_name_quantized}"
|
323 |
+
r = subprocess.run(cmd, shell=True)
|
324 |
+
print(r)
|
325 |
+
if r.returncode != 0:
|
326 |
+
raise gr.Error("Failed to quantize the model.")
|
327 |
|
328 |
file_name = file_name_quantized
|
329 |
else:
|
|
|
331 |
file_name = f"/tmp/my_model-{_binary_type}.bin"
|
332 |
|
333 |
cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name_fixed} {file_name}"
|
334 |
+
r = subprocess.run(cmd, shell=True)
|
335 |
+
print(r)
|
336 |
+
if r.returncode != 0:
|
337 |
+
raise gr.Error("Failed to quantize the model.")
|
338 |
|
339 |
gr.Success("Model created.")
|
340 |
|