Yehor commited on
Commit
5296ffa
·
verified ·
1 Parent(s): d2e8728

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -32
app.py CHANGED
@@ -261,37 +261,38 @@ def text_to_kenlm(
261
  with open(intermediate_file, "w") as f:
262
  f.write(" ".join(results))
263
 
264
- # Commands to run in the container
265
- cmd = (
266
- f"{kenlm_bin}/lmplz -T /tmp -S 80% --text {intermediate_file} --arpa /tmp/my_model.arpa -o {_order} --prune {_arpa_prune} --discount_fallback",
267
- )
268
- r = subprocess.run(cmd, shell=True)
269
- print(r)
270
- if r.returncode != 0:
271
- raise gr.Error("Failed to create the model.")
272
-
273
  file_name = "/tmp/my_model.arpa"
274
- file_name_fixed = "/tmp/my_model_correct.arpa"
275
-
276
- # Fix the ARPA file
277
- with (
278
- open(file_name, "r") as read_file,
279
- open(file_name_fixed, "w") as write_file,
280
- ):
281
- has_added_eos = False
282
- for line in read_file:
283
- if not has_added_eos and "ngram 1=" in line:
284
- count = line.strip().split("=")[-1]
285
- write_file.write(line.replace(f"{count}", f"{int(count) + 1}"))
286
- elif not has_added_eos and "<s>" in line:
287
- write_file.write(line)
288
- write_file.write(line.replace("<s>", "</s>"))
289
- has_added_eos = True
290
- else:
291
- write_file.write(line)
292
-
293
- # Replace the file name
294
- file_name = file_name_fixed
 
 
 
 
 
 
 
 
 
 
295
 
296
  if _do_limit_topk:
297
  file_name = f"/tmp/my_model-{_topk_words}-words.arpa"
@@ -302,7 +303,7 @@ def text_to_kenlm(
302
  [
303
  os.path.join(kenlm_bin, "filter"),
304
  "single",
305
- "model:{}".format(file_name_fixed),
306
  file_name,
307
  ],
308
  input=vocab_str.encode("utf-8"),
@@ -330,7 +331,7 @@ def text_to_kenlm(
330
  if _do_quantize:
331
  file_name = f"/tmp/my_model-{_binary_type}.bin"
332
 
333
- cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name_fixed} {file_name}"
334
  r = subprocess.run(cmd, shell=True)
335
  print(r)
336
  if r.returncode != 0:
 
261
  with open(intermediate_file, "w") as f:
262
  f.write(" ".join(results))
263
 
 
 
 
 
 
 
 
 
 
264
  file_name = "/tmp/my_model.arpa"
265
+
266
+ # Commands to run in the container
267
+ if not _do_limit_topk:
268
+ cmd = (
269
+ f"{kenlm_bin}/lmplz -T /tmp -S 80% --text {intermediate_file} --arpa /tmp/my_model.arpa -o {_order} --prune {_arpa_prune} --discount_fallback",
270
+ )
271
+ r = subprocess.run(cmd, shell=True)
272
+ print(r)
273
+ if r.returncode != 0:
274
+ raise gr.Error("Failed to create the model.")
275
+
276
+ file_name_fixed = "/tmp/my_model_correct.arpa"
277
+
278
+ # Fix the ARPA file
279
+ with (
280
+ open(file_name, "r") as read_file,
281
+ open(file_name_fixed, "w") as write_file,
282
+ ):
283
+ has_added_eos = False
284
+ for line in read_file:
285
+ if not has_added_eos and "ngram 1=" in line:
286
+ count = line.strip().split("=")[-1]
287
+ write_file.write(line.replace(f"{count}", f"{int(count) + 1}"))
288
+ elif not has_added_eos and "<s>" in line:
289
+ write_file.write(line)
290
+ write_file.write(line.replace("<s>", "</s>"))
291
+ has_added_eos = True
292
+ else:
293
+ write_file.write(line)
294
+ # Replace the file name
295
+ file_name = file_name_fixed
296
 
297
  if _do_limit_topk:
298
  file_name = f"/tmp/my_model-{_topk_words}-words.arpa"
 
303
  [
304
  os.path.join(kenlm_bin, "filter"),
305
  "single",
306
+ "model:{}".format(file_name),
307
  file_name,
308
  ],
309
  input=vocab_str.encode("utf-8"),
 
331
  if _do_quantize:
332
  file_name = f"/tmp/my_model-{_binary_type}.bin"
333
 
334
+ cmd = f"{kenlm_bin}/build_binary -a {_binary_a_bits} -b {_binary_b_bits} -q {_binary_q_bits} -v {_binary_type} {file_name} {file_name}"
335
  r = subprocess.run(cmd, shell=True)
336
  print(r)
337
  if r.returncode != 0: