Yehor commited on
Commit
e534ea9
·
verified ·
1 Parent(s): e6d2f62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -53,11 +53,11 @@ tech_libraries = f"""
53
  """.strip()
54
 
55
 
56
- def convert_and_filter_topk(output_dir, input_txt, top_k):
57
  """Convert to lowercase, count word occurrences and save top-k words to a file"""
58
 
59
  counter = Counter()
60
- data_lower = os.path.join(output_dir, "lower.txt.gz")
61
 
62
  print("\nConverting to lowercase and counting word occurrences ...")
63
  with io.TextIOWrapper(
@@ -83,8 +83,7 @@ def convert_and_filter_topk(output_dir, input_txt, top_k):
83
  print("\nSaving top {} words ...".format(top_k))
84
  top_counter = counter.most_common(top_k)
85
  vocab_str = "\n".join(word for word, count in top_counter)
86
- vocab_path = "vocab-{}.txt".format(top_k)
87
- vocab_path = os.path.join(output_dir, vocab_path)
88
  with open(vocab_path, "w+") as file:
89
  file.write(vocab_str)
90
 
@@ -294,7 +293,7 @@ def text_to_kenlm(
294
  if _do_limit_topk:
295
  file_name = f"/tmp/my_model-{_topk_words}-words.arpa"
296
 
297
- _, vocab_str = convert_and_filter_topk(app_dir, intermediate_file, _topk_words)
298
 
299
  print(
300
  subprocess.run(
 
53
  """.strip()
54
 
55
 
56
+ def convert_and_filter_topk(input_txt, top_k):
57
  """Convert to lowercase, count word occurrences and save top-k words to a file"""
58
 
59
  counter = Counter()
60
+ data_lower = "/tmp/lower.txt.gz"
61
 
62
  print("\nConverting to lowercase and counting word occurrences ...")
63
  with io.TextIOWrapper(
 
83
  print("\nSaving top {} words ...".format(top_k))
84
  top_counter = counter.most_common(top_k)
85
  vocab_str = "\n".join(word for word, count in top_counter)
86
+ vocab_path = "/tmp/vocab-{}.txt".format(top_k)
 
87
  with open(vocab_path, "w+") as file:
88
  file.write(vocab_str)
89
 
 
293
  if _do_limit_topk:
294
  file_name = f"/tmp/my_model-{_topk_words}-words.arpa"
295
 
296
+ _, vocab_str = convert_and_filter_topk(intermediate_file, _topk_words)
297
 
298
  print(
299
  subprocess.run(