fewe32 commited on
Commit
eef1213
·
verified ·
1 Parent(s): 3926a0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  from infer import inference
3
  import unicodedata
4
  import regex
 
5
 
6
  description = '''
7
  Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
@@ -35,7 +36,7 @@ def adjust_case(original, replacement):
35
  adjusted += replacement[len(original):]
36
  return adjusted
37
 
38
- def replace_with_custom_dict(text, custom_dict):
39
  text = normalize_text(text)
40
  tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
41
  new_tokens = []
@@ -51,6 +52,7 @@ def replace_with_custom_dict(text, custom_dict):
51
  new_tokens.append(adjusted_replacement)
52
  else:
53
  new_tokens.append(token)
 
54
  else:
55
  new_tokens.append(token)
56
  return ''.join(new_tokens)
@@ -87,13 +89,37 @@ with open('dict.txt', 'r', encoding='utf-8') as f:
87
  base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
88
  custom_dict[base_word] = line_normalized
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def transform_text(text, apply_custom_dict, add_pauses_flag):
 
91
  text = normalize_text(text)
92
  if apply_custom_dict:
93
- text = replace_with_custom_dict(text, custom_dict)
94
  text = convert_accented_text(text)
95
  if add_pauses_flag:
96
  text = add_pauses_to_text(text)
 
 
 
 
 
 
 
 
 
97
  return text
98
 
99
  def synthesise(transformed_text, speed, steps, progress=gr.Progress()):
 
2
  from infer import inference
3
  import unicodedata
4
  import regex
5
+ import threading
6
 
7
  description = '''
8
  Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
 
36
  adjusted += replacement[len(original):]
37
  return adjusted
38
 
39
+ def replace_with_custom_dict(text, custom_dict, unknown_words):
40
  text = normalize_text(text)
41
  tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
42
  new_tokens = []
 
52
  new_tokens.append(adjusted_replacement)
53
  else:
54
  new_tokens.append(token)
55
+ unknown_words.add(base_token)
56
  else:
57
  new_tokens.append(token)
58
  return ''.join(new_tokens)
 
89
  base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
90
  custom_dict[base_word] = line_normalized
91
 
92
+ # Load existing words from new_dict.txt
93
+ existing_new_dict_words = set()
94
+ try:
95
+ with open('new_dict.txt', 'r', encoding='utf-8') as f:
96
+ for line in f:
97
+ existing_word = line.strip()
98
+ if existing_word:
99
+ existing_new_dict_words.add(existing_word)
100
+ except FileNotFoundError:
101
+ pass # If the file doesn't exist, we'll create it later
102
+
103
+ # Lock for thread-safe file writing
104
+ file_lock = threading.Lock()
105
+
106
  def transform_text(text, apply_custom_dict, add_pauses_flag):
107
+ unknown_words = set()
108
  text = normalize_text(text)
109
  if apply_custom_dict:
110
+ text = replace_with_custom_dict(text, custom_dict, unknown_words)
111
  text = convert_accented_text(text)
112
  if add_pauses_flag:
113
  text = add_pauses_to_text(text)
114
+
115
+ # Write unknown words to new_dict.txt
116
+ new_words_to_add = unknown_words - existing_new_dict_words
117
+ if new_words_to_add:
118
+ with file_lock:
119
+ with open('new_dict.txt', 'a', encoding='utf-8') as f:
120
+ for word in sorted(new_words_to_add):
121
+ f.write(word + '\n')
122
+ existing_new_dict_words.update(new_words_to_add)
123
  return text
124
 
125
  def synthesise(transformed_text, speed, steps, progress=gr.Progress()):