text_to_speech_ukr

Sleeping

App Files Files Community

fewe32 commited on Nov 6, 2024

Commit

eef1213

verified ·

1 Parent(s): 3926a0a

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -2

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from infer import inference
 import unicodedata
 import regex
 description = '''
 Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
@@ -35,7 +36,7 @@ def adjust_case(original, replacement):
         adjusted += replacement[len(original):]
         return adjusted
-def replace_with_custom_dict(text, custom_dict):
     text = normalize_text(text)
     tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
     new_tokens = []
@@ -51,6 +52,7 @@ def replace_with_custom_dict(text, custom_dict):
                 new_tokens.append(adjusted_replacement)
             else:
                 new_tokens.append(token)
         else:
             new_tokens.append(token)
     return ''.join(new_tokens)
@@ -87,13 +89,37 @@ with open('dict.txt', 'r', encoding='utf-8') as f:
             base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
             custom_dict[base_word] = line_normalized
 def transform_text(text, apply_custom_dict, add_pauses_flag):
     text = normalize_text(text)
     if apply_custom_dict:
-        text = replace_with_custom_dict(text, custom_dict)
     text = convert_accented_text(text)
     if add_pauses_flag:
         text = add_pauses_to_text(text)
     return text
 def synthesise(transformed_text, speed, steps, progress=gr.Progress()):

 from infer import inference
 import unicodedata
 import regex
+import threading
 description = '''
 Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
         adjusted += replacement[len(original):]
         return adjusted
+def replace_with_custom_dict(text, custom_dict, unknown_words):
     text = normalize_text(text)
     tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
     new_tokens = []
                 new_tokens.append(adjusted_replacement)
             else:
                 new_tokens.append(token)
+                unknown_words.add(base_token)
         else:
             new_tokens.append(token)
     return ''.join(new_tokens)
             base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
             custom_dict[base_word] = line_normalized
+# Load existing words from new_dict.txt
+existing_new_dict_words = set()
+try:
+    with open('new_dict.txt', 'r', encoding='utf-8') as f:
+        for line in f:
+            existing_word = line.strip()
+            if existing_word:
+                existing_new_dict_words.add(existing_word)
+except FileNotFoundError:
+    pass  # If the file doesn't exist, we'll create it later
+# Lock for thread-safe file writing
+file_lock = threading.Lock()
 def transform_text(text, apply_custom_dict, add_pauses_flag):
+    unknown_words = set()
     text = normalize_text(text)
     if apply_custom_dict:
+        text = replace_with_custom_dict(text, custom_dict, unknown_words)
     text = convert_accented_text(text)
     if add_pauses_flag:
         text = add_pauses_to_text(text)
+    # Write unknown words to new_dict.txt
+    new_words_to_add = unknown_words - existing_new_dict_words
+    if new_words_to_add:
+        with file_lock:
+            with open('new_dict.txt', 'a', encoding='utf-8') as f:
+                for word in sorted(new_words_to_add):
+                    f.write(word + '\n')
+            existing_new_dict_words.update(new_words_to_add)
     return text
 def synthesise(transformed_text, speed, steps, progress=gr.Progress()):