Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
from infer import inference
|
3 |
import unicodedata
|
4 |
import regex
|
|
|
5 |
|
6 |
description = '''
|
7 |
Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
|
@@ -35,7 +36,7 @@ def adjust_case(original, replacement):
|
|
35 |
adjusted += replacement[len(original):]
|
36 |
return adjusted
|
37 |
|
38 |
-
def replace_with_custom_dict(text, custom_dict):
|
39 |
text = normalize_text(text)
|
40 |
tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
|
41 |
new_tokens = []
|
@@ -51,6 +52,7 @@ def replace_with_custom_dict(text, custom_dict):
|
|
51 |
new_tokens.append(adjusted_replacement)
|
52 |
else:
|
53 |
new_tokens.append(token)
|
|
|
54 |
else:
|
55 |
new_tokens.append(token)
|
56 |
return ''.join(new_tokens)
|
@@ -87,13 +89,37 @@ with open('dict.txt', 'r', encoding='utf-8') as f:
|
|
87 |
base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
|
88 |
custom_dict[base_word] = line_normalized
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def transform_text(text, apply_custom_dict, add_pauses_flag):
|
|
|
91 |
text = normalize_text(text)
|
92 |
if apply_custom_dict:
|
93 |
-
text = replace_with_custom_dict(text, custom_dict)
|
94 |
text = convert_accented_text(text)
|
95 |
if add_pauses_flag:
|
96 |
text = add_pauses_to_text(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
return text
|
98 |
|
99 |
def synthesise(transformed_text, speed, steps, progress=gr.Progress()):
|
|
|
2 |
from infer import inference
|
3 |
import unicodedata
|
4 |
import regex
|
5 |
+
import threading
|
6 |
|
7 |
description = '''
|
8 |
Програма може не коректно визначати деякі наголоси і не перетворює цифри, акроніми і різні скорочення в словесну форму.
|
|
|
36 |
adjusted += replacement[len(original):]
|
37 |
return adjusted
|
38 |
|
39 |
+
def replace_with_custom_dict(text, custom_dict, unknown_words):
|
40 |
text = normalize_text(text)
|
41 |
tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
|
42 |
new_tokens = []
|
|
|
52 |
new_tokens.append(adjusted_replacement)
|
53 |
else:
|
54 |
new_tokens.append(token)
|
55 |
+
unknown_words.add(base_token)
|
56 |
else:
|
57 |
new_tokens.append(token)
|
58 |
return ''.join(new_tokens)
|
|
|
89 |
base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
|
90 |
custom_dict[base_word] = line_normalized
|
91 |
|
92 |
+
# Load existing words from new_dict.txt
|
93 |
+
existing_new_dict_words = set()
|
94 |
+
try:
|
95 |
+
with open('new_dict.txt', 'r', encoding='utf-8') as f:
|
96 |
+
for line in f:
|
97 |
+
existing_word = line.strip()
|
98 |
+
if existing_word:
|
99 |
+
existing_new_dict_words.add(existing_word)
|
100 |
+
except FileNotFoundError:
|
101 |
+
pass # If the file doesn't exist, we'll create it later
|
102 |
+
|
103 |
+
# Lock for thread-safe file writing
|
104 |
+
file_lock = threading.Lock()
|
105 |
+
|
106 |
def transform_text(text, apply_custom_dict, add_pauses_flag):
|
107 |
+
unknown_words = set()
|
108 |
text = normalize_text(text)
|
109 |
if apply_custom_dict:
|
110 |
+
text = replace_with_custom_dict(text, custom_dict, unknown_words)
|
111 |
text = convert_accented_text(text)
|
112 |
if add_pauses_flag:
|
113 |
text = add_pauses_to_text(text)
|
114 |
+
|
115 |
+
# Write unknown words to new_dict.txt
|
116 |
+
new_words_to_add = unknown_words - existing_new_dict_words
|
117 |
+
if new_words_to_add:
|
118 |
+
with file_lock:
|
119 |
+
with open('new_dict.txt', 'a', encoding='utf-8') as f:
|
120 |
+
for word in sorted(new_words_to_add):
|
121 |
+
f.write(word + '\n')
|
122 |
+
existing_new_dict_words.update(new_words_to_add)
|
123 |
return text
|
124 |
|
125 |
def synthesise(transformed_text, speed, steps, progress=gr.Progress()):
|