text_to_speech_ukr

Sleeping

File size: 6,843 Bytes

81470e4
 
61c3568
2fea0af
eef1213
81470e4
c52b425
c9e36ee
81470e4
 
2fea0af
61c3568
 
 
2fea0af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eef1213
2fea0af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eef1213
2fea0af
 
 
 
61c3568
 
 
 
 
 
 
 
 
 
 
6219873
30f3747
 
 
 
 
 
 
 
 
 
2fea0af
 
 
 
 
 
 
 
 
61c3568
eef1213
 
 
 
 
 
 
 
 
 
 
 
 
 
6219873
eef1213
2fea0af
c1bd305
eef1213
2fea0af
6219873
 
eef1213
 
 
 
 
 
 
 
 
c1bd305
2fea0af
c1bd305
 
 
 
 
4a3db98
81470e4
c1bd305
81470e4
4a3db98
c1bd305
30f3747
81470e4
c1bd305
 
 
 
4a3db98
c1bd305
 
 
 
 
 
4a3db98
 
 
c1bd305
4a3db98
6219873
 
c1bd305
4a3db98
 
 
 
c1bd305
 
4a3db98
c1bd305
4a3db98
c1bd305

import gradio as gr
from infer import inference
import unicodedata
import regex
import threading

description = '''
Програма для перетворення текста в мову. Озвучування тексту українською мовою за допомогою штучного інтелекту
'''

# Text Pre-processing Functions
def normalize_text(text):
    return unicodedata.normalize('NFC', text)

def remove_combining_chars(text):
    decomposed = unicodedata.normalize('NFD', text)
    filtered = ''.join(c for c in decomposed if unicodedata.category(c) != 'Mn')
    return unicodedata.normalize('NFC', filtered)

def adjust_case(original, replacement):
    if original.isupper():
        return replacement.upper()
    elif original[0].isupper() and original[1:].islower():
        return replacement.capitalize()
    elif original.islower():
        return replacement.lower()
    else:
        adjusted = ''
        for o_char, r_char in zip(original, replacement):
            if o_char.isupper():
                adjusted += r_char.upper()
            else:
                adjusted += r_char.lower()
        adjusted += replacement[len(original):]
        return adjusted

def replace_with_custom_dict(text, custom_dict, unknown_words):
    text = normalize_text(text)
    tokens = regex.findall(r'[\p{L}\p{M}\+]+|\s+|[^\s\p{L}\p{M}]+', text)
    new_tokens = []
    for token in tokens:
        token_normalized = normalize_text(token)
        if regex.match(r'^[\p{L}\p{M}\+]+$', token_normalized):
            token_no_combining = remove_combining_chars(token_normalized)
            base_token = token_no_combining.replace('+', '').lower()
            base_token = normalize_text(base_token)
            if base_token in custom_dict:
                replacement = custom_dict[base_token]
                adjusted_replacement = adjust_case(token, replacement)
                new_tokens.append(adjusted_replacement)
            else:
                new_tokens.append(token)
                unknown_words.add(base_token)
        else:
            new_tokens.append(token)
    return ''.join(new_tokens)

def convert_accented_text(text):
    result = ""
    for char in text:
        decomposed = unicodedata.normalize('NFD', char)
        if any('COMBINING ACUTE ACCENT' in unicodedata.name(c, '') for c in decomposed):
            base_char = ''.join([c for c in decomposed if 'COMBINING ACUTE ACCENT' not in unicodedata.name(c, '')])
            result += unicodedata.normalize('NFC', base_char) + "+"
        else:
            result += unicodedata.normalize('NFC', char)
    return result

def add_pauses_to_text(text):
    text = text.replace(':', ':::')
    text = text.replace(',', ',:::')
    text = text.replace(';', ';:::')
    text = text.replace('—', '—:::')
    text = text.replace('–', '–:::')
    text = text.replace('.', '. ... ... ')
    text = text.replace('!', '! ... ...')
    text = text.replace('?', '? ... ...')
    return text

# Load the custom dictionary from dict.txt
custom_dict = {}
with open('dict.txt', 'r', encoding='utf-8') as f:
    for line in f:
        line = line.strip()
        if line:
            line_normalized = normalize_text(line)
            base_word = remove_combining_chars(line_normalized.replace('+', '').lower())
            custom_dict[base_word] = line_normalized

# Load existing words from new_dict.txt
existing_new_dict_words = set()
try:
    with open('new_dict.txt', 'r', encoding='utf-8') as f:
        for line in f:
            existing_word = line.strip()
            if existing_word:
                existing_new_dict_words.add(existing_word)
except FileNotFoundError:
    pass  # If the file doesn't exist, we'll create it later

# Lock for thread-safe file writing
file_lock = threading.Lock()

def transform_text(text, apply_custom_dict, add_pauses_flag):
    unknown_words = set()
    text = normalize_text(text)
    if apply_custom_dict:
        text = replace_with_custom_dict(text, custom_dict, unknown_words)
    text = convert_accented_text(text)
    if add_pauses_flag:
        text = add_pauses_to_text(text)

    # Write unknown words to new_dict.txt
    new_words_to_add = unknown_words - existing_new_dict_words
    if new_words_to_add:
        with file_lock:
            with open('new_dict.txt', 'a', encoding='utf-8') as f:
                for word in sorted(new_words_to_add):
                    f.write(word + '\n')
            existing_new_dict_words.update(new_words_to_add)
    return text

def synthesise(transformed_text, speed, steps, progress=gr.Progress()):
    if transformed_text.strip() == "":
        raise gr.Error("Ви повинні ввести текст")
    if len(transformed_text) > 50000:
        raise gr.Error("Текст повинен бути менше 50 000 символів")

    print("*** saying ***")
    print(transformed_text)
    print("*** end ***")

    return 24000, inference(transformed_text, progress, speed=speed, alpha=1.0, diffusion_steps=steps, embedding_scale=1.0)[0]

if __name__ == "__main__":
    with gr.Blocks() as demo:
        gr.Markdown(description)
        with gr.Row():
            text_input = gr.Textbox(label='Text:', lines=5, max_lines=10)
            transformed_text_output = gr.Textbox(label='Transformed Text:', lines=5, max_lines=10, interactive=True)
        with gr.Row():
            apply_custom_dict_checkbox = gr.Checkbox(label='Замінити слова за словником', value=True)
            add_pauses_checkbox = gr.Checkbox(label='Додати паузи', value=False)
        with gr.Row():
            speed_slider = gr.Slider(label='Швидкість:', maximum=1.3, minimum=0.7, value=1.0)
            steps_slider = gr.Slider(label='Кількість кроків дифузії:', minimum=3, maximum=20, step=1, value=3)
        with gr.Row():
            transform_button = gr.Button('Transform Text')
            generate_button = gr.Button('Згенерувати аудіо')
        audio_output = gr.Audio(label="Audio:", autoplay=False, streaming=False, type="numpy")

        def update_transformed_text(text, apply_custom_dict, add_pauses_flag):
            transformed_text = transform_text(text, apply_custom_dict, add_pauses_flag)
            return transformed_text

        # Set up transformation on button click
        transform_button.click(fn=update_transformed_text, inputs=[text_input, apply_custom_dict_checkbox, add_pauses_checkbox], outputs=transformed_text_output)

        def generate_audio(transformed_text, speed, steps):
            return synthesise(transformed_text, speed, steps)

        generate_button.click(fn=generate_audio, inputs=[transformed_text_output, speed_slider, steps_slider], outputs=audio_output)

    demo.launch(share=False, server_name="0.0.0.0")