Spaces:

jhlfrfufyfn
/

bel-tts

Running

File size: 2,859 Bytes

8cb1844
5dfe98a
 
 
75ccc38
 
5dfe98a
 
 
 
b23a251
5dfe98a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75ccc38
5dfe98a
5a090e9
5dfe98a
5a090e9
75ccc38
6b48298
1897f0d
3442ac2
1897f0d
 
 
99db302
 
fc985f9
19b7774
b0966fd
5dfe98a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75ccc38
 
5dfe98a
 
 
 
75ccc38
5dfe98a
 
75ccc38
17ac8e8
5dfe98a
75ccc38
 
 
 
5dfe98a
75ccc38

from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download
import gradio as gr
import tempfile
import os
import requests

REPO_ID = "jhlfrfufyfn/bel-tts"

my_title = "Беларускі тэкст-у-маўленне"
my_description = "Беларускамоўная мадэль для агучвання тэксту (травень 2023)."

be_text = "Гепарды жывуць у адкрытых і прасторных месцах, дзе ёсць шмат здабычы."

def belarusify_russian_text(text: str):
    text = text.replace("и", "і")
    text = text.replace("іу", "іў")
    text = text.replace("оу", "оў")
    text = text.replace("ау", "аў")
    text = text.replace("ыу", "ыў")
    text = text.replace("уу", "уў")
    text = text.replace("юу", "юў")
    text = text.replace("еу", "еў")
    text = text.replace("ёу", "ёў")
    text = text.replace("щ", "шч")
    return text

def tts(text: str):
    print("Original text: ", text)
    text = belarusify_russian_text(text)
    print("Belarusified text: ", text)
    # Sending a request to the fonemizer
    headers = {'Content-Type': 'text/plain; charset=utf-8'}  # Specify the charset as UTF-8

    response = requests.post("https://fonemizer.nikuchin.fun/processText", 
                         data=text.encode('utf-8'),  # Encode the text as UTF-8
                         headers=headers)

    if response.status_code != 200:
        raise Exception(f"Request to fonemizer failed with status code {response.status_code}")
    print(response.content)
    print(response.headers.get('Content-Type'))
    text = response.text
    best_model_path = hf_hub_download(repo_id=REPO_ID, filename="model.pth") 
    config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
    vocoder_path = hf_hub_download(repo_id=REPO_ID, filename="vocoder.pth")
    scale_stats_path = hf_hub_download(repo_id=REPO_ID, filename="scale_stats.npy")
    vocoder_config_path = hf_hub_download(repo_id=REPO_ID, filename="vocoder_config.json")
    
    # init synthesizer  
    synthesizer = Synthesizer(
        best_model_path,
        config_path,
        None,
        None,
        vocoder_path,
        vocoder_config_path,
        None,
        None,
        False
    )

    # create audio file
    wavs = synthesizer.tts(text)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
    return fp.name 

print("CWD IS ", os.getcwd())
print("LIST IS", os.listdir())

iface = gr.Interface(
    fn=tts, 
    inputs=gr.Textbox(lines=5, label="Input Text", value=be_text), 
    outputs=gr.Audio(type="filepath", label="Output Audio"), 
    title=my_title, 
    description=my_description, 
    article="", 
    examples=None, 
    allow_flagging="never"
)

iface.launch()