Spaces:

mbarnig
/

MULTI_TTS_for_LOD

Running

File size: 5,587 Bytes

e027fad
 
 
 
 
7eb9a60
e027fad
887f428
bd60f01
23ca51e
c65b0b0
 
 
 
 
c96ccce
 
c65b0b0
e22351c
 
 
e027fad
 
e22351c
 
ba9e88e
 
 
 
e027fad
 
c65b0b0
23ca51e
 
ba9e88e
a8d4cda
 
 
 
 
 
23ca51e
ba9e88e
e703177
bd60f01
ba9e88e
 
 
 
 
 
 
e027fad
 
 
e22351c
 
e027fad
 
 
 
 
e22351c
 
 
 
 
 
c842240
e22351c
c842240
 
 
 
 
 
 
9ae2efe
e027fad
3199016
 
e027fad
 
 
 
0de8d9d
3199016
 
e027fad
 
 
 
 
 
c233c11
e22351c
e027fad
 
 
 
 
 
 
 
 
 
 
 
9aa3304
e027fad
e22351c

import gradio as gr
import tempfile
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download

REPO_ID = "denZLS/MULTI_LOD_TTS"

my_title = "🇱🇺 🇩🇪 🇫🇷 🇬🇧 🇵🇹 Mir schwätzen wéi e Lëtzebuerger ! "
my_description = "Multilingual-Multispeaker Text-to-Speech (TTS) synthesizer speaking the five current languages in Luxembourg. This model is based on VITS, thanks to 🐸 [Coqui.ai](https://coqui.ai/). I forked the [Coqui-TTS](https://github.com/mbarnig/TTS) project and did some modifications and workarounds to run the inference in this HuggingFace space." 

lb_text = "An der Zäit hunn sech den Nordwand an d'Sonn gestridden, wie vun hinnen zwee wuel méi staark wier, wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum."
de_text = "Einst stritten sich Nordwind und Sonne, wer von ihnen beiden wohl der Stärkere wäre, als ein Wanderer, der in einen warmen Mantel gehüllt war, des Weges daherkam."
fr_text = "La bise et le soleil se disputaient, chacun assurant qu'il était le plus fort, quand ils ont vu un voyageur qui s'avançait, enveloppé dans son manteau."
en_text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak."
pt_text = "O vento norte e o Sol discutiam quem era o mais forte, quando surgiu um viajante envolvido numa capa."
schwäin_text = "Patrull Wëllschwäin, duerch déck an dënn."
wisel_text = "Patrull Wisel, ëmmer flénk."

my_voices = [
    "Male",
    "Female"
]

my_languages = [
    "Lëtzebuergesch",
    "Deutsch",
    "Français",
    "English",
    "Português"
]

my_examples = [
    [schwäin_text, "Male", "Lëtzebuergesch"],
    [wisel_text, "Female", "Lëtzebuergesch"],
    [lb_text, "Male", "Lëtzebuergesch"],
    [de_text, "Female", "Deutsch"],
    [fr_text, "Male", "Français"],
    [en_text, "Female", "English"],
    [pt_text, "Male", "Português"]
]    
    
my_article = "<h3>More Infos</h3>" \
             "<table><tr>" \
             "<td><a href = 'https://www.web3.lu/wp-content/uploads/2024/07/qubit-bookcovers.png'><image src = 'https://www.web3.lu/wp-content/uploads/2024/07/qubit-bookcovers-200.png' alt = 'bookcovers'></a></td>" \
             "<td><p><b>User guide :</b></p><p>1. Click an example below the input field and click the play button in the audio field at the right side of the screen.</p>" \
             "<p>2. Enter your own text in the input field, select a voice and the related language of the text, click the submit button, wait for the audio generation and click the play button in the audio field at the right side of the screen.</p>" \
             "<p>Technical informations about the development, the training, the model and the dataset are available on my <a href = 'https://github.com/mbarnig/TTS-for-LOD'>Github repository.</a></p>" \
             "<p> If you are interested in knowing the whole history of technology projects in relation to the Luxembourgish language, the first volume of my book Qubit Lëtzebuerg is made for you." \
             " In chapters 2.1.3. to 2.1.8. you will discover EPISTOLE-PC, CORTINA, SpellChecker, LOD, SYSTRAN, EUROTRA, CRETA, eTranslation, Google Translate, Yandex Translate, Euroscript," \
             " Wordbee, LuNa, Strips, Spellux, Spacy, CyanogenMod, Gruut, eSpeak-NG-lb, MaryLux , lb_de_fr_en_pt_COQUI_VITS_TTS, Wav2Vec-XLS-R, Coqui STT, schreimaschinn.lu, Whisper, etc." \
             " You will find all the details about this book on my website <a href = 'https://www.web3.lu'>web3.lu : Internet with a Brain.</a></p></td>" \
             "</tr></table>"

my_inputs = [
  gr.Textbox(lines=5, label="Input Text"),
  gr.Radio(label="Speaker", choices = my_voices, value = "Male"),
  gr.Radio(label="Language", choices = my_languages, value = "Lëtzebuergesch"),
]

my_outputs = gr.Audio(type="filepath", label="Output Audio")
    
def tts(text: str, speaker_idx: str, language_idx: str):
    if speaker_idx == "Male":
        speaker_idx = "Max"
    else:
        speaker_idx = "Mod"
    if language_idx == "Lëtzebuergesch":
        language_idx = "D:\\MULTI-LOD\\mailabs\\x-lb"
    elif language_idx == "Deutsch":
        language_idx = "D:\\MULTI-LOD\\mailabs\\x-de"
    elif language_idx == "Français":
        language_idx = "D:\\MULTI-LOD\\mailabs\\fr-fr"
    elif language_idx == "English":
        language_idx = "D:\\MULTI-LOD\\mailabs\\en"
    else:
        language_idx = "D:\\MULTI-LOD\\mailabs\\pt-br"
        
    best_model_path = hf_hub_download(repo_id=REPO_ID, filename="checkpoint_180000.pth") 
    config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
    speakers_path = hf_hub_download(repo_id=REPO_ID, filename="speakers.pth")
    languages_path = hf_hub_download(repo_id=REPO_ID, filename="language_ids.json")
    
    # init synthesizer  
    synthesizer = Synthesizer(
        best_model_path,
        config_path,
        speakers_path,
        languages_path,
        None,
        None,
        False
    )

    # create audio file
    # wavs = synthesizer.tts(text, speaker_idx, language_idx
    wavs = synthesizer.tts(text, speaker_idx, language_idx)
    with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
        synthesizer.save_wav(wavs, fp)                      
    return fp.name 
 
iface = gr.Interface(
    fn=tts, 
    inputs=my_inputs, 
    outputs=my_outputs, 
    title=my_title, 
    description = my_description, 
    article = my_article,
    examples = my_examples, 
    allow_flagging="never"
)
iface.launch()