Spaces:

projecte-aina
/

transcripcio-fonetica-catala

Running

App Files Files Community

ccoreilly commited on Apr 30, 2023

Commit

53016e3

•

1 Parent(s): f47653c

Mostra fonemes

Browse files

Files changed (1) hide show

app.py +23 -5

app.py CHANGED Viewed

@@ -8,11 +8,14 @@ import os
 import json
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 MAX_TXT_LEN = 100
 SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
 def carrega_bsc():
     model_path = os.getcwd() + "/models/bsc/best_model.pth"
     config_path = os.getcwd() + "/models/bsc/config.json"
@@ -77,15 +80,29 @@ def tts(text, speaker_idx):
         fp.write(wav_piper)
         fp_piper = fp.name
-    return fp_bsc, fp_coll, fp_piper
 description="""
-1️⃣ Introdueix el text a sintetitzar.
-2️⃣ Selecciona una veu en el desplegable.
-3️⃣ Gaudeix!
 """
 article= ""
@@ -99,11 +116,12 @@ iface = gr.Interface(
         gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default="ona")
     ],
     outputs=[
         gr.Audio(label="BSC VITS",type="filepath"),
         gr.Audio(label="Collectivat Fastspeech",type="filepath"),
         gr.Audio(label="Piper VITS",type="filepath")
     ],
-    title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
     description=description,
     article=article,
     allow_flagging="never",

 import json
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
+from espeak_phonemizer import Phonemizer
 MAX_TXT_LEN = 100
 SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
+fonemitzador = Phonemizer("ca")
 def carrega_bsc():
     model_path = os.getcwd() + "/models/bsc/best_model.pth"
     config_path = os.getcwd() + "/models/bsc/config.json"
         fp.write(wav_piper)
         fp_piper = fp.name
+    fonemes = fonemitzador.phonemize(text)
+    return fonemes, fp_bsc, fp_coll, fp_piper
 description="""
+Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models lliures pel català.
+1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina)
+https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker
+2. Model Fastspeech entrenat per Col·lectivat
+https://github.com/CollectivaT-dev/TTS-API
+3. Model VITS entrenat per Piper/Home Assistant
+https://github.com/rhasspy/piper
+Els dós últims models han estat entrenats amb la veu d'Ona de FestCAT, que va servir com a base per a les veus catalanes de Festival
+El primer model conté moltes veus de qualitat variable. Podeu sel·leccionar-ne una altre al desplegable. La veu d'Ona esta sel·leccionada per defecte per la comparativa.
+Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
+https://github.com/projecte-aina/espeak-ng
 """
 article= ""
         gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default="ona")
     ],
     outputs=[
+        gr.Textbox(label="Fonemes"),
         gr.Audio(label="BSC VITS",type="filepath"),
         gr.Audio(label="Collectivat Fastspeech",type="filepath"),
         gr.Audio(label="Piper VITS",type="filepath")
     ],
+    title="Comparativa de síntesi lliure en català️",
     description=description,
     article=article,
     allow_flagging="never",