Spaces:

projecte-aina
/

transcripcio-fonetica-catala

Running

App Files Files Community

ccoreilly commited on Apr 30, 2023

Commit

c19313c

•

1 Parent(s): db540c2

Afegeix Festival

Browse files

Files changed (3) hide show

Dockerfile +8 -1
app.py +9 -7
festival.py +65 -0

Dockerfile CHANGED Viewed

@@ -1,6 +1,11 @@
 FROM python:3.9
-RUN apt update && apt install -y git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev
 RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
@@ -14,6 +19,7 @@ RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
 	PATH=/home/user/.local/bin:$PATH
@@ -26,6 +32,7 @@ COPY --chown=user models models
 RUN pip install -r requirements.txt
 COPY --chown=user engine.py .
 COPY --chown=user app.py .
 RUN mkdir -p cache && chmod 777 cache

 FROM python:3.9
+RUN apt-get update && apt-get install -y gnupg && \
+     apt-key adv --recv-keys --keyserver hkp://keyserver.ubuntu.com:80 A3A48C4A && \
+     echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
+     echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
+     apt-get update && \
+     apt-get -y install festival festvox-ca-ona-hts festvox-ca-pau-hts lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev
 RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
 USER user
 ENV HOME=/home/user \
 	PATH=/home/user/.local/bin:$PATH
 RUN pip install -r requirements.txt
 COPY --chown=user engine.py .
+COPY --chown=user festival.py .
 COPY --chown=user app.py .
 RUN mkdir -p cache && chmod 777 cache

app.py CHANGED Viewed

@@ -1,19 +1,17 @@
-from engine import Piper
 import tempfile
 from typing import Optional
 from TTS.config import load_config
 import gradio as gr
 import numpy as np
 import os
-import json
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 from espeak_phonemizer import Phonemizer
 MAX_TXT_LEN = 325
-SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
 fonemitzador = Phonemizer("ca")
 def carrega_bsc():
@@ -51,7 +49,7 @@ model_collectivat = carrega_collectivat()
 model_piper = carrega_piper()
-def tts(text, speaker_idx):
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
@@ -82,7 +80,9 @@ def tts(text, speaker_idx):
     fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
-    return fonemes, fp_bsc, fp_coll, fp_piper
 description="""
@@ -113,10 +113,12 @@ iface = gr.Interface(
             label="Text",
             value="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
-        gr.Dropdown(label="Selecciona un parlant pel model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
     ],
     outputs=[
         gr.Markdown(label="Fonemes"),
         gr.Audio(label="BSC VITS",type="filepath"),
         gr.Audio(label="Collectivat Fastspeech",type="filepath"),
         gr.Audio(label="Piper VITS",type="filepath")

 import tempfile
 from typing import Optional
 from TTS.config import load_config
 import gradio as gr
 import numpy as np
 import os
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 from espeak_phonemizer import Phonemizer
+from engine import Piper
+from festival import festival_synthesize
 MAX_TXT_LEN = 325
 fonemitzador = Phonemizer("ca")
 def carrega_bsc():
 model_piper = carrega_piper()
+def tts(text, festival_voice, speaker_idx):
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
     fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
+    fp_festival = festival_synthesize(text, festival_voice)
+    return fonemes, fp_festival, fp_bsc, fp_coll, fp_piper
 description="""
             label="Text",
             value="L'Èlia i l'Alí a l'aula.  L'oli i l'ou.  Lulú olorava la lila.",
         ),
+        gr.Dropdown(label="Parlant del motor Festival", choices=["ona", "pau"], value="ona"),
+        gr.Dropdown(label="Parlant del model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
     ],
     outputs=[
         gr.Markdown(label="Fonemes"),
+        gr.Audio(label="Festival",type="filepath"),
         gr.Audio(label="BSC VITS",type="filepath"),
         gr.Audio(label="Collectivat Fastspeech",type="filepath"),
         gr.Audio(label="Piper VITS",type="filepath")

festival.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+#
+# Copyright (c) 2016 Jordi Mas i Hernandez <[email protected]>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+import subprocess
+import tempfile
+festival_voices = {
+    "ona": "voice_upc_ca_ona_hts",
+    "pau": "voice_upc_ca_pau_hts"
+}
+def _normalize(result):
+    mapping = {
+                '’' : '\'',
+                'à' : 'à',
+                'í' : 'í',
+                'ó' : 'ó',
+                'è' : 'è',
+                'ò' : 'ò',
+                'ú' : 'ú',
+              }
+    for char in mapping.keys():
+        result = result.replace(char, mapping[char])
+    return result
+def festival_synthesize(text, voice):
+    if voice not in ["ona", "pau"]:
+        raise Error
+    txt2wave = '/usr/bin/text2wave'
+    with tempfile.NamedTemporaryFile() as encoded_file,\
+         tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wave_file:
+        text = _normalize(text)
+        f = open(encoded_file.name, 'wb')
+        f.write(text.encode('ISO-8859-15', 'ignore'))
+        f.close()
+        cmd = '{0} -o {1} {2} -eval "({3})"'.\
+              format(txt2wave, wave_file.name, encoded_file.name, festival_voices[voice])
+        p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
+        p.wait()
+        return wave_file.name