Afegeix Festival
Browse files- Dockerfile +8 -1
- app.py +9 -7
- festival.py +65 -0
Dockerfile
CHANGED
@@ -1,6 +1,11 @@
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
-
RUN apt update && apt install -y
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
|
6 |
|
@@ -14,6 +19,7 @@ RUN useradd -m -u 1000 user
|
|
14 |
|
15 |
USER user
|
16 |
|
|
|
17 |
ENV HOME=/home/user \
|
18 |
PATH=/home/user/.local/bin:$PATH
|
19 |
|
@@ -26,6 +32,7 @@ COPY --chown=user models models
|
|
26 |
RUN pip install -r requirements.txt
|
27 |
|
28 |
COPY --chown=user engine.py .
|
|
|
29 |
COPY --chown=user app.py .
|
30 |
|
31 |
RUN mkdir -p cache && chmod 777 cache
|
|
|
1 |
FROM python:3.9
|
2 |
|
3 |
+
RUN apt-get update && apt-get install -y gnupg && \
|
4 |
+
apt-key adv --recv-keys --keyserver hkp://keyserver.ubuntu.com:80 A3A48C4A && \
|
5 |
+
echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
6 |
+
echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
|
7 |
+
apt-get update && \
|
8 |
+
apt-get -y install festival festvox-ca-ona-hts festvox-ca-pau-hts lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev
|
9 |
|
10 |
RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
|
11 |
|
|
|
19 |
|
20 |
USER user
|
21 |
|
22 |
+
|
23 |
ENV HOME=/home/user \
|
24 |
PATH=/home/user/.local/bin:$PATH
|
25 |
|
|
|
32 |
RUN pip install -r requirements.txt
|
33 |
|
34 |
COPY --chown=user engine.py .
|
35 |
+
COPY --chown=user festival.py .
|
36 |
COPY --chown=user app.py .
|
37 |
|
38 |
RUN mkdir -p cache && chmod 777 cache
|
app.py
CHANGED
@@ -1,19 +1,17 @@
|
|
1 |
-
from engine import Piper
|
2 |
import tempfile
|
3 |
from typing import Optional
|
4 |
from TTS.config import load_config
|
5 |
import gradio as gr
|
6 |
import numpy as np
|
7 |
import os
|
8 |
-
import json
|
9 |
from TTS.utils.manage import ModelManager
|
10 |
from TTS.utils.synthesizer import Synthesizer
|
11 |
from espeak_phonemizer import Phonemizer
|
|
|
|
|
12 |
|
13 |
MAX_TXT_LEN = 325
|
14 |
|
15 |
-
SPEAKERS = ['f_cen_05', 'f_cen_81', 'f_occ_31', 'f_occ_de', 'f_sep_31', 'm_cen_08', 'm_occ_44', 'm_val_89']
|
16 |
-
|
17 |
fonemitzador = Phonemizer("ca")
|
18 |
|
19 |
def carrega_bsc():
|
@@ -51,7 +49,7 @@ model_collectivat = carrega_collectivat()
|
|
51 |
|
52 |
model_piper = carrega_piper()
|
53 |
|
54 |
-
def tts(text, speaker_idx):
|
55 |
if len(text) > MAX_TXT_LEN:
|
56 |
text = text[:MAX_TXT_LEN]
|
57 |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
|
@@ -82,7 +80,9 @@ def tts(text, speaker_idx):
|
|
82 |
|
83 |
fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
|
84 |
|
85 |
-
|
|
|
|
|
86 |
|
87 |
|
88 |
description="""
|
@@ -113,10 +113,12 @@ iface = gr.Interface(
|
|
113 |
label="Text",
|
114 |
value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
|
115 |
),
|
116 |
-
gr.Dropdown(label="
|
|
|
117 |
],
|
118 |
outputs=[
|
119 |
gr.Markdown(label="Fonemes"),
|
|
|
120 |
gr.Audio(label="BSC VITS",type="filepath"),
|
121 |
gr.Audio(label="Collectivat Fastspeech",type="filepath"),
|
122 |
gr.Audio(label="Piper VITS",type="filepath")
|
|
|
|
|
1 |
import tempfile
|
2 |
from typing import Optional
|
3 |
from TTS.config import load_config
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
import os
|
|
|
7 |
from TTS.utils.manage import ModelManager
|
8 |
from TTS.utils.synthesizer import Synthesizer
|
9 |
from espeak_phonemizer import Phonemizer
|
10 |
+
from engine import Piper
|
11 |
+
from festival import festival_synthesize
|
12 |
|
13 |
MAX_TXT_LEN = 325
|
14 |
|
|
|
|
|
15 |
fonemitzador = Phonemizer("ca")
|
16 |
|
17 |
def carrega_bsc():
|
|
|
49 |
|
50 |
model_piper = carrega_piper()
|
51 |
|
52 |
+
def tts(text, festival_voice, speaker_idx):
|
53 |
if len(text) > MAX_TXT_LEN:
|
54 |
text = text[:MAX_TXT_LEN]
|
55 |
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
|
|
|
80 |
|
81 |
fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
|
82 |
|
83 |
+
fp_festival = festival_synthesize(text, festival_voice)
|
84 |
+
|
85 |
+
return fonemes, fp_festival, fp_bsc, fp_coll, fp_piper
|
86 |
|
87 |
|
88 |
description="""
|
|
|
113 |
label="Text",
|
114 |
value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
|
115 |
),
|
116 |
+
gr.Dropdown(label="Parlant del motor Festival", choices=["ona", "pau"], value="ona"),
|
117 |
+
gr.Dropdown(label="Parlant del model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
|
118 |
],
|
119 |
outputs=[
|
120 |
gr.Markdown(label="Fonemes"),
|
121 |
+
gr.Audio(label="Festival",type="filepath"),
|
122 |
gr.Audio(label="BSC VITS",type="filepath"),
|
123 |
gr.Audio(label="Collectivat Fastspeech",type="filepath"),
|
124 |
gr.Audio(label="Piper VITS",type="filepath")
|
festival.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
#
|
4 |
+
# Copyright (c) 2016 Jordi Mas i Hernandez <[email protected]>
|
5 |
+
#
|
6 |
+
# This program is free software; you can redistribute it and/or
|
7 |
+
# modify it under the terms of the GNU Lesser General Public
|
8 |
+
# License as published by the Free Software Foundation; either
|
9 |
+
# version 2.1 of the License, or (at your option) any later version.
|
10 |
+
#
|
11 |
+
# This program is distributed in the hope that it will be useful,
|
12 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
+
# Lesser General Public License for more details.
|
15 |
+
#
|
16 |
+
# You should have received a copy of the GNU Lesser General Public
|
17 |
+
# License along with this program; if not, write to the
|
18 |
+
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
19 |
+
# Boston, MA 02111-1307, USA.
|
20 |
+
|
21 |
+
import subprocess
|
22 |
+
import tempfile
|
23 |
+
|
24 |
+
festival_voices = {
|
25 |
+
"ona": "voice_upc_ca_ona_hts",
|
26 |
+
"pau": "voice_upc_ca_pau_hts"
|
27 |
+
}
|
28 |
+
|
29 |
+
def _normalize(result):
|
30 |
+
mapping = {
|
31 |
+
'’' : '\'',
|
32 |
+
'à' : 'à',
|
33 |
+
'í' : 'í',
|
34 |
+
'ó' : 'ó',
|
35 |
+
'è' : 'è',
|
36 |
+
'ò' : 'ò',
|
37 |
+
'ú' : 'ú',
|
38 |
+
}
|
39 |
+
|
40 |
+
for char in mapping.keys():
|
41 |
+
result = result.replace(char, mapping[char])
|
42 |
+
|
43 |
+
return result
|
44 |
+
|
45 |
+
|
46 |
+
def festival_synthesize(text, voice):
|
47 |
+
if voice not in ["ona", "pau"]:
|
48 |
+
raise Error
|
49 |
+
|
50 |
+
txt2wave = '/usr/bin/text2wave'
|
51 |
+
|
52 |
+
with tempfile.NamedTemporaryFile() as encoded_file,\
|
53 |
+
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as wave_file:
|
54 |
+
|
55 |
+
text = _normalize(text)
|
56 |
+
f = open(encoded_file.name, 'wb')
|
57 |
+
f.write(text.encode('ISO-8859-15', 'ignore'))
|
58 |
+
f.close()
|
59 |
+
|
60 |
+
cmd = '{0} -o {1} {2} -eval "({3})"'.\
|
61 |
+
format(txt2wave, wave_file.name, encoded_file.name, festival_voices[voice])
|
62 |
+
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
63 |
+
p.wait()
|
64 |
+
|
65 |
+
return wave_file.name
|