te-ch commited on
Commit
23ce701
·
1 Parent(s): f64d86f

removed junk

Browse files
Files changed (3) hide show
  1. Dockerfile +2 -12
  2. app.py +6 -92
  3. requirements.txt +0 -2
Dockerfile CHANGED
@@ -5,9 +5,9 @@ RUN apt-get update && apt-get install -y gnupg && \
5
  echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
6
  echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
7
  apt-get update && \
8
- apt-get -y install festival festvox-ca-ona-hts festvox-ca-pau-hts lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev libatlas-base-dev gfortran
9
 
10
- RUN git clone -b ca-to-pr https://github.com/projecte-aina/espeak-ng
11
 
12
  RUN cd espeak-ng && \
13
  ./autogen.sh && \
@@ -19,7 +19,6 @@ RUN useradd -m -u 1000 user
19
 
20
  USER user
21
 
22
-
23
  ENV HOME=/home/user \
24
  PATH=/home/user/.local/bin:$PATH
25
 
@@ -31,15 +30,6 @@ COPY --chown=user models models
31
 
32
  RUN pip install -r requirements.txt
33
 
34
- RUN git clone https://github.com/jaywalnut310/vits.git && \
35
- cd vits && sed s/torch==1.6.0/torch==1.7.0/ requirements.txt > requirements.txt && pip install -r requirements.txt && cd monotonic_align && \
36
- python setup.py build_ext --inplace && cd /home/user
37
-
38
- ENV PYTHONPATH=$PYTHONPATH:/home/user/app/vits
39
-
40
- COPY --chown=user engine.py .
41
- COPY --chown=user mms.py .
42
- COPY --chown=user festival.py .
43
  COPY --chown=user app.py .
44
 
45
  RUN mkdir -p cache && chmod 777 cache
 
5
  echo "deb http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
6
  echo "deb-src http://ppa.launchpad.net/zeehio/festcat/ubuntu bionic main" >> /etc/apt/sources.list && \
7
  apt-get update && \
8
+ apt-get -y install lame git make autoconf automake libtool pkg-config gcc libsonic-dev ronn kramdown libpcaudio-dev libatlas-base-dev gfortran
9
 
10
+ RUN git clone -b ca-pr https://github.com/projecte-aina/espeak-ng
11
 
12
  RUN cd espeak-ng && \
13
  ./autogen.sh && \
 
19
 
20
  USER user
21
 
 
22
  ENV HOME=/home/user \
23
  PATH=/home/user/.local/bin:$PATH
24
 
 
30
 
31
  RUN pip install -r requirements.txt
32
 
 
 
 
 
 
 
 
 
 
33
  COPY --chown=user app.py .
34
 
35
  RUN mkdir -p cache && chmod 777 cache
app.py CHANGED
@@ -1,111 +1,30 @@
1
  import tempfile
2
  import gradio as gr
3
  import os
4
- from TTS.utils.synthesizer import Synthesizer
5
  from espeak_phonemizer import Phonemizer
6
- from engine import Piper
7
- from festival import festival_synthesize
8
- from mms import MMS
9
 
10
  MAX_TXT_LEN = 325
11
 
12
  fonemitzador = Phonemizer("ca")
13
 
14
- def carrega_bsc():
15
- model_path = os.getcwd() + "/models/bsc/best_model.pth"
16
- config_path = os.getcwd() + "/models/bsc/config.json"
17
- speakers_file_path = os.getcwd() + "/models/bsc/speakers.pth"
18
- vocoder_path = None
19
- vocoder_config_path = None
20
-
21
- synthesizer = Synthesizer(
22
- model_path, config_path, speakers_file_path, None, vocoder_path, vocoder_config_path,
23
- )
24
-
25
- return synthesizer
26
-
27
- def carrega_collectivat():
28
- model_path = os.getcwd() + "/models/collectivat/fast-speech_best_model.pth"
29
- config_path = os.getcwd() + "/models/collectivat/fast-speech_config.json"
30
- vocoder_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_model_file.pth"
31
- vocoder_config_path = os.getcwd() + "/models/collectivat/ljspeech--hifigan_v2_config.json"
32
- synthesizer = Synthesizer(
33
- model_path, config_path, None, None, vocoder_path, vocoder_config_path
34
- )
35
-
36
- return synthesizer
37
-
38
- def carrega_piper():
39
- return Piper(os.getcwd() + "/models/piper/ca-upc_ona-x-low.onnx")
40
-
41
- def carrega_mms():
42
- return MMS(os.getcwd() + "/models/mms")
43
-
44
-
45
- model_bsc = carrega_bsc()
46
- SPEAKERS = model_bsc.tts_model.speaker_manager.speaker_names
47
-
48
- model_collectivat = carrega_collectivat()
49
-
50
- model_piper = carrega_piper()
51
-
52
- model_mms = carrega_mms()
53
-
54
  request_count = 0
55
 
56
- def tts(text, festival_voice, speaker_idx):
57
  if len(text) > MAX_TXT_LEN:
58
  text = text[:MAX_TXT_LEN]
59
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
60
  print(text)
61
 
62
  # synthesize
63
- wav_bsc = model_bsc.tts(text, speaker_idx)
64
- wav_coll = model_collectivat.tts(text)
65
- wav_piper = model_piper.synthesize(text)
66
-
67
- fp_bsc = ""
68
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
69
- model_bsc.save_wav(wav_bsc, fp)
70
- fp_bsc = fp.name
71
-
72
- fp_coll = ""
73
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
74
- model_collectivat.save_wav(wav_coll, fp)
75
- fp_coll = fp.name
76
-
77
- fp_piper = ""
78
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
79
- fp.write(wav_piper)
80
- fp_piper = fp.name
81
-
82
- fp_mms = ""
83
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
84
- model_mms.synthesize(fp.name, text)
85
- fp_mms = fp.name
86
 
87
  fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
88
 
89
- fp_festival = festival_synthesize(text, festival_voice)
90
-
91
  global request_count
92
  request_count += 1
93
  print(f"Requests: {request_count}")
94
- return fonemes, fp_festival, fp_bsc, fp_coll, fp_piper, fp_mms
95
-
96
 
97
  description="""
98
- Amb aquesta aplicació podeu sintetitzar text a veu amb els últims models neuronals lliures pel català i amb el motor Festival.
99
-
100
- 1. Model multi-parlant VITS entrenat pel BSC (Projecte Aina) [enllaç](https://huggingface.co/projecte-aina/tts-ca-coqui-vits-multispeaker)
101
- 2. Model Fastspeech entrenat per Col·lectivat [enllaç](https://github.com/CollectivaT-dev/TTS-API)
102
- 3. Model VITS entrenat per Piper/Home Assistant [enllaç](https://github.com/rhasspy/piper)
103
- 3. Model VITS entrenat per Meta (llicència CC-BY-NC) [enllaç](https://github.com/facebookresearch/fairseq/tree/main/examples/mms)
104
-
105
- El primer model ha estat entrenat amb totes les veus de FestCAT, els talls de Common Voice 8 i un altre corpus pel que conté moltes veus de qualitat variable. La veu d'Ona està seleccionada per defecte per la comparativa però podeu provar les altres.
106
- Els models 2 i 3 han estat entrenats amb la veu d'Ona de FestCAT.
107
- El model 4, anomenat MMS, de Meta (Facebook) ha estat entrenat a partir de dades d'un [audiollibre](http://live.bible.is/bible/CATBSS/LUK/1) de la Bíblia
108
-
109
  Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
110
  https://github.com/projecte-aina/espeak-ng
111
 
@@ -114,23 +33,18 @@ NOTA: El model de col·lectivat treballa amb grafemes pel que no fa servir espea
114
  article= ""
115
 
116
  iface = gr.Interface(
117
- fn=tts,
118
  inputs=[
119
  gr.Textbox(
120
  label="Text",
121
  value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
122
  ),
123
- gr.Dropdown(label="Parlant del motor Festival", choices=["ona", "pau"], value="ona"),
124
- gr.Dropdown(label="Parlant del model VITS multi-parlant del BSC", choices=SPEAKERS, value="ona")
125
  ],
126
  outputs=[
127
- gr.Markdown(label="Fonemes"),
128
- gr.Audio(label="Festival",type="filepath"),
129
- gr.Audio(label="BSC VITS",type="filepath"),
130
- gr.Audio(label="Collectivat Fastspeech",type="filepath"),
131
- gr.Audio(label="Piper VITS",type="filepath"),
132
- gr.Audio(label="Meta MMS VITS",type="filepath")
133
  ],
 
134
  title="Comparativa de síntesi lliure en català️",
135
  description=description,
136
  article=article,
 
1
  import tempfile
2
  import gradio as gr
3
  import os
 
4
  from espeak_phonemizer import Phonemizer
 
 
 
5
 
6
  MAX_TXT_LEN = 325
7
 
8
  fonemitzador = Phonemizer("ca")
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  request_count = 0
11
 
12
+ def phonemiser(text):
13
  if len(text) > MAX_TXT_LEN:
14
  text = text[:MAX_TXT_LEN]
15
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
16
  print(text)
17
 
18
  # synthesize
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  fonemes = fonemitzador.phonemize(text, keep_clause_breakers=True)
21
 
 
 
22
  global request_count
23
  request_count += 1
24
  print(f"Requests: {request_count}")
25
+ return fonemes
 
26
 
27
  description="""
 
 
 
 
 
 
 
 
 
 
 
28
  Aquesta aplicació fa servir l'últim estat de l'espeak millorat per Carme Armentano del BSC
29
  https://github.com/projecte-aina/espeak-ng
30
 
 
33
  article= ""
34
 
35
  iface = gr.Interface(
 
36
  inputs=[
37
  gr.Textbox(
38
  label="Text",
39
  value="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
40
  ),
41
+ gr.Dropdown(label="dialect", choices="")
42
+
43
  ],
44
  outputs=[
45
+ gr.Markdown(label="Fonemes")
 
 
 
 
 
46
  ],
47
+
48
  title="Comparativa de síntesi lliure en català️",
49
  description=description,
50
  article=article,
requirements.txt CHANGED
@@ -1,4 +1,2 @@
1
- git+https://github.com/coqui-ai/TTS@dev#egg=TTS
2
  gradio
3
  espeak-phonemizer>=1.1.0,<2
4
- onnxruntime~=1.11.0
 
 
1
  gradio
2
  espeak-phonemizer>=1.1.0,<2