Pranjal12345 commited on
Commit
964c670
·
1 Parent(s): 368344c

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +258 -0
  2. examples/.DS_Store +0 -0
  3. examples/female.wav +3 -0
  4. examples/male.wav +3 -0
  5. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
4
+ os.environ["COQUI_TOS_AGREED"] = "1"
5
+
6
+ import gradio as gr
7
+ from TTS.api import TTS
8
+
9
+ model_names = TTS().list_models()
10
+ m = model_names[0]
11
+ print(model_names)
12
+ tts = TTS(m, gpu=False)
13
+ tts.to("cpu") # no GPU or Amd
14
+ #tts.to("cuda") # cuda only
15
+
16
+ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
17
+ if agree == True:
18
+ if use_mic == True:
19
+ if mic_file_path is not None:
20
+ speaker_wav=mic_file_path
21
+ else:
22
+ gr.Warning("Please record your voice with Microphone, or uncheck Use Microphone to use reference audios")
23
+ return (
24
+ None,
25
+ None,
26
+ )
27
+
28
+ else:
29
+ speaker_wav=audio_file_pth
30
+
31
+ if len(prompt)<2:
32
+ gr.Warning("Please give a longer prompt text")
33
+ return (
34
+ None,
35
+ None,
36
+ )
37
+ if len(prompt)>10000:
38
+ gr.Warning("Text length limited to 10000 characters for this demo, please try shorter text")
39
+ return (
40
+ None,
41
+ None,
42
+ )
43
+ try:
44
+ if language == "fr":
45
+ if m.find("your") != -1:
46
+ language = "fr-fr"
47
+ if m.find("/fr/") != -1:
48
+ language = None
49
+ tts.tts_to_file(
50
+ text=prompt,
51
+ file_path="output.wav",
52
+ speaker_wav=speaker_wav,
53
+ language=language
54
+ )
55
+ except RuntimeError as e :
56
+ if "device-assert" in str(e):
57
+ # cannot do anything on cuda device side error, need tor estart
58
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
59
+ print("Cuda device-assert Runtime encountered need restart")
60
+ sys.exit("Exit due to cuda device-assert")
61
+ else:
62
+ raise e
63
+
64
+ return (
65
+ gr.make_waveform(
66
+ audio="output.wav",
67
+ ),
68
+ "output.wav",
69
+ )
70
+ else:
71
+ gr.Warning("Please accept the Terms & Condition!")
72
+ return (
73
+ None,
74
+ None,
75
+ )
76
+
77
+
78
+ title = "XTTS Glz's remake (Fonctional Text-2-Speech)"
79
+
80
+ description = """
81
+ <a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 3-second audio clip.
82
+ <br/>
83
+ XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
84
+ <br/>
85
+ This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
86
+ <br/>
87
+ Leave a star on the Github <a href="https://github.com/coqui-ai/TTS">TTS</a>, where our open-source inference and training code lives.
88
+ <br/>
89
+ <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
90
+ <br/>
91
+ <a href="https://huggingface.co/spaces/coqui/xtts?duplicate=true">
92
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
93
+ </p>
94
+ """
95
+
96
+ article = """
97
+ <div style='margin:20px auto;'>
98
+ <p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
99
+ </div>
100
+ """
101
+ examples = [
102
+ [
103
+ "Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
104
+ "en",
105
+ "examples/female.wav",
106
+ None,
107
+ False,
108
+ True,
109
+ ],
110
+ [
111
+ "Je suis un lycéen français de 17 ans, passioner par la Cyber-Sécuritée et les models d'IA.",
112
+ "fr",
113
+ "examples/male.wav",
114
+ None,
115
+ False,
116
+ True,
117
+ ],
118
+ [
119
+ "Als ich sechs war, sah ich einmal ein wunderbares Bild",
120
+ "de",
121
+ "examples/female.wav",
122
+ None,
123
+ False,
124
+ True,
125
+ ],
126
+ [
127
+ "Cuando tenía seis años, vi una vez una imagen magnífica",
128
+ "es",
129
+ "examples/male.wav",
130
+ None,
131
+ False,
132
+ True,
133
+ ],
134
+ [
135
+ "Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
136
+ "pt",
137
+ "examples/female.wav",
138
+ None,
139
+ False,
140
+ True,
141
+ ],
142
+ [
143
+ "Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
144
+ "pl",
145
+ "examples/male.wav",
146
+ None,
147
+ False,
148
+ True,
149
+ ],
150
+ [
151
+ "Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
152
+ "it",
153
+ "examples/female.wav",
154
+ None,
155
+ False,
156
+ True,
157
+ ],
158
+ [
159
+ "Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
160
+ "tr",
161
+ "examples/female.wav",
162
+ None,
163
+ False,
164
+ True,
165
+ ],
166
+ [
167
+ "Когда мне было шесть лет, я увидел однажды удивительную картинку",
168
+ "ru",
169
+ "examples/female.wav",
170
+ None,
171
+ False,
172
+ True,
173
+ ],
174
+ [
175
+ "Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
176
+ "nl",
177
+ "examples/male.wav",
178
+ None,
179
+ False,
180
+ True,
181
+ ],
182
+ [
183
+ "Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
184
+ "cs",
185
+ "examples/female.wav",
186
+ None,
187
+ False,
188
+ True,
189
+ ],
190
+ [
191
+ "当我还只有六岁的时候, 看到了一副精彩的插画",
192
+ "zh-cn",
193
+ "examples/female.wav",
194
+ None,
195
+ False,
196
+ True,
197
+ ],
198
+ ]
199
+
200
+
201
+
202
+ gr.Interface(
203
+ fn=predict,
204
+ inputs=[
205
+ gr.Textbox(
206
+ label="Text Prompt",
207
+ info="One or two sentences at a time is better",
208
+ value="Hello, World !, here is an example of light voice cloning. Try to upload your best audio samples quality",
209
+ ),
210
+ gr.Dropdown(
211
+ label="Language",
212
+ info="Select an output language for the synthesised speech",
213
+ choices=[
214
+ "en",
215
+ "es",
216
+ "fr",
217
+ "de",
218
+ "it",
219
+ "pt",
220
+ "pl",
221
+ "tr",
222
+ "ru",
223
+ "nl",
224
+ "cs",
225
+ "ar",
226
+ "zh-cn",
227
+ ],
228
+ max_choices=1,
229
+ value="en",
230
+ ),
231
+ gr.Audio(
232
+ label="Reference Audio",
233
+ info="Click on the ✎ button to upload your own target speaker audio",
234
+ type="filepath",
235
+ value="examples/female.wav",
236
+ ),
237
+ gr.Audio(source="microphone",
238
+ type="filepath",
239
+ info="Use your microphone to record audio",
240
+ label="Use Microphone for Reference"),
241
+ gr.Checkbox(label="Check to use Microphone as Reference",
242
+ value=False,
243
+ info="Notice: Microphone input may not work properly under traffic",),
244
+ gr.Checkbox(
245
+ label="Agree",
246
+ value=True,
247
+ info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
248
+ ),
249
+ ],
250
+ outputs=[
251
+ gr.Video(label="Waveform Visual"),
252
+ gr.Audio(label="Synthesised Audio"),
253
+ ],
254
+ title=title,
255
+ description=description,
256
+ article=article,
257
+ examples=examples,
258
+ ).queue().launch(debug=True)
examples/.DS_Store ADDED
Binary file (6.15 kB). View file
 
examples/female.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
3
+ size 1002030
examples/male.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d801f5e6af56675e7ff642351c8a2f6d8d6ccfd9c30d9f80632cb5f2ef2b00b
3
+ size 131
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ TTS@ git+https://github.com/coqui-ai/TTS.git@dev
2
+ gradio==3.41.2