renator commited on
Commit
2d610a5
1 Parent(s): 152904d

Complete the endpoint

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ venv
2
+ env
3
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.10.5 as the base image
2
+ FROM python:3.10.5-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Upgrade pip, install git, MeCab and its dependencies
8
+ RUN apt-get update \
9
+ && apt-get install -y git mecab libmecab-dev mecab-ipadic mecab-ipadic-utf8 \
10
+ && pip install --upgrade pip
11
+
12
+ # Install PyTorch
13
+ # Note: Replace the next line with the correct command to install the PyTorch version compatible with your deepspeed version
14
+ RUN pip install torch
15
+ RUN pip install librosa -U
16
+
17
+ # Install other dependencies from requirements.txt
18
+ COPY requirements.txt /app/
19
+ RUN pip install --no-cache-dir -r requirements.txt
20
+
21
+ # List installed packages for debugging
22
+ RUN pip list
23
+
24
+ # Copy the rest of your application's code
25
+ COPY . /app/
26
+
27
+ # RUN cd /tmp && mkdir cache1
28
+
29
+ ENV NUMBA_CACHE_DIR=/tmp
30
+
31
+
32
+ # Expose the port your app runs on
33
+ EXPOSE 7860
34
+
35
+ # Download UniDic for MeCab
36
+ RUN pip install unidic \
37
+ && python -m unidic download
38
+
39
+ # Set the environment variable for Coqui TTS
40
+ ENV COQUI_TOS_AGREED=1
41
+ RUN pip install numba==0.48
42
+
43
+ # Apply migrations
44
+ RUN python manage.py migrate
45
+
46
+ # Use Django's built-in server to serve the app
47
+ CMD ["python", "manage.py", "runserver", "0.0.0.0:7860"]
48
+
49
+
50
+
51
+ # # Fast api
52
+ # # Use the official Python image as a parent image
53
+ # FROM python:3.10.5-slim
54
+
55
+ # # Set the working directory in the container
56
+ # WORKDIR /app
57
+
58
+ # # Copy the requirements file into the container at /app
59
+ # COPY requirements.txt .
60
+
61
+ # # Install any needed packages specified in requirements.txt
62
+ # RUN pip install -r requirements.txt
63
+
64
+ # # Copy the current directory contents into the container at /app
65
+ # COPY . .
66
+
67
+ # # Expose port 7860 to the outside world
68
+ # EXPOSE 7860
69
+
70
+ # # Define the command to run your FastAPI application using uvicorn
71
+ # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from TTS.api import TTS
3
+ import io, os, stat
4
+ import subprocess
5
+ import random
6
+ from zipfile import ZipFile
7
+ import uuid
8
+ import time
9
+ import torch
10
+ import torchaudio
11
+
12
+
13
+ #download for mecab
14
+ # os.system('python -m unidic download')
15
+
16
+ # By using XTTS you agree to CPML license https://coqui.ai/cpml
17
+ os.environ["COQUI_TOS_AGREED"] = "1"
18
+
19
+ # langid is used to detect language for longer text
20
+ # Most users expect text to be their own language, there is checkbox to disable it
21
+ import langid
22
+ import base64
23
+ import csv
24
+ from io import StringIO
25
+ import datetime
26
+ import re
27
+
28
+ import gradio as gr
29
+ from scipy.io.wavfile import write
30
+ from pydub import AudioSegment
31
+
32
+ from TTS.api import TTS
33
+ from TTS.tts.configs.xtts_config import XttsConfig
34
+ from TTS.tts.models.xtts import Xtts
35
+ from TTS.utils.generic_utils import get_user_data_dir
36
+
37
+ HF_TOKEN = os.environ.get("HF_TOKEN")
38
+
39
+ from huggingface_hub import HfApi
40
+
41
+ # will use api to restart space on a unrecoverable error
42
+ api = HfApi(token=HF_TOKEN)
43
+ repo_id = "coqui/xtts"
44
+
45
+ # Use never ffmpeg binary for Ubuntu20 to use denoising for microphone input
46
+ print("Export newer ffmpeg binary for denoise filter")
47
+ ZipFile("ffmpeg.zip").extractall()
48
+ print("Make ffmpeg binary executable")
49
+ st = os.stat("ffmpeg")
50
+ os.chmod("ffmpeg", st.st_mode | stat.S_IEXEC)
51
+
52
+ # This will trigger downloading model
53
+ print("Downloading if not downloaded Coqui XTTS V2")
54
+ from TTS.utils.manage import ModelManager
55
+
56
+ model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
57
+ ModelManager().download_model(model_name)
58
+ model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
59
+ print("XTTS downloaded")
60
+
61
+ config = XttsConfig()
62
+ config.load_json(os.path.join(model_path, "config.json"))
63
+
64
+ model = Xtts.init_from_config(config)
65
+ model.load_checkpoint(
66
+ config,
67
+ checkpoint_path=os.path.join(model_path, "model.pth"),
68
+ vocab_path=os.path.join(model_path, "vocab.json"),
69
+ eval=True,
70
+ use_deepspeed=True,
71
+ )
72
+ model.cuda()
73
+
74
+ # This is for debugging purposes only
75
+ DEVICE_ASSERT_DETECTED = 0
76
+ DEVICE_ASSERT_PROMPT = None
77
+ DEVICE_ASSERT_LANG = None
78
+
79
+ supported_languages = config.languages
80
+
81
+ def predict(
82
+ prompt,
83
+ language,
84
+ audio_file_pth,
85
+ mic_file_path,
86
+ use_mic,
87
+ voice_cleanup,
88
+ no_lang_auto_detect,
89
+ agree,
90
+ ):
91
+ if agree == True:
92
+ if language not in supported_languages:
93
+ gr.Warning(
94
+ f"Language you put {language} in is not in is not in our Supported Languages, please choose from dropdown"
95
+ )
96
+
97
+ return (
98
+ None,
99
+ None,
100
+ None,
101
+ None,
102
+ )
103
+
104
+ language_predicted = langid.classify(prompt)[
105
+ 0
106
+ ].strip() # strip need as there is space at end!
107
+
108
+ # tts expects chinese as zh-cn
109
+ if language_predicted == "zh":
110
+ # we use zh-cn
111
+ language_predicted = "zh-cn"
112
+
113
+ print(f"Detected language:{language_predicted}, Chosen language:{language}")
114
+
115
+ # After text character length 15 trigger language detection
116
+ if len(prompt) > 15:
117
+ # allow any language for short text as some may be common
118
+ # If user unchecks language autodetection it will not trigger
119
+ # You may remove this completely for own use
120
+ if language_predicted != language and not no_lang_auto_detect:
121
+ # Please duplicate and remove this check if you really want this
122
+ # Or auto-detector fails to identify language (which it can on pretty short text or mixed text)
123
+ gr.Warning(
124
+ f"It looks like your text isn’t the language you chose , if you’re sure the text is the same language you chose, please check disable language auto-detection checkbox"
125
+ )
126
+
127
+ return (
128
+ None,
129
+ None,
130
+ None,
131
+ None,
132
+ )
133
+
134
+ if use_mic == True:
135
+ if mic_file_path is not None:
136
+ speaker_wav = mic_file_path
137
+ else:
138
+ gr.Warning(
139
+ "Please record your voice with Microphone, or uncheck Use Microphone to use reference audios"
140
+ )
141
+ return (
142
+ None,
143
+ None,
144
+ None,
145
+ None,
146
+ )
147
+
148
+ else:
149
+ speaker_wav = audio_file_pth
150
+
151
+ # Filtering for microphone input, as it has BG noise, maybe silence in beginning and end
152
+ # This is fast filtering not perfect
153
+
154
+ # Apply all on demand
155
+ lowpassfilter = denoise = trim = loudness = True
156
+
157
+ if lowpassfilter:
158
+ lowpass_highpass = "lowpass=8000,highpass=75,"
159
+ else:
160
+ lowpass_highpass = ""
161
+
162
+ if trim:
163
+ # better to remove silence in beginning and end for microphone
164
+ trim_silence = "areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,areverse,silenceremove=start_periods=1:start_silence=0:start_threshold=0.02,"
165
+ else:
166
+ trim_silence = ""
167
+
168
+ if voice_cleanup:
169
+ try:
170
+ out_filename = (
171
+ speaker_wav + str(uuid.uuid4()) + ".wav"
172
+ ) # ffmpeg to know output format
173
+
174
+ # we will use newer ffmpeg as that has afftn denoise filter
175
+ shell_command = f"./ffmpeg -y -i {speaker_wav} -af {lowpass_highpass}{trim_silence} {out_filename}".split(
176
+ " "
177
+ )
178
+
179
+ command_result = subprocess.run(
180
+ [item for item in shell_command],
181
+ capture_output=False,
182
+ text=True,
183
+ check=True,
184
+ )
185
+ speaker_wav = out_filename
186
+ print("Filtered microphone input")
187
+ except subprocess.CalledProcessError:
188
+ # There was an error - command exited with non-zero code
189
+ print("Error: failed filtering, use original microphone input")
190
+ else:
191
+ speaker_wav = speaker_wav
192
+
193
+ if len(prompt) < 2:
194
+ gr.Warning("Please give a longer prompt text")
195
+ return (
196
+ None,
197
+ None,
198
+ None,
199
+ None,
200
+ )
201
+ if len(prompt) > 200:
202
+ gr.Warning(
203
+ "Text length limited to 200 characters for this demo, please try shorter text. You can clone this space and edit code for your own usage"
204
+ )
205
+ return (
206
+ None,
207
+ None,
208
+ None,
209
+ None,
210
+ )
211
+ global DEVICE_ASSERT_DETECTED
212
+ if DEVICE_ASSERT_DETECTED:
213
+ global DEVICE_ASSERT_PROMPT
214
+ global DEVICE_ASSERT_LANG
215
+ # It will likely never come here as we restart space on first unrecoverable error now
216
+ print(
217
+ f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}"
218
+ )
219
+
220
+ # HF Space specific.. This error is unrecoverable need to restart space
221
+ space = api.get_space_runtime(repo_id=repo_id)
222
+ if space.stage!="BUILDING":
223
+ api.restart_space(repo_id=repo_id)
224
+ else:
225
+ print("TRIED TO RESTART but space is building")
226
+
227
+ try:
228
+ metrics_text = ""
229
+ t_latent = time.time()
230
+
231
+ # note diffusion_conditioning not used on hifigan (default mode), it will be empty but need to pass it to model.inference
232
+ try:
233
+ (
234
+ gpt_cond_latent,
235
+ speaker_embedding,
236
+ ) = model.get_conditioning_latents(audio_path=speaker_wav, gpt_cond_len=30, gpt_cond_chunk_len=4, max_ref_length=60)
237
+ except Exception as e:
238
+ print("Speaker encoding error", str(e))
239
+ gr.Warning(
240
+ "It appears something wrong with reference, did you unmute your microphone?"
241
+ )
242
+ return (
243
+ None,
244
+ None,
245
+ None,
246
+ None,
247
+ )
248
+
249
+ latent_calculation_time = time.time() - t_latent
250
+ # metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
251
+
252
+ # temporary comma fix
253
+ prompt= re.sub("([^\x00-\x7F]|\w)(\.|\。|\?)",r"\1 \2\2",prompt)
254
+
255
+ wav_chunks = []
256
+ ## Direct mode
257
+
258
+ print("I: Generating new audio...")
259
+ t0 = time.time()
260
+ out = model.inference(
261
+ prompt,
262
+ language,
263
+ gpt_cond_latent,
264
+ speaker_embedding,
265
+ repetition_penalty=5.0,
266
+ temperature=0.75,
267
+ )
268
+ inference_time = time.time() - t0
269
+ print(f"I: Time to generate audio: {round(inference_time*1000)} milliseconds")
270
+ metrics_text+=f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
271
+ real_time_factor= (time.time() - t0) / out['wav'].shape[-1] * 24000
272
+ print(f"Real-time factor (RTF): {real_time_factor}")
273
+ metrics_text+=f"Real-time factor (RTF): {real_time_factor:.2f}\n"
274
+ torchaudio.save("output.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
275
+
276
+
277
+ """
278
+ print("I: Generating new audio in streaming mode...")
279
+ t0 = time.time()
280
+ chunks = model.inference_stream(
281
+ prompt,
282
+ language,
283
+ gpt_cond_latent,
284
+ speaker_embedding,
285
+ repetition_penalty=7.0,
286
+ temperature=0.85,
287
+ )
288
+
289
+ first_chunk = True
290
+ for i, chunk in enumerate(chunks):
291
+ if first_chunk:
292
+ first_chunk_time = time.time() - t0
293
+ metrics_text += f"Latency to first audio chunk: {round(first_chunk_time*1000)} milliseconds\n"
294
+ first_chunk = False
295
+ wav_chunks.append(chunk)
296
+ print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
297
+ inference_time = time.time() - t0
298
+ print(
299
+ f"I: Time to generate audio: {round(inference_time*1000)} milliseconds"
300
+ )
301
+ #metrics_text += (
302
+ # f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
303
+ #)
304
+
305
+ wav = torch.cat(wav_chunks, dim=0)
306
+ print(wav.shape)
307
+ real_time_factor = (time.time() - t0) / wav.shape[0] * 24000
308
+ print(f"Real-time factor (RTF): {real_time_factor}")
309
+ metrics_text += f"Real-time factor (RTF): {real_time_factor:.2f}\n"
310
+
311
+ torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
312
+ """
313
+
314
+ except RuntimeError as e:
315
+ if "device-side assert" in str(e):
316
+ # cannot do anything on cuda device side error, need tor estart
317
+ print(
318
+ f"Exit due to: Unrecoverable exception caused by language:{language} prompt:{prompt}",
319
+ flush=True,
320
+ )
321
+ gr.Warning("Unhandled Exception encounter, please retry in a minute")
322
+ print("Cuda device-assert Runtime encountered need restart")
323
+ if not DEVICE_ASSERT_DETECTED:
324
+ DEVICE_ASSERT_DETECTED = 1
325
+ DEVICE_ASSERT_PROMPT = prompt
326
+ DEVICE_ASSERT_LANG = language
327
+
328
+ # just before restarting save what caused the issue so we can handle it in future
329
+ # Uploading Error data only happens for unrecovarable error
330
+ error_time = datetime.datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
331
+ error_data = [
332
+ error_time,
333
+ prompt,
334
+ language,
335
+ audio_file_pth,
336
+ mic_file_path,
337
+ use_mic,
338
+ voice_cleanup,
339
+ no_lang_auto_detect,
340
+ agree,
341
+ ]
342
+ error_data = [str(e) if type(e) != str else e for e in error_data]
343
+ print(error_data)
344
+ print(speaker_wav)
345
+ write_io = StringIO()
346
+ csv.writer(write_io).writerows([error_data])
347
+ csv_upload = write_io.getvalue().encode()
348
+
349
+ filename = error_time + "_" + str(uuid.uuid4()) + ".csv"
350
+ print("Writing error csv")
351
+ error_api = HfApi()
352
+ error_api.upload_file(
353
+ path_or_fileobj=csv_upload,
354
+ path_in_repo=filename,
355
+ repo_id="coqui/xtts-flagged-dataset",
356
+ repo_type="dataset",
357
+ )
358
+
359
+ # speaker_wav
360
+ print("Writing error reference audio")
361
+ speaker_filename = (
362
+ error_time + "_reference_" + str(uuid.uuid4()) + ".wav"
363
+ )
364
+ error_api = HfApi()
365
+ error_api.upload_file(
366
+ path_or_fileobj=speaker_wav,
367
+ path_in_repo=speaker_filename,
368
+ repo_id="coqui/xtts-flagged-dataset",
369
+ repo_type="dataset",
370
+ )
371
+
372
+ # HF Space specific.. This error is unrecoverable need to restart space
373
+ space = api.get_space_runtime(repo_id=repo_id)
374
+ if space.stage!="BUILDING":
375
+ api.restart_space(repo_id=repo_id)
376
+ else:
377
+ print("TRIED TO RESTART but space is building")
378
+
379
+ else:
380
+ if "Failed to decode" in str(e):
381
+ print("Speaker encoding error", str(e))
382
+ gr.Warning(
383
+ "It appears something wrong with reference, did you unmute your microphone?"
384
+ )
385
+ else:
386
+ print("RuntimeError: non device-side assert error:", str(e))
387
+ gr.Warning("Something unexpected happened please retry again.")
388
+ return (
389
+ None,
390
+ None,
391
+ None,
392
+ None,
393
+ )
394
+ return (
395
+ gr.make_waveform(
396
+ audio="output.wav",
397
+ ),
398
+ "output.wav",
399
+ metrics_text,
400
+ speaker_wav,
401
+ )
402
+ else:
403
+ gr.Warning("Please accept the Terms & Condition!")
404
+ return (
405
+ None,
406
+ None,
407
+ None,
408
+ None,
409
+ )
410
+
411
+
412
+
413
+ with gr.Blocks(analytics_enabled=False) as demo:
414
+ with gr.Row():
415
+ with gr.Column():
416
+ input_text_gr = gr.Textbox(
417
+ label="Text Prompt",
418
+ info="One or two sentences at a time is better. Up to 200 text characters.",
419
+ value="Hi there, I'm your new voice clone. Try your best to upload quality audio.",
420
+ )
421
+ language_gr = gr.Dropdown(
422
+ label="Language",
423
+ info="Select an output language for the synthesised speech",
424
+ choices=[
425
+ "en",
426
+ "es",
427
+ "fr",
428
+ "de",
429
+ "it",
430
+ "pt",
431
+ "pl",
432
+ "tr",
433
+ "ru",
434
+ "nl",
435
+ "cs",
436
+ "ar",
437
+ "zh-cn",
438
+ "ja",
439
+ "ko",
440
+ "hu",
441
+ "hi"
442
+ ],
443
+ max_choices=1,
444
+ value="en",
445
+ )
446
+ ref_gr = gr.Audio(
447
+ label="Reference Audio",
448
+ info="Click on the ✎ button to upload your own target speaker audio",
449
+ type="filepath",
450
+ value="examples/female.wav",
451
+ )
452
+ mic_gr = gr.Audio(
453
+ source="microphone",
454
+ type="filepath",
455
+ info="Use your microphone to record audio",
456
+ label="Use Microphone for Reference",
457
+ )
458
+ use_mic_gr = gr.Checkbox(
459
+ label="Use Microphone",
460
+ value=False,
461
+ info="Notice: Microphone input may not work properly under traffic",
462
+ )
463
+ clean_ref_gr = gr.Checkbox(
464
+ label="Cleanup Reference Voice",
465
+ value=False,
466
+ info="This check can improve output if your microphone or reference voice is noisy",
467
+ )
468
+ auto_det_lang_gr = gr.Checkbox(
469
+ label="Do not use language auto-detect",
470
+ value=False,
471
+ info="Check to disable language auto-detection",
472
+ )
473
+ tos_gr = gr.Checkbox(
474
+ label="Agree",
475
+ value=False,
476
+ info="I agree to the terms of the CPML: https://coqui.ai/cpml",
477
+ )
478
+
479
+ tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
480
+
481
+
482
+ with gr.Column():
483
+ video_gr = gr.Video(label="Waveform Visual")
484
+ audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
485
+ out_text_gr = gr.Text(label="Metrics")
486
+ ref_audio_gr = gr.Audio(label="Reference Audio Used")
487
+
488
+ with gr.Row():
489
+ gr.Examples(examples,
490
+ label="Examples",
491
+ inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
492
+ outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
493
+ fn=predict,
494
+ cache_examples=False,)
495
+
496
+ tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr])
497
+
498
+ demo.queue()
499
+ demo.launch(debug=True, show_api=True)
config/__init__.py ADDED
File without changes
config/asgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ASGI config for config project.
3
+
4
+ It exposes the ASGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.asgi import get_asgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
15
+
16
+ application = get_asgi_application()
config/settings.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Django settings for config project.
3
+
4
+ Generated by 'django-admin startproject' using Django 5.0.1.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/topics/settings/
8
+
9
+ For the full list of settings and their values, see
10
+ https://docs.djangoproject.com/en/5.0/ref/settings/
11
+ """
12
+
13
+ import os
14
+ import pprint
15
+ pprint.pprint(os.environ)
16
+
17
+ os.environ[ 'NUMBA_CACHE_DIR' ] = '/tmp/'
18
+
19
+ from numba.caching import _UserProvidedCacheLocator
20
+ print("@@@@@@@@@@@@@",_UserProvidedCacheLocator(lambda x:x, 'string').get_cache_path())
21
+
22
+
23
+
24
+ from pathlib import Path
25
+
26
+ # Build paths inside the project like this: BASE_DIR / 'subdir'.
27
+ BASE_DIR = Path(__file__).resolve().parent.parent
28
+
29
+
30
+ # Quick-start development settings - unsuitable for production
31
+ # See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/
32
+
33
+ # SECURITY WARNING: keep the secret key used in production secret!
34
+ SECRET_KEY = 'django-insecure-0%9gzn*t$0ca^@v@*-!cmjw_&*ig&8jp_26b=m-&9!$fc3wdy$'
35
+
36
+ # SECURITY WARNING: don't run with debug turned on in production!
37
+ DEBUG = True
38
+
39
+ ALLOWED_HOSTS = ["*"]
40
+
41
+
42
+ # Application definition
43
+
44
+ INSTALLED_APPS = [
45
+ 'django.contrib.admin',
46
+ 'django.contrib.auth',
47
+ 'django.contrib.contenttypes',
48
+ 'django.contrib.sessions',
49
+ 'django.contrib.messages',
50
+ 'django.contrib.staticfiles',
51
+ 'rest_framework',
52
+ 'drf_yasg',
53
+ 'texttovoice'
54
+ ]
55
+
56
+ X_FRAME_OPTIONS = 'ALLOW-FROM https://huggingface.co/'
57
+
58
+ MIDDLEWARE = [
59
+ 'django.middleware.security.SecurityMiddleware',
60
+ 'django.contrib.sessions.middleware.SessionMiddleware',
61
+ 'django.middleware.common.CommonMiddleware',
62
+ 'django.middleware.csrf.CsrfViewMiddleware',
63
+ 'django.contrib.auth.middleware.AuthenticationMiddleware',
64
+ 'django.contrib.messages.middleware.MessageMiddleware',
65
+ 'django.middleware.clickjacking.XFrameOptionsMiddleware',
66
+ ]
67
+
68
+ ROOT_URLCONF = 'config.urls'
69
+
70
+ TEMPLATES = [
71
+ {
72
+ 'BACKEND': 'django.template.backends.django.DjangoTemplates',
73
+ 'DIRS': [],
74
+ 'APP_DIRS': True,
75
+ 'OPTIONS': {
76
+ 'context_processors': [
77
+ 'django.template.context_processors.debug',
78
+ 'django.template.context_processors.request',
79
+ 'django.contrib.auth.context_processors.auth',
80
+ 'django.contrib.messages.context_processors.messages',
81
+ ],
82
+ },
83
+ },
84
+ ]
85
+
86
+
87
+ WSGI_APPLICATION = 'config.wsgi.application'
88
+
89
+
90
+ # Database
91
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#databases
92
+
93
+ DATABASES = {
94
+ 'default': {
95
+ 'ENGINE': 'django.db.backends.sqlite3',
96
+ 'NAME': BASE_DIR / 'db.sqlite3',
97
+ }
98
+ }
99
+
100
+
101
+ # Password validation
102
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators
103
+
104
+ AUTH_PASSWORD_VALIDATORS = [
105
+ {
106
+ 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
107
+ },
108
+ {
109
+ 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
110
+ },
111
+ {
112
+ 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
113
+ },
114
+ {
115
+ 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
116
+ },
117
+ ]
118
+
119
+
120
+ # Internationalization
121
+ # https://docs.djangoproject.com/en/5.0/topics/i18n/
122
+
123
+ LANGUAGE_CODE = 'en-us'
124
+
125
+ TIME_ZONE = 'UTC'
126
+
127
+ USE_I18N = True
128
+
129
+ USE_TZ = True
130
+
131
+
132
+ # Static files (CSS, JavaScript, Images)
133
+ # https://docs.djangoproject.com/en/5.0/howto/static-files/
134
+
135
+ STATIC_URL = 'static/'
136
+
137
+ # Default primary key field type
138
+ # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
139
+
140
+ DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
config/urls.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from django.contrib import admin
2
+ from django.urls import path, re_path, include
3
+ from rest_framework import permissions
4
+ from drf_yasg.views import get_schema_view
5
+ from drf_yasg import openapi
6
+
7
+ schema_view = get_schema_view(
8
+ openapi.Info(
9
+ title="Your API",
10
+ default_version='v1',
11
+ description="Your API Description",
12
+ terms_of_service="https://www.yourwebsite.com/terms/",
13
+ contact=openapi.Contact(email="[email protected]"),
14
+ license=openapi.License(name="Your License"),
15
+ ),
16
+ public=True,
17
+ permission_classes=(permissions.AllowAny,),
18
+ )
19
+
20
+
21
+ from texttovoice.views import TextToSpeechCreateView
22
+
23
+ urlpatterns = [
24
+ path('admin/', admin.site.urls),
25
+ path('generate-speech/', TextToSpeechCreateView.as_view(), name='generate-speech-create'),
26
+ path('swagger/', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
27
+ re_path(r'^swagger(?P<format>\.json|\.yaml)$', schema_view.without_ui(cache_timeout=0), name='schema-json'),
28
+
29
+ ]
config/wsgi.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ WSGI config for config project.
3
+
4
+ It exposes the WSGI callable as a module-level variable named ``application``.
5
+
6
+ For more information on this file, see
7
+ https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/
8
+ """
9
+
10
+ import os
11
+
12
+ from django.core.wsgi import get_wsgi_application
13
+
14
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
15
+
16
+ application = get_wsgi_application()
db.sqlite3 ADDED
Binary file (131 kB). View file
 
manage.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """Django's command-line utility for administrative tasks."""
3
+ import os
4
+ import sys
5
+
6
+
7
+ def main():
8
+ """Run administrative tasks."""
9
+ os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings')
10
+ try:
11
+ from django.core.management import execute_from_command_line
12
+ except ImportError as exc:
13
+ raise ImportError(
14
+ "Couldn't import Django. Are you sure it's installed and "
15
+ "available on your PYTHONPATH environment variable? Did you "
16
+ "forget to activate a virtual environment?"
17
+ ) from exc
18
+ execute_from_command_line(sys.argv)
19
+
20
+
21
+ if __name__ == '__main__':
22
+ main()
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TTS
2
+ pydantic==1.10.13
3
+ python-multipart==0.0.6
4
+ typing-extensions>=4.8.0
5
+ cutlet
6
+ mecab-python3==1.0.6
7
+ unidic-lite==1.0.8
8
+ unidic==1.1.0
9
+ langid
10
+ deepspeed
11
+ pydub
12
+ Django
13
+ djangorestframework
14
+ drf-yasg
15
+
16
+ # fastapi==0.70.0
17
+ # uvicorn==0.15.0
texttovoice/__init__.py ADDED
File without changes
texttovoice/admin.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.contrib import admin
2
+
3
+ # Register your models here.
texttovoice/apps.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from django.apps import AppConfig
2
+
3
+
4
+ class TexttovoiceConfig(AppConfig):
5
+ default_auto_field = 'django.db.models.BigAutoField'
6
+ name = 'texttovoice'
texttovoice/migrations/__init__.py ADDED
File without changes
texttovoice/models.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.db import models
2
+
3
+ # Create your models here.
texttovoice/serializers.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from rest_framework import serializers
2
+
3
+ class TextToSpeechSerializer(serializers.Serializer):
4
+ text = serializers.CharField()
5
+ speaker_wav = serializers.FileField()
6
+ language = serializers.CharField(default="en")
texttovoice/tests.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from django.test import TestCase
2
+
3
+ # Create your tests here.
texttovoice/views.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ from django.http import FileResponse
4
+ from rest_framework import status
5
+ from rest_framework.response import Response
6
+ from rest_framework.generics import CreateAPIView
7
+ from TTS.api import TTS
8
+ from .serializers import TextToSpeechSerializer
9
+
10
+ class TextToSpeechCreateView(CreateAPIView):
11
+ serializer_class = TextToSpeechSerializer
12
+
13
+ def create(self, request, *args, **kwargs):
14
+ serializer = self.get_serializer(data=request.data)
15
+ if serializer.is_valid():
16
+ text = serializer.validated_data.get("text")
17
+ speaker_wav = serializer.validated_data.get("speaker_wav")
18
+ language = serializer.validated_data.get("language")
19
+ output_filename = f"output_{uuid.uuid4()}.wav"
20
+
21
+ try:
22
+ # Save the uploaded speaker file to a temporary location
23
+ speaker_file_path = os.path.join("/tmp", speaker_wav.name)
24
+ with open(speaker_file_path, "wb") as destination:
25
+ for chunk in speaker_wav.chunks():
26
+ destination.write(chunk)
27
+
28
+ # Generate speech using tts.tts_to_file
29
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
30
+ tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
31
+
32
+ # Define a function to delete the output file
33
+ def file_iterator(file_name):
34
+ with open(file_name, 'rb') as f:
35
+ yield from f
36
+
37
+ # Delete the file after sending it
38
+ try:
39
+ os.remove(file_name)
40
+ except Exception as e:
41
+ # You might want to log this error
42
+ pass
43
+
44
+ # Use the file_iterator to create a FileResponse
45
+ response = FileResponse(file_iterator(output_filename), as_attachment=True, content_type='audio/wav')
46
+ return response
47
+
48
+ except Exception as e:
49
+ return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
50
+ return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)