Aseem Gupta commited on
Commit
d8fe51c
·
1 Parent(s): 9581ca3
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +13 -13
  3. requirements.txt +2 -3
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.wav
2
+ __pycache__/
app.py CHANGED
@@ -4,38 +4,38 @@ import os
4
 
5
  os.environ["COQUI_TOS_AGREED"] = "1"
6
 
7
- # Load the XTTS-v2 model and set it to use CPU
8
- tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")
9
- tts.to("cpu") # Set the model to run on CPU
10
 
11
- # Define the function for voice cloning
12
  def generate_voice(text, speaker_audio):
13
  output_path = "output.wav"
14
  tts.tts_to_file(
15
  text=text,
16
- speaker_wav=speaker_audio.name,
17
  file_path=output_path,
18
  language="en"
19
  )
20
  return output_path
21
 
22
- # Gradio Interface
23
  with gr.Blocks() as demo:
24
  gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
25
-
26
  with gr.Row():
27
  text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
28
- speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="file")
29
-
30
  output_audio = gr.Audio(label="Generated Voice", type="filepath")
31
-
32
  generate_button = gr.Button("Generate Voice")
33
-
34
  generate_button.click(
35
  fn=generate_voice,
36
  inputs=[text_input, speaker_audio_input],
37
  outputs=output_audio
38
  )
39
 
40
- # Launch the Gradio app
41
- demo.launch()
 
4
 
5
  os.environ["COQUI_TOS_AGREED"] = "1"
6
 
7
+ # Load the XTTS-v2 model with trust_remote_code=True
8
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", trust_remote_code=True)
9
+ tts.to("cpu")
10
 
11
+ # Function to synthesize voice
12
  def generate_voice(text, speaker_audio):
13
  output_path = "output.wav"
14
  tts.tts_to_file(
15
  text=text,
16
+ speaker_wav=speaker_audio,
17
  file_path=output_path,
18
  language="en"
19
  )
20
  return output_path
21
 
22
+ # Gradio interface
23
  with gr.Blocks() as demo:
24
  gr.Markdown("# 🗣️ Voice Cloning with Coqui XTTS-v2")
25
+
26
  with gr.Row():
27
  text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
28
+ speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="filepath")
29
+
30
  output_audio = gr.Audio(label="Generated Voice", type="filepath")
31
+
32
  generate_button = gr.Button("Generate Voice")
33
+
34
  generate_button.click(
35
  fn=generate_voice,
36
  inputs=[text_input, speaker_audio_input],
37
  outputs=output_audio
38
  )
39
 
40
+ # Launch the app
41
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
  gradio
2
  torch
3
- transformers
4
- langdetect
5
- TTS
 
1
  gradio
2
  torch
3
+ TTS
4
+ soundfile