Aseem Gupta commited on
Commit
a416ccf
Β·
1 Parent(s): c636952
Files changed (2) hide show
  1. app.py +29 -41
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,49 +1,37 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import pipeline
4
- from langdetect import detect
5
-
6
- # Load the Coqui XTTS model
7
- tts = pipeline("text-to-speech", model="coqui/XTTS-v2", device=0 if torch.cuda.is_available() else -1)
8
-
9
- # Helper function to clone voice and generate speech
10
- def clone_and_generate(audio, text_prompt, language):
11
- if audio is None or text_prompt.strip() == "":
12
- return "Please provide both audio input and text prompt.", None
13
-
14
- # Check if language is supported
15
- supported_languages = {"english": "en", "hindi": "hi"}
16
- if language not in supported_languages:
17
- return f"Language {language} not supported yet.", None
18
-
19
- # Convert text to the target language (if needed)
20
- if detect(text_prompt) != supported_languages[language]:
21
- # For now, we assume text is already in the desired language
22
- pass
23
-
24
- # Generate speech
25
- try:
26
- result = tts(text=text_prompt, speaker=audio)
27
- return "Speech generated successfully!", result["audio"]
28
- except Exception as e:
29
- return f"Error: {str(e)}", None
30
 
31
  # Gradio Interface
32
  with gr.Blocks() as demo:
33
- gr.Markdown("## 🎀 Voice Cloning & Text-to-Speech with Language Translation")
34
 
35
  with gr.Row():
36
- with gr.Column():
37
- audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record or Upload Voice")
38
- text_input = gr.Textbox(label="πŸ“ Enter Text to Generate Speech")
39
- language_input = gr.Dropdown(choices=["english", "hindi"], value="english", label="🌐 Select Language")
40
-
41
- with gr.Column():
42
- output_message = gr.Textbox(label="πŸ“’ Status")
43
- output_audio = gr.Audio(label="πŸ”Š Generated Speech")
44
-
45
- generate_button = gr.Button("πŸš€ Generate Speech")
46
- generate_button.click(clone_and_generate, inputs=[audio_input, text_input, language_input], outputs=[output_message, output_audio])
 
47
 
48
- # Launch the app
49
  demo.launch()
 
1
  import gradio as gr
2
+ from TTS.api import TTS
3
+
4
+ # Load the XTTS-v2 model
5
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
6
+
7
+ # Define the function for voice cloning
8
+ def generate_voice(text, speaker_audio):
9
+ output_path = "output.wav"
10
+ tts.tts_to_file(
11
+ text=text,
12
+ speaker_wav=speaker_audio.name,
13
+ file_path=output_path,
14
+ language="en"
15
+ )
16
+ return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Gradio Interface
19
  with gr.Blocks() as demo:
20
+ gr.Markdown("# πŸ—£οΈ Voice Cloning with Coqui XTTS-v2")
21
 
22
  with gr.Row():
23
+ text_input = gr.Textbox(label="Enter Text", placeholder="Type the text you want to synthesize...")
24
+ speaker_audio_input = gr.Audio(label="Upload Speaker Audio (WAV)", type="file")
25
+
26
+ output_audio = gr.Audio(label="Generated Voice", type="filepath")
27
+
28
+ generate_button = gr.Button("Generate Voice")
29
+
30
+ generate_button.click(
31
+ fn=generate_voice,
32
+ inputs=[text_input, speaker_audio_input],
33
+ outputs=output_audio
34
+ )
35
 
36
+ # Launch the Gradio app
37
  demo.launch()
requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio
2
  torch
3
  transformers
4
  langdetect
 
 
2
  torch
3
  transformers
4
  langdetect
5
+ TTS