Aseem Gupta commited on
Commit
c636952
Β·
1 Parent(s): 2f8acff
Files changed (2) hide show
  1. app.py +49 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+ from langdetect import detect
5
+
6
+ # Load the Coqui XTTS model
7
+ tts = pipeline("text-to-speech", model="coqui/XTTS-v2", device=0 if torch.cuda.is_available() else -1)
8
+
9
+ # Helper function to clone voice and generate speech
10
+ def clone_and_generate(audio, text_prompt, language):
11
+ if audio is None or text_prompt.strip() == "":
12
+ return "Please provide both audio input and text prompt.", None
13
+
14
+ # Check if language is supported
15
+ supported_languages = {"english": "en", "hindi": "hi"}
16
+ if language not in supported_languages:
17
+ return f"Language {language} not supported yet.", None
18
+
19
+ # Convert text to the target language (if needed)
20
+ if detect(text_prompt) != supported_languages[language]:
21
+ # For now, we assume text is already in the desired language
22
+ pass
23
+
24
+ # Generate speech
25
+ try:
26
+ result = tts(text=text_prompt, speaker=audio)
27
+ return "Speech generated successfully!", result["audio"]
28
+ except Exception as e:
29
+ return f"Error: {str(e)}", None
30
+
31
+ # Gradio Interface
32
+ with gr.Blocks() as demo:
33
+ gr.Markdown("## 🎀 Voice Cloning & Text-to-Speech with Language Translation")
34
+
35
+ with gr.Row():
36
+ with gr.Column():
37
+ audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record or Upload Voice")
38
+ text_input = gr.Textbox(label="πŸ“ Enter Text to Generate Speech")
39
+ language_input = gr.Dropdown(choices=["english", "hindi"], value="english", label="🌐 Select Language")
40
+
41
+ with gr.Column():
42
+ output_message = gr.Textbox(label="πŸ“’ Status")
43
+ output_audio = gr.Audio(label="πŸ”Š Generated Speech")
44
+
45
+ generate_button = gr.Button("πŸš€ Generate Speech")
46
+ generate_button.click(clone_and_generate, inputs=[audio_input, text_input, language_input], outputs=[output_message, output_audio])
47
+
48
+ # Launch the app
49
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ transformers
4
+ langdetect