import gradio as gr import torch from transformers import pipeline from langdetect import detect # Load the Coqui XTTS model tts = pipeline("text-to-speech", model="coqui/XTTS-v2", device=0 if torch.cuda.is_available() else -1) # Helper function to clone voice and generate speech def clone_and_generate(audio, text_prompt, language): if audio is None or text_prompt.strip() == "": return "Please provide both audio input and text prompt.", None # Check if language is supported supported_languages = {"english": "en", "hindi": "hi"} if language not in supported_languages: return f"Language {language} not supported yet.", None # Convert text to the target language (if needed) if detect(text_prompt) != supported_languages[language]: # For now, we assume text is already in the desired language pass # Generate speech try: result = tts(text=text_prompt, speaker=audio) return "Speech generated successfully!", result["audio"] except Exception as e: return f"Error: {str(e)}", None # Gradio Interface with gr.Blocks() as demo: gr.Markdown("## 🎤 Voice Cloning & Text-to-Speech with Language Translation") with gr.Row(): with gr.Column(): audio_input = gr.Audio(source="microphone", type="filepath", label="🎙️ Record or Upload Voice") text_input = gr.Textbox(label="📝 Enter Text to Generate Speech") language_input = gr.Dropdown(choices=["english", "hindi"], value="english", label="🌐 Select Language") with gr.Column(): output_message = gr.Textbox(label="📢 Status") output_audio = gr.Audio(label="🔊 Generated Speech") generate_button = gr.Button("🚀 Generate Speech") generate_button.click(clone_and_generate, inputs=[audio_input, text_input, language_input], outputs=[output_message, output_audio]) # Launch the app demo.launch()