import gradio as gr import torch from TTS.api import TTS import os from datetime import datetime # Function to process text and voice input, then generate speech def tts_process(transcript_file, voice_file): # Initialize TTS with your model path device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) # Read transcript text from uploaded file text = transcript_file.read().decode("utf-8") # Generate output file name with timestamp timestamp = datetime.now().strftime("%Y%m%d%H%M%S") output_file_name = f"Download_{timestamp}.wav" # Assuming the voice cloning model accepts paths, save files temporarily transcript_path = f"temp_transcript_{timestamp}.txt" voice_path = f"temp_voice_{timestamp}.wav" with open(transcript_path, 'w') as f: f.write(text) with open(voice_path, 'wb') as f: f.write(voice_file.read()) # Generate speech and save to a file tts.tts_to_file(text=text, speaker_wav=voice_path, language="en", file_path=output_file_name) # Cleanup temporary files os.remove(transcript_path) os.remove(voice_path) return output_file_name # Gradio interface setup iface = gr.Interface(fn=tts_process, inputs=[gr.inputs.File(label="Upload Transcript Text File"), gr.inputs.File(label="Upload Voice File for Cloning")], outputs=gr.outputs.File(label="Download Speech Output"), title="TTS Voice Cloning", description="Upload a transcript text file and a voice file to clone the voice and generate speech.") # Execute only if run as a script if __name__ == "__app__": iface.launch(share=True)