File size: 1,782 Bytes
38282c7 822ab4f 38282c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import torch
from TTS.api import TTS
import os
from datetime import datetime
# Function to process text and voice input, then generate speech
def tts_process(transcript_file, voice_file):
# Initialize TTS with your model path
device = "cuda" if torch.cuda.is_available() else "cpu"
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
# Read transcript text from uploaded file
text = transcript_file.read().decode("utf-8")
# Generate output file name with timestamp
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_file_name = f"Download_{timestamp}.wav"
# Assuming the voice cloning model accepts paths, save files temporarily
transcript_path = f"temp_transcript_{timestamp}.txt"
voice_path = f"temp_voice_{timestamp}.wav"
with open(transcript_path, 'w') as f:
f.write(text)
with open(voice_path, 'wb') as f:
f.write(voice_file.read())
# Generate speech and save to a file
tts.tts_to_file(text=text, speaker_wav=voice_path, language="en", file_path=output_file_name)
# Cleanup temporary files
os.remove(transcript_path)
os.remove(voice_path)
return output_file_name
# Gradio interface setup
iface = gr.Interface(fn=tts_process,
inputs=[gr.inputs.File(label="Upload Transcript Text File"),
gr.inputs.File(label="Upload Voice File for Cloning")],
outputs=gr.outputs.File(label="Download Speech Output"),
title="TTS Voice Cloning",
description="Upload a transcript text file and a voice file to clone the voice and generate speech.")
# Execute only if run as a script
if __name__ == "__app__":
iface.launch(share=True)
|