|
import gradio as gr |
|
import torch |
|
from TTS.api import TTS |
|
import os |
|
from datetime import datetime |
|
|
|
|
|
def tts_process(transcript_file, voice_file): |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) |
|
|
|
|
|
text = transcript_file.read().decode("utf-8") |
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") |
|
output_file_name = f"Download_{timestamp}.wav" |
|
|
|
|
|
transcript_path = f"temp_transcript_{timestamp}.txt" |
|
voice_path = f"temp_voice_{timestamp}.wav" |
|
with open(transcript_path, 'w') as f: |
|
f.write(text) |
|
with open(voice_path, 'wb') as f: |
|
f.write(voice_file.read()) |
|
|
|
|
|
tts.tts_to_file(text=text, speaker_wav=voice_path, language="en", file_path=output_file_name) |
|
|
|
|
|
os.remove(transcript_path) |
|
os.remove(voice_path) |
|
|
|
return output_file_name |
|
|
|
|
|
iface = gr.Interface(fn=tts_process, |
|
inputs=[gr.UploadButton("Click to Upload a Transcript (.txt) File", file_types=["text"]), |
|
gr.UploadButton("Click to Upload a Voice to be Cloned (.mp3) File", file_types=["audio"])], |
|
outputs=gr.File(label="Download Speech Output"), |
|
title="TTS Voice Cloning", |
|
description="Upload a transcript text file and a voice file to clone the voice and generate speech.") |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch(share=True) |
|
|