File size: 1,887 Bytes
38282c7
 
84a73f0
38282c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84a73f0
 
 
38282c7
 
 
 
84a73f0
38282c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
import torch
from TTS.api import TTS  # Ensure this import matches your TTS library
import os
from datetime import datetime

# Function to process text and voice input, then generate speech
def tts_process(transcript_file, voice_file):
    # Initialize TTS with your model path
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

    # Read transcript text from uploaded file
    text = transcript_file.read().decode("utf-8")

    # Generate output file name with timestamp
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    output_file_name = f"Download_{timestamp}.wav"

    # Assuming the voice cloning model accepts paths, save files temporarily
    transcript_path = f"temp_transcript_{timestamp}.txt"
    voice_path = f"temp_voice_{timestamp}.wav"
    with open(transcript_path, 'w') as f:
        f.write(text)
    with open(voice_path, 'wb') as f:
        f.write(voice_file.read())

    # Generate speech and save to a file
    tts.tts_to_file(text=text, speaker_wav=voice_path, language="en", file_path=output_file_name)

    # Cleanup temporary files
    os.remove(transcript_path)
    os.remove(voice_path)

    return output_file_name

# Gradio interface setup
iface = gr.Interface(fn=tts_process,
                     inputs=[gr.UploadButton("Click to Upload a Transcript (.txt) File", file_types=["text"]),
                             gr.UploadButton("Click to Upload a Voice to be Cloned (.mp3) File", file_types=["audio"])],
                     outputs=gr.File(label="Download Speech Output"),
                     title="TTS Voice Cloning",
                     description="Upload a transcript text file and a voice file to clone the voice and generate speech.")

# Execute only if run as a script
if __name__ == "__main__":
    iface.launch(share=True)