Amelia-James's picture
Create app.py
f276f86 verified
raw
history blame
2.22 kB
import streamlit as st
import librosa
import soundfile as sf
from TTS.api import TTS
# Load the pre-trained TTS model
MODEL_NAME = "tts_models/en/vctk/vits" # Change this to other models if desired
tts = TTS(model_name=MODEL_NAME, progress_bar=True, gpu=False)
# Title and description
st.title("Voice Cloning Tool")
st.markdown("""
Upload a sample of your voice and type text to generate a cloned output.
This tool uses a pre-trained voice synthesis model.
""")
# Step 1: Upload an audio file
uploaded_file = st.file_uploader("Upload your voice sample (WAV format preferred):", type=["wav", "mp3"])
if uploaded_file:
st.audio(uploaded_file, format="audio/wav", start_time=0)
# Step 2: Enter text for synthesis
text_input = st.text_area("Enter text to synthesize with the cloned voice:")
if not text_input:
st.warning("Please enter text to generate cloned voice output.")
# Process the audio input (convert to mono WAV)
def preprocess_audio(file):
"""Converts audio to mono WAV with a sampling rate of 16kHz."""
y, sr = librosa.load(file, sr=16000, mono=True)
return y, sr
# Save processed audio for the model
if uploaded_file:
with open("input_audio.wav", "wb") as f:
f.write(uploaded_file.read())
input_audio_path = "input_audio.wav"
# Step 3: Clone voice and synthesize speech
if st.button("Clone Voice"):
if uploaded_file and text_input:
# Process the input audio
audio, sr = preprocess_audio(input_audio_path)
sf.write("processed_audio.wav", audio, sr)
# Clone the voice and synthesize speech
try:
output_audio = tts.tts(text=text_input, speaker_wav="processed_audio.wav")
output_path = "cloned_output.wav"
sf.write(output_path, output_audio, samplerate=16000)
st.success("Voice cloning complete! Listen to the output below:")
st.audio(output_path, format="audio/wav")
st.download_button("Download Cloned Voice", data=open(output_path, "rb"), file_name="cloned_output.wav")
except Exception as e:
st.error(f"Error during voice cloning: {e}")
else:
st.error("Please upload a voice sample and enter text for synthesis.")