Spaces:
Build error
Build error
import streamlit as st | |
import moviepy.editor as mp | |
import soundfile as sf | |
from io import BytesIO | |
import subprocess | |
from TTS.api import TTS | |
# Set up the model for text-to-speech (TTS) | |
MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC" # Example TTS model; adjust as needed | |
tts = TTS(model_name=MODEL_NAME, progress_bar=True, gpu=False) | |
# Function to extract audio from MP4 file using ffmpeg if moviepy fails | |
def extract_audio_from_mp4(mp4_file): | |
try: | |
# Attempt to use moviepy to extract audio | |
video = mp.VideoFileClip(mp4_file) | |
audio = video.audio | |
audio_file = "temp_audio.wav" | |
audio.write_audiofile(audio_file) | |
return audio_file | |
except Exception as e: | |
st.error(f"Error extracting audio with moviepy: {e}. Trying ffmpeg extraction...") | |
# Fallback to using ffmpeg for audio extraction if moviepy fails | |
try: | |
mp4_path = "uploaded_video.mp4" | |
with open(mp4_path, "wb") as f: | |
f.write(mp4_file.getbuffer()) | |
audio_path = "temp_audio.wav" | |
subprocess.run(["ffmpeg", "-i", mp4_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", audio_path]) | |
return audio_path | |
except Exception as ffmpeg_error: | |
st.error(f"Error with ffmpeg extraction: {ffmpeg_error}") | |
return None | |
# Function to load audio file | |
def load_audio(file): | |
try: | |
audio_data, sample_rate = sf.read(file) | |
return audio_data, sample_rate | |
except Exception as e: | |
st.error(f"Error loading audio: {e}") | |
return None, None | |
# Function to save the generated audio to a file | |
def save_audio(output_audio, sample_rate): | |
output_path = "output_cloned_voice.wav" | |
sf.write(output_path, output_audio, sample_rate) | |
return output_path | |
# Streamlit app | |
def main(): | |
st.title("Voice Cloning Tool") | |
st.markdown("Upload an MP4, WAV, or MP3 file, and get the cloned voice output.") | |
# File upload | |
audio_file = st.file_uploader("Upload your audio file", type=["wav", "mp3", "mp4"]) | |
if audio_file is not None: | |
# Handle MP4 file by extracting audio | |
if audio_file.type == "video/mp4": | |
# Save the uploaded file to a temporary location | |
with open("uploaded_video.mp4", "wb") as f: | |
f.write(audio_file.getbuffer()) | |
# Extract audio from MP4 | |
audio_path = extract_audio_from_mp4("uploaded_video.mp4") | |
if audio_path: | |
st.audio(audio_path, format="audio/wav") | |
# Load audio for TTS processing | |
audio_data, sample_rate = load_audio(audio_path) | |
else: | |
# For audio files directly (WAV/MP3) | |
st.audio(audio_file, format=f"audio/{audio_file.type.split('/')[1]}") | |
# Load audio file | |
with open("temp_audio.wav", "wb") as f: | |
f.write(audio_file.getbuffer()) | |
audio_data, sample_rate = load_audio("temp_audio.wav") | |
if audio_data is not None: | |
# Perform voice cloning (This assumes your TTS model supports some form of input) | |
try: | |
st.text("Processing your input...") | |
output_audio = tts.tts(audio_data) # Pass the audio to your TTS model for cloning | |
output_path = save_audio(output_audio, sample_rate) | |
# Provide download link | |
st.audio(output_path, format="audio/wav") | |
st.markdown(f"[Download Cloned Voice](/{output_path})") | |
except Exception as e: | |
st.error(f"Error processing audio: {e}") | |
else: | |
st.error("No audio data to process.") | |
if __name__ == "__main__": | |
main() | |