Spaces:
Build error
Build error
File size: 3,776 Bytes
f276f86 561e8cb 5ad2404 550fb6b 5ad2404 f276f86 89c6ab7 f276f86 550fb6b 561e8cb 5ad2404 550fb6b 5ad2404 550fb6b 561e8cb 89c6ab7 5ad2404 89c6ab7 561e8cb 89c6ab7 561e8cb 89c6ab7 561e8cb 5ad2404 561e8cb 550fb6b 561e8cb 89c6ab7 5ad2404 561e8cb 5ad2404 89c6ab7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import streamlit as st
import moviepy.editor as mp
import soundfile as sf
from io import BytesIO
import subprocess
from TTS.api import TTS
# Set up the model for text-to-speech (TTS)
MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC" # Example TTS model; adjust as needed
tts = TTS(model_name=MODEL_NAME, progress_bar=True, gpu=False)
# Function to extract audio from MP4 file using ffmpeg if moviepy fails
def extract_audio_from_mp4(mp4_file):
try:
# Attempt to use moviepy to extract audio
video = mp.VideoFileClip(mp4_file)
audio = video.audio
audio_file = "temp_audio.wav"
audio.write_audiofile(audio_file)
return audio_file
except Exception as e:
st.error(f"Error extracting audio with moviepy: {e}. Trying ffmpeg extraction...")
# Fallback to using ffmpeg for audio extraction if moviepy fails
try:
mp4_path = "uploaded_video.mp4"
with open(mp4_path, "wb") as f:
f.write(mp4_file.getbuffer())
audio_path = "temp_audio.wav"
subprocess.run(["ffmpeg", "-i", mp4_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", audio_path])
return audio_path
except Exception as ffmpeg_error:
st.error(f"Error with ffmpeg extraction: {ffmpeg_error}")
return None
# Function to load audio file
def load_audio(file):
try:
audio_data, sample_rate = sf.read(file)
return audio_data, sample_rate
except Exception as e:
st.error(f"Error loading audio: {e}")
return None, None
# Function to save the generated audio to a file
def save_audio(output_audio, sample_rate):
output_path = "output_cloned_voice.wav"
sf.write(output_path, output_audio, sample_rate)
return output_path
# Streamlit app
def main():
st.title("Voice Cloning Tool")
st.markdown("Upload an MP4, WAV, or MP3 file, and get the cloned voice output.")
# File upload
audio_file = st.file_uploader("Upload your audio file", type=["wav", "mp3", "mp4"])
if audio_file is not None:
# Handle MP4 file by extracting audio
if audio_file.type == "video/mp4":
# Save the uploaded file to a temporary location
with open("uploaded_video.mp4", "wb") as f:
f.write(audio_file.getbuffer())
# Extract audio from MP4
audio_path = extract_audio_from_mp4("uploaded_video.mp4")
if audio_path:
st.audio(audio_path, format="audio/wav")
# Load audio for TTS processing
audio_data, sample_rate = load_audio(audio_path)
else:
# For audio files directly (WAV/MP3)
st.audio(audio_file, format=f"audio/{audio_file.type.split('/')[1]}")
# Load audio file
with open("temp_audio.wav", "wb") as f:
f.write(audio_file.getbuffer())
audio_data, sample_rate = load_audio("temp_audio.wav")
if audio_data is not None:
# Perform voice cloning (This assumes your TTS model supports some form of input)
try:
st.text("Processing your input...")
output_audio = tts.tts(audio_data) # Pass the audio to your TTS model for cloning
output_path = save_audio(output_audio, sample_rate)
# Provide download link
st.audio(output_path, format="audio/wav")
st.markdown(f"[Download Cloned Voice](/{output_path})")
except Exception as e:
st.error(f"Error processing audio: {e}")
else:
st.error("No audio data to process.")
if __name__ == "__main__":
main()
|