File size: 3,776 Bytes
f276f86
561e8cb
 
5ad2404
550fb6b
5ad2404
f276f86
89c6ab7
 
f276f86
 
550fb6b
561e8cb
5ad2404
550fb6b
5ad2404
 
 
 
 
 
550fb6b
 
 
 
 
 
 
 
 
 
 
 
 
 
561e8cb
89c6ab7
 
5ad2404
 
 
 
 
 
89c6ab7
 
 
 
 
 
 
 
 
 
561e8cb
89c6ab7
 
561e8cb
 
89c6ab7
561e8cb
 
 
 
 
 
 
 
5ad2404
 
 
 
 
561e8cb
550fb6b
561e8cb
 
 
 
 
 
 
89c6ab7
5ad2404
 
 
 
 
 
561e8cb
5ad2404
 
 
 
 
 
 
89c6ab7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
import moviepy.editor as mp
import soundfile as sf
from io import BytesIO
import subprocess
from TTS.api import TTS

# Set up the model for text-to-speech (TTS)
MODEL_NAME = "tts_models/en/ljspeech/tacotron2-DDC"  # Example TTS model; adjust as needed
tts = TTS(model_name=MODEL_NAME, progress_bar=True, gpu=False)

# Function to extract audio from MP4 file using ffmpeg if moviepy fails
def extract_audio_from_mp4(mp4_file):
    try:
        # Attempt to use moviepy to extract audio
        video = mp.VideoFileClip(mp4_file)
        audio = video.audio
        audio_file = "temp_audio.wav"
        audio.write_audiofile(audio_file)
        return audio_file
    except Exception as e:
        st.error(f"Error extracting audio with moviepy: {e}. Trying ffmpeg extraction...")
        
        # Fallback to using ffmpeg for audio extraction if moviepy fails
        try:
            mp4_path = "uploaded_video.mp4"
            with open(mp4_path, "wb") as f:
                f.write(mp4_file.getbuffer())
            
            audio_path = "temp_audio.wav"
            subprocess.run(["ffmpeg", "-i", mp4_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", audio_path])
            return audio_path
        except Exception as ffmpeg_error:
            st.error(f"Error with ffmpeg extraction: {ffmpeg_error}")
            return None

# Function to load audio file
def load_audio(file):
    try:
        audio_data, sample_rate = sf.read(file)
        return audio_data, sample_rate
    except Exception as e:
        st.error(f"Error loading audio: {e}")
        return None, None

# Function to save the generated audio to a file
def save_audio(output_audio, sample_rate):
    output_path = "output_cloned_voice.wav"
    sf.write(output_path, output_audio, sample_rate)
    return output_path

# Streamlit app
def main():
    st.title("Voice Cloning Tool")
    st.markdown("Upload an MP4, WAV, or MP3 file, and get the cloned voice output.")

    # File upload
    audio_file = st.file_uploader("Upload your audio file", type=["wav", "mp3", "mp4"])

    if audio_file is not None:
        # Handle MP4 file by extracting audio
        if audio_file.type == "video/mp4":
            # Save the uploaded file to a temporary location
            with open("uploaded_video.mp4", "wb") as f:
                f.write(audio_file.getbuffer())

            # Extract audio from MP4
            audio_path = extract_audio_from_mp4("uploaded_video.mp4")
            if audio_path:
                st.audio(audio_path, format="audio/wav")

                # Load audio for TTS processing
                audio_data, sample_rate = load_audio(audio_path)
        else:
            # For audio files directly (WAV/MP3)
            st.audio(audio_file, format=f"audio/{audio_file.type.split('/')[1]}")

            # Load audio file
            with open("temp_audio.wav", "wb") as f:
                f.write(audio_file.getbuffer())
            
            audio_data, sample_rate = load_audio("temp_audio.wav")

        if audio_data is not None:
            # Perform voice cloning (This assumes your TTS model supports some form of input)
            try:
                st.text("Processing your input...")
                output_audio = tts.tts(audio_data)  # Pass the audio to your TTS model for cloning
                output_path = save_audio(output_audio, sample_rate)

                # Provide download link
                st.audio(output_path, format="audio/wav")
                st.markdown(f"[Download Cloned Voice](/{output_path})")
            except Exception as e:
                st.error(f"Error processing audio: {e}")
        else:
            st.error("No audio data to process.")

if __name__ == "__main__":
    main()