File size: 6,034 Bytes
c14d84c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import streamlit as st
import requests
import io
from gradio_client import Client, handle_file
import tempfile
import os
from utils import clean_response, get_translation, get_image_prompts, generate_images, generate_video  # Import generate_video
import constants  

# Initialize the client only once
if 'client' not in st.session_state:
    st.session_state.client = Client("habib926653/openai-whisper-large-v3-turbo", hf_token=constants.HF_TOKEN)

# Initialize state variables
if 'transcript_visible' not in st.session_state:
    st.session_state.transcript_visible = False
if 'translation_visible' not in st.session_state:
    st.session_state.translation_visible = False
if 'uploaded_file_name' not in st.session_state:
    st.session_state.uploaded_file_name = None
if 'converted_audio' not in st.session_state:
    st.session_state.converted_audio = None
if 'was_converted' not in st.session_state:
    st.session_state.was_converted = False
if 'transcript' not in st.session_state:
    st.session_state.transcript = None
if 'translation' not in st.session_state:
    st.session_state.translation = None
if 'generated_video' not in st.session_state:
    st.session_state.generated_video = None

# Function to convert the audio to MP3 using the external API
def convert_to_mp3(audio_file):
    if audio_file.name.endswith(".mp3"):
        return audio_file, False  # File is already MP3
    else:
        # Send to the external converter API
        url = constants.AUDIO_CONVERTER_ENDPOINT
        files = {"file": (audio_file.name, audio_file, "audio/mpeg")}

        with st.spinner("Converting audio to MP3... Please wait."):
            response = requests.post(url, files=files)

        if response.status_code == 200:
            # If conversion is successful, save and return the MP3 file
            converted_file = io.BytesIO(response.content)
            converted_file.name = "converted.mp3"
            return converted_file, True  # File was converted
        else:
            st.error("Conversion failed. Please try another format.")
            return None, None

# Streamlit UI
st.markdown(
    "<h1 style='text-align: center;'>AI Video Generator</h1>",
    unsafe_allow_html=True
)

# Upload audio file
audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)

if audio_file:
    # Reset states when a new file is uploaded
    if st.session_state.uploaded_file_name != audio_file.name:
        st.session_state.uploaded_file_name = audio_file.name
        st.session_state.converted_audio, st.session_state.was_converted = convert_to_mp3(audio_file)
        st.session_state.transcript = None
        st.session_state.translation = None
        st.session_state.generated_video = None  # Reset video generation state

    # Display uploaded file name
    st.info(f"Uploaded file: **{audio_file.name}**")

    if st.session_state.converted_audio:
        if not st.session_state.was_converted:
            st.success("🎧 The uploaded file is already in MP3 format.")
        else:
            st.success("✅ File successfully converted to MP3!")

        # Save the file temporarily if no transcript exists
        if st.session_state.transcript is None:
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
                tmp_file.write(st.session_state.converted_audio.read())
                tmp_file_path = tmp_file.name

            result = st.session_state.client.predict(
                param_0=handle_file(tmp_file_path),
                api_name="/predict"
            )
            st.session_state.transcript = clean_response(result)

            # Clean up temporary file
            os.remove(tmp_file_path)

        # Ensure translation is always generated after transcription
        if st.session_state.transcript and st.session_state.translation is None:
            with st.spinner("Generating translation..."):
                st.session_state.translation = get_translation(st.session_state.transcript)

        # Display and allow playback of the MP3 file
        st.audio(st.session_state.converted_audio, format="audio/mp3")

        # Toggle to show or hide the transcript
        toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible)

        if toggle_transcript:
            st.session_state.transcript_visible = True
            st.write("### Transcription:")
            st.write(st.session_state.transcript)
        else:
            st.session_state.transcript_visible = False

        # Toggle to show or hide the translation
        toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible)

        if toggle_translation:
            st.session_state.translation_visible = True
            st.write("### Translation:")
            st.write(st.session_state.translation)
        else:
            st.session_state.translation_visible = False

        # Image prompts - generated once translation is available
        if st.session_state.translation:
            st.write("### Image Prompts")
            result = get_image_prompts(st.session_state.translation)
            for prompt in result['image_prompts']:
                st.write(prompt)

            # Generate images for prompts
            images_folder = generate_images(result['image_prompts'])

            # Generate the video based on the images and translation
            if images_folder:
                st.write("### Generating Video...")
                with st.spinner("Creating video..."):
                    video_file = generate_video(images_folder, st.session_state.translation)
                    if video_file:
                        st.session_state.generated_video = video_file
                        st.video(video_file)  # Display the video
                    else:
                        st.error("Failed to generate the video.")

else:
    # If no file is uploaded yet
    st.warning("Please upload an audio file to proceed.")