File size: 9,038 Bytes
c14d84c
78b717c
 
 
a46fd4b
c14d84c
a46fd4b
c14d84c
e611ea3
a46fd4b
e0f02e8
78b717c
 
 
 
 
 
a46fd4b
78b717c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c14d84c
 
 
 
 
 
 
78b717c
 
 
 
c14d84c
e611ea3
 
 
 
 
 
 
c14d84c
a46fd4b
 
 
c14d84c
698fb1d
78b717c
 
 
 
 
 
 
 
 
c14d84c
 
 
a46fd4b
 
 
 
 
 
 
e952cc2
a46fd4b
 
 
78b717c
 
a46fd4b
 
78b717c
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
 
78b717c
 
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
 
78b717c
 
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
 
78b717c
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
78b717c
a46fd4b
78b717c
 
a46fd4b
 
78b717c
e952cc2
 
78b717c
e952cc2
 
78b717c
 
e952cc2
 
 
 
 
 
a46fd4b
 
78b717c
e952cc2
78b717c
 
 
 
 
e952cc2
78b717c
e952cc2
78b717c
e952cc2
78b717c
e952cc2
78b717c
 
e952cc2
a46fd4b
 
78b717c
 
e952cc2
 
78b717c
e952cc2
 
 
78b717c
e952cc2
 
c14d84c
 
 
698fb1d
a46fd4b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import streamlit as st
import os
import tempfile
import uuid
from utils import get_translation, get_image_prompts, segments_to_chunks, generate_images, generate_video
import constants  
from groq import Groq


client = Groq()

# Generate a unique session ID for each user
if 'session_id' not in st.session_state:
    st.session_state.session_id = str(uuid.uuid4())

session_id = st.session_state.session_id

# Initialize state variables if not already set
if f'transcript_visible_{session_id}' not in st.session_state:
    st.session_state[f'transcript_visible_{session_id}'] = False
if f'translation_visible_{session_id}' not in st.session_state:
    st.session_state[f'translation_visible_{session_id}'] = False
if f'uploaded_file_name_{session_id}' not in st.session_state:
    st.session_state[f'uploaded_file_name_{session_id}'] = None
if f'audio_{session_id}' not in st.session_state:
    st.session_state[f'audio_{session_id}'] = None
if f'was_converted_{session_id}' not in st.session_state:
    st.session_state[f'was_converted_{session_id}'] = False
if f'transcript_{session_id}' not in st.session_state:
    st.session_state[f'transcript_{session_id}'] = None
if f'translation_{session_id}' not in st.session_state:
    st.session_state[f'translation_{session_id}'] = None
if f'generated_video_{session_id}' not in st.session_state:
    st.session_state[f'generated_video_{session_id}'] = None
if f'image_prompts_{session_id}' not in st.session_state:
    st.session_state[f'image_prompts_{session_id}'] = None
if f'generated_images_{session_id}' not in st.session_state:
    st.session_state[f'generated_images_{session_id}'] = None
if f'video_generated_{session_id}' not in st.session_state:
    st.session_state[f'video_generated_{session_id}'] = False


# Streamlit UI
st.markdown(
    "<h1 style='text-align: center;'>AI Video Generator</h1>",
    unsafe_allow_html=True
)
st.markdown("<p style='text-align: center;'>Leave a Like if it works for you! ❤️</p>", unsafe_allow_html=True)
st.info("**Video Generation Feature** - Functional But Can be Buggy")

# Encourage users to like the app

audio_option = st.radio("Choose audio input method:", ("Upload Audio File", "Record Audio"), horizontal=True)

if audio_option == "Upload Audio File":
    # Upload audio file
    audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
else:
    audio_file = st.audio_input("🔊 Record Audio")

print(audio_file,'is the upload')


if audio_file:
    # Reset states only when a new file is uploaded
    if st.session_state[f'uploaded_file_name_{session_id}'] != audio_file.name:
        st.session_state[f'uploaded_file_name_{session_id}'] = audio_file.name
        st.session_state[f'audio_{session_id}'] = audio_file
        st.session_state[f'transcript_{session_id}'] = None
        st.session_state[f'translation_{session_id}'] = None
        st.session_state[f'image_prompts_{session_id}'] = None
        st.session_state[f'generated_images_{session_id}'] = None  # Reset image generation state
        st.session_state[f'generated_video_{session_id}'] = None  # Reset generated video state
        st.session_state[f'video_generated_{session_id}'] = False  # Reset video generated flag

    st.info(f"Uploaded file: **{audio_file.name}**")

    # Read the uploaded file's bytes and send to Groq API for transcription
    file_bytes = audio_file.read()

    # Create a transcription of the audio file using Groq API
    result = client.audio.transcriptions.create(
        file=(audio_file.name, file_bytes),  # Send the audio file content directly to the API
        model="whisper-large-v3-turbo",  # Model to use for transcription
        prompt="Take Note of Overall Context of the Audio",  # Optional context for better transcription accuracy
        response_format="verbose_json",  # Return detailed JSON response
        temperature=0.0,  # Control randomness in the transcription output
    )
    st.session_state[f'transcript_{session_id}'] = result.text
    st.session_state[f'segments_{session_id}'] = result.segments

    # Translation logic
    if st.session_state[f'transcript_{session_id}'] and st.session_state[f'translation_{session_id}'] is None:
        with st.spinner("Generating translation... Please wait."):
            st.session_state[f'translation_{session_id}'] = get_translation(st.session_state[f'transcript_{session_id}'])

    st.audio(st.session_state[f'audio_{session_id}'], format=f"audio/{audio_file.type}")

    # Toggle transcript visibility
    toggle_transcript = st.checkbox("Show Transcript", value=st.session_state[f'transcript_visible_{session_id}'], key="toggle_transcript")
    st.session_state[f'transcript_visible_{session_id}'] = toggle_transcript

    if st.session_state[f'transcript_visible_{session_id}']:
        st.write("### Transcription:")
        st.write(st.session_state[f'transcript_{session_id}'])

    # Toggle translation visibility
    toggle_translation = st.checkbox("Show Translation", value=st.session_state[f'translation_visible_{session_id}'], key="toggle_translation")
    st.session_state[f'translation_visible_{session_id}'] = toggle_translation

    if st.session_state[f'translation_visible_{session_id}']:
        st.write("### Translation:")
        st.write(st.session_state[f'translation_{session_id}'])

    # Image generation logic
    if st.session_state[f'translation_{session_id}'] and st.session_state[f'image_prompts_{session_id}'] is None:
        with st.spinner("Generating image prompts... Please wait."):
            if 'Already in English' in st.session_state[f'translation_{session_id}']:
                st.info("Audio is Already in English. Using Transcription to generate Image Prompts")
                st.session_state[f'image_prompts_{session_id}'] = get_image_prompts(segments_to_chunks(st.session_state[f'segments_{session_id}']))['image_prompts']
            else:
                st.session_state[f'image_prompts_{session_id}'] = get_image_prompts(segments_to_chunks(st.session_state[f'segments_{session_id}']))['image_prompts']

    print(st.session_state[f'image_prompts_{session_id}'])
    # Ensure that generated_images is always a list
    if f'generated_images_{session_id}' not in st.session_state or st.session_state[f'generated_images_{session_id}'] is None:
        st.session_state[f'generated_images_{session_id}'] = []

    # Generate images only if they have not been generated already
    if st.session_state[f'image_prompts_{session_id}'] and not st.session_state[f'generated_images_{session_id}']:
        progress_placeholder = st.empty()
        progress_bar = st.progress(0)
        total_images = len(st.session_state[f'image_prompts_{session_id}'])
        progress_placeholder.text(f"Generating images. Please be patient...")
        
        for idx, (prompt, image_path) in enumerate(generate_images(st.session_state[f'image_prompts_{session_id}'])):
            st.session_state[f'generated_images_{session_id}'].append((prompt, image_path))
            progress = (idx + 1) / total_images
            progress_bar.progress(progress)
            progress_placeholder.text(f"Generated image {idx + 1} of {total_images}: {prompt[:50]}...")
        
        progress_placeholder.text("✅ All images generated successfully!")
        progress_bar.empty()

    # Generate video when all images are generated
    if st.session_state[f'generated_images_{session_id}'] and st.session_state[f'audio_{session_id}'] and not st.session_state[f'video_generated_{session_id}']:
        with st.spinner("Generating video... Please wait."):
            # Create a temporary directory for the video
            temp_dir = tempfile.gettempdir()
            video_filename = f"generated_video_{session_id}.mp4"
            video_path = os.path.join(temp_dir, video_filename)

            # Map images to segments
            image_paths = [img[1] for img in st.session_state[f'generated_images_{session_id}']]
            generated_video_path = generate_video(
                audio_file=st.session_state[f'audio_{session_id}'], 
                images=image_paths, 
                segments=st.session_state[f'segments_{session_id}']
            )
            st.session_state[f'generated_video_{session_id}'] = generated_video_path
            st.session_state[f'video_generated_{session_id}'] = True  # Set the flag to True
            st.success("Video generated successfully!")

    # Display the generated video
    if st.session_state[f'generated_video_{session_id}']:
        st.video(st.session_state[f'generated_video_{session_id}'])
        
        # Add a download button for the generated video
        with open(st.session_state[f'generated_video_{session_id}'], "rb") as file:
            st.download_button(
                label="Download Video",
                data=file,
                file_name=f"generated_video_{session_id}.mp4",
                mime="video/mp4"
            )

else:
    st.warning("Please upload an audio file to proceed.")