File size: 6,629 Bytes
c14d84c a46fd4b c14d84c a46fd4b c14d84c a46fd4b e0f02e8 a46fd4b c14d84c a46fd4b c14d84c 698fb1d c14d84c a46fd4b c14d84c a46fd4b c14d84c 698fb1d c14d84c a46fd4b c14d84c 698fb1d c14d84c a46fd4b e952cc2 a46fd4b e952cc2 a46fd4b e952cc2 a46fd4b e952cc2 c14d84c 698fb1d a46fd4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import streamlit as st
from utils import get_translation, get_image_prompts, segments_to_chunks, generate_images, generate_video
import constants
from groq import Groq
client = Groq()
# Initialize state variables if not already set
if 'transcript_visible' not in st.session_state:
st.session_state.transcript_visible = False
if 'translation_visible' not in st.session_state:
st.session_state.translation_visible = False
if 'uploaded_file_name' not in st.session_state:
st.session_state.uploaded_file_name = None
if 'audio' not in st.session_state:
st.session_state.audio = None
if 'was_converted' not in st.session_state:
st.session_state.was_converted = False
if 'transcript' not in st.session_state:
st.session_state.transcript = None
if 'translation' not in st.session_state:
st.session_state.translation = None
if 'generated_video' not in st.session_state:
st.session_state.generated_video = None
if 'image_prompts' not in st.session_state:
st.session_state.image_prompts = None
if 'generated_images' not in st.session_state:
st.session_state.generated_images = None
# Streamlit UI
st.markdown(
"<h1 style='text-align: center;'>AI Video Generator</h1>",
unsafe_allow_html=True
)
st.info("Video Generation Feature - Functional But Can be Buggy")
# Upload audio file
audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUPPORTED_FORMATS)
print(audio_file,'is the upload')
if audio_file:
# Reset states only when a new file is uploaded
if st.session_state.uploaded_file_name != audio_file.name:
st.session_state.uploaded_file_name = audio_file.name
st.session_state.audio = audio_file
st.session_state.transcript = None
st.session_state.translation = None
st.session_state.image_prompts = None
st.session_state.generated_images = None # Reset image generation state
st.info(f"Uploaded file: **{audio_file.name}**")
# Read the uploaded file's bytes and send to Groq API for transcription
file_bytes = audio_file.read()
# Create a transcription of the audio file using Groq API
result = client.audio.transcriptions.create(
file=(audio_file.name, file_bytes), # Send the audio file content directly to the API
model="whisper-large-v3-turbo", # Model to use for transcription
prompt="Take Note of Overall Context of the Audio", # Optional context for better transcription accuracy
response_format="verbose_json", # Return detailed JSON response
temperature=0.0, # Control randomness in the transcription output
)
st.session_state.transcript = result.text
st.session_state.segments = result.segments
# Translation logic
if st.session_state.transcript and st.session_state.translation is None:
with st.spinner("Generating translation... Please wait."):
st.session_state.translation = get_translation(st.session_state.transcript)
st.audio(st.session_state.audio, format=f"audio/{audio_file.type}")
# Toggle transcript visibility
toggle_transcript = st.checkbox("Show Transcript", value=st.session_state.transcript_visible, key="toggle_transcript")
st.session_state.transcript_visible = toggle_transcript
if st.session_state.transcript_visible:
st.write("### Transcription:")
st.write(st.session_state.transcript)
# Toggle translation visibility
toggle_translation = st.checkbox("Show Translation", value=st.session_state.translation_visible, key="toggle_translation")
st.session_state.translation_visible = toggle_translation
if st.session_state.translation_visible:
st.write("### Translation:")
st.write(st.session_state.translation)
# Image generation logic
if st.session_state.translation and st.session_state.image_prompts is None:
with st.spinner("Generating image prompts... Please wait."):
if 'Already in English' in st.session_state.translation:
st.info("Audio is Already in English. Using Transcription to generate Image Prompts")
st.session_state.image_prompts = get_image_prompts(segments_to_chunks(st.session_state.segments))['image_prompts']
else:
st.session_state.image_prompts = get_image_prompts(segments_to_chunks(st.session_state.segments))['image_prompts']
print(st.session_state.image_prompts)
# Ensure that generated_images is always a list
if 'generated_images' not in st.session_state or st.session_state.generated_images is None:
st.session_state.generated_images = []
# Generate images only if they have not been generated already
if st.session_state.image_prompts and not st.session_state.generated_images:
progress_placeholder = st.empty()
progress_bar = st.progress(0)
total_images = len(st.session_state.image_prompts)
progress_placeholder.text(f"Generating images. Please be patient...")
for idx, (prompt, image_path) in enumerate(generate_images(st.session_state.image_prompts)):
st.session_state.generated_images.append((prompt, image_path))
progress = (idx + 1) / total_images
progress_bar.progress(progress)
progress_placeholder.text(f"Generated image {idx + 1} of {total_images}: {prompt[:50]}...")
progress_placeholder.text("✅ All images generated successfully!")
progress_bar.empty()
# Generate video when all images are generated
if st.session_state.generated_images and st.session_state.audio:
with st.spinner("Generating video... Please wait."):
# Map images to segments
image_paths = [img[1] for img in st.session_state.generated_images]
generated_video_path = generate_video(
audio_file=st.session_state.audio,
images=image_paths,
segments=st.session_state.segments
)
st.session_state.generated_video = generated_video_path
st.success("Video generated successfully!")
# Display the generated video
if st.session_state.generated_video:
st.video(st.session_state.generated_video)
# Add a download button for the generated video
with open(st.session_state.generated_video, "rb") as file:
st.download_button(
label="Download Video",
data=file,
file_name="generated_video.mp4",
mime="video/mp4"
)
else:
st.warning("Please upload an audio file to proceed.")
|