wower99 commited on
Commit
e952cc2
·
1 Parent(s): 7aef441

Better UI with progressbar and download button

Browse files
Files changed (2) hide show
  1. app.py +33 -32
  2. utils.py +51 -97
app.py CHANGED
@@ -40,15 +40,6 @@ audio_file = st.file_uploader("🔼 Upload your audio file:", type=constants.SUP
40
 
41
  print(audio_file,'is the upload')
42
 
43
- # if audio_file is not None:
44
- # # Check the duration of the uploaded audio file
45
- # duration = get_audio_duration(audio_file)
46
-
47
- # # Allow only files up to 5 minutes (300 seconds)
48
- # if duration > 300:
49
- # st.error("The uploaded audio file exceeds the 5-minute limit. Please upload a shorter file.")
50
- # else:
51
- # st.success(f"Audio file uploaded successfully! Duration: {duration/60:.2f} minutes")
52
 
53
  if audio_file:
54
  # Reset states only when a new file is uploaded
@@ -69,7 +60,7 @@ if audio_file:
69
  result = client.audio.transcriptions.create(
70
  file=(audio_file.name, file_bytes), # Send the audio file content directly to the API
71
  model="whisper-large-v3-turbo", # Model to use for transcription
72
- prompt="Specify context or spelling", # Optional context for better transcription accuracy
73
  response_format="verbose_json", # Return detailed JSON response
74
  temperature=0.0, # Control randomness in the transcription output
75
  )
@@ -115,35 +106,45 @@ if audio_file:
115
 
116
  # Generate images only if they have not been generated already
117
  if st.session_state.image_prompts and not st.session_state.generated_images:
118
- with st.spinner("Generating images... Please wait."):
119
- for prompt, image_path in generate_images(st.session_state.image_prompts):
120
- # # Display each image as soon as it's generated
121
- # st.image(image_path, caption=f"{prompt}", use_container_width=True)
122
- # Append the generated image to the session state
123
- st.session_state.generated_images.append((prompt, image_path))
124
-
125
- # # Display all previously generated images (including newly generated ones)
126
- # else:
127
- # for prompt, image_path in st.session_state.generated_images:
128
- # st.image(image_path, caption=f"{prompt}", use_container_width=True)
 
 
129
 
130
  # Generate video when all images are generated
131
  if st.session_state.generated_images and st.session_state.audio:
132
- if st.button("Generate Video"):
133
- with st.spinner("Generating video... Please wait."):
134
- # Map images to segments
135
- image_paths = [img[1] for img in st.session_state.generated_images]
136
- generated_video_path = generate_video(
137
- audio_file=st.session_state.audio,
138
- images=image_paths,
139
- segments=st.session_state.segments
140
- )
141
- st.session_state.generated_video = generated_video_path
142
- st.success("Video generated successfully!")
143
 
144
  # Display the generated video
145
  if st.session_state.generated_video:
146
  st.video(st.session_state.generated_video)
 
 
 
 
 
 
 
 
 
147
 
148
  else:
149
  st.warning("Please upload an audio file to proceed.")
 
40
 
41
  print(audio_file,'is the upload')
42
 
 
 
 
 
 
 
 
 
 
43
 
44
  if audio_file:
45
  # Reset states only when a new file is uploaded
 
60
  result = client.audio.transcriptions.create(
61
  file=(audio_file.name, file_bytes), # Send the audio file content directly to the API
62
  model="whisper-large-v3-turbo", # Model to use for transcription
63
+ prompt="Take Note of Overall Context of the Audio", # Optional context for better transcription accuracy
64
  response_format="verbose_json", # Return detailed JSON response
65
  temperature=0.0, # Control randomness in the transcription output
66
  )
 
106
 
107
  # Generate images only if they have not been generated already
108
  if st.session_state.image_prompts and not st.session_state.generated_images:
109
+ progress_placeholder = st.empty()
110
+ progress_bar = st.progress(0)
111
+ total_images = len(st.session_state.image_prompts)
112
+ progress_placeholder.text(f"Generating images. Please be patient...")
113
+
114
+ for idx, (prompt, image_path) in enumerate(generate_images(st.session_state.image_prompts)):
115
+ st.session_state.generated_images.append((prompt, image_path))
116
+ progress = (idx + 1) / total_images
117
+ progress_bar.progress(progress)
118
+ progress_placeholder.text(f"Generated image {idx + 1} of {total_images}: {prompt[:50]}...")
119
+
120
+ progress_placeholder.text("✅ All images generated successfully!")
121
+ progress_bar.empty()
122
 
123
  # Generate video when all images are generated
124
  if st.session_state.generated_images and st.session_state.audio:
125
+ with st.spinner("Generating video... Please wait."):
126
+ # Map images to segments
127
+ image_paths = [img[1] for img in st.session_state.generated_images]
128
+ generated_video_path = generate_video(
129
+ audio_file=st.session_state.audio,
130
+ images=image_paths,
131
+ segments=st.session_state.segments
132
+ )
133
+ st.session_state.generated_video = generated_video_path
134
+ st.success("Video generated successfully!")
 
135
 
136
  # Display the generated video
137
  if st.session_state.generated_video:
138
  st.video(st.session_state.generated_video)
139
+
140
+ # Add a download button for the generated video
141
+ with open(st.session_state.generated_video, "rb") as file:
142
+ st.download_button(
143
+ label="Download Video",
144
+ data=file,
145
+ file_name="generated_video.mp4",
146
+ mime="video/mp4"
147
+ )
148
 
149
  else:
150
  st.warning("Please upload an audio file to proceed.")
utils.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import requests
3
  import constants
4
  import os
@@ -54,33 +53,7 @@ def get_translation(text: str):
54
  print(f"An exception occurred: {e}")
55
  return {"error_occured" : e}
56
 
57
-
58
-
59
- def old_get_image_prompts(text_input):
60
- headers = {
61
- "Authorization": f"Bearer {constants.HF_TOKEN}", # Replace with your token
62
- "Content-Type": "application/json" # Optional, ensures JSON payload
63
- }
64
-
65
- endpoint = f"{constants.PROMPT_GENERATION_ENDPOINT}"
66
- payload = {"text_input": text_input}
67
-
68
- try:
69
- # Send the POST request
70
- print("making post request for image prompts", endpoint)
71
- response = requests.post(endpoint, json=payload, headers=headers)
72
-
73
- # Raise an exception for HTTP errors
74
- response.raise_for_status()
75
-
76
- # Parse JSON response
77
- result = response.json()
78
- return result
79
-
80
- except requests.exceptions.RequestException as e:
81
- print(f"Error during request: {e}")
82
- return {"error": str(e)}
83
-
84
  def segments_to_chunks(segments):
85
  chunks = []
86
  for segment in segments:
@@ -98,7 +71,7 @@ def get_image_prompts(text_input : List):
98
  extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
99
  chunks_count = len(text_input)
100
  chunks = "chunk: " + "\nchunk: ".join(text_input)
101
- prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer
102
  TASK: Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
103
  result = extractor.extract(prompt)
104
  return result.model_dump() # returns dictionary version pydantic model
@@ -158,62 +131,15 @@ def tmp_folder(folder_name: str) -> str:
158
 
159
 
160
 
161
- def old_generate_video(audio_file, images, segments):
162
- print(f"images: {images}")
163
- print(f"segments: {segments}")
164
- print(f"audio file: {audio_file.name}")
165
- try:
166
- # Save the uploaded audio file to a temporary location
167
- file_extension = os.path.splitext(audio_file.name)[1]
168
- temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
169
- temp_audio_path.write(audio_file.read())
170
- temp_audio_path.close()
171
-
172
- # Load the audio file using MoviePy
173
- audio = mp.AudioFileClip(temp_audio_path.name)
174
- audio_duration = audio.duration
175
-
176
- # Create video clips for each segment using the corresponding image
177
- video_clips = []
178
- for i, segment in enumerate(segments):
179
- start_time = segment["start"]
180
- end_time = segment["end"]
181
-
182
- # Ensure the image index is within bounds
183
- image_path = images[min(i, len(images) - 1)]
184
-
185
- # Create an ImageClip for the current segment
186
- image_clip = ImageClip(image_path, duration=end_time - start_time)
187
- image_clip = image_clip.set_start(start_time).set_end(end_time)
188
- video_clips.append(image_clip)
189
-
190
- # Concatenate all the image clips to form the video
191
- video = mp.concatenate_videoclips(video_clips, method="compose")
192
-
193
- # Add the audio to the video
194
- video = video.set_audio(audio)
195
-
196
- # Save the video to a temporary file
197
- temp_dir = tempfile.gettempdir()
198
- video_path = os.path.join(temp_dir, "generated_video.mp4")
199
- video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
200
-
201
- # Clean up the temporary audio file
202
- os.remove(temp_audio_path.name)
203
 
204
- return video_path
205
 
206
- except Exception as e:
207
- print(f"Error generating video: {e}")
208
- return
209
-
210
 
211
- from moviepy.editor import *
212
 
213
  def generate_video(audio_file, images, segments):
214
- print(f"images: {images}")
215
- print(f"segments: {segments}")
216
- print(f"audio file: {audio_file.name}")
217
  try:
218
  # Save the uploaded audio file to a temporary location
219
  file_extension = os.path.splitext(audio_file.name)[1]
@@ -223,36 +149,58 @@ def generate_video(audio_file, images, segments):
223
 
224
  # Load the audio file using MoviePy
225
  audio = AudioFileClip(temp_audio_path.name)
226
- audio_duration = audio.duration
227
 
228
- # Define YouTube-like dimensions (16:9 aspect ratio, e.g., 1920x1080)
229
- frame_width = 1920
230
- frame_height = 1080
231
 
232
- # Create video clips for each segment using the corresponding image
233
  video_clips = []
234
- for i, segment in enumerate(segments):
235
- start_time = segment["start"]
236
- end_time = segment["end"]
237
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  # Ensure the image index is within bounds
239
  image_path = images[min(i, len(images) - 1)]
240
-
241
  # Create an ImageClip for the current segment
242
- image_clip = ImageClip(image_path, duration=end_time - start_time)
243
-
244
  # Resize and pad the image to fit a 16:9 aspect ratio
245
  image_clip = image_clip.resize(height=frame_height).on_color(
246
  size=(frame_width, frame_height),
247
  color=(0, 0, 0), # Black background
248
  pos="center" # Center the image
249
  )
250
-
251
- # Set the timing of the clip
252
- image_clip = image_clip.set_start(start_time).set_end(end_time)
 
 
253
  video_clips.append(image_clip)
254
 
255
  # Concatenate all the image clips to form the video
 
256
  video = concatenate_videoclips(video_clips, method="compose")
257
 
258
  # Add the audio to the video
@@ -261,16 +209,22 @@ def generate_video(audio_file, images, segments):
261
  # Save the video to a temporary file
262
  temp_dir = tempfile.gettempdir()
263
  video_path = os.path.join(temp_dir, "generated_video.mp4")
264
- video.write_videofile(video_path, fps=24, codec="libx264", audio_codec="aac")
 
265
 
266
  # Clean up the temporary audio file
267
  os.remove(temp_audio_path.name)
 
268
 
269
  return video_path
270
 
271
  except Exception as e:
272
  print(f"Error generating video: {e}")
273
- return
 
 
 
 
274
 
275
 
276
  # Example usage:
 
 
1
  import requests
2
  import constants
3
  import os
 
53
  print(f"An exception occurred: {e}")
54
  return {"error_occured" : e}
55
 
56
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def segments_to_chunks(segments):
58
  chunks = []
59
  for segment in segments:
 
71
  extractor = StructuredOutputExtractor(response_schema=ImagePromptResponseSchema)
72
  chunks_count = len(text_input)
73
  chunks = "chunk: " + "\nchunk: ".join(text_input)
74
+ prompt = f"""ROLE: You are a Highly Experienced Image Prompt Sythesizer (try to avoid explicit unethical prompt gracefully as much as possible)
75
  TASK: Generate {chunks_count} image prompts, Each per chunk\n\n {chunks}"""
76
  result = extractor.extract(prompt)
77
  return result.model_dump() # returns dictionary version pydantic model
 
131
 
132
 
133
 
134
+ from moviepy.editor import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
136
 
137
+ import os
138
+ import tempfile
139
+ from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
 
140
 
 
141
 
142
  def generate_video(audio_file, images, segments):
 
 
 
143
  try:
144
  # Save the uploaded audio file to a temporary location
145
  file_extension = os.path.splitext(audio_file.name)[1]
 
149
 
150
  # Load the audio file using MoviePy
151
  audio = AudioFileClip(temp_audio_path.name)
 
152
 
153
+ # Define YouTube-like dimensions (16:9 aspect ratio)
154
+ frame_width = 1280
155
+ frame_height = 720
156
 
 
157
  video_clips = []
158
+ total_segments = len(segments)
159
+
160
+ for i, current_segment in enumerate(segments):
161
+ start_time = current_segment["start"]
162
+ end_time = current_segment["end"]
163
+
164
+ # Calculate the actual duration including any gap until the next segment
165
+ if i < total_segments - 1:
166
+ # If there's a next segment, extend until it starts
167
+ next_segment = segments[i + 1]
168
+ actual_end_time = next_segment["start"]
169
+ else:
170
+ # For the last segment, use its end time
171
+ actual_end_time = end_time
172
+
173
+ # Calculate total duration including any gap
174
+ segment_duration = actual_end_time - start_time
175
+
176
+ print(f"\nProcessing segment {i + 1}/{total_segments}:")
177
+ print(f" Start time: {start_time}s")
178
+ print(f" Base end time: {end_time}s")
179
+ print(f" Actual end time: {actual_end_time}s")
180
+ print(f" Total duration: {segment_duration}s")
181
+ print(f" Text: '{current_segment['text']}'")
182
+
183
  # Ensure the image index is within bounds
184
  image_path = images[min(i, len(images) - 1)]
185
+
186
  # Create an ImageClip for the current segment
187
+ image_clip = ImageClip(image_path)
188
+
189
  # Resize and pad the image to fit a 16:9 aspect ratio
190
  image_clip = image_clip.resize(height=frame_height).on_color(
191
  size=(frame_width, frame_height),
192
  color=(0, 0, 0), # Black background
193
  pos="center" # Center the image
194
  )
195
+
196
+ # Set the duration and start time for the clip
197
+ image_clip = image_clip.set_duration(segment_duration)
198
+ image_clip = image_clip.set_start(start_time) # Set the start time explicitly
199
+
200
  video_clips.append(image_clip)
201
 
202
  # Concatenate all the image clips to form the video
203
+ print("Concatenating video clips...")
204
  video = concatenate_videoclips(video_clips, method="compose")
205
 
206
  # Add the audio to the video
 
209
  # Save the video to a temporary file
210
  temp_dir = tempfile.gettempdir()
211
  video_path = os.path.join(temp_dir, "generated_video.mp4")
212
+ print(f"Writing video file to {video_path}...")
213
+ video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
214
 
215
  # Clean up the temporary audio file
216
  os.remove(temp_audio_path.name)
217
+ print("Temporary audio file removed.")
218
 
219
  return video_path
220
 
221
  except Exception as e:
222
  print(f"Error generating video: {e}")
223
+ return None
224
+
225
+
226
+
227
+
228
 
229
 
230
  # Example usage: