PriyankaSatish commited on
Commit
0a9d475
·
verified ·
1 Parent(s): c3eb6ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -11
app.py CHANGED
@@ -10,7 +10,7 @@ from io import BytesIO
10
  from openai import OpenAI
11
  import whisper
12
  from google.cloud import vision
13
-
14
  # st.set_page_config(layout="wide")
15
 
16
  load_dotenv()
@@ -196,13 +196,13 @@ def search_keyword(keyword, frame_texts):
196
 
197
 
198
  # Function to generate description for video frames
199
- def generate_description(base64_frames):
200
  try:
201
  prompt_messages = [
202
  {
203
  "role": "user",
204
  "content": [
205
- "1. Generate a description for this sequence of video frames in about 90 words. 2.Return the following: i. List of objects in the video ii. Any restrictive content or sensitive content and if so which frame. iii. The frames is supposed to contain news content and we want to detect non-news content such as an advertisement. So analyze specifically for any indications that the content might be promotional or an advertisement.",
206
  *map(lambda x: {"image": x, "resize": 428}, base64_frames),
207
  ],
208
  },
@@ -212,10 +212,24 @@ def generate_description(base64_frames):
212
  messages=prompt_messages,
213
  max_tokens=3000,
214
  )
215
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  except Exception as e:
217
  print(f"Error in generate_description: {e}")
218
- return None
219
 
220
  def generate_overall_description(transcript_text, video_description):
221
  try:
@@ -251,8 +265,21 @@ with col1:
251
  keyword = st.text_input("Enter a keyword to filter the frames (optional):")
252
  extract_frames_button = st.button("Extract Frames")
253
  uploaded_video = st.file_uploader("Or upload a video file (MP4):", type=["mp4"])
254
-
255
-
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  # Slider to select the number of seconds for extraction
257
  seconds = st.slider("Select the number of seconds for extraction:", min_value=1, max_value=60, value=10)
258
 
@@ -375,7 +402,7 @@ with col1:
375
 
376
  # Get consolidated description for all frames
377
  if ffmpeg_output:
378
- description = generate_description(base64_frames)
379
  if description:
380
  st.markdown("**Frame Description:**")
381
  st.write(description)
@@ -391,7 +418,7 @@ with col1:
391
 
392
  # Get the transcript from whisper
393
  transcript_text = get_transcript_from_audio(audio_tempfile.name)
394
- description = generate_description(base64_frames)
395
  # Generate overall description using transcript and video description
396
  overall_description = generate_overall_description(transcript_text, description)
397
  if overall_description:
@@ -429,6 +456,7 @@ with col1:
429
  n_frames = len(frame_bytes_list)
430
  base64_frames = [base64.b64encode(b'\xff\xd8' + frame_bytes).decode('utf-8') for frame_bytes in frame_bytes_list]
431
 
 
432
  categories_results = []
433
  frame_texts = {}
434
 
@@ -439,6 +467,7 @@ with col1:
439
  col1, col2 = st.columns([3, 2])
440
  with col1:
441
  frame_bytes = base64.b64decode(frame_base64)
 
442
  st.image(Image.open(BytesIO(frame_bytes)), caption=f'Frame {idx + 1}', use_column_width=True)
443
  with col2:
444
  st.write(f"Extracted Text: {extracted_text}")
@@ -482,7 +511,7 @@ with col1:
482
 
483
  # Get consolidated description for all frames
484
  if ffmpeg_output:
485
- description = generate_description(base64_frames)
486
  if description:
487
  st.markdown("**Frame Description:**")
488
  st.write(description)
@@ -503,7 +532,59 @@ with col1:
503
 
504
  # Get the transcript from whisper
505
  transcript_text = get_transcript_from_audio(audio_tempfile.name)
506
- description = generate_description(base64_frames)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  # Generate overall description using transcript and video description
508
  overall_description = generate_overall_description(transcript_text, description)
509
  if overall_description:
 
10
  from openai import OpenAI
11
  import whisper
12
  from google.cloud import vision
13
+ import re
14
  # st.set_page_config(layout="wide")
15
 
16
  load_dotenv()
 
196
 
197
 
198
  # Function to generate description for video frames
199
+ def generate_description(base64_frames,prompt):
200
  try:
201
  prompt_messages = [
202
  {
203
  "role": "user",
204
  "content": [
205
+ prompt ,
206
  *map(lambda x: {"image": x, "resize": 428}, base64_frames),
207
  ],
208
  },
 
212
  messages=prompt_messages,
213
  max_tokens=3000,
214
  )
215
+ description = response.choices[0].message.content
216
+
217
+ # Use regular expression to find frame numbers
218
+ frame_numbers = re.findall(r'Frames\s*:\s*(\d+(?:,\s*\d+)*)', response.choices[0].message.content)
219
+
220
+ # Convert the string of numbers into a list of integers
221
+ if frame_numbers:
222
+ frame_numbers = [int(num) for num in frame_numbers[0].split(',')]
223
+ else:
224
+ frame_numbers = []
225
+
226
+ print("Frame numbers to extract:", frame_numbers)
227
+
228
+ return description, frame_numbers
229
+
230
  except Exception as e:
231
  print(f"Error in generate_description: {e}")
232
+ return None, []
233
 
234
  def generate_overall_description(transcript_text, video_description):
235
  try:
 
265
  keyword = st.text_input("Enter a keyword to filter the frames (optional):")
266
  extract_frames_button = st.button("Extract Frames")
267
  uploaded_video = st.file_uploader("Or upload a video file (MP4):", type=["mp4"])
268
+ prompt1 = "keyword is " + st.text_input("Enter a keyword for analysis:")
269
+ prompt2 = "1. Generate a description for this sequence of video frames in about 90 words. 2.Return the following:\
270
+ i. List of objects in the video \
271
+ ii. Any restrictive content or sensitive content and if so which frame. \
272
+ iii. The frames is supposed to contain news content and we want to detect non-news content such as an advertisement. \
273
+ So analyze specifically for any indications that the content might be promotional or an advertisement. \
274
+ Find the most portions of a video related to the keyword.  \
275
+ The output will be targeted towards social media (like TikTok or Reels) or to news broadcasts. \
276
+ For the provided frames return the frames related to the keyword\
277
+ I am trying to fill these frames for a TikTok video. \
278
+ Hence while selecting the frames keep that in mind. \
279
+ You do not have to give me the script of the Tiktok video. \
280
+ Just return the most interesting frames in a sequence that will come for a tiktok video. \
281
+ List all frame numbers separated by commas at the end like this for eg, Frames : 1,2,4,7,9"
282
+ prompt = prompt2 + prompt1
283
  # Slider to select the number of seconds for extraction
284
  seconds = st.slider("Select the number of seconds for extraction:", min_value=1, max_value=60, value=10)
285
 
 
402
 
403
  # Get consolidated description for all frames
404
  if ffmpeg_output:
405
+ description = generate_description(base64_frames,prompt)
406
  if description:
407
  st.markdown("**Frame Description:**")
408
  st.write(description)
 
418
 
419
  # Get the transcript from whisper
420
  transcript_text = get_transcript_from_audio(audio_tempfile.name)
421
+ description = generate_description(base64_frames,prompt)
422
  # Generate overall description using transcript and video description
423
  overall_description = generate_overall_description(transcript_text, description)
424
  if overall_description:
 
456
  n_frames = len(frame_bytes_list)
457
  base64_frames = [base64.b64encode(b'\xff\xd8' + frame_bytes).decode('utf-8') for frame_bytes in frame_bytes_list]
458
 
459
+ frame_dict = {}
460
  categories_results = []
461
  frame_texts = {}
462
 
 
467
  col1, col2 = st.columns([3, 2])
468
  with col1:
469
  frame_bytes = base64.b64decode(frame_base64)
470
+ frame_dict[idx + 1] = frame_bytes
471
  st.image(Image.open(BytesIO(frame_bytes)), caption=f'Frame {idx + 1}', use_column_width=True)
472
  with col2:
473
  st.write(f"Extracted Text: {extracted_text}")
 
511
 
512
  # Get consolidated description for all frames
513
  if ffmpeg_output:
514
+ description,frame_numbers = generate_description(base64_frames,prompt)
515
  if description:
516
  st.markdown("**Frame Description:**")
517
  st.write(description)
 
532
 
533
  # Get the transcript from whisper
534
  transcript_text = get_transcript_from_audio(audio_tempfile.name)
535
+ description = generate_description(base64_frames,prompt)
536
+
537
+ if frame_numbers:
538
+ print("Frame numbers to extract:", frame_numbers) # Check frame numbers
539
+
540
+ # Create a mapping from original frame numbers to sequential numbers
541
+ frame_mapping = {}
542
+ new_frame_numbers = []
543
+ for idx, frame_number in enumerate(sorted(frame_numbers)):
544
+ frame_mapping[frame_number] = idx + 1
545
+ new_frame_numbers.append(idx + 1)
546
+
547
+ print("New frame numbers:", new_frame_numbers)
548
+ print("Frame mapping:", frame_mapping)
549
+
550
+ # Create a temporary directory to store images
551
+ with tempfile.TemporaryDirectory() as temp_dir:
552
+ image_paths = []
553
+ for frame_number in frame_numbers:
554
+ if frame_number in frame_dict:
555
+ frame_path = os.path.join(temp_dir, f'frame_{frame_mapping[frame_number]:03}.jpg') # Updated file naming
556
+ image_paths.append(frame_path)
557
+ with open(frame_path, 'wb') as f:
558
+ f.write(frame_dict[frame_number])
559
+
560
+ #image = Image.open(BytesIO(frame_bytes))
561
+ #st.image(image, caption='Selected Frame', use_column_width=True)
562
+ #with open(frame_path, "rb") as file:
563
+ # btn = st.download_button(
564
+ # label="Download Frame",
565
+ # data=file,
566
+ # file_name=f'frame_{frame_number}.jpg',
567
+ # mime="image/jpeg"
568
+ # )
569
+ # Once all selected frames are saved as images, create a video from them using FFmpeg
570
+ video_output_path = os.path.join(temp_dir, 'output5.mp4')
571
+ framerate = 1 # Adjust framerate based on the number of frames
572
+ ffmpeg_command = [
573
+ 'ffmpeg',
574
+ '-framerate', str(framerate), # Set framerate based on the number of frames
575
+ '-i', os.path.join(temp_dir, 'frame_%03d.jpg'), # Input pattern for all frame files
576
+ '-c:v', 'libx264',
577
+ '-pix_fmt', 'yuv420p',
578
+ video_output_path
579
+ ]
580
+
581
+ print("FFmpeg command:", ' '.join(ffmpeg_command)) # Debug FFmpeg command
582
+
583
+ subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
584
+
585
+ # Display or provide a download link for the created video
586
+ st.header("Final Video")
587
+ st.video(video_output_path)
588
  # Generate overall description using transcript and video description
589
  overall_description = generate_overall_description(transcript_text, description)
590
  if overall_description: