CosmickVisions commited on
Commit
c6ab0b8
·
verified ·
1 Parent(s): faa229e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1259 -267
app.py CHANGED
@@ -36,6 +36,8 @@ from langchain.memory import ConversationBufferMemory
36
  from langchain_community.document_loaders import TextLoader
37
  import re
38
  import base64
 
 
39
 
40
  # Set page config
41
  st.set_page_config(
@@ -431,34 +433,209 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None):
431
  return summary_img
432
 
433
  class VideoProcessor(VideoProcessorBase):
434
- """Process video frames for real-time analysis"""
435
 
436
- def __init__(self, analysis_types: List[str]):
 
437
  self.analysis_types = analysis_types
 
 
 
 
 
 
438
  self.frame_counter = 0
439
- self.process_every_n_frames = 5 # Process every 5th frame
440
  self.vision_client = client # Store client reference
441
  self.last_results = {} # Cache results between processed frames
442
  self.last_processed_time = time.time()
443
  self.processing_active = True
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
446
  img = frame.to_ndarray(format="bgr24")
447
  self.frame_counter += 1
448
 
449
  # Add status display on all frames
450
  cv2.putText(img,
451
- f"Vision AI: {'Active' if self.processing_active else 'Paused'}",
452
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
453
 
454
- # Process at regular intervals
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  current_time = time.time()
456
- if current_time - self.last_processed_time > 1.0 and self.processing_active: # Process max once per second
 
 
 
 
 
 
 
457
  self.last_processed_time = current_time
458
 
459
  try:
460
  # Convert to PIL Image for Vision API
461
- pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
462
 
463
  # Process with Vision API
464
  img_byte_arr = io.BytesIO()
@@ -467,56 +644,36 @@ class VideoProcessor(VideoProcessorBase):
467
  vision_image = vision.Image(content=content)
468
 
469
  # Update status text
470
- cv2.putText(img, "Processing...", (10, 60),
471
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
472
 
473
  # Process according to selected analysis types
474
- if "Objects" in self.analysis_types:
475
- objects = self.vision_client.object_localization(image=vision_image)
476
- self.last_results["objects"] = objects.localized_object_annotations
477
-
478
- if "Face Detection" in self.analysis_types:
479
- faces = self.vision_client.face_detection(image=vision_image)
480
- self.last_results["faces"] = faces.face_annotations
481
-
482
  if "Text" in self.analysis_types:
483
  text = self.vision_client.text_detection(image=vision_image)
484
  self.last_results["text"] = text.text_annotations
485
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  except Exception as e:
487
- error_msg = str(e)[:50]
488
- cv2.putText(img, f"Error: {error_msg}", (10, 60),
489
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
490
 
491
- # Always draw the cached results for smooth display
492
  try:
493
- # Draw object detections
494
- if "objects" in self.last_results and "Objects" in self.analysis_types:
495
- for obj in self.last_results["objects"]:
496
- box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
497
- for vertex in obj.bounding_poly.normalized_vertices]
498
- box = np.array(box, np.int32).reshape((-1, 1, 2))
499
- cv2.polylines(img, [box], True, (0, 255, 0), 2)
500
- # Add label
501
- cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
502
- (int(box[0][0][0]), int(box[0][0][1]) - 10),
503
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
504
-
505
- # Draw face detections
506
- if "faces" in self.last_results and "Face Detection" in self.analysis_types:
507
- for face in self.last_results["faces"]:
508
- vertices = face.bounding_poly.vertices
509
- points = [(vertex.x, vertex.y) for vertex in vertices]
510
- pts = np.array(points, np.int32).reshape((-1, 1, 2))
511
- cv2.polylines(img, [pts], True, (0, 0, 255), 2)
512
-
513
- # Draw landmarks
514
- for landmark in face.landmarks:
515
- px = int(landmark.position.x)
516
- py = int(landmark.position.y)
517
- cv2.circle(img, (px, py), 2, (255, 255, 0), -1)
518
-
519
- # Draw text detections
520
  if "text" in self.last_results and "Text" in self.analysis_types:
521
  if len(self.last_results["text"]) > 1: # Skip the first one (full text)
522
  for text_annot in self.last_results["text"][1:]:
@@ -524,15 +681,103 @@ class VideoProcessor(VideoProcessorBase):
524
  pts = np.array(box, np.int32).reshape((-1, 1, 2))
525
  cv2.polylines(img, [pts], True, (255, 0, 0), 1)
526
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  except Exception as e:
528
- error_msg = str(e)[:50]
529
- cv2.putText(img, f"Display Error: {error_msg}", (10, 90),
530
- cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
531
 
532
- # Add which analysis types are active
533
- y_pos = img.shape[0] - 10
534
- cv2.putText(img, f"Analyzing: {', '.join(self.analysis_types)}",
535
- (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
  return av.VideoFrame.from_ndarray(img, format="bgr24")
538
 
@@ -758,8 +1003,9 @@ def list_bigquery_resources():
758
 
759
  return resources
760
 
761
- def process_video_file(video_file, analysis_types):
762
- """Process an uploaded video file with enhanced Vision AI detection and analytics"""
 
763
  # Create a temporary file to save the uploaded video
764
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
765
  temp_file.write(video_file.read())
@@ -805,8 +1051,8 @@ def process_video_file(video_file, analysis_types):
805
  fourcc = cv2.VideoWriter_fourcc(*'DIB ') # Uncompressed RGB
806
  out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
807
 
808
- # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail
809
- process_every_n_frames = 5
810
 
811
  # Create a progress bar
812
  progress_bar = st.progress(0)
@@ -818,13 +1064,26 @@ def process_video_file(video_file, analysis_types):
818
  "faces": 0,
819
  "text_blocks": 0,
820
  "labels": {},
821
- # New advanced tracking
822
- "object_tracking": {}, # Track object appearances by frame
823
- "activity_metrics": [], # Track frame-to-frame differences
824
- "scene_changes": [] # Track major scene transitions
 
 
825
  }
826
 
827
- # For scene change detection
 
 
 
 
 
 
 
 
 
 
 
828
  previous_frame_gray = None
829
  scene_change_threshold = 40.0 # Threshold for scene change detection
830
 
@@ -846,184 +1105,274 @@ def process_video_file(video_file, analysis_types):
846
  cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
847
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
848
 
849
- # Activity detection and scene change detection
850
  current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
851
  current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
852
-
 
 
 
 
 
 
 
 
853
  if previous_frame_gray is not None:
854
- # Calculate frame difference for activity detection
855
- frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
856
- activity_level = np.mean(frame_diff)
857
- detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
 
 
 
 
 
 
 
858
 
859
  # Scene change detection
860
- if activity_level > scene_change_threshold:
861
  detection_stats["scene_changes"].append(frame_count/fps)
862
  # Mark scene change on frame
863
  cv2.putText(frame, "SCENE CHANGE",
864
  (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
 
 
 
 
 
 
 
 
 
 
 
 
865
 
866
  previous_frame_gray = current_frame_gray
867
 
868
- # Process frames with Vision API
869
- if frame_count % process_every_n_frames == 0:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870
  try:
871
- # Convert OpenCV frame to PIL Image for Vision API
872
- pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
873
 
874
- # Create vision image
875
- img_byte_arr = io.BytesIO()
876
- pil_img.save(img_byte_arr, format='PNG')
877
- content = img_byte_arr.getvalue()
878
- vision_image = vision.Image(content=content)
879
 
880
- # Apply analysis based on selected types with enhanced detail
881
- if "Objects" in analysis_types:
882
- objects = client.object_localization(image=vision_image)
883
- # Draw boxes around detected objects with enhanced info
884
- for obj in objects.localized_object_annotations:
885
- obj_name = obj.name
886
- # Update basic stats
887
- if obj_name in detection_stats["objects"]:
888
- detection_stats["objects"][obj_name] += 1
889
- else:
890
- detection_stats["objects"][obj_name] = 1
891
-
892
- # Enhanced object tracking
893
- timestamp = frame_count/fps
894
- if obj_name not in detection_stats["object_tracking"]:
895
- detection_stats["object_tracking"][obj_name] = {
896
- "first_seen": timestamp,
897
- "last_seen": timestamp,
898
- "frames_present": 1,
899
- "timestamps": [timestamp]
900
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
  else:
902
- tracking = detection_stats["object_tracking"][obj_name]
903
- tracking["frames_present"] += 1
904
- tracking["last_seen"] = timestamp
905
- tracking["timestamps"].append(timestamp)
906
-
907
- # Calculate box coordinates
908
- box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
909
- for vertex in obj.bounding_poly.normalized_vertices]
910
- box = np.array(box, np.int32).reshape((-1, 1, 2))
911
 
912
- # Draw more noticeable box with thicker lines
913
- cv2.polylines(frame, [box], True, (0, 255, 0), 3)
914
 
915
- # Calculate box size for better placement of labels
916
- x_min = min([p[0][0] for p in box])
917
- y_min = min([p[0][1] for p in box])
918
- confidence = int(obj.score * 100)
919
-
920
- # Enhanced label with confidence and border - larger text for visibility
921
- label_text = f"{obj.name}: {confidence}%"
922
- text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
923
-
924
- # Larger background rectangle for text visibility
925
- cv2.rectangle(frame,
926
- (int(x_min), int(y_min) - text_size[1] - 10),
927
- (int(x_min) + text_size[0] + 10, int(y_min)),
928
- (0, 0, 0), -1)
929
-
930
- # Draw the label text with larger font
931
  cv2.putText(frame, label_text,
932
- (int(x_min) + 5, int(y_min) - 5),
933
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
 
 
 
934
 
935
- if "Face Detection" in analysis_types:
936
- faces = client.face_detection(image=vision_image)
937
- # Track statistics
938
- detection_stats["faces"] += len(faces.face_annotations)
 
 
 
939
 
940
- for face in faces.face_annotations:
941
- vertices = face.bounding_poly.vertices
942
- points = [(vertex.x, vertex.y) for vertex in vertices]
943
- # Draw face box with thicker lines
944
- pts = np.array(points, np.int32).reshape((-1, 1, 2))
945
- cv2.polylines(frame, [pts], True, (0, 0, 255), 3)
946
-
947
- # Enhanced face info
948
- emotions = []
949
- if face.joy_likelihood >= 3:
950
- emotions.append("Joy")
951
- if face.anger_likelihood >= 3:
952
- emotions.append("Anger")
953
- if face.surprise_likelihood >= 3:
954
- emotions.append("Surprise")
955
- if face.sorrow_likelihood >= 3:
956
- emotions.append("Sorrow")
957
-
958
- emotion_text = ", ".join(emotions) if emotions else "Neutral"
959
- x_min = min([p[0] for p in points])
960
- y_min = min([p[1] for p in points])
961
-
962
- # Add detailed emotion text
963
- cv2.putText(frame, emotion_text,
964
- (int(x_min), int(y_min) - 10),
965
- cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
 
 
 
 
 
 
 
 
966
 
967
- # Draw enhanced landmarks
968
- for landmark in face.landmarks:
969
- px = int(landmark.position.x)
970
- py = int(landmark.position.y)
971
- cv2.circle(frame, (px, py), 3, (255, 255, 0), -1) # Larger circles
 
 
 
 
 
 
 
972
 
 
 
 
 
 
 
 
 
 
 
 
973
  if "Text" in analysis_types:
974
  text = client.text_detection(image=vision_image)
 
975
  # Update stats
976
- if len(text.text_annotations) > 1:
977
  detection_stats["text_blocks"] += len(text.text_annotations) - 1
978
 
979
- # Add overall text summary to the frame
980
- if text.text_annotations:
 
 
 
 
 
981
  full_text = text.text_annotations[0].description
982
  words = full_text.split()
983
  short_text = " ".join(words[:5])
984
  if len(words) > 5:
985
  short_text += "..."
986
-
987
- # Add text summary to top of frame with better visibility
988
- cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1)
989
  cv2.putText(frame, f"Text: {short_text}",
990
- (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
991
-
992
- # Draw text boxes with improved visibility
993
- for text_annot in text.text_annotations[1:]:
994
- box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
995
- pts = np.array(box, np.int32).reshape((-1, 1, 2))
996
- cv2.polylines(frame, [pts], True, (255, 0, 0), 2) # Thicker lines
997
 
998
- # Add Labels analysis for more detail
999
  if "Labels" in analysis_types:
1000
  labels = client.label_detection(image=vision_image, max_results=5)
1001
 
1002
- # Add labels to the frame with better visibility
1003
- y_pos = 120
1004
- cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1)
1005
- cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1006
-
1007
- # Track stats and show labels
1008
  for i, label in enumerate(labels.label_annotations):
1009
- # Update stats
1010
  if label.description in detection_stats["labels"]:
1011
  detection_stats["labels"][label.description] += 1
1012
  else:
1013
  detection_stats["labels"][label.description] = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1014
 
1015
- # Display on frame with larger text
1016
- cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%",
1017
- (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1018
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1019
  except Exception as e:
1020
  # Show error on frame
1021
  cv2.putText(frame, f"API Error: {str(e)[:30]}",
1022
- (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
1023
-
1024
- # Add hint about slowed down speed
1025
- cv2.putText(frame, "Playback: 60% speed for better visualization",
1026
- (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
1027
 
1028
  # Write the frame to output video
1029
  out.write(frame)
@@ -1036,6 +1385,21 @@ def process_video_file(video_file, analysis_types):
1036
  progress_bar.empty()
1037
  status_text.empty()
1038
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1039
  # Read the processed video as bytes for download
1040
  with open(output_path, 'rb') as file:
1041
  processed_video_bytes = file.read()
@@ -1044,24 +1408,13 @@ def process_video_file(video_file, analysis_types):
1044
  os.unlink(temp_video_path)
1045
  os.unlink(output_path)
1046
 
1047
- # Calculate additional statistics
1048
- for obj_name, tracking in detection_stats["object_tracking"].items():
1049
- # Calculate total screen time
1050
- tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2)
1051
- # Calculate average confidence if available
1052
- if "confidences" in tracking and tracking["confidences"]:
1053
- tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"])
1054
-
1055
- # Return enhanced results
1056
- results = {"detection_stats": detection_stats}
1057
-
1058
  # Store results in session state for chatbot context
1059
- st.session_state.analysis_results = results
1060
 
1061
  # Update vectorstore with new results
1062
- update_vectorstore_with_results(results)
1063
 
1064
- return processed_video_bytes, results
1065
 
1066
  except Exception as e:
1067
  # Clean up on error
@@ -1714,10 +2067,36 @@ def main():
1714
  st.error(f"Error processing {uploaded_file.name}: {str(e)}")
1715
 
1716
  elif selected == "Video Analysis":
1717
- st.markdown('<div class="subheader">Video Analysis</div>', unsafe_allow_html=True)
1718
 
1719
- # Analysis settings
1720
  st.sidebar.markdown("### Video Analysis Settings")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1721
  analysis_types = []
1722
  if st.sidebar.checkbox("Object Detection", value=True):
1723
  analysis_types.append("Objects")
@@ -1725,21 +2104,33 @@ def main():
1725
  analysis_types.append("Face Detection")
1726
  if st.sidebar.checkbox("Text Recognition"):
1727
  analysis_types.append("Text")
 
 
1728
 
1729
  st.sidebar.markdown("---")
1730
- st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
1731
 
1732
- # Upload Video mode only - removed real-time camera option
 
 
 
 
1733
  st.markdown("""
1734
- #### 📤 Video Analysis
1735
 
1736
- Upload a video file to analyze it with Google Cloud Vision AI.
 
 
 
 
 
 
 
1737
 
1738
  **Instructions:**
1739
- 1. Select the analysis types in the sidebar
1740
  2. Upload a video file (MP4, MOV, AVI)
1741
  3. Click "Process Video" to begin analysis
1742
- 4. Download the processed video when complete
1743
 
1744
  **Note:** Videos are limited to 10 seconds of processing to manage API usage.
1745
  """)
@@ -1759,10 +2150,15 @@ def main():
1759
  if not analysis_types:
1760
  st.warning("Please select at least one analysis type.")
1761
  else:
1762
- with st.spinner("Processing video (max 10 seconds)..."):
1763
  try:
1764
- # Process the video with enhanced detail
1765
- processed_video, results = process_video_file(uploaded_file, analysis_types)
 
 
 
 
 
1766
 
1767
  if processed_video:
1768
  # Offer download of processed video
@@ -1774,64 +2170,207 @@ def main():
1774
  mime="video/mp4"
1775
  )
1776
 
1777
- # Show detailed analysis results
1778
- st.markdown("### Detailed Analysis Results")
1779
 
1780
- # Display object detection summary
1781
- if "Objects" in analysis_types and results["detection_stats"]["objects"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
1782
  st.markdown("#### 📦 Objects Detected")
1783
 
1784
- # Sort objects by frequency
1785
- sorted_objects = dict(sorted(results["detection_stats"]["objects"].items(),
1786
- key=lambda x: x[1], reverse=True))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1787
 
1788
- # Create bar chart for objects
1789
- if sorted_objects:
1790
- fig, ax = plt.subplots(figsize=(10, 5))
1791
- objects = list(sorted_objects.keys())
1792
- counts = list(sorted_objects.values())
1793
- ax.barh(objects, counts, color='skyblue')
1794
- ax.set_xlabel('Number of Detections')
1795
- ax.set_title('Objects Detected in Video')
1796
- st.pyplot(fig)
1797
 
1798
- # List with counts
1799
- col1, col2 = st.columns(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1800
  with col1:
1801
- st.markdown("**Top Objects:**")
1802
- for obj, count in list(sorted_objects.items())[:10]:
1803
- st.markdown(f"- {obj}: {count} occurrences")
1804
-
1805
- # Display face detection summary
1806
- if "Face Detection" in analysis_types and results["detection_stats"]["faces"] > 0:
1807
- st.markdown("#### 👤 Face Analysis")
1808
- st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
 
 
 
 
 
 
 
 
1809
 
1810
- # Display text detection summary
1811
- if "Text" in analysis_types and results["detection_stats"]["text_blocks"] > 0:
1812
- st.markdown("#### 📝 Text Analysis")
1813
- st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1814
 
1815
- # Display label detection summary
1816
- if "Labels" in analysis_types and results["detection_stats"]["labels"]:
1817
- st.markdown("#### 🏷️ Scene Labels")
 
1818
 
1819
- # Sort labels by frequency
1820
- sorted_labels = dict(sorted(results["detection_stats"]["labels"].items(),
1821
- key=lambda x: x[1], reverse=True))
 
 
 
 
1822
 
1823
- # Create pie chart for top labels
1824
- if sorted_labels:
1825
- fig, ax = plt.subplots(figsize=(8, 8))
1826
- top_labels = dict(list(sorted_labels.items())[:7])
1827
- if len(sorted_labels) > 7:
1828
- other_count = sum(list(sorted_labels.values())[7:])
1829
- top_labels["Other"] = other_count
1830
 
1831
- ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
1832
- ax.set_title('Distribution of Scene Labels')
1833
- st.pyplot(fig)
1834
-
 
 
 
 
 
 
 
 
 
 
 
 
1835
  except Exception as e:
1836
  st.error(f"Error processing video: {str(e)}")
1837
 
@@ -2294,3 +2833,456 @@ def extract_video_frames(video_bytes, num_frames=5):
2294
  os.unlink(temp_video_path)
2295
 
2296
  return frames
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  from langchain_community.document_loaders import TextLoader
37
  import re
38
  import base64
39
+ from numpy.lib.stride_tricks import as_strided
40
+ from object_tracker import ObjectTracker
41
 
42
  # Set page config
43
  st.set_page_config(
 
433
  return summary_img
434
 
435
  class VideoProcessor(VideoProcessorBase):
436
+ """Process video frames with hybrid local/cloud processing"""
437
 
438
+ def __init__(self, analysis_types: List[str], processing_mode="hybrid", stabilize=False,
439
+ edge_detection=None, segmentation=None, enable_tracking=False):
440
  self.analysis_types = analysis_types
441
+ self.processing_mode = processing_mode # "local", "cloud", or "hybrid"
442
+ self.stabilize = stabilize
443
+ self.edge_detection = edge_detection # None, "canny", "sobel", or "laplacian"
444
+ self.segmentation = segmentation # None, "watershed", or "grabcut"
445
+ self.enable_tracking = enable_tracking
446
+
447
  self.frame_counter = 0
448
+ self.cloud_process_interval = 10 # Process with Google Vision every 10 frames
449
  self.vision_client = client # Store client reference
450
  self.last_results = {} # Cache results between processed frames
451
  self.last_processed_time = time.time()
452
  self.processing_active = True
453
 
454
+ # Initialize motion tracking
455
+ self.prev_gray = None
456
+ self.motion_history = []
457
+ self.motion_threshold = 40.0 # Threshold for scene change detection
458
+ self.scene_changes = []
459
+
460
+ # Initialize local models if needed
461
+ if processing_mode in ["local", "hybrid"]:
462
+ self.yolo_net, self.yolo_classes, self.yolo_output_layers = load_yolo_model()
463
+ self.face_cascade = load_haar_cascades()
464
+
465
+ # Initialize object tracker if enabled
466
+ if self.enable_tracking:
467
+ self.object_tracker = ObjectTracker(tracker_type="CSRT")
468
+ self.tracking_initialized = False
469
+ self.tracked_objects = {}
470
+ # How often to reinitialize tracking with new detections (in frames)
471
+ self.detection_interval = 15
472
+
473
  def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
474
  img = frame.to_ndarray(format="bgr24")
475
  self.frame_counter += 1
476
 
477
  # Add status display on all frames
478
  cv2.putText(img,
479
+ f"Vision AI: {self.processing_mode.title()} Mode",
480
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
481
 
482
+ # Make a copy for processing that won't affect the original
483
+ processed_img = img.copy()
484
+
485
+ # Prepare grayscale image for motion tracking
486
+ current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
487
+ current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0)
488
+
489
+ # Stabilize frame if enabled
490
+ if self.stabilize and self.prev_gray is not None:
491
+ img = stabilize_frame(img, self.prev_gray, current_gray)
492
+ processed_img = img.copy()
493
+ # Update current_gray after stabilization
494
+ current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
495
+ current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0)
496
+
497
+ # Process motion if we have a previous frame
498
+ if self.prev_gray is not None:
499
+ # Calculate optical flow for motion detection
500
+ motion_level, motion_area, motion_mask, flow = calculate_optical_flow(
501
+ self.prev_gray, current_gray
502
+ )
503
+
504
+ # Store motion metrics
505
+ timestamp = time.time()
506
+ self.motion_history.append({
507
+ "timestamp": timestamp,
508
+ "frame": self.frame_counter,
509
+ "motion_level": motion_level,
510
+ "motion_area": motion_area * 100 # Convert to percentage
511
+ })
512
+
513
+ # Detect scene changes
514
+ if motion_level > self.motion_threshold:
515
+ self.scene_changes.append(self.frame_counter)
516
+ # Mark scene change on frame
517
+ cv2.putText(img, "SCENE CHANGE",
518
+ (img.shape[1] // 2 - 100, 50),
519
+ cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
520
+
521
+ # Visualize motion
522
+ motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET)
523
+ motion_overlay = cv2.resize(motion_overlay, (img.shape[1] // 4, img.shape[0] // 4))
524
+
525
+ # Add motion overlay to corner of frame
526
+ h, w = motion_overlay.shape[:2]
527
+ img[10:10+h, img.shape[1]-10-w:img.shape[1]-10] = motion_overlay
528
+
529
+ # Add motion level indicator
530
+ cv2.putText(img, f"Motion: {motion_level:.1f}",
531
+ (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
532
+
533
+ # Store current frame as previous for next iteration
534
+ self.prev_gray = current_gray
535
+
536
+ # Process with local models if in local or hybrid mode
537
+ detected_objects = []
538
+
539
+ if self.processing_mode in ["local", "hybrid"]:
540
+ # Object detection with YOLO
541
+ if "Objects" in self.analysis_types:
542
+ try:
543
+ objects = detect_objects_yolo(
544
+ processed_img, self.yolo_net, self.yolo_classes,
545
+ self.yolo_output_layers, confidence_threshold=0.4
546
+ )
547
+
548
+ # Update results cache
549
+ self.last_results["objects"] = objects
550
+
551
+ # Draw detected objects
552
+ for obj in objects:
553
+ x, y, w, h = obj["box"]
554
+ label = obj["label"]
555
+ confidence = obj["confidence"]
556
+
557
+ # Add to detected objects list for tracking
558
+ detected_objects.append((x, y, w, h, label))
559
+
560
+ # Draw box (skip if tracking is enabled, as tracker will draw boxes)
561
+ if not self.enable_tracking:
562
+ cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
563
+
564
+ # Add label with confidence
565
+ label_text = f"{label}: {int(confidence * 100)}%"
566
+ cv2.putText(img, label_text,
567
+ (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
568
+ except Exception as e:
569
+ cv2.putText(img, f"YOLO Error: {str(e)[:30]}",
570
+ (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
571
+
572
+ # Face detection with Haar cascades
573
+ if "Face Detection" in self.analysis_types:
574
+ try:
575
+ faces = detect_faces_haar(processed_img, self.face_cascade)
576
+
577
+ # Update results cache
578
+ self.last_results["faces"] = faces
579
+
580
+ # Add to detected objects list for tracking
581
+ for face in faces:
582
+ x, y, w, h = face["box"]
583
+ detected_objects.append((x, y, w, h, "Face"))
584
+
585
+ # Draw detected faces (skip if tracking is enabled)
586
+ if not self.enable_tracking:
587
+ for face in faces:
588
+ x, y, w, h = face["box"]
589
+
590
+ # Draw box
591
+ cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
592
+ except Exception as e:
593
+ cv2.putText(img, f"Face Detection Error: {str(e)[:30]}",
594
+ (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
595
+
596
+ # Handle object tracking if enabled
597
+ if self.enable_tracking:
598
+ try:
599
+ # Initialize tracking on first frame or periodically with new detections
600
+ if not self.tracking_initialized or self.frame_counter % self.detection_interval == 0:
601
+ # Reset if tracking is already initialized
602
+ if self.tracking_initialized:
603
+ self.object_tracker = ObjectTracker(tracker_type="CSRT")
604
+
605
+ # Register each detected object with the tracker
606
+ for x, y, w, h, label in detected_objects:
607
+ self.object_tracker.register(processed_img, (x, y, w, h), label)
608
+
609
+ self.tracking_initialized = True
610
+
611
+ # Update tracking on every frame
612
+ self.tracked_objects = self.object_tracker.update(processed_img)
613
+
614
+ # Draw tracked objects
615
+ img = self.object_tracker.draw_tracked_objects(img, self.tracked_objects)
616
+
617
+ # Add tracking status
618
+ cv2.putText(img, f"Tracking {len(self.tracked_objects)} objects",
619
+ (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2)
620
+ except Exception as e:
621
+ cv2.putText(img, f"Tracking Error: {str(e)[:30]}",
622
+ (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
623
+
624
+ # Process with Google Vision API periodically if in cloud or hybrid mode
625
  current_time = time.time()
626
+ should_process_cloud = (
627
+ self.processing_mode in ["cloud", "hybrid"] and
628
+ (self.frame_counter % self.cloud_process_interval == 0) and
629
+ (current_time - self.last_processed_time > 1.0) and # Max once per second
630
+ self.processing_active
631
+ )
632
+
633
+ if should_process_cloud:
634
  self.last_processed_time = current_time
635
 
636
  try:
637
  # Convert to PIL Image for Vision API
638
+ pil_img = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB))
639
 
640
  # Process with Vision API
641
  img_byte_arr = io.BytesIO()
 
644
  vision_image = vision.Image(content=content)
645
 
646
  # Update status text
647
+ cv2.putText(img, "Cloud Processing...", (10, 180),
648
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
649
 
650
  # Process according to selected analysis types
 
 
 
 
 
 
 
 
651
  if "Text" in self.analysis_types:
652
  text = self.vision_client.text_detection(image=vision_image)
653
  self.last_results["text"] = text.text_annotations
654
 
655
+ if "Labels" in self.analysis_types:
656
+ labels = self.vision_client.label_detection(image=vision_image, max_results=5)
657
+ self.last_results["labels"] = labels.label_annotations
658
+
659
+ # Only use Vision API for objects/faces if in cloud-only mode
660
+ if self.processing_mode == "cloud":
661
+ if "Objects" in self.analysis_types:
662
+ objects = self.vision_client.object_localization(image=vision_image)
663
+ self.last_results["objects"] = objects.localized_object_annotations
664
+
665
+ if "Face Detection" in self.analysis_types:
666
+ faces = self.vision_client.face_detection(image=vision_image)
667
+ self.last_results["faces"] = faces.face_annotations
668
+
669
  except Exception as e:
670
+ # Show error on frame
671
+ cv2.putText(img, f"API Error: {str(e)[:30]}",
672
+ (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
673
 
674
+ # Always draw the cached cloud results for smooth display
675
  try:
676
+ # Draw text detections from cloud
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  if "text" in self.last_results and "Text" in self.analysis_types:
678
  if len(self.last_results["text"]) > 1: # Skip the first one (full text)
679
  for text_annot in self.last_results["text"][1:]:
 
681
  pts = np.array(box, np.int32).reshape((-1, 1, 2))
682
  cv2.polylines(img, [pts], True, (255, 0, 0), 1)
683
 
684
+ # Show full text summary
685
+ if self.last_results["text"]:
686
+ full_text = self.last_results["text"][0].description
687
+ words = full_text.split()
688
+ short_text = " ".join(words[:3])
689
+ if len(words) > 3:
690
+ short_text += "..."
691
+
692
+ # Display text at top of frame
693
+ cv2.putText(img, f"Text: {short_text}",
694
+ (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
695
+
696
+ # Draw labels from cloud
697
+ if "labels" in self.last_results and "Labels" in self.analysis_types:
698
+ y_pos = img.shape[0] - 50
699
+ for i, label in enumerate(self.last_results["labels"][:3]): # Show top 3 labels
700
+ label_text = f"Label: {label.description} ({int(label.score*100)}%)"
701
+ cv2.putText(img, label_text,
702
+ (img.shape[1] - 300, y_pos - i*20),
703
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
704
+
705
+ # Draw cloud-detected objects and faces only if in cloud-only mode
706
+ if self.processing_mode == "cloud" and not self.enable_tracking:
707
+ # Draw objects
708
+ if "objects" in self.last_results and "Objects" in self.analysis_types:
709
+ for obj in self.last_results["objects"]:
710
+ box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
711
+ for vertex in obj.bounding_poly.normalized_vertices]
712
+ box = np.array(box, np.int32).reshape((-1, 1, 2))
713
+ cv2.polylines(img, [box], True, (0, 255, 0), 2)
714
+ # Add label
715
+ cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
716
+ (int(box[0][0][0]), int(box[0][0][1]) - 10),
717
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
718
+
719
+ # Draw faces
720
+ if "faces" in self.last_results and "Face Detection" in self.analysis_types:
721
+ for face in self.last_results["faces"]:
722
+ vertices = face.bounding_poly.vertices
723
+ points = [(vertex.x, vertex.y) for vertex in vertices]
724
+ pts = np.array(points, np.int32).reshape((-1, 1, 2))
725
+ cv2.polylines(img, [pts], True, (0, 0, 255), 2)
726
+
727
+ # Draw landmarks
728
+ for landmark in face.landmarks:
729
+ px = int(landmark.position.x)
730
+ py = int(landmark.position.y)
731
+ cv2.circle(img, (px, py), 2, (255, 255, 0), -1)
732
+
733
  except Exception as e:
734
+ cv2.putText(img, f"Display Error: {str(e)[:30]}",
735
+ (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
736
+
737
+ # Apply edge detection if enabled
738
+ if self.edge_detection:
739
+ # Create edge detection visualization
740
+ edge_img = detect_edges(processed_img, method=self.edge_detection)
741
+
742
+ # Display edge detection mode
743
+ cv2.putText(img, f"Edge: {self.edge_detection.title()}",
744
+ (10, img.shape[0] - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
745
+
746
+ # Show edge detection in a corner (similar to motion overlay)
747
+ edge_small = cv2.resize(edge_img, (img.shape[1] // 4, img.shape[0] // 4))
748
+ h, w = edge_small.shape[:2]
749
+ img[10:10+h, 10:10+w] = edge_small
750
 
751
+ # Apply segmentation if enabled
752
+ if self.segmentation:
753
+ try:
754
+ # Create segmentation visualization
755
+ segmented_img, _ = segment_image(processed_img, method=self.segmentation)
756
+
757
+ # Display segmentation mode
758
+ cv2.putText(img, f"Segment: {self.segmentation.title()}",
759
+ (10, img.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
760
+
761
+ # Show segmentation in a corner opposite to edge detection or motion
762
+ seg_small = cv2.resize(segmented_img, (img.shape[1] // 4, img.shape[0] // 4))
763
+ h, w = seg_small.shape[:2]
764
+ img[10+h+10:10+h+10+h, 10:10+w] = seg_small
765
+ except Exception as e:
766
+ cv2.putText(img, f"Segmentation Error: {str(e)[:30]}",
767
+ (10, img.shape[0] - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
768
+
769
+ # Add processing mode and stabilization status
770
+ mode_text = f"Mode: {self.processing_mode.title()}"
771
+ features = []
772
+ if self.stabilize:
773
+ features.append("Stabilized")
774
+ if self.enable_tracking:
775
+ features.append("Tracking")
776
+ if features:
777
+ mode_text += f" | {', '.join(features)}"
778
+
779
+ cv2.putText(img, mode_text,
780
+ (10, img.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
781
 
782
  return av.VideoFrame.from_ndarray(img, format="bgr24")
783
 
 
1003
 
1004
  return resources
1005
 
1006
+ def process_video_file(video_file, analysis_types, processing_mode="hybrid", stabilize=False,
1007
+ edge_detection=None, segmentation=None, enable_tracking=False):
1008
+ """Process an uploaded video file with hybrid Vision AI detection and analytics"""
1009
  # Create a temporary file to save the uploaded video
1010
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
1011
  temp_file.write(video_file.read())
 
1051
  fourcc = cv2.VideoWriter_fourcc(*'DIB ') # Uncompressed RGB
1052
  out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
1053
 
1054
+ # Process every Nth frame to reduce API calls
1055
+ cloud_process_interval = 10 # How often to use Google Vision API
1056
 
1057
  # Create a progress bar
1058
  progress_bar = st.progress(0)
 
1064
  "faces": 0,
1065
  "text_blocks": 0,
1066
  "labels": {},
1067
+ # Motion tracking
1068
+ "motion_data": [],
1069
+ "scene_changes": [],
1070
+ "avg_motion_level": 0,
1071
+ "processing_mode": processing_mode,
1072
+ "stabilized": stabilize
1073
  }
1074
 
1075
+ # Initialize object tracker if enabled
1076
+ if enable_tracking:
1077
+ object_tracker = ObjectTracker(tracker_type="CSRT")
1078
+ tracked_objects = {}
1079
+ detection_interval = 15 # How often to reinitialize tracking
1080
+
1081
+ # Load models based on processing mode
1082
+ if processing_mode in ["local", "hybrid"]:
1083
+ yolo_net, yolo_classes, yolo_output_layers = load_yolo_model()
1084
+ face_cascade = load_haar_cascades()
1085
+
1086
+ # For scene change detection and motion tracking
1087
  previous_frame_gray = None
1088
  scene_change_threshold = 40.0 # Threshold for scene change detection
1089
 
 
1105
  cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
1106
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
1107
 
1108
+ # Prepare grayscale image for motion analysis
1109
  current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
1110
  current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
1111
+
1112
+ # Stabilize frame if enabled
1113
+ if stabilize and previous_frame_gray is not None:
1114
+ frame = stabilize_frame(frame, previous_frame_gray, current_frame_gray)
1115
+ # Update grayscale after stabilization
1116
+ current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
1117
+ current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
1118
+
1119
+ # Motion detection and scene change detection
1120
  if previous_frame_gray is not None:
1121
+ # Calculate optical flow for motion detection
1122
+ motion_level, motion_area, motion_mask, flow = calculate_optical_flow(
1123
+ previous_frame_gray, current_frame_gray
1124
+ )
1125
+
1126
+ # Store motion metrics
1127
+ detection_stats["motion_data"].append({
1128
+ "time": frame_count/fps,
1129
+ "motion_level": motion_level,
1130
+ "motion_area": motion_area * 100 # Convert to percentage
1131
+ })
1132
 
1133
  # Scene change detection
1134
+ if motion_level > scene_change_threshold:
1135
  detection_stats["scene_changes"].append(frame_count/fps)
1136
  # Mark scene change on frame
1137
  cv2.putText(frame, "SCENE CHANGE",
1138
  (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
1139
+
1140
+ # Visualize motion
1141
+ motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET)
1142
+ motion_overlay = cv2.resize(motion_overlay, (width // 4, height // 4))
1143
+
1144
+ # Add motion overlay to corner of frame
1145
+ h, w = motion_overlay.shape[:2]
1146
+ frame[10:10+h, width-10-w:width-10] = motion_overlay
1147
+
1148
+ # Add motion indicator
1149
+ cv2.putText(frame, f"Motion: {motion_level:.1f}",
1150
+ (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
1151
 
1152
  previous_frame_gray = current_frame_gray
1153
 
1154
+ # Apply edge detection if enabled
1155
+ if edge_detection:
1156
+ # Create edge detection visualization in a corner
1157
+ edge_img = detect_edges(frame, method=edge_detection)
1158
+
1159
+ # Display edge detection mode
1160
+ cv2.putText(frame, f"Edge: {edge_detection.title()}",
1161
+ (10, height - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1162
+
1163
+ # Show edge detection in a corner
1164
+ edge_small = cv2.resize(edge_img, (width // 4, height // 4))
1165
+ h, w = edge_small.shape[:2]
1166
+ frame[10:10+h, 10:10+w] = edge_small
1167
+
1168
+ # Apply segmentation if enabled
1169
+ if segmentation:
1170
  try:
1171
+ # Create segmentation visualization
1172
+ segmented_img, _ = segment_image(frame, method=segmentation)
1173
 
1174
+ # Display segmentation mode
1175
+ cv2.putText(frame, f"Segment: {segmentation.title()}",
1176
+ (10, height - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
 
 
1177
 
1178
+ # Show segmentation in another corner
1179
+ seg_small = cv2.resize(segmented_img, (width // 4, height // 4))
1180
+ h, w = seg_small.shape[:2]
1181
+ frame[10+h+10:10+h+10+h, 10:10+w] = seg_small
1182
+ except Exception as e:
1183
+ cv2.putText(frame, f"Segmentation Error: {str(e)[:30]}",
1184
+ (10, height - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
1185
+
1186
+ # Add processing mode indicator
1187
+ mode_text = f"Mode: {processing_mode.title()}"
1188
+ if stabilize:
1189
+ mode_text += " | Stabilized"
1190
+ cv2.putText(frame, mode_text,
1191
+ (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
1192
+
1193
+ # Handle object tracking if enabled
1194
+ detected_objects = []
1195
+
1196
+ # Local processing (YOLOv4-tiny and Haar cascades)
1197
+ if processing_mode in ["local", "hybrid"]:
1198
+ # Object detection with YOLO
1199
+ if "Objects" in analysis_types:
1200
+ objects = detect_objects_yolo(
1201
+ frame, yolo_net, yolo_classes, yolo_output_layers
1202
+ )
1203
+
1204
+ # Collect objects for tracking
1205
+ for obj in objects:
1206
+ x, y, w, h = obj["box"]
1207
+ label = obj["label"]
1208
+ confidence = obj["confidence"]
1209
+
1210
+ # Add to detected objects list for tracking
1211
+ detected_objects.append((x, y, w, h, label))
1212
+
1213
+ # Update statistics and draw boxes (if tracking disabled)
1214
+ if not enable_tracking:
1215
+ if label in detection_stats["objects"]:
1216
+ detection_stats["objects"][label] += 1
1217
  else:
1218
+ detection_stats["objects"][label] = 1
 
 
 
 
 
 
 
 
1219
 
1220
+ # Draw box
1221
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
1222
 
1223
+ # Add label with confidence
1224
+ label_text = f"{label}: {int(confidence * 100)}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1225
  cv2.putText(frame, label_text,
1226
+ (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
1227
+
1228
+ # Face detection with Haar cascades
1229
+ if "Face Detection" in analysis_types:
1230
+ faces = detect_faces_haar(frame, face_cascade)
1231
 
1232
+ # Update faces count and add to detected objects for tracking
1233
+ if not enable_tracking:
1234
+ detection_stats["faces"] += len(faces)
1235
+
1236
+ for face in faces:
1237
+ x, y, w, h = face["box"]
1238
+ detected_objects.append((x, y, w, h, "Face"))
1239
 
1240
+ # Draw boxes only if tracking is disabled
1241
+ if not enable_tracking:
1242
+ # Draw box
1243
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
1244
+
1245
+ # Add tracking code
1246
+ if enable_tracking:
1247
+ try:
1248
+ # Initialize tracking on first frame or periodically
1249
+ if frame_count == 1 or frame_count % detection_interval == 0:
1250
+ # Reset tracker periodically
1251
+ if frame_count > 1:
1252
+ object_tracker = ObjectTracker(tracker_type="CSRT")
1253
+
1254
+ # Register each detected object
1255
+ for x, y, w, h, label in detected_objects:
1256
+ object_tracker.register(frame, (x, y, w, h), label)
1257
+
1258
+ # Update tracking on every frame
1259
+ tracked_objects = object_tracker.update(frame)
1260
+
1261
+ # Draw tracked objects
1262
+ frame = object_tracker.draw_tracked_objects(frame, tracked_objects)
1263
+
1264
+ # Add tracking status
1265
+ cv2.putText(frame, f"Tracking {len(tracked_objects)} objects",
1266
+ (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2)
1267
+
1268
+ # Count object types in tracking
1269
+ for _, (_, _, _, _, label) in tracked_objects.items():
1270
+ if label in detection_stats["objects"]:
1271
+ detection_stats["objects"][label] += 1
1272
+ else:
1273
+ detection_stats["objects"][label] = 1
1274
 
1275
+ # Update faces count if any faces are being tracked
1276
+ face_count = sum(1 for _, (_, _, _, _, label) in tracked_objects.items() if label == "Face")
1277
+ detection_stats["faces"] += face_count
1278
+ except Exception as e:
1279
+ cv2.putText(frame, f"Tracking Error: {str(e)[:30]}",
1280
+ (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
1281
+
1282
+ # Cloud processing with Google Vision API (less frequent)
1283
+ if processing_mode in ["cloud", "hybrid"] and frame_count % cloud_process_interval == 0:
1284
+ try:
1285
+ # Convert to PIL Image for Vision API
1286
+ pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
1287
 
1288
+ # Create vision image
1289
+ img_byte_arr = io.BytesIO()
1290
+ pil_img.save(img_byte_arr, format='PNG')
1291
+ content = img_byte_arr.getvalue()
1292
+ vision_image = vision.Image(content=content)
1293
+
1294
+ # Add cloud processing indicator
1295
+ cv2.putText(frame, "Cloud Processing", (width - 200, 30),
1296
+ cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
1297
+
1298
+ # Text detection
1299
  if "Text" in analysis_types:
1300
  text = client.text_detection(image=vision_image)
1301
+
1302
  # Update stats
1303
+ if text.text_annotations:
1304
  detection_stats["text_blocks"] += len(text.text_annotations) - 1
1305
 
1306
+ # Draw text boxes
1307
+ for text_annot in text.text_annotations[1:]:
1308
+ box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
1309
+ pts = np.array(box, np.int32).reshape((-1, 1, 2))
1310
+ cv2.polylines(frame, [pts], True, (255, 0, 0), 2)
1311
+
1312
+ # Show text summary
1313
  full_text = text.text_annotations[0].description
1314
  words = full_text.split()
1315
  short_text = " ".join(words[:5])
1316
  if len(words) > 5:
1317
  short_text += "..."
1318
+
 
 
1319
  cv2.putText(frame, f"Text: {short_text}",
1320
+ (10, height - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
 
 
 
 
 
 
1321
 
1322
+ # Label detection
1323
  if "Labels" in analysis_types:
1324
  labels = client.label_detection(image=vision_image, max_results=5)
1325
 
1326
+ # Update stats and show labels
 
 
 
 
 
1327
  for i, label in enumerate(labels.label_annotations):
 
1328
  if label.description in detection_stats["labels"]:
1329
  detection_stats["labels"][label.description] += 1
1330
  else:
1331
  detection_stats["labels"][label.description] = 1
1332
+
1333
+ # Display on frame
1334
+ cv2.putText(frame, f"Label: {label.description}",
1335
+ (width - 200, 60 + i*30),
1336
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
1337
+
1338
+ # Only do object/face detection with Vision API in cloud-only mode
1339
+ if processing_mode == "cloud" and not enable_tracking:
1340
+ if "Objects" in analysis_types:
1341
+ objects = client.object_localization(image=vision_image)
1342
+
1343
+ for obj in objects.localized_object_annotations:
1344
+ # Update stats
1345
+ if obj.name in detection_stats["objects"]:
1346
+ detection_stats["objects"][obj.name] += 1
1347
+ else:
1348
+ detection_stats["objects"][obj.name] = 1
1349
 
1350
+ # Draw box
1351
+ box = [(vertex.x * width, vertex.y * height)
1352
+ for vertex in obj.bounding_poly.normalized_vertices]
1353
+ box = np.array(box, np.int32).reshape((-1, 1, 2))
1354
+ cv2.polylines(frame, [box], True, (0, 255, 0), 2)
1355
+
1356
+ # Add label
1357
+ x_min = min([p[0][0] for p in box])
1358
+ y_min = min([p[0][1] for p in box])
1359
+ cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%",
1360
+ (int(x_min), int(y_min) - 10),
1361
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
1362
+
1363
+ if "Face Detection" in analysis_types:
1364
+ faces = client.face_detection(image=vision_image)
1365
+ detection_stats["faces"] += len(faces.face_annotations)
1366
+
1367
+ for face in faces.face_annotations:
1368
+ vertices = face.bounding_poly.vertices
1369
+ points = [(vertex.x, vertex.y) for vertex in vertices]
1370
+ pts = np.array(points, np.int32).reshape((-1, 1, 2))
1371
+ cv2.polylines(frame, [pts], True, (0, 0, 255), 2)
1372
  except Exception as e:
1373
  # Show error on frame
1374
  cv2.putText(frame, f"API Error: {str(e)[:30]}",
1375
+ (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
 
 
 
 
1376
 
1377
  # Write the frame to output video
1378
  out.write(frame)
 
1385
  progress_bar.empty()
1386
  status_text.empty()
1387
 
1388
+ # Calculate additional statistics
1389
+ if detection_stats["motion_data"]:
1390
+ detection_stats["avg_motion_level"] = sum(item["motion_level"] for item in detection_stats["motion_data"]) / len(detection_stats["motion_data"])
1391
+
1392
+ # Update the detection_stats to include the new features
1393
+ detection_stats.update({
1394
+ "edge_detection": edge_detection,
1395
+ "segmentation": segmentation,
1396
+ "tracking": {
1397
+ "enabled": enable_tracking,
1398
+ "method": "CSRT" if enable_tracking else None,
1399
+ "objects_tracked": len(tracked_objects) if enable_tracking else 0
1400
+ }
1401
+ })
1402
+
1403
  # Read the processed video as bytes for download
1404
  with open(output_path, 'rb') as file:
1405
  processed_video_bytes = file.read()
 
1408
  os.unlink(temp_video_path)
1409
  os.unlink(output_path)
1410
 
 
 
 
 
 
 
 
 
 
 
 
1411
  # Store results in session state for chatbot context
1412
+ st.session_state.analysis_results = {"detection_stats": detection_stats}
1413
 
1414
  # Update vectorstore with new results
1415
+ update_vectorstore_with_results({"detection_stats": detection_stats})
1416
 
1417
+ return processed_video_bytes, {"detection_stats": detection_stats}
1418
 
1419
  except Exception as e:
1420
  # Clean up on error
 
2067
  st.error(f"Error processing {uploaded_file.name}: {str(e)}")
2068
 
2069
  elif selected == "Video Analysis":
2070
+ st.markdown('<div class="subheader">Video Analysis with Hybrid Processing</div>', unsafe_allow_html=True)
2071
 
2072
+ # Enhanced analysis settings
2073
  st.sidebar.markdown("### Video Analysis Settings")
2074
+
2075
+ # Add processing mode selection
2076
+ processing_mode = st.sidebar.radio(
2077
+ "Processing Mode",
2078
+ ["hybrid", "local", "cloud"],
2079
+ format_func=lambda x: {
2080
+ "hybrid": "Hybrid (Local + Cloud) - Recommended",
2081
+ "local": "Local Only (Faster, Less Accurate)",
2082
+ "cloud": "Cloud Only (Slower, More Accurate)"
2083
+ }[x],
2084
+ index=0 # Default to hybrid
2085
+ )
2086
+
2087
+ # Show appropriate explanation based on selected mode
2088
+ if processing_mode == "hybrid":
2089
+ st.sidebar.info("Hybrid mode uses local processing for real-time tasks and Google Vision for detailed analysis.")
2090
+ elif processing_mode == "local":
2091
+ st.sidebar.info("Local mode runs entirely on your device using YOLOv4-tiny for object detection and Haar cascades for faces.")
2092
+ else: # cloud
2093
+ st.sidebar.info("Cloud mode sends all frames to Google Vision API for high-accuracy analysis.")
2094
+
2095
+ # Add stabilization toggle
2096
+ stabilize = st.sidebar.checkbox("Enable Video Stabilization", value=False,
2097
+ help="Reduces camera shake using optical flow")
2098
+
2099
+ # Analysis type selection
2100
  analysis_types = []
2101
  if st.sidebar.checkbox("Object Detection", value=True):
2102
  analysis_types.append("Objects")
 
2104
  analysis_types.append("Face Detection")
2105
  if st.sidebar.checkbox("Text Recognition"):
2106
  analysis_types.append("Text")
2107
+ if st.sidebar.checkbox("Label Detection"):
2108
+ analysis_types.append("Labels")
2109
 
2110
  st.sidebar.markdown("---")
 
2111
 
2112
+ # Add info about processing limits and usage
2113
+ if processing_mode in ["cloud", "hybrid"]:
2114
+ st.sidebar.warning("⚠️ Cloud analysis may use a significant amount of API calls. Use responsibly.")
2115
+
2116
+ # Main content
2117
  st.markdown("""
2118
+ #### 📤 Enhanced Video Analysis
2119
 
2120
+ Upload a video file to analyze it with hybrid AI processing.
2121
+
2122
+ **Features:**
2123
+ - **Local Processing**: Fast object & face detection using YOLOv4-tiny and Haar cascades
2124
+ - **Cloud Processing**: High-accuracy text recognition and labels with Google Vision AI
2125
+ - **Motion Analysis**: Track movement patterns with optical flow
2126
+ - **Video Stabilization**: Reduce camera shake (optional)
2127
+ - **Scene Changes**: Automatically detect major scene transitions
2128
 
2129
  **Instructions:**
2130
+ 1. Select processing mode and analysis types in the sidebar
2131
  2. Upload a video file (MP4, MOV, AVI)
2132
  3. Click "Process Video" to begin analysis
2133
+ 4. Explore the enhanced analytics and download the processed video
2134
 
2135
  **Note:** Videos are limited to 10 seconds of processing to manage API usage.
2136
  """)
 
2150
  if not analysis_types:
2151
  st.warning("Please select at least one analysis type.")
2152
  else:
2153
+ with st.spinner(f"Processing video in {processing_mode} mode (max 10 seconds)..."):
2154
  try:
2155
+ # Process the video with hybrid processing
2156
+ processed_video, results = process_video_file(
2157
+ uploaded_file,
2158
+ analysis_types,
2159
+ processing_mode=processing_mode,
2160
+ stabilize=stabilize
2161
+ )
2162
 
2163
  if processed_video:
2164
  # Offer download of processed video
 
2170
  mime="video/mp4"
2171
  )
2172
 
2173
+ # Enhanced analytics display
2174
+ detection_stats = results["detection_stats"]
2175
 
2176
+ st.markdown("### Enhanced Video Analytics")
2177
+
2178
+ # Display processing mode info
2179
+ st.info(f"Processing mode: **{detection_stats['processing_mode'].title()}**" +
2180
+ (", with video stabilization" if detection_stats['stabilized'] else ""))
2181
+
2182
+ # Create tabs for different analytics
2183
+ tab1, tab2, tab3, tab4 = st.tabs([
2184
+ "Object Detection",
2185
+ "Motion Analysis",
2186
+ "Scene Changes",
2187
+ "Text & Labels"
2188
+ ])
2189
+
2190
+ with tab1:
2191
  st.markdown("#### 📦 Objects Detected")
2192
 
2193
+ if detection_stats["objects"]:
2194
+ # Sort objects by frequency
2195
+ sorted_objects = dict(sorted(detection_stats["objects"].items(),
2196
+ key=lambda x: x[1], reverse=True))
2197
+
2198
+ # Create bar chart for objects
2199
+ if sorted_objects:
2200
+ fig = px.bar(
2201
+ x=list(sorted_objects.keys()),
2202
+ y=list(sorted_objects.values()),
2203
+ labels={"x": "Object Type", "y": "Frequency"},
2204
+ title="Objects Detected in Video",
2205
+ color=list(sorted_objects.values()),
2206
+ color_continuous_scale="Viridis"
2207
+ )
2208
+ st.plotly_chart(fig, use_container_width=True)
2209
+
2210
+ # Object statistics
2211
+ st.markdown("##### Object Detection Statistics")
2212
+ total_objects = sum(sorted_objects.values())
2213
+ unique_objects = len(sorted_objects)
2214
+
2215
+ col1, col2, col3 = st.columns(3)
2216
+ with col1:
2217
+ st.metric("Total Detections", total_objects)
2218
+ with col2:
2219
+ st.metric("Unique Objects", unique_objects)
2220
+ with col3:
2221
+ if "faces" in detection_stats:
2222
+ st.metric("Faces Detected", detection_stats["faces"])
2223
+
2224
+ # List with counts
2225
+ st.markdown("##### Top Objects")
2226
+ for obj, count in list(sorted_objects.items())[:10]:
2227
+ st.markdown(f"- **{obj}**: {count} occurrences")
2228
+ else:
2229
+ st.info("No objects detected in the video.")
2230
+
2231
+ with tab2:
2232
+ st.markdown("#### 🔄 Motion Analysis")
2233
 
2234
+ if detection_stats["motion_data"]:
2235
+ # Create a DataFrame for the motion data
2236
+ motion_df = pd.DataFrame(detection_stats["motion_data"])
 
 
 
 
 
 
2237
 
2238
+ # Plot motion level over time
2239
+ st.markdown("##### Motion Intensity Over Time")
2240
+ fig = px.line(
2241
+ motion_df,
2242
+ x="time",
2243
+ y="motion_level",
2244
+ labels={"time": "Time (seconds)", "motion_level": "Motion Intensity"},
2245
+ title="Motion Intensity Throughout Video"
2246
+ )
2247
+ # Add a horizontal line for scene change threshold
2248
+ fig.add_hline(
2249
+ y=40.0,
2250
+ line_dash="dash",
2251
+ line_color="red",
2252
+ annotation_text="Scene Change Threshold"
2253
+ )
2254
+ st.plotly_chart(fig, use_container_width=True)
2255
+
2256
+ # Motion area percentage
2257
+ st.markdown("##### Motion Area Percentage")
2258
+ fig = px.area(
2259
+ motion_df,
2260
+ x="time",
2261
+ y="motion_area",
2262
+ labels={"time": "Time (seconds)", "motion_area": "% of Frame with Motion"},
2263
+ title="Percentage of Frame with Detected Motion"
2264
+ )
2265
+ st.plotly_chart(fig, use_container_width=True)
2266
+
2267
+ # Motion statistics
2268
+ st.markdown("##### Motion Statistics")
2269
+ col1, col2, col3 = st.columns(3)
2270
  with col1:
2271
+ st.metric(
2272
+ "Average Motion",
2273
+ f"{detection_stats['avg_motion_level']:.2f}"
2274
+ )
2275
+ with col2:
2276
+ st.metric(
2277
+ "Peak Motion",
2278
+ f"{max(item['motion_level'] for item in detection_stats['motion_data']):.2f}"
2279
+ )
2280
+ with col3:
2281
+ st.metric(
2282
+ "Motion Variability",
2283
+ f"{np.std([item['motion_level'] for item in detection_stats['motion_data']]):.2f}"
2284
+ )
2285
+ else:
2286
+ st.info("No motion data collected for this video.")
2287
 
2288
+ with tab3:
2289
+ st.markdown("#### 🎬 Scene Changes")
2290
+
2291
+ if detection_stats["scene_changes"]:
2292
+ # Create a timeline of scene changes
2293
+ st.markdown("##### Timeline of Detected Scene Changes")
2294
+
2295
+ # Create a DataFrame with scene change markers
2296
+ timeline_df = pd.DataFrame({
2297
+ "time": detection_stats["scene_changes"],
2298
+ "event": ["Scene Change"] * len(detection_stats["scene_changes"])
2299
+ })
2300
+
2301
+ # Plot the timeline
2302
+ fig = px.scatter(
2303
+ timeline_df,
2304
+ x="time",
2305
+ y="event",
2306
+ labels={"time": "Time (seconds)"},
2307
+ title="Scene Change Timeline",
2308
+ size=[10] * len(timeline_df),
2309
+ color_discrete_sequence=["red"]
2310
+ )
2311
+ # Add vertical lines for each scene change
2312
+ for time in detection_stats["scene_changes"]:
2313
+ fig.add_vline(x=time, line_dash="solid", line_color="rgba(255,0,0,0.3)")
2314
+
2315
+ # Adjust the y-axis
2316
+ fig.update_yaxes(showticklabels=False)
2317
+
2318
+ # Show the plot
2319
+ st.plotly_chart(fig, use_container_width=True)
2320
+
2321
+ # List scene changes
2322
+ st.markdown("##### Scene Changes Detected At:")
2323
+ for i, time in enumerate(sorted(detection_stats["scene_changes"])):
2324
+ st.markdown(f"**Scene {i+1}**: {time:.2f} seconds")
2325
+
2326
+ # Scene statistics
2327
+ st.markdown("##### Scene Statistics")
2328
+ col1, col2 = st.columns(2)
2329
+ with col1:
2330
+ st.metric("Number of Scenes", len(detection_stats["scene_changes"]) + 1)
2331
+ with col2:
2332
+ if len(detection_stats["scene_changes"]) > 0:
2333
+ avg_scene_duration = 10.0 / (len(detection_stats["scene_changes"]) + 1)
2334
+ st.metric("Average Scene Duration", f"{avg_scene_duration:.2f}s")
2335
+ else:
2336
+ st.info("No scene changes detected in this video.")
2337
 
2338
+ with tab4:
2339
+ st.markdown("#### 📝 Text & Labels")
2340
+
2341
+ col1, col2 = st.columns(2)
2342
 
2343
+ with col1:
2344
+ st.markdown("##### Text Detection")
2345
+ if detection_stats["text_blocks"] > 0:
2346
+ st.metric("Text Blocks Detected", detection_stats["text_blocks"])
2347
+ st.info("Text recognition powered by Google Cloud Vision AI")
2348
+ else:
2349
+ st.info("No text detected in the video.")
2350
 
2351
+ with col2:
2352
+ st.markdown("##### Scene Labels")
2353
+ if detection_stats["labels"]:
2354
+ # Sort labels by frequency
2355
+ sorted_labels = dict(sorted(detection_stats["labels"].items(),
2356
+ key=lambda x: x[1], reverse=True))
 
2357
 
2358
+ # Create pie chart for top labels
2359
+ fig = px.pie(
2360
+ names=list(sorted_labels.keys())[:7],
2361
+ values=list(sorted_labels.values())[:7],
2362
+ title="Distribution of Scene Labels",
2363
+ hole=0.3
2364
+ )
2365
+ st.plotly_chart(fig, use_container_width=True)
2366
+
2367
+ # List labels
2368
+ st.markdown("**Top Labels:**")
2369
+ for label, count in list(sorted_labels.items())[:7]:
2370
+ st.markdown(f"- {label}: {count} occurrences")
2371
+ else:
2372
+ st.info("No labels detected in the video.")
2373
+
2374
  except Exception as e:
2375
  st.error(f"Error processing video: {str(e)}")
2376
 
 
2833
  os.unlink(temp_video_path)
2834
 
2835
  return frames
2836
+
2837
+ def load_yolo_model():
2838
+ """Load YOLOv4-tiny model for object detection"""
2839
+ # Create directory for models if it doesn't exist
2840
+ models_dir = Path("models")
2841
+ models_dir.mkdir(exist_ok=True)
2842
+
2843
+ # Paths for YOLO files
2844
+ weights_path = models_dir / "yolov4-tiny.weights"
2845
+ cfg_path = models_dir / "yolov4-tiny.cfg"
2846
+ names_path = models_dir / "coco.names"
2847
+
2848
+ # Download YOLO files if they don't exist
2849
+ if not weights_path.exists():
2850
+ st.info("Downloading YOLOv4-tiny weights (first time only)...")
2851
+ import urllib.request
2852
+ urllib.request.urlretrieve(
2853
+ "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights",
2854
+ str(weights_path)
2855
+ )
2856
+
2857
+ if not cfg_path.exists():
2858
+ st.info("Downloading YOLOv4-tiny configuration (first time only)...")
2859
+ import urllib.request
2860
+ urllib.request.urlretrieve(
2861
+ "https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg",
2862
+ str(cfg_path)
2863
+ )
2864
+
2865
+ if not names_path.exists():
2866
+ st.info("Downloading COCO class names (first time only)...")
2867
+ import urllib.request
2868
+ urllib.request.urlretrieve(
2869
+ "https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names",
2870
+ str(names_path)
2871
+ )
2872
+
2873
+ # Load YOLO model
2874
+ net = cv2.dnn.readNet(str(weights_path), str(cfg_path))
2875
+
2876
+ # Load class names
2877
+ with open(str(names_path), "r") as f:
2878
+ classes = [line.strip() for line in f.readlines()]
2879
+
2880
+ # Get output layer names
2881
+ layer_names = net.getLayerNames()
2882
+ try:
2883
+ # OpenCV 4.5.4+
2884
+ output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
2885
+ except:
2886
+ # Older OpenCV versions
2887
+ output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
2888
+
2889
+ return net, classes, output_layers
2890
+
2891
+ def load_haar_cascades():
2892
+ """Load Haar cascade classifiers for face detection"""
2893
+ # Create directory for models if it doesn't exist
2894
+ models_dir = Path("models")
2895
+ models_dir.mkdir(exist_ok=True)
2896
+
2897
+ # Paths for Haar cascade files
2898
+ face_cascade_path = models_dir / "haarcascade_frontalface_default.xml"
2899
+
2900
+ # Download Haar cascade files if they don't exist
2901
+ if not face_cascade_path.exists():
2902
+ st.info("Downloading Haar cascade face detector (first time only)...")
2903
+ import urllib.request
2904
+ urllib.request.urlretrieve(
2905
+ "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml",
2906
+ str(face_cascade_path)
2907
+ )
2908
+
2909
+ # Load face cascade
2910
+ face_cascade = cv2.CascadeClassifier(str(face_cascade_path))
2911
+
2912
+ return face_cascade
2913
+
2914
+ # Add these functions before process_video_file
2915
+
2916
+ def detect_objects_yolo(frame, net, classes, output_layers, confidence_threshold=0.5):
2917
+ """Detect objects in frame using YOLOv4-tiny"""
2918
+ height, width, _ = frame.shape
2919
+
2920
+ # Prepare image for YOLO
2921
+ blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
2922
+ net.setInput(blob)
2923
+
2924
+ # Forward pass
2925
+ layer_outputs = net.forward(output_layers)
2926
+
2927
+ # Initialize lists for detected objects
2928
+ boxes = []
2929
+ confidences = []
2930
+ class_ids = []
2931
+
2932
+ # Process each output layer
2933
+ for output in layer_outputs:
2934
+ for detection in output:
2935
+ scores = detection[5:]
2936
+ class_id = np.argmax(scores)
2937
+ confidence = scores[class_id]
2938
+
2939
+ if confidence > confidence_threshold:
2940
+ # Scale box coordinates to frame size
2941
+ center_x = int(detection[0] * width)
2942
+ center_y = int(detection[1] * height)
2943
+ w = int(detection[2] * width)
2944
+ h = int(detection[3] * height)
2945
+
2946
+ # Rectangle coordinates
2947
+ x = int(center_x - w / 2)
2948
+ y = int(center_y - h / 2)
2949
+
2950
+ # Add to lists
2951
+ boxes.append([x, y, w, h])
2952
+ confidences.append(float(confidence))
2953
+ class_ids.append(class_id)
2954
+
2955
+ # Apply non-maximum suppression
2956
+ indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
2957
+
2958
+ # Prepare results
2959
+ results = []
2960
+
2961
+ if len(indexes) > 0:
2962
+ # Ensure indexes is properly flattened (OpenCV 4.5.4+ vs older versions)
2963
+ try:
2964
+ flat_indexes = indexes.flatten()
2965
+ except:
2966
+ flat_indexes = indexes
2967
+
2968
+ for i in flat_indexes:
2969
+ box = boxes[i]
2970
+ x, y, w, h = box
2971
+ label = str(classes[class_ids[i]])
2972
+ confidence = confidences[i]
2973
+
2974
+ results.append({
2975
+ "box": (x, y, w, h),
2976
+ "label": label,
2977
+ "confidence": confidence
2978
+ })
2979
+
2980
+ return results
2981
+
2982
+ def detect_faces_haar(frame, face_cascade):
2983
+ """Detect faces using Haar cascades"""
2984
+ # Convert to grayscale for Haar cascade
2985
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
2986
+
2987
+ # Detect faces
2988
+ faces = face_cascade.detectMultiScale(
2989
+ gray,
2990
+ scaleFactor=1.1,
2991
+ minNeighbors=5,
2992
+ minSize=(30, 30)
2993
+ )
2994
+
2995
+ # Prepare results
2996
+ results = []
2997
+
2998
+ for (x, y, w, h) in faces:
2999
+ results.append({
3000
+ "box": (x, y, w, h)
3001
+ })
3002
+
3003
+ return results
3004
+
3005
+ def calculate_optical_flow(prev_gray, current_gray):
3006
+ """Calculate optical flow between frames for motion detection"""
3007
+ # Calculate flow using Lucas-Kanade method
3008
+ flow = cv2.calcOpticalFlowFarneback(
3009
+ prev_gray, current_gray,
3010
+ None, 0.5, 3, 15, 3, 5, 1.2, 0
3011
+ )
3012
+
3013
+ # Calculate magnitude and angle
3014
+ magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
3015
+
3016
+ # Create visualization
3017
+ motion_mask = np.zeros_like(prev_gray)
3018
+
3019
+ # Normalize magnitude for visualization
3020
+ norm_magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
3021
+ motion_mask = norm_magnitude.astype(np.uint8)
3022
+
3023
+ # Calculate motion metrics
3024
+ motion_level = np.mean(magnitude)
3025
+ motion_area = np.sum(magnitude > 0.5) / (magnitude.shape[0] * magnitude.shape[1])
3026
+
3027
+ return motion_level, motion_area, motion_mask, flow
3028
+
3029
+ def stabilize_frame(frame, prev_frame_gray, current_frame_gray):
3030
+ """Stabilize video frame using optical flow"""
3031
+ # Calculate optical flow
3032
+ flow = cv2.calcOpticalFlowFarneback(
3033
+ prev_frame_gray, current_frame_gray,
3034
+ None, 0.5, 3, 15, 3, 5, 1.2, 0
3035
+ )
3036
+
3037
+ # Calculate the median flow vectors
3038
+ h, w = flow.shape[:2]
3039
+ flow_median_x = np.median(flow[..., 0])
3040
+ flow_median_y = np.median(flow[..., 1])
3041
+
3042
+ # Create transformation matrix for affine transform
3043
+ transform = np.array([[1, 0, -flow_median_x], [0, 1, -flow_median_y]], dtype=np.float32)
3044
+
3045
+ # Apply affine transformation to stabilize the frame
3046
+ stabilized_frame = cv2.warpAffine(frame, transform, (w, h))
3047
+
3048
+ return stabilized_frame
3049
+
3050
+ def create_tracker(tracker_type="CSRT"):
3051
+ """Create an OpenCV tracker of the specified type"""
3052
+ if tracker_type == 'BOOSTING':
3053
+ return cv2.legacy.TrackerBoosting_create()
3054
+ elif tracker_type == 'MIL':
3055
+ return cv2.legacy.TrackerMIL_create()
3056
+ elif tracker_type == 'KCF':
3057
+ return cv2.legacy.TrackerKCF_create()
3058
+ elif tracker_type == 'TLD':
3059
+ return cv2.legacy.TrackerTLD_create()
3060
+ elif tracker_type == 'MEDIANFLOW':
3061
+ return cv2.legacy.TrackerMedianFlow_create()
3062
+ elif tracker_type == 'CSRT':
3063
+ return cv2.legacy.TrackerCSRT_create()
3064
+ elif tracker_type == 'MOSSE':
3065
+ return cv2.legacy.TrackerMOSSE_create()
3066
+ else:
3067
+ return cv2.legacy.TrackerCSRT_create() # Default
3068
+
3069
+ class ObjectTracker:
3070
+ """Manages object tracking across video frames"""
3071
+
3072
+ def __init__(self, tracker_type="CSRT", max_disappeared=30):
3073
+ self.tracker_type = tracker_type
3074
+ self.trackers = {} # Dict of active trackers
3075
+ self.disappeared = {} # Count of frames where object disappeared
3076
+ self.max_disappeared = max_disappeared # Max frames to keep tracking after disappearance
3077
+ self.next_object_id = 0 # Counter for object IDs
3078
+ self.objects = {} # Dict of tracked object positions {ID: (x, y, w, h, label)}
3079
+
3080
+ def register(self, frame, bbox, label="Object"):
3081
+ """Register a new object to track"""
3082
+ # Create a new tracker
3083
+ tracker = create_tracker(self.tracker_type)
3084
+ tracker.init(frame, bbox)
3085
+
3086
+ # Register the object
3087
+ object_id = self.next_object_id
3088
+ self.trackers[object_id] = tracker
3089
+ self.objects[object_id] = (*bbox, label)
3090
+ self.disappeared[object_id] = 0
3091
+
3092
+ # Increment the counter
3093
+ self.next_object_id += 1
3094
+
3095
+ return object_id
3096
+
3097
+ def deregister(self, object_id):
3098
+ """Stop tracking an object"""
3099
+ # Remove from dictionaries
3100
+ self.trackers.pop(object_id, None)
3101
+ self.objects.pop(object_id, None)
3102
+ self.disappeared.pop(object_id, None)
3103
+
3104
+ def update(self, frame):
3105
+ """Update all trackers with new frame"""
3106
+ # Check if we have no objects
3107
+ if len(self.trackers) == 0:
3108
+ return self.objects
3109
+
3110
+ # Initialize a list of updated objects
3111
+ updated_objects = {}
3112
+
3113
+ # Loop through tracked objects
3114
+ for object_id in list(self.trackers.keys()):
3115
+ # Get the tracker
3116
+ tracker = self.trackers[object_id]
3117
+
3118
+ # Update the tracker
3119
+ success, bbox = tracker.update(frame)
3120
+
3121
+ if success:
3122
+ # Successfully tracked, reset disappeared counter
3123
+ self.disappeared[object_id] = 0
3124
+
3125
+ # Update object position, keeping the same label
3126
+ _, _, _, _, label = self.objects[object_id]
3127
+ self.objects[object_id] = (*bbox, label)
3128
+ updated_objects[object_id] = self.objects[object_id]
3129
+ else:
3130
+ # Tracking failed, increment disappeared counter
3131
+ self.disappeared[object_id] += 1
3132
+
3133
+ # If object has disappeared for too long, deregister it
3134
+ if self.disappeared[object_id] > self.max_disappeared:
3135
+ self.deregister(object_id)
3136
+ else:
3137
+ # Keep the last known position
3138
+ updated_objects[object_id] = self.objects[object_id]
3139
+
3140
+ return updated_objects
3141
+
3142
+ def draw_tracked_objects(self, frame, objects):
3143
+ """Draw bounding boxes and IDs for tracked objects"""
3144
+ for object_id, (x, y, w, h, label) in objects.items():
3145
+ # Convert to integer coordinates
3146
+ x, y, w, h = int(x), int(y), int(w), int(h)
3147
+
3148
+ # Draw bounding box
3149
+ cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
3150
+
3151
+ # Draw ID and label
3152
+ text = f"ID:{object_id} {label}"
3153
+ cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
3154
+
3155
+ return frame
3156
+
3157
+ def segment_image(frame, method="watershed", rect=None):
3158
+ """Segment an image into foreground and background regions"""
3159
+ if method == "watershed":
3160
+ # Watershed segmentation
3161
+
3162
+ # Convert to grayscale if needed
3163
+ if len(frame.shape) == 3:
3164
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
3165
+ else:
3166
+ gray = frame.copy()
3167
+ frame = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
3168
+
3169
+ # Apply threshold
3170
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
3171
+
3172
+ # Noise removal with morphological operations
3173
+ kernel = np.ones((3, 3), np.uint8)
3174
+ opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
3175
+
3176
+ # Sure background area
3177
+ sure_bg = cv2.dilate(opening, kernel, iterations=3)
3178
+
3179
+ # Finding sure foreground area
3180
+ dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
3181
+ _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
3182
+
3183
+ # Finding unknown region
3184
+ sure_fg = np.uint8(sure_fg)
3185
+ unknown = cv2.subtract(sure_bg, sure_fg)
3186
+
3187
+ # Marker labeling
3188
+ _, markers = cv2.connectedComponents(sure_fg)
3189
+
3190
+ # Add 1 to all labels so that background is 1 instead of 0
3191
+ markers = markers + 1
3192
+
3193
+ # Mark the unknown region with 0
3194
+ markers[unknown == 255] = 0
3195
+
3196
+ # Apply watershed
3197
+ markers = cv2.watershed(frame, markers)
3198
+
3199
+ # Create visualization with boundaries
3200
+ segmented = frame.copy()
3201
+ segmented[markers == -1] = [0, 0, 255] # Mark boundaries in red
3202
+
3203
+ # Create a colored mask for visualization
3204
+ mask = np.zeros_like(frame)
3205
+ for label in np.unique(markers):
3206
+ if label > 1: # Skip background (1) and boundaries (-1)
3207
+ # Create a random color for this segment
3208
+ color = np.random.randint(0, 255, size=3, dtype=np.uint8)
3209
+ mask[markers == label] = color
3210
+
3211
+ # Blend the original image with the segmentation mask
3212
+ result = cv2.addWeighted(frame, 0.7, mask, 0.3, 0)
3213
+
3214
+ return result, markers
3215
+
3216
+ elif method == "grabcut":
3217
+ # GrabCut segmentation
3218
+
3219
+ # Create mask and temporary arrays
3220
+ mask = np.zeros(frame.shape[:2], np.uint8)
3221
+ bgd_model = np.zeros((1, 65), np.float64)
3222
+ fgd_model = np.zeros((1, 65), np.float64)
3223
+
3224
+ # If no rectangle provided, use center portion of image
3225
+ if rect is None:
3226
+ h, w = frame.shape[:2]
3227
+ rect = (w//4, h//4, w//2, h//2)
3228
+
3229
+ # Apply GrabCut
3230
+ cv2.grabCut(frame, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
3231
+
3232
+ # Create mask where certain == background (0) and probable == background (2)
3233
+ mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
3234
+
3235
+ # Create segmented image
3236
+ segmented = frame * mask2[:, :, np.newaxis]
3237
+
3238
+ # Create visualization that highlights foreground
3239
+ highlight = frame.copy()
3240
+ highlight_mask = np.zeros_like(frame)
3241
+ highlight_mask[mask2 == 1] = [0, 255, 0] # Green for foreground
3242
+ result = cv2.addWeighted(highlight, 0.7, highlight_mask, 0.3, 0)
3243
+
3244
+ return result, mask
3245
+
3246
+ else:
3247
+ return frame, None # Return original frame if method not recognized
3248
+
3249
+ def detect_edges(frame, method="canny", low_threshold=100, high_threshold=200):
3250
+ """Detect edges in an image using various methods"""
3251
+ # Convert to grayscale if needed
3252
+ if len(frame.shape) == 3:
3253
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
3254
+ else:
3255
+ gray = frame
3256
+
3257
+ # Apply Gaussian blur to reduce noise
3258
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
3259
+
3260
+ if method == "canny":
3261
+ # Canny edge detector
3262
+ edges = cv2.Canny(blurred, low_threshold, high_threshold)
3263
+ # Convert back to 3-channel for visualization
3264
+ return cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
3265
+
3266
+ elif method == "sobel":
3267
+ # Sobel edge detector
3268
+ sobel_x = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=3)
3269
+ sobel_y = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=3)
3270
+
3271
+ # Calculate magnitude and convert to uint8
3272
+ magnitude = cv2.magnitude(sobel_x, sobel_y)
3273
+ magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
3274
+
3275
+ # Convert back to 3-channel for visualization
3276
+ return cv2.cvtColor(magnitude, cv2.COLOR_GRAY2BGR)
3277
+
3278
+ elif method == "laplacian":
3279
+ # Laplacian edge detector
3280
+ laplacian = cv2.Laplacian(blurred, cv2.CV_64F)
3281
+ laplacian = np.uint8(np.absolute(laplacian))
3282
+ laplacian = cv2.normalize(laplacian, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
3283
+
3284
+ # Convert back to 3-channel for visualization
3285
+ return cv2.cvtColor(laplacian, cv2.COLOR_GRAY2BGR)
3286
+
3287
+ else:
3288
+ return frame # Return original frame if method not recognized