Spaces:

CosmickVisions
/

Cloud

Sleeping

App Files Files Community

CosmickVisions commited on Mar 30

Commit

c6ab0b8

verified ·

1 Parent(s): faa229e

Update app.py

Browse files

Files changed (1) hide show

app.py +1259 -267

app.py CHANGED Viewed

@@ -36,6 +36,8 @@ from langchain.memory import ConversationBufferMemory
 from langchain_community.document_loaders import TextLoader
 import re
 import base64
 # Set page config
 st.set_page_config(
@@ -431,34 +433,209 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None):
     return summary_img
 class VideoProcessor(VideoProcessorBase):
-    """Process video frames for real-time analysis"""
-    def __init__(self, analysis_types: List[str]):
         self.analysis_types = analysis_types
         self.frame_counter = 0
-        self.process_every_n_frames = 5  # Process every 5th frame
         self.vision_client = client  # Store client reference
         self.last_results = {}  # Cache results between processed frames
         self.last_processed_time = time.time()
         self.processing_active = True
     def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         self.frame_counter += 1
         # Add status display on all frames
         cv2.putText(img,
-                   f"Vision AI: {'Active' if self.processing_active else 'Paused'}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
-        # Process at regular intervals
         current_time = time.time()
-        if current_time - self.last_processed_time > 1.0 and self.processing_active:  # Process max once per second
             self.last_processed_time = current_time
             try:
                 # Convert to PIL Image for Vision API
-                pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                 # Process with Vision API
                 img_byte_arr = io.BytesIO()
@@ -467,56 +644,36 @@ class VideoProcessor(VideoProcessorBase):
                 vision_image = vision.Image(content=content)
                 # Update status text
-                cv2.putText(img, "Processing...", (10, 60),
-                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
                 # Process according to selected analysis types
-                if "Objects" in self.analysis_types:
-                    objects = self.vision_client.object_localization(image=vision_image)
-                    self.last_results["objects"] = objects.localized_object_annotations
-                if "Face Detection" in self.analysis_types:
-                    faces = self.vision_client.face_detection(image=vision_image)
-                    self.last_results["faces"] = faces.face_annotations
                 if "Text" in self.analysis_types:
                     text = self.vision_client.text_detection(image=vision_image)
                     self.last_results["text"] = text.text_annotations
             except Exception as e:
-                error_msg = str(e)[:50]
-                cv2.putText(img, f"Error: {error_msg}", (10, 60),
-                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-        # Always draw the cached results for smooth display
         try:
-            # Draw object detections
-            if "objects" in self.last_results and "Objects" in self.analysis_types:
-                for obj in self.last_results["objects"]:
-                    box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
-                          for vertex in obj.bounding_poly.normalized_vertices]
-                    box = np.array(box, np.int32).reshape((-1, 1, 2))
-                    cv2.polylines(img, [box], True, (0, 255, 0), 2)
-                    # Add label
-                    cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
-                               (int(box[0][0][0]), int(box[0][0][1]) - 10),
-                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
-            # Draw face detections
-            if "faces" in self.last_results and "Face Detection" in self.analysis_types:
-                for face in self.last_results["faces"]:
-                    vertices = face.bounding_poly.vertices
-                    points = [(vertex.x, vertex.y) for vertex in vertices]
-                    pts = np.array(points, np.int32).reshape((-1, 1, 2))
-                    cv2.polylines(img, [pts], True, (0, 0, 255), 2)
-                    # Draw landmarks
-                    for landmark in face.landmarks:
-                        px = int(landmark.position.x)
-                        py = int(landmark.position.y)
-                        cv2.circle(img, (px, py), 2, (255, 255, 0), -1)
-            # Draw text detections
             if "text" in self.last_results and "Text" in self.analysis_types:
                 if len(self.last_results["text"]) > 1:  # Skip the first one (full text)
                     for text_annot in self.last_results["text"][1:]:
@@ -524,15 +681,103 @@ class VideoProcessor(VideoProcessorBase):
                         pts = np.array(box, np.int32).reshape((-1, 1, 2))
                         cv2.polylines(img, [pts], True, (255, 0, 0), 1)
         except Exception as e:
-            error_msg = str(e)[:50]
-            cv2.putText(img, f"Display Error: {error_msg}", (10, 90),
-                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-        # Add which analysis types are active
-        y_pos = img.shape[0] - 10
-        cv2.putText(img, f"Analyzing: {', '.join(self.analysis_types)}",
-                   (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
         return av.VideoFrame.from_ndarray(img, format="bgr24")
@@ -758,8 +1003,9 @@ def list_bigquery_resources():
     return resources
-def process_video_file(video_file, analysis_types):
-    """Process an uploaded video file with enhanced Vision AI detection and analytics"""
     # Create a temporary file to save the uploaded video
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
         temp_file.write(video_file.read())
@@ -805,8 +1051,8 @@ def process_video_file(video_file, analysis_types):
             fourcc = cv2.VideoWriter_fourcc(*'DIB ')  # Uncompressed RGB
     out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
-    # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail
-    process_every_n_frames = 5
     # Create a progress bar
     progress_bar = st.progress(0)
@@ -818,13 +1064,26 @@ def process_video_file(video_file, analysis_types):
         "faces": 0,
         "text_blocks": 0,
         "labels": {},
-        # New advanced tracking
-        "object_tracking": {},  # Track object appearances by frame
-        "activity_metrics": [],  # Track frame-to-frame differences
-        "scene_changes": []     # Track major scene transitions
     }
-    # For scene change detection
     previous_frame_gray = None
     scene_change_threshold = 40.0  # Threshold for scene change detection
@@ -846,184 +1105,274 @@ def process_video_file(video_file, analysis_types):
             cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
                       (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
-            # Activity detection and scene change detection
             current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
             if previous_frame_gray is not None:
-                # Calculate frame difference for activity detection
-                frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
-                activity_level = np.mean(frame_diff)
-                detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
                 # Scene change detection
-                if activity_level > scene_change_threshold:
                     detection_stats["scene_changes"].append(frame_count/fps)
                     # Mark scene change on frame
                     cv2.putText(frame, "SCENE CHANGE",
                               (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
             previous_frame_gray = current_frame_gray
-            # Process frames with Vision API
-            if frame_count % process_every_n_frames == 0:
                 try:
-                    # Convert OpenCV frame to PIL Image for Vision API
-                    pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-                    # Create vision image
-                    img_byte_arr = io.BytesIO()
-                    pil_img.save(img_byte_arr, format='PNG')
-                    content = img_byte_arr.getvalue()
-                    vision_image = vision.Image(content=content)
-                    # Apply analysis based on selected types with enhanced detail
-                    if "Objects" in analysis_types:
-                        objects = client.object_localization(image=vision_image)
-                        # Draw boxes around detected objects with enhanced info
-                        for obj in objects.localized_object_annotations:
-                            obj_name = obj.name
-                            # Update basic stats
-                            if obj_name in detection_stats["objects"]:
-                                detection_stats["objects"][obj_name] += 1
-                            else:
-                                detection_stats["objects"][obj_name] = 1
-                            # Enhanced object tracking
-                            timestamp = frame_count/fps
-                            if obj_name not in detection_stats["object_tracking"]:
-                                detection_stats["object_tracking"][obj_name] = {
-                                    "first_seen": timestamp,
-                                    "last_seen": timestamp,
-                                    "frames_present": 1,
-                                    "timestamps": [timestamp]
-                                }
                             else:
-                                tracking = detection_stats["object_tracking"][obj_name]
-                                tracking["frames_present"] += 1
-                                tracking["last_seen"] = timestamp
-                                tracking["timestamps"].append(timestamp)
-                            # Calculate box coordinates
-                            box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0])
-                                  for vertex in obj.bounding_poly.normalized_vertices]
-                            box = np.array(box, np.int32).reshape((-1, 1, 2))
-                            # Draw more noticeable box with thicker lines
-                            cv2.polylines(frame, [box], True, (0, 255, 0), 3)
-                            # Calculate box size for better placement of labels
-                            x_min = min([p[0][0] for p in box])
-                            y_min = min([p[0][1] for p in box])
-                            confidence = int(obj.score * 100)
-                            # Enhanced label with confidence and border - larger text for visibility
-                            label_text = f"{obj.name}: {confidence}%"
-                            text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0]
-                            # Larger background rectangle for text visibility
-                            cv2.rectangle(frame,
-                                        (int(x_min), int(y_min) - text_size[1] - 10),
-                                        (int(x_min) + text_size[0] + 10, int(y_min)),
-                                        (0, 0, 0), -1)
-                            # Draw the label text with larger font
                             cv2.putText(frame, label_text,
-                                      (int(x_min) + 5, int(y_min) - 5),
-                                      cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-                    if "Face Detection" in analysis_types:
-                        faces = client.face_detection(image=vision_image)
-                        # Track statistics
-                        detection_stats["faces"] += len(faces.face_annotations)
-                        for face in faces.face_annotations:
-                            vertices = face.bounding_poly.vertices
-                            points = [(vertex.x, vertex.y) for vertex in vertices]
-                            # Draw face box with thicker lines
-                            pts = np.array(points, np.int32).reshape((-1, 1, 2))
-                            cv2.polylines(frame, [pts], True, (0, 0, 255), 3)
-                            # Enhanced face info
-                            emotions = []
-                            if face.joy_likelihood >= 3:
-                                emotions.append("Joy")
-                            if face.anger_likelihood >= 3:
-                                emotions.append("Anger")
-                            if face.surprise_likelihood >= 3:
-                                emotions.append("Surprise")
-                            if face.sorrow_likelihood >= 3:
-                                emotions.append("Sorrow")
-                            emotion_text = ", ".join(emotions) if emotions else "Neutral"
-                            x_min = min([p[0] for p in points])
-                            y_min = min([p[1] for p in points])
-                            # Add detailed emotion text
-                            cv2.putText(frame, emotion_text,
-                                      (int(x_min), int(y_min) - 10),
-                                      cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-                            # Draw enhanced landmarks
-                            for landmark in face.landmarks:
-                                px = int(landmark.position.x)
-                                py = int(landmark.position.y)
-                                cv2.circle(frame, (px, py), 3, (255, 255, 0), -1)  # Larger circles
                     if "Text" in analysis_types:
                         text = client.text_detection(image=vision_image)
                         # Update stats
-                        if len(text.text_annotations) > 1:
                             detection_stats["text_blocks"] += len(text.text_annotations) - 1
-                        # Add overall text summary to the frame
-                        if text.text_annotations:
                             full_text = text.text_annotations[0].description
                             words = full_text.split()
                             short_text = " ".join(words[:5])
                             if len(words) > 5:
                                 short_text += "..."
-                            # Add text summary to top of frame with better visibility
-                            cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1)
                             cv2.putText(frame, f"Text: {short_text}",
-                                      (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                        # Draw text boxes with improved visibility
-                        for text_annot in text.text_annotations[1:]:
-                            box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
-                            pts = np.array(box, np.int32).reshape((-1, 1, 2))
-                            cv2.polylines(frame, [pts], True, (255, 0, 0), 2)  # Thicker lines
-                    # Add Labels analysis for more detail
                     if "Labels" in analysis_types:
                         labels = client.label_detection(image=vision_image, max_results=5)
-                        # Add labels to the frame with better visibility
-                        y_pos = 120
-                        cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1)
-                        cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
-                        # Track stats and show labels
                         for i, label in enumerate(labels.label_annotations):
-                            # Update stats
                             if label.description in detection_stats["labels"]:
                                 detection_stats["labels"][label.description] += 1
                             else:
                                 detection_stats["labels"][label.description] = 1
-                            # Display on frame with larger text
-                            cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%",
-                                      (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                 except Exception as e:
                     # Show error on frame
                     cv2.putText(frame, f"API Error: {str(e)[:30]}",
-                              (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-            # Add hint about slowed down speed
-            cv2.putText(frame, "Playback: 60% speed for better visualization",
-                      (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
             # Write the frame to output video
             out.write(frame)
@@ -1036,6 +1385,21 @@ def process_video_file(video_file, analysis_types):
         progress_bar.empty()
         status_text.empty()
         # Read the processed video as bytes for download
         with open(output_path, 'rb') as file:
             processed_video_bytes = file.read()
@@ -1044,24 +1408,13 @@ def process_video_file(video_file, analysis_types):
         os.unlink(temp_video_path)
         os.unlink(output_path)
-        # Calculate additional statistics
-        for obj_name, tracking in detection_stats["object_tracking"].items():
-            # Calculate total screen time
-            tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2)
-            # Calculate average confidence if available
-            if "confidences" in tracking and tracking["confidences"]:
-                tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"])
-        # Return enhanced results
-        results = {"detection_stats": detection_stats}
         # Store results in session state for chatbot context
-        st.session_state.analysis_results = results
         # Update vectorstore with new results
-        update_vectorstore_with_results(results)
-        return processed_video_bytes, results
     except Exception as e:
         # Clean up on error
@@ -1714,10 +2067,36 @@ def main():
                             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
     elif selected == "Video Analysis":
-        st.markdown('<div class="subheader">Video Analysis</div>', unsafe_allow_html=True)
-        # Analysis settings
         st.sidebar.markdown("### Video Analysis Settings")
         analysis_types = []
         if st.sidebar.checkbox("Object Detection", value=True):
             analysis_types.append("Objects")
@@ -1725,21 +2104,33 @@ def main():
             analysis_types.append("Face Detection")
         if st.sidebar.checkbox("Text Recognition"):
             analysis_types.append("Text")
         st.sidebar.markdown("---")
-        st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
-        # Upload Video mode only - removed real-time camera option
         st.markdown("""
-        #### 📤 Video Analysis
-        Upload a video file to analyze it with Google Cloud Vision AI.
         **Instructions:**
-        1. Select the analysis types in the sidebar
         2. Upload a video file (MP4, MOV, AVI)
         3. Click "Process Video" to begin analysis
-        4. Download the processed video when complete
         **Note:** Videos are limited to 10 seconds of processing to manage API usage.
         """)
@@ -1759,10 +2150,15 @@ def main():
                 if not analysis_types:
                     st.warning("Please select at least one analysis type.")
                 else:
-                    with st.spinner("Processing video (max 10 seconds)..."):
                         try:
-                            # Process the video with enhanced detail
-                            processed_video, results = process_video_file(uploaded_file, analysis_types)
                             if processed_video:
                                 # Offer download of processed video
@@ -1774,64 +2170,207 @@ def main():
                                     mime="video/mp4"
                                 )
-                                # Show detailed analysis results
-                                st.markdown("### Detailed Analysis Results")
-                                # Display object detection summary
-                                if "Objects" in analysis_types and results["detection_stats"]["objects"]:
                                     st.markdown("#### 📦 Objects Detected")
-                                    # Sort objects by frequency
-                                    sorted_objects = dict(sorted(results["detection_stats"]["objects"].items(),
-                                                        key=lambda x: x[1], reverse=True))
-                                    # Create bar chart for objects
-                                    if sorted_objects:
-                                        fig, ax = plt.subplots(figsize=(10, 5))
-                                        objects = list(sorted_objects.keys())
-                                        counts = list(sorted_objects.values())
-                                        ax.barh(objects, counts, color='skyblue')
-                                        ax.set_xlabel('Number of Detections')
-                                        ax.set_title('Objects Detected in Video')
-                                        st.pyplot(fig)
-                                        # List with counts
-                                        col1, col2 = st.columns(2)
                                         with col1:
-                                            st.markdown("**Top Objects:**")
-                                            for obj, count in list(sorted_objects.items())[:10]:
-                                                st.markdown(f"- {obj}: {count} occurrences")
-                                # Display face detection summary
-                                if "Face Detection" in analysis_types and results["detection_stats"]["faces"] > 0:
-                                    st.markdown("#### 👤 Face Analysis")
-                                    st.markdown(f"Total faces detected: {results['detection_stats']['faces']}")
-                                # Display text detection summary
-                                if "Text" in analysis_types and results["detection_stats"]["text_blocks"] > 0:
-                                    st.markdown("#### 📝 Text Analysis")
-                                    st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}")
-                                # Display label detection summary
-                                if "Labels" in analysis_types and results["detection_stats"]["labels"]:
-                                    st.markdown("#### 🏷️ Scene Labels")
-                                    # Sort labels by frequency
-                                    sorted_labels = dict(sorted(results["detection_stats"]["labels"].items(),
-                                                       key=lambda x: x[1], reverse=True))
-                                    # Create pie chart for top labels
-                                    if sorted_labels:
-                                        fig, ax = plt.subplots(figsize=(8, 8))
-                                        top_labels = dict(list(sorted_labels.items())[:7])
-                                        if len(sorted_labels) > 7:
-                                            other_count = sum(list(sorted_labels.values())[7:])
-                                            top_labels["Other"] = other_count
-                                        ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%')
-                                        ax.set_title('Distribution of Scene Labels')
-                                        st.pyplot(fig)
                         except Exception as e:
                             st.error(f"Error processing video: {str(e)}")
@@ -2294,3 +2833,456 @@ def extract_video_frames(video_bytes, num_frames=5):
     os.unlink(temp_video_path)
     return frames

 from langchain_community.document_loaders import TextLoader
 import re
 import base64
+from numpy.lib.stride_tricks import as_strided
+from object_tracker import ObjectTracker
 # Set page config
 st.set_page_config(
     return summary_img
 class VideoProcessor(VideoProcessorBase):
+    """Process video frames with hybrid local/cloud processing"""
+    def __init__(self, analysis_types: List[str], processing_mode="hybrid", stabilize=False,
+                edge_detection=None, segmentation=None, enable_tracking=False):
         self.analysis_types = analysis_types
+        self.processing_mode = processing_mode  # "local", "cloud", or "hybrid"
+        self.stabilize = stabilize
+        self.edge_detection = edge_detection  # None, "canny", "sobel", or "laplacian"
+        self.segmentation = segmentation  # None, "watershed", or "grabcut"
+        self.enable_tracking = enable_tracking
         self.frame_counter = 0
+        self.cloud_process_interval = 10  # Process with Google Vision every 10 frames
         self.vision_client = client  # Store client reference
         self.last_results = {}  # Cache results between processed frames
         self.last_processed_time = time.time()
         self.processing_active = True
+        # Initialize motion tracking
+        self.prev_gray = None
+        self.motion_history = []
+        self.motion_threshold = 40.0  # Threshold for scene change detection
+        self.scene_changes = []
+        # Initialize local models if needed
+        if processing_mode in ["local", "hybrid"]:
+            self.yolo_net, self.yolo_classes, self.yolo_output_layers = load_yolo_model()
+            self.face_cascade = load_haar_cascades()
+        # Initialize object tracker if enabled
+        if self.enable_tracking:
+            self.object_tracker = ObjectTracker(tracker_type="CSRT")
+            self.tracking_initialized = False
+            self.tracked_objects = {}
+            # How often to reinitialize tracking with new detections (in frames)
+            self.detection_interval = 15
     def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         self.frame_counter += 1
         # Add status display on all frames
         cv2.putText(img,
+                   f"Vision AI: {self.processing_mode.title()} Mode",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+        # Make a copy for processing that won't affect the original
+        processed_img = img.copy()
+        # Prepare grayscale image for motion tracking
+        current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0)
+        # Stabilize frame if enabled
+        if self.stabilize and self.prev_gray is not None:
+            img = stabilize_frame(img, self.prev_gray, current_gray)
+            processed_img = img.copy()
+            # Update current_gray after stabilization
+            current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0)
+        # Process motion if we have a previous frame
+        if self.prev_gray is not None:
+            # Calculate optical flow for motion detection
+            motion_level, motion_area, motion_mask, flow = calculate_optical_flow(
+                self.prev_gray, current_gray
+            )
+            # Store motion metrics
+            timestamp = time.time()
+            self.motion_history.append({
+                "timestamp": timestamp,
+                "frame": self.frame_counter,
+                "motion_level": motion_level,
+                "motion_area": motion_area * 100  # Convert to percentage
+            })
+            # Detect scene changes
+            if motion_level > self.motion_threshold:
+                self.scene_changes.append(self.frame_counter)
+                # Mark scene change on frame
+                cv2.putText(img, "SCENE CHANGE",
+                          (img.shape[1] // 2 - 100, 50),
+                          cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
+            # Visualize motion
+            motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET)
+            motion_overlay = cv2.resize(motion_overlay, (img.shape[1] // 4, img.shape[0] // 4))
+            # Add motion overlay to corner of frame
+            h, w = motion_overlay.shape[:2]
+            img[10:10+h, img.shape[1]-10-w:img.shape[1]-10] = motion_overlay
+            # Add motion level indicator
+            cv2.putText(img, f"Motion: {motion_level:.1f}",
+                       (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
+        # Store current frame as previous for next iteration
+        self.prev_gray = current_gray
+        # Process with local models if in local or hybrid mode
+        detected_objects = []
+        if self.processing_mode in ["local", "hybrid"]:
+            # Object detection with YOLO
+            if "Objects" in self.analysis_types:
+                try:
+                    objects = detect_objects_yolo(
+                        processed_img, self.yolo_net, self.yolo_classes,
+                        self.yolo_output_layers, confidence_threshold=0.4
+                    )
+                    # Update results cache
+                    self.last_results["objects"] = objects
+                    # Draw detected objects
+                    for obj in objects:
+                        x, y, w, h = obj["box"]
+                        label = obj["label"]
+                        confidence = obj["confidence"]
+                        # Add to detected objects list for tracking
+                        detected_objects.append((x, y, w, h, label))
+                        # Draw box (skip if tracking is enabled, as tracker will draw boxes)
+                        if not self.enable_tracking:
+                        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
+                        # Add label with confidence
+                        label_text = f"{label}: {int(confidence * 100)}%"
+                        cv2.putText(img, label_text,
+                                  (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                except Exception as e:
+                    cv2.putText(img, f"YOLO Error: {str(e)[:30]}",
+                               (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+            # Face detection with Haar cascades
+            if "Face Detection" in self.analysis_types:
+                try:
+                    faces = detect_faces_haar(processed_img, self.face_cascade)
+                    # Update results cache
+                    self.last_results["faces"] = faces
+                    # Add to detected objects list for tracking
+                    for face in faces:
+                        x, y, w, h = face["box"]
+                        detected_objects.append((x, y, w, h, "Face"))
+                    # Draw detected faces (skip if tracking is enabled)
+                    if not self.enable_tracking:
+                    for face in faces:
+                        x, y, w, h = face["box"]
+                        # Draw box
+                        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
+                except Exception as e:
+                    cv2.putText(img, f"Face Detection Error: {str(e)[:30]}",
+                               (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Handle object tracking if enabled
+        if self.enable_tracking:
+            try:
+                # Initialize tracking on first frame or periodically with new detections
+                if not self.tracking_initialized or self.frame_counter % self.detection_interval == 0:
+                    # Reset if tracking is already initialized
+                    if self.tracking_initialized:
+                        self.object_tracker = ObjectTracker(tracker_type="CSRT")
+                    # Register each detected object with the tracker
+                    for x, y, w, h, label in detected_objects:
+                        self.object_tracker.register(processed_img, (x, y, w, h), label)
+                    self.tracking_initialized = True
+                # Update tracking on every frame
+                self.tracked_objects = self.object_tracker.update(processed_img)
+                # Draw tracked objects
+                img = self.object_tracker.draw_tracked_objects(img, self.tracked_objects)
+                # Add tracking status
+                cv2.putText(img, f"Tracking {len(self.tracked_objects)} objects",
+                           (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2)
+            except Exception as e:
+                cv2.putText(img, f"Tracking Error: {str(e)[:30]}",
+                           (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Process with Google Vision API periodically if in cloud or hybrid mode
         current_time = time.time()
+        should_process_cloud = (
+            self.processing_mode in ["cloud", "hybrid"] and
+            (self.frame_counter % self.cloud_process_interval == 0) and
+            (current_time - self.last_processed_time > 1.0) and  # Max once per second
+            self.processing_active
+        )
+        if should_process_cloud:
             self.last_processed_time = current_time
             try:
                 # Convert to PIL Image for Vision API
+                pil_img = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB))
                 # Process with Vision API
                 img_byte_arr = io.BytesIO()
                 vision_image = vision.Image(content=content)
                 # Update status text
+                cv2.putText(img, "Cloud Processing...", (10, 180),
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
                 # Process according to selected analysis types
                 if "Text" in self.analysis_types:
                     text = self.vision_client.text_detection(image=vision_image)
                     self.last_results["text"] = text.text_annotations
+                if "Labels" in self.analysis_types:
+                    labels = self.vision_client.label_detection(image=vision_image, max_results=5)
+                    self.last_results["labels"] = labels.label_annotations
+                # Only use Vision API for objects/faces if in cloud-only mode
+                if self.processing_mode == "cloud":
+                    if "Objects" in self.analysis_types:
+                        objects = self.vision_client.object_localization(image=vision_image)
+                        self.last_results["objects"] = objects.localized_object_annotations
+                    if "Face Detection" in self.analysis_types:
+                        faces = self.vision_client.face_detection(image=vision_image)
+                        self.last_results["faces"] = faces.face_annotations
             except Exception as e:
+                # Show error on frame
+                cv2.putText(img, f"API Error: {str(e)[:30]}",
+                          (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Always draw the cached cloud results for smooth display
         try:
+            # Draw text detections from cloud
             if "text" in self.last_results and "Text" in self.analysis_types:
                 if len(self.last_results["text"]) > 1:  # Skip the first one (full text)
                     for text_annot in self.last_results["text"][1:]:
                         pts = np.array(box, np.int32).reshape((-1, 1, 2))
                         cv2.polylines(img, [pts], True, (255, 0, 0), 1)
+                # Show full text summary
+                if self.last_results["text"]:
+                    full_text = self.last_results["text"][0].description
+                    words = full_text.split()
+                    short_text = " ".join(words[:3])
+                    if len(words) > 3:
+                        short_text += "..."
+                    # Display text at top of frame
+                    cv2.putText(img, f"Text: {short_text}",
+                              (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
+            # Draw labels from cloud
+            if "labels" in self.last_results and "Labels" in self.analysis_types:
+                y_pos = img.shape[0] - 50
+                for i, label in enumerate(self.last_results["labels"][:3]):  # Show top 3 labels
+                    label_text = f"Label: {label.description} ({int(label.score*100)}%)"
+                    cv2.putText(img, label_text,
+                              (img.shape[1] - 300, y_pos - i*20),
+                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2)
+            # Draw cloud-detected objects and faces only if in cloud-only mode
+            if self.processing_mode == "cloud" and not self.enable_tracking:
+                # Draw objects
+                if "objects" in self.last_results and "Objects" in self.analysis_types:
+                    for obj in self.last_results["objects"]:
+                        box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
+                              for vertex in obj.bounding_poly.normalized_vertices]
+                        box = np.array(box, np.int32).reshape((-1, 1, 2))
+                        cv2.polylines(img, [box], True, (0, 255, 0), 2)
+                        # Add label
+                        cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
+                                   (int(box[0][0][0]), int(box[0][0][1]) - 10),
+                                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                # Draw faces
+                if "faces" in self.last_results and "Face Detection" in self.analysis_types:
+                    for face in self.last_results["faces"]:
+                        vertices = face.bounding_poly.vertices
+                        points = [(vertex.x, vertex.y) for vertex in vertices]
+                        pts = np.array(points, np.int32).reshape((-1, 1, 2))
+                        cv2.polylines(img, [pts], True, (0, 0, 255), 2)
+                        # Draw landmarks
+                        for landmark in face.landmarks:
+                            px = int(landmark.position.x)
+                            py = int(landmark.position.y)
+                            cv2.circle(img, (px, py), 2, (255, 255, 0), -1)
         except Exception as e:
+            cv2.putText(img, f"Display Error: {str(e)[:30]}",
+                       (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Apply edge detection if enabled
+        if self.edge_detection:
+            # Create edge detection visualization
+            edge_img = detect_edges(processed_img, method=self.edge_detection)
+            # Display edge detection mode
+            cv2.putText(img, f"Edge: {self.edge_detection.title()}",
+                       (10, img.shape[0] - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+            # Show edge detection in a corner (similar to motion overlay)
+            edge_small = cv2.resize(edge_img, (img.shape[1] // 4, img.shape[0] // 4))
+            h, w = edge_small.shape[:2]
+            img[10:10+h, 10:10+w] = edge_small
+        # Apply segmentation if enabled
+        if self.segmentation:
+            try:
+                # Create segmentation visualization
+                segmented_img, _ = segment_image(processed_img, method=self.segmentation)
+                # Display segmentation mode
+                cv2.putText(img, f"Segment: {self.segmentation.title()}",
+                           (10, img.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                # Show segmentation in a corner opposite to edge detection or motion
+                seg_small = cv2.resize(segmented_img, (img.shape[1] // 4, img.shape[0] // 4))
+                h, w = seg_small.shape[:2]
+                img[10+h+10:10+h+10+h, 10:10+w] = seg_small
+            except Exception as e:
+                cv2.putText(img, f"Segmentation Error: {str(e)[:30]}",
+                           (10, img.shape[0] - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Add processing mode and stabilization status
+        mode_text = f"Mode: {self.processing_mode.title()}"
+        features = []
+        if self.stabilize:
+            features.append("Stabilized")
+        if self.enable_tracking:
+            features.append("Tracking")
+        if features:
+            mode_text += f" | {', '.join(features)}"
+        cv2.putText(img, mode_text,
+                   (10, img.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
         return av.VideoFrame.from_ndarray(img, format="bgr24")
     return resources
+def process_video_file(video_file, analysis_types, processing_mode="hybrid", stabilize=False,
+                      edge_detection=None, segmentation=None, enable_tracking=False):
+    """Process an uploaded video file with hybrid Vision AI detection and analytics"""
     # Create a temporary file to save the uploaded video
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
         temp_file.write(video_file.read())
             fourcc = cv2.VideoWriter_fourcc(*'DIB ')  # Uncompressed RGB
     out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True)
+    # Process every Nth frame to reduce API calls
+    cloud_process_interval = 10  # How often to use Google Vision API
     # Create a progress bar
     progress_bar = st.progress(0)
         "faces": 0,
         "text_blocks": 0,
         "labels": {},
+        # Motion tracking
+        "motion_data": [],
+        "scene_changes": [],
+        "avg_motion_level": 0,
+        "processing_mode": processing_mode,
+        "stabilized": stabilize
     }
+    # Initialize object tracker if enabled
+    if enable_tracking:
+        object_tracker = ObjectTracker(tracker_type="CSRT")
+        tracked_objects = {}
+        detection_interval = 15  # How often to reinitialize tracking
+    # Load models based on processing mode
+    if processing_mode in ["local", "hybrid"]:
+        yolo_net, yolo_classes, yolo_output_layers = load_yolo_model()
+        face_cascade = load_haar_cascades()
+    # For scene change detection and motion tracking
     previous_frame_gray = None
     scene_change_threshold = 40.0  # Threshold for scene change detection
             cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
                       (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+            # Prepare grayscale image for motion analysis
             current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
             current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
+            # Stabilize frame if enabled
+            if stabilize and previous_frame_gray is not None:
+                frame = stabilize_frame(frame, previous_frame_gray, current_frame_gray)
+                # Update grayscale after stabilization
+                current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
+            # Motion detection and scene change detection
             if previous_frame_gray is not None:
+                # Calculate optical flow for motion detection
+                motion_level, motion_area, motion_mask, flow = calculate_optical_flow(
+                    previous_frame_gray, current_frame_gray
+                )
+                # Store motion metrics
+                detection_stats["motion_data"].append({
+                    "time": frame_count/fps,
+                    "motion_level": motion_level,
+                    "motion_area": motion_area * 100  # Convert to percentage
+                })
                 # Scene change detection
+                if motion_level > scene_change_threshold:
                     detection_stats["scene_changes"].append(frame_count/fps)
                     # Mark scene change on frame
                     cv2.putText(frame, "SCENE CHANGE",
                               (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
+                # Visualize motion
+                motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET)
+                motion_overlay = cv2.resize(motion_overlay, (width // 4, height // 4))
+                # Add motion overlay to corner of frame
+                h, w = motion_overlay.shape[:2]
+                frame[10:10+h, width-10-w:width-10] = motion_overlay
+                # Add motion indicator
+                cv2.putText(frame, f"Motion: {motion_level:.1f}",
+                          (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
             previous_frame_gray = current_frame_gray
+            # Apply edge detection if enabled
+            if edge_detection:
+                # Create edge detection visualization in a corner
+                edge_img = detect_edges(frame, method=edge_detection)
+                # Display edge detection mode
+                cv2.putText(frame, f"Edge: {edge_detection.title()}",
+                           (10, height - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                # Show edge detection in a corner
+                edge_small = cv2.resize(edge_img, (width // 4, height // 4))
+                h, w = edge_small.shape[:2]
+                frame[10:10+h, 10:10+w] = edge_small
+            # Apply segmentation if enabled
+            if segmentation:
                 try:
+                    # Create segmentation visualization
+                    segmented_img, _ = segment_image(frame, method=segmentation)
+                    # Display segmentation mode
+                    cv2.putText(frame, f"Segment: {segmentation.title()}",
+                               (10, height - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+                    # Show segmentation in another corner
+                    seg_small = cv2.resize(segmented_img, (width // 4, height // 4))
+                    h, w = seg_small.shape[:2]
+                    frame[10+h+10:10+h+10+h, 10:10+w] = seg_small
+                except Exception as e:
+                    cv2.putText(frame, f"Segmentation Error: {str(e)[:30]}",
+                               (10, height - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+            # Add processing mode indicator
+            mode_text = f"Mode: {processing_mode.title()}"
+            if stabilize:
+                mode_text += " | Stabilized"
+            cv2.putText(frame, mode_text,
+                      (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+            # Handle object tracking if enabled
+            detected_objects = []
+            # Local processing (YOLOv4-tiny and Haar cascades)
+            if processing_mode in ["local", "hybrid"]:
+                # Object detection with YOLO
+                if "Objects" in analysis_types:
+                    objects = detect_objects_yolo(
+                        frame, yolo_net, yolo_classes, yolo_output_layers
+                    )
+                    # Collect objects for tracking
+                    for obj in objects:
+                        x, y, w, h = obj["box"]
+                        label = obj["label"]
+                        confidence = obj["confidence"]
+                        # Add to detected objects list for tracking
+                        detected_objects.append((x, y, w, h, label))
+                        # Update statistics and draw boxes (if tracking disabled)
+                        if not enable_tracking:
+                            if label in detection_stats["objects"]:
+                                detection_stats["objects"][label] += 1
                             else:
+                                detection_stats["objects"][label] = 1
+                            # Draw box
+                            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+                            # Add label with confidence
+                            label_text = f"{label}: {int(confidence * 100)}%"
                             cv2.putText(frame, label_text,
+                                      (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                # Face detection with Haar cascades
+                if "Face Detection" in analysis_types:
+                    faces = detect_faces_haar(frame, face_cascade)
+                    # Update faces count and add to detected objects for tracking
+                    if not enable_tracking:
+                        detection_stats["faces"] += len(faces)
+                    for face in faces:
+                        x, y, w, h = face["box"]
+                        detected_objects.append((x, y, w, h, "Face"))
+                        # Draw boxes only if tracking is disabled
+                        if not enable_tracking:
+                            # Draw box
+                            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2)
+            # Add tracking code
+            if enable_tracking:
+                try:
+                    # Initialize tracking on first frame or periodically
+                    if frame_count == 1 or frame_count % detection_interval == 0:
+                        # Reset tracker periodically
+                        if frame_count > 1:
+                            object_tracker = ObjectTracker(tracker_type="CSRT")
+                        # Register each detected object
+                        for x, y, w, h, label in detected_objects:
+                            object_tracker.register(frame, (x, y, w, h), label)
+                    # Update tracking on every frame
+                    tracked_objects = object_tracker.update(frame)
+                    # Draw tracked objects
+                    frame = object_tracker.draw_tracked_objects(frame, tracked_objects)
+                    # Add tracking status
+                    cv2.putText(frame, f"Tracking {len(tracked_objects)} objects",
+                               (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2)
+                    # Count object types in tracking
+                    for _, (_, _, _, _, label) in tracked_objects.items():
+                        if label in detection_stats["objects"]:
+                            detection_stats["objects"][label] += 1
+                        else:
+                            detection_stats["objects"][label] = 1
+                    # Update faces count if any faces are being tracked
+                    face_count = sum(1 for _, (_, _, _, _, label) in tracked_objects.items() if label == "Face")
+                    detection_stats["faces"] += face_count
+                except Exception as e:
+                    cv2.putText(frame, f"Tracking Error: {str(e)[:30]}",
+                               (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+            # Cloud processing with Google Vision API (less frequent)
+            if processing_mode in ["cloud", "hybrid"] and frame_count % cloud_process_interval == 0:
+                try:
+                    # Convert to PIL Image for Vision API
+                    pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+                    # Create vision image
+                    img_byte_arr = io.BytesIO()
+                    pil_img.save(img_byte_arr, format='PNG')
+                    content = img_byte_arr.getvalue()
+                    vision_image = vision.Image(content=content)
+                    # Add cloud processing indicator
+                    cv2.putText(frame, "Cloud Processing", (width - 200, 30),
+                              cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)
+                    # Text detection
                     if "Text" in analysis_types:
                         text = client.text_detection(image=vision_image)
                         # Update stats
+                        if text.text_annotations:
                             detection_stats["text_blocks"] += len(text.text_annotations) - 1
+                            # Draw text boxes
+                            for text_annot in text.text_annotations[1:]:
+                                box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
+                                pts = np.array(box, np.int32).reshape((-1, 1, 2))
+                                cv2.polylines(frame, [pts], True, (255, 0, 0), 2)
+                            # Show text summary
                             full_text = text.text_annotations[0].description
                             words = full_text.split()
                             short_text = " ".join(words[:5])
                             if len(words) > 5:
                                 short_text += "..."
                             cv2.putText(frame, f"Text: {short_text}",
+                                      (10, height - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
+                    # Label detection
                     if "Labels" in analysis_types:
                         labels = client.label_detection(image=vision_image, max_results=5)
+                        # Update stats and show labels
                         for i, label in enumerate(labels.label_annotations):
                             if label.description in detection_stats["labels"]:
                                 detection_stats["labels"][label.description] += 1
                             else:
                                 detection_stats["labels"][label.description] = 1
+                            # Display on frame
+                            cv2.putText(frame, f"Label: {label.description}",
+                                      (width - 200, 60 + i*30),
+                                      cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
+                    # Only do object/face detection with Vision API in cloud-only mode
+                    if processing_mode == "cloud" and not enable_tracking:
+                        if "Objects" in analysis_types:
+                            objects = client.object_localization(image=vision_image)
+                            for obj in objects.localized_object_annotations:
+                                # Update stats
+                                if obj.name in detection_stats["objects"]:
+                                    detection_stats["objects"][obj.name] += 1
+                                else:
+                                    detection_stats["objects"][obj.name] = 1
+                                # Draw box
+                                box = [(vertex.x * width, vertex.y * height)
+                                      for vertex in obj.bounding_poly.normalized_vertices]
+                                box = np.array(box, np.int32).reshape((-1, 1, 2))
+                                cv2.polylines(frame, [box], True, (0, 255, 0), 2)
+                                # Add label
+                                x_min = min([p[0][0] for p in box])
+                                y_min = min([p[0][1] for p in box])
+                                cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%",
+                                          (int(x_min), int(y_min) - 10),
+                                          cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                        if "Face Detection" in analysis_types:
+                            faces = client.face_detection(image=vision_image)
+                            detection_stats["faces"] += len(faces.face_annotations)
+                            for face in faces.face_annotations:
+                                vertices = face.bounding_poly.vertices
+                                points = [(vertex.x, vertex.y) for vertex in vertices]
+                                pts = np.array(points, np.int32).reshape((-1, 1, 2))
+                                cv2.polylines(frame, [pts], True, (0, 0, 255), 2)
                 except Exception as e:
                     # Show error on frame
                     cv2.putText(frame, f"API Error: {str(e)[:30]}",
+                              (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
             # Write the frame to output video
             out.write(frame)
         progress_bar.empty()
         status_text.empty()
+        # Calculate additional statistics
+        if detection_stats["motion_data"]:
+            detection_stats["avg_motion_level"] = sum(item["motion_level"] for item in detection_stats["motion_data"]) / len(detection_stats["motion_data"])
+        # Update the detection_stats to include the new features
+        detection_stats.update({
+            "edge_detection": edge_detection,
+            "segmentation": segmentation,
+            "tracking": {
+                "enabled": enable_tracking,
+                "method": "CSRT" if enable_tracking else None,
+                "objects_tracked": len(tracked_objects) if enable_tracking else 0
+            }
+        })
         # Read the processed video as bytes for download
         with open(output_path, 'rb') as file:
             processed_video_bytes = file.read()
         os.unlink(temp_video_path)
         os.unlink(output_path)
         # Store results in session state for chatbot context
+        st.session_state.analysis_results = {"detection_stats": detection_stats}
         # Update vectorstore with new results
+        update_vectorstore_with_results({"detection_stats": detection_stats})
+        return processed_video_bytes, {"detection_stats": detection_stats}
     except Exception as e:
         # Clean up on error
                             st.error(f"Error processing {uploaded_file.name}: {str(e)}")
     elif selected == "Video Analysis":
+        st.markdown('<div class="subheader">Video Analysis with Hybrid Processing</div>', unsafe_allow_html=True)
+        # Enhanced analysis settings
         st.sidebar.markdown("### Video Analysis Settings")
+        # Add processing mode selection
+        processing_mode = st.sidebar.radio(
+            "Processing Mode",
+            ["hybrid", "local", "cloud"],
+            format_func=lambda x: {
+                "hybrid": "Hybrid (Local + Cloud) - Recommended",
+                "local": "Local Only (Faster, Less Accurate)",
+                "cloud": "Cloud Only (Slower, More Accurate)"
+            }[x],
+            index=0  # Default to hybrid
+        )
+        # Show appropriate explanation based on selected mode
+        if processing_mode == "hybrid":
+            st.sidebar.info("Hybrid mode uses local processing for real-time tasks and Google Vision for detailed analysis.")
+        elif processing_mode == "local":
+            st.sidebar.info("Local mode runs entirely on your device using YOLOv4-tiny for object detection and Haar cascades for faces.")
+        else:  # cloud
+            st.sidebar.info("Cloud mode sends all frames to Google Vision API for high-accuracy analysis.")
+        # Add stabilization toggle
+        stabilize = st.sidebar.checkbox("Enable Video Stabilization", value=False,
+                                       help="Reduces camera shake using optical flow")
+        # Analysis type selection
         analysis_types = []
         if st.sidebar.checkbox("Object Detection", value=True):
             analysis_types.append("Objects")
             analysis_types.append("Face Detection")
         if st.sidebar.checkbox("Text Recognition"):
             analysis_types.append("Text")
+        if st.sidebar.checkbox("Label Detection"):
+            analysis_types.append("Labels")
         st.sidebar.markdown("---")
+        # Add info about processing limits and usage
+        if processing_mode in ["cloud", "hybrid"]:
+            st.sidebar.warning("⚠️ Cloud analysis may use a significant amount of API calls. Use responsibly.")
+        # Main content
         st.markdown("""
+        #### 📤 Enhanced Video Analysis
+        Upload a video file to analyze it with hybrid AI processing.
+        **Features:**
+        - **Local Processing**: Fast object & face detection using YOLOv4-tiny and Haar cascades
+        - **Cloud Processing**: High-accuracy text recognition and labels with Google Vision AI
+        - **Motion Analysis**: Track movement patterns with optical flow
+        - **Video Stabilization**: Reduce camera shake (optional)
+        - **Scene Changes**: Automatically detect major scene transitions
         **Instructions:**
+        1. Select processing mode and analysis types in the sidebar
         2. Upload a video file (MP4, MOV, AVI)
         3. Click "Process Video" to begin analysis
+        4. Explore the enhanced analytics and download the processed video
         **Note:** Videos are limited to 10 seconds of processing to manage API usage.
         """)
                 if not analysis_types:
                     st.warning("Please select at least one analysis type.")
                 else:
+                    with st.spinner(f"Processing video in {processing_mode} mode (max 10 seconds)..."):
                         try:
+                            # Process the video with hybrid processing
+                            processed_video, results = process_video_file(
+                                uploaded_file,
+                                analysis_types,
+                                processing_mode=processing_mode,
+                                stabilize=stabilize
+                            )
                             if processed_video:
                                 # Offer download of processed video
                                     mime="video/mp4"
                                 )
+                                # Enhanced analytics display
+                                detection_stats = results["detection_stats"]
+                                st.markdown("### Enhanced Video Analytics")
+                                # Display processing mode info
+                                st.info(f"Processing mode: **{detection_stats['processing_mode'].title()}**" +
+                                       (", with video stabilization" if detection_stats['stabilized'] else ""))
+                                # Create tabs for different analytics
+                                tab1, tab2, tab3, tab4 = st.tabs([
+                                    "Object Detection",
+                                    "Motion Analysis",
+                                    "Scene Changes",
+                                    "Text & Labels"
+                                ])
+                                with tab1:
                                     st.markdown("#### 📦 Objects Detected")
+                                    if detection_stats["objects"]:
+                                        # Sort objects by frequency
+                                        sorted_objects = dict(sorted(detection_stats["objects"].items(),
+                                                            key=lambda x: x[1], reverse=True))
+                                        # Create bar chart for objects
+                                        if sorted_objects:
+                                            fig = px.bar(
+                                                x=list(sorted_objects.keys()),
+                                                y=list(sorted_objects.values()),
+                                                labels={"x": "Object Type", "y": "Frequency"},
+                                                title="Objects Detected in Video",
+                                                color=list(sorted_objects.values()),
+                                                color_continuous_scale="Viridis"
+                                            )
+                                            st.plotly_chart(fig, use_container_width=True)
+                                            # Object statistics
+                                            st.markdown("##### Object Detection Statistics")
+                                            total_objects = sum(sorted_objects.values())
+                                            unique_objects = len(sorted_objects)
+                                            col1, col2, col3 = st.columns(3)
+                                            with col1:
+                                                st.metric("Total Detections", total_objects)
+                                            with col2:
+                                                st.metric("Unique Objects", unique_objects)
+                                            with col3:
+                                                if "faces" in detection_stats:
+                                                    st.metric("Faces Detected", detection_stats["faces"])
+                                    # List with counts
+                                    st.markdown("##### Top Objects")
+                                    for obj, count in list(sorted_objects.items())[:10]:
+                                        st.markdown(f"- **{obj}**: {count} occurrences")
+                                else:
+                                    st.info("No objects detected in the video.")
+                                with tab2:
+                                    st.markdown("#### 🔄 Motion Analysis")
+                                    if detection_stats["motion_data"]:
+                                        # Create a DataFrame for the motion data
+                                        motion_df = pd.DataFrame(detection_stats["motion_data"])
+                                        # Plot motion level over time
+                                        st.markdown("##### Motion Intensity Over Time")
+                                        fig = px.line(
+                                            motion_df,
+                                            x="time",
+                                            y="motion_level",
+                                            labels={"time": "Time (seconds)", "motion_level": "Motion Intensity"},
+                                            title="Motion Intensity Throughout Video"
+                                        )
+                                        # Add a horizontal line for scene change threshold
+                                        fig.add_hline(
+                                            y=40.0,
+                                            line_dash="dash",
+                                            line_color="red",
+                                            annotation_text="Scene Change Threshold"
+                                        )
+                                        st.plotly_chart(fig, use_container_width=True)
+                                        # Motion area percentage
+                                        st.markdown("##### Motion Area Percentage")
+                                        fig = px.area(
+                                            motion_df,
+                                            x="time",
+                                            y="motion_area",
+                                            labels={"time": "Time (seconds)", "motion_area": "% of Frame with Motion"},
+                                            title="Percentage of Frame with Detected Motion"
+                                        )
+                                        st.plotly_chart(fig, use_container_width=True)
+                                        # Motion statistics
+                                        st.markdown("##### Motion Statistics")
+                                        col1, col2, col3 = st.columns(3)
                                         with col1:
+                                            st.metric(
+                                                "Average Motion",
+                                                f"{detection_stats['avg_motion_level']:.2f}"
+                                            )
+                                        with col2:
+                                            st.metric(
+                                                "Peak Motion",
+                                                f"{max(item['motion_level'] for item in detection_stats['motion_data']):.2f}"
+                                            )
+                                        with col3:
+                                            st.metric(
+                                                "Motion Variability",
+                                                f"{np.std([item['motion_level'] for item in detection_stats['motion_data']]):.2f}"
+                                            )
+                                    else:
+                                        st.info("No motion data collected for this video.")
+                                with tab3:
+                                    st.markdown("#### 🎬 Scene Changes")
+                                    if detection_stats["scene_changes"]:
+                                        # Create a timeline of scene changes
+                                        st.markdown("##### Timeline of Detected Scene Changes")
+                                        # Create a DataFrame with scene change markers
+                                        timeline_df = pd.DataFrame({
+                                            "time": detection_stats["scene_changes"],
+                                            "event": ["Scene Change"] * len(detection_stats["scene_changes"])
+                                        })
+                                        # Plot the timeline
+                                        fig = px.scatter(
+                                            timeline_df,
+                                            x="time",
+                                            y="event",
+                                            labels={"time": "Time (seconds)"},
+                                            title="Scene Change Timeline",
+                                            size=[10] * len(timeline_df),
+                                            color_discrete_sequence=["red"]
+                                        )
+                                        # Add vertical lines for each scene change
+                                        for time in detection_stats["scene_changes"]:
+                                            fig.add_vline(x=time, line_dash="solid", line_color="rgba(255,0,0,0.3)")
+                                        # Adjust the y-axis
+                                        fig.update_yaxes(showticklabels=False)
+                                        # Show the plot
+                                        st.plotly_chart(fig, use_container_width=True)
+                                        # List scene changes
+                                        st.markdown("##### Scene Changes Detected At:")
+                                        for i, time in enumerate(sorted(detection_stats["scene_changes"])):
+                                            st.markdown(f"**Scene {i+1}**: {time:.2f} seconds")
+                                        # Scene statistics
+                                        st.markdown("##### Scene Statistics")
+                                        col1, col2 = st.columns(2)
+                                        with col1:
+                                            st.metric("Number of Scenes", len(detection_stats["scene_changes"]) + 1)
+                                        with col2:
+                                            if len(detection_stats["scene_changes"]) > 0:
+                                                avg_scene_duration = 10.0 / (len(detection_stats["scene_changes"]) + 1)
+                                                st.metric("Average Scene Duration", f"{avg_scene_duration:.2f}s")
+                                    else:
+                                        st.info("No scene changes detected in this video.")
+                                with tab4:
+                                    st.markdown("#### 📝 Text & Labels")
+                                    col1, col2 = st.columns(2)
+                                    with col1:
+                                        st.markdown("##### Text Detection")
+                                        if detection_stats["text_blocks"] > 0:
+                                            st.metric("Text Blocks Detected", detection_stats["text_blocks"])
+                                            st.info("Text recognition powered by Google Cloud Vision AI")
+                                        else:
+                                            st.info("No text detected in the video.")
+                                    with col2:
+                                        st.markdown("##### Scene Labels")
+                                        if detection_stats["labels"]:
+                                            # Sort labels by frequency
+                                            sorted_labels = dict(sorted(detection_stats["labels"].items(),
+                                                               key=lambda x: x[1], reverse=True))
+                                            # Create pie chart for top labels
+                                            fig = px.pie(
+                                                names=list(sorted_labels.keys())[:7],
+                                                values=list(sorted_labels.values())[:7],
+                                                title="Distribution of Scene Labels",
+                                                hole=0.3
+                                            )
+                                            st.plotly_chart(fig, use_container_width=True)
+                                            # List labels
+                                            st.markdown("**Top Labels:**")
+                                            for label, count in list(sorted_labels.items())[:7]:
+                                                st.markdown(f"- {label}: {count} occurrences")
+                                        else:
+                                            st.info("No labels detected in the video.")
                         except Exception as e:
                             st.error(f"Error processing video: {str(e)}")
     os.unlink(temp_video_path)
     return frames
+def load_yolo_model():
+    """Load YOLOv4-tiny model for object detection"""
+    # Create directory for models if it doesn't exist
+    models_dir = Path("models")
+    models_dir.mkdir(exist_ok=True)
+    # Paths for YOLO files
+    weights_path = models_dir / "yolov4-tiny.weights"
+    cfg_path = models_dir / "yolov4-tiny.cfg"
+    names_path = models_dir / "coco.names"
+    # Download YOLO files if they don't exist
+    if not weights_path.exists():
+        st.info("Downloading YOLOv4-tiny weights (first time only)...")
+        import urllib.request
+        urllib.request.urlretrieve(
+            "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights",
+            str(weights_path)
+        )
+    if not cfg_path.exists():
+        st.info("Downloading YOLOv4-tiny configuration (first time only)...")
+        import urllib.request
+        urllib.request.urlretrieve(
+            "https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg",
+            str(cfg_path)
+        )
+    if not names_path.exists():
+        st.info("Downloading COCO class names (first time only)...")
+        import urllib.request
+        urllib.request.urlretrieve(
+            "https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names",
+            str(names_path)
+        )
+    # Load YOLO model
+    net = cv2.dnn.readNet(str(weights_path), str(cfg_path))
+    # Load class names
+    with open(str(names_path), "r") as f:
+        classes = [line.strip() for line in f.readlines()]
+    # Get output layer names
+    layer_names = net.getLayerNames()
+    try:
+        # OpenCV 4.5.4+
+        output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
+    except:
+        # Older OpenCV versions
+        output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
+    return net, classes, output_layers
+def load_haar_cascades():
+    """Load Haar cascade classifiers for face detection"""
+    # Create directory for models if it doesn't exist
+    models_dir = Path("models")
+    models_dir.mkdir(exist_ok=True)
+    # Paths for Haar cascade files
+    face_cascade_path = models_dir / "haarcascade_frontalface_default.xml"
+    # Download Haar cascade files if they don't exist
+    if not face_cascade_path.exists():
+        st.info("Downloading Haar cascade face detector (first time only)...")
+        import urllib.request
+        urllib.request.urlretrieve(
+            "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml",
+            str(face_cascade_path)
+        )
+    # Load face cascade
+    face_cascade = cv2.CascadeClassifier(str(face_cascade_path))
+    return face_cascade
+# Add these functions before process_video_file
+def detect_objects_yolo(frame, net, classes, output_layers, confidence_threshold=0.5):
+    """Detect objects in frame using YOLOv4-tiny"""
+    height, width, _ = frame.shape
+    # Prepare image for YOLO
+    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
+    net.setInput(blob)
+    # Forward pass
+    layer_outputs = net.forward(output_layers)
+    # Initialize lists for detected objects
+    boxes = []
+    confidences = []
+    class_ids = []
+    # Process each output layer
+    for output in layer_outputs:
+        for detection in output:
+            scores = detection[5:]
+            class_id = np.argmax(scores)
+            confidence = scores[class_id]
+            if confidence > confidence_threshold:
+                # Scale box coordinates to frame size
+                center_x = int(detection[0] * width)
+                center_y = int(detection[1] * height)
+                w = int(detection[2] * width)
+                h = int(detection[3] * height)
+                # Rectangle coordinates
+                x = int(center_x - w / 2)
+                y = int(center_y - h / 2)
+                # Add to lists
+                boxes.append([x, y, w, h])
+                confidences.append(float(confidence))
+                class_ids.append(class_id)
+    # Apply non-maximum suppression
+    indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4)
+    # Prepare results
+    results = []
+    if len(indexes) > 0:
+        # Ensure indexes is properly flattened (OpenCV 4.5.4+ vs older versions)
+        try:
+            flat_indexes = indexes.flatten()
+        except:
+            flat_indexes = indexes
+        for i in flat_indexes:
+            box = boxes[i]
+            x, y, w, h = box
+            label = str(classes[class_ids[i]])
+            confidence = confidences[i]
+            results.append({
+                "box": (x, y, w, h),
+                "label": label,
+                "confidence": confidence
+            })
+    return results
+def detect_faces_haar(frame, face_cascade):
+    """Detect faces using Haar cascades"""
+    # Convert to grayscale for Haar cascade
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    # Detect faces
+    faces = face_cascade.detectMultiScale(
+        gray,
+        scaleFactor=1.1,
+        minNeighbors=5,
+        minSize=(30, 30)
+    )
+    # Prepare results
+    results = []
+    for (x, y, w, h) in faces:
+        results.append({
+            "box": (x, y, w, h)
+        })
+    return results
+def calculate_optical_flow(prev_gray, current_gray):
+    """Calculate optical flow between frames for motion detection"""
+    # Calculate flow using Lucas-Kanade method
+    flow = cv2.calcOpticalFlowFarneback(
+        prev_gray, current_gray,
+        None, 0.5, 3, 15, 3, 5, 1.2, 0
+    )
+    # Calculate magnitude and angle
+    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
+    # Create visualization
+    motion_mask = np.zeros_like(prev_gray)
+    # Normalize magnitude for visualization
+    norm_magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
+    motion_mask = norm_magnitude.astype(np.uint8)
+    # Calculate motion metrics
+    motion_level = np.mean(magnitude)
+    motion_area = np.sum(magnitude > 0.5) / (magnitude.shape[0] * magnitude.shape[1])
+    return motion_level, motion_area, motion_mask, flow
+def stabilize_frame(frame, prev_frame_gray, current_frame_gray):
+    """Stabilize video frame using optical flow"""
+    # Calculate optical flow
+    flow = cv2.calcOpticalFlowFarneback(
+        prev_frame_gray, current_frame_gray,
+        None, 0.5, 3, 15, 3, 5, 1.2, 0
+    )
+    # Calculate the median flow vectors
+    h, w = flow.shape[:2]
+    flow_median_x = np.median(flow[..., 0])
+    flow_median_y = np.median(flow[..., 1])
+    # Create transformation matrix for affine transform
+    transform = np.array([[1, 0, -flow_median_x], [0, 1, -flow_median_y]], dtype=np.float32)
+    # Apply affine transformation to stabilize the frame
+    stabilized_frame = cv2.warpAffine(frame, transform, (w, h))
+    return stabilized_frame
+def create_tracker(tracker_type="CSRT"):
+    """Create an OpenCV tracker of the specified type"""
+    if tracker_type == 'BOOSTING':
+        return cv2.legacy.TrackerBoosting_create()
+    elif tracker_type == 'MIL':
+        return cv2.legacy.TrackerMIL_create()
+    elif tracker_type == 'KCF':
+        return cv2.legacy.TrackerKCF_create()
+    elif tracker_type == 'TLD':
+        return cv2.legacy.TrackerTLD_create()
+    elif tracker_type == 'MEDIANFLOW':
+        return cv2.legacy.TrackerMedianFlow_create()
+    elif tracker_type == 'CSRT':
+        return cv2.legacy.TrackerCSRT_create()
+    elif tracker_type == 'MOSSE':
+        return cv2.legacy.TrackerMOSSE_create()
+    else:
+        return cv2.legacy.TrackerCSRT_create()  # Default
+class ObjectTracker:
+    """Manages object tracking across video frames"""
+    def __init__(self, tracker_type="CSRT", max_disappeared=30):
+        self.tracker_type = tracker_type
+        self.trackers = {}  # Dict of active trackers
+        self.disappeared = {}  # Count of frames where object disappeared
+        self.max_disappeared = max_disappeared  # Max frames to keep tracking after disappearance
+        self.next_object_id = 0  # Counter for object IDs
+        self.objects = {}  # Dict of tracked object positions {ID: (x, y, w, h, label)}
+    def register(self, frame, bbox, label="Object"):
+        """Register a new object to track"""
+        # Create a new tracker
+        tracker = create_tracker(self.tracker_type)
+        tracker.init(frame, bbox)
+        # Register the object
+        object_id = self.next_object_id
+        self.trackers[object_id] = tracker
+        self.objects[object_id] = (*bbox, label)
+        self.disappeared[object_id] = 0
+        # Increment the counter
+        self.next_object_id += 1
+        return object_id
+    def deregister(self, object_id):
+        """Stop tracking an object"""
+        # Remove from dictionaries
+        self.trackers.pop(object_id, None)
+        self.objects.pop(object_id, None)
+        self.disappeared.pop(object_id, None)
+    def update(self, frame):
+        """Update all trackers with new frame"""
+        # Check if we have no objects
+        if len(self.trackers) == 0:
+            return self.objects
+        # Initialize a list of updated objects
+        updated_objects = {}
+        # Loop through tracked objects
+        for object_id in list(self.trackers.keys()):
+            # Get the tracker
+            tracker = self.trackers[object_id]
+            # Update the tracker
+            success, bbox = tracker.update(frame)
+            if success:
+                # Successfully tracked, reset disappeared counter
+                self.disappeared[object_id] = 0
+                # Update object position, keeping the same label
+                _, _, _, _, label = self.objects[object_id]
+                self.objects[object_id] = (*bbox, label)
+                updated_objects[object_id] = self.objects[object_id]
+            else:
+                # Tracking failed, increment disappeared counter
+                self.disappeared[object_id] += 1
+                # If object has disappeared for too long, deregister it
+                if self.disappeared[object_id] > self.max_disappeared:
+                    self.deregister(object_id)
+                else:
+                    # Keep the last known position
+                    updated_objects[object_id] = self.objects[object_id]
+        return updated_objects
+    def draw_tracked_objects(self, frame, objects):
+        """Draw bounding boxes and IDs for tracked objects"""
+        for object_id, (x, y, w, h, label) in objects.items():
+            # Convert to integer coordinates
+            x, y, w, h = int(x), int(y), int(w), int(h)
+            # Draw bounding box
+            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
+            # Draw ID and label
+            text = f"ID:{object_id} {label}"
+            cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+        return frame
+def segment_image(frame, method="watershed", rect=None):
+    """Segment an image into foreground and background regions"""
+    if method == "watershed":
+        # Watershed segmentation
+        # Convert to grayscale if needed
+        if len(frame.shape) == 3:
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        else:
+            gray = frame.copy()
+            frame = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
+        # Apply threshold
+        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+        # Noise removal with morphological operations
+        kernel = np.ones((3, 3), np.uint8)
+        opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
+        # Sure background area
+        sure_bg = cv2.dilate(opening, kernel, iterations=3)
+        # Finding sure foreground area
+        dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
+        _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0)
+        # Finding unknown region
+        sure_fg = np.uint8(sure_fg)
+        unknown = cv2.subtract(sure_bg, sure_fg)
+        # Marker labeling
+        _, markers = cv2.connectedComponents(sure_fg)
+        # Add 1 to all labels so that background is 1 instead of 0
+        markers = markers + 1
+        # Mark the unknown region with 0
+        markers[unknown == 255] = 0
+        # Apply watershed
+        markers = cv2.watershed(frame, markers)
+        # Create visualization with boundaries
+        segmented = frame.copy()
+        segmented[markers == -1] = [0, 0, 255]  # Mark boundaries in red
+        # Create a colored mask for visualization
+        mask = np.zeros_like(frame)
+        for label in np.unique(markers):
+            if label > 1:  # Skip background (1) and boundaries (-1)
+                # Create a random color for this segment
+                color = np.random.randint(0, 255, size=3, dtype=np.uint8)
+                mask[markers == label] = color
+        # Blend the original image with the segmentation mask
+        result = cv2.addWeighted(frame, 0.7, mask, 0.3, 0)
+        return result, markers
+    elif method == "grabcut":
+        # GrabCut segmentation
+        # Create mask and temporary arrays
+        mask = np.zeros(frame.shape[:2], np.uint8)
+        bgd_model = np.zeros((1, 65), np.float64)
+        fgd_model = np.zeros((1, 65), np.float64)
+        # If no rectangle provided, use center portion of image
+        if rect is None:
+            h, w = frame.shape[:2]
+            rect = (w//4, h//4, w//2, h//2)
+        # Apply GrabCut
+        cv2.grabCut(frame, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
+        # Create mask where certain == background (0) and probable == background (2)
+        mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
+        # Create segmented image
+        segmented = frame * mask2[:, :, np.newaxis]
+        # Create visualization that highlights foreground
+        highlight = frame.copy()
+        highlight_mask = np.zeros_like(frame)
+        highlight_mask[mask2 == 1] = [0, 255, 0]  # Green for foreground
+        result = cv2.addWeighted(highlight, 0.7, highlight_mask, 0.3, 0)
+        return result, mask
+    else:
+        return frame, None  # Return original frame if method not recognized
+def detect_edges(frame, method="canny", low_threshold=100, high_threshold=200):
+    """Detect edges in an image using various methods"""
+    # Convert to grayscale if needed
+    if len(frame.shape) == 3:
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    else:
+        gray = frame
+    # Apply Gaussian blur to reduce noise
+    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    if method == "canny":
+        # Canny edge detector
+        edges = cv2.Canny(blurred, low_threshold, high_threshold)
+        # Convert back to 3-channel for visualization
+        return cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR)
+    elif method == "sobel":
+        # Sobel edge detector
+        sobel_x = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=3)
+        sobel_y = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=3)
+        # Calculate magnitude and convert to uint8
+        magnitude = cv2.magnitude(sobel_x, sobel_y)
+        magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+        # Convert back to 3-channel for visualization
+        return cv2.cvtColor(magnitude, cv2.COLOR_GRAY2BGR)
+    elif method == "laplacian":
+        # Laplacian edge detector
+        laplacian = cv2.Laplacian(blurred, cv2.CV_64F)
+        laplacian = np.uint8(np.absolute(laplacian))
+        laplacian = cv2.normalize(laplacian, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+        # Convert back to 3-channel for visualization
+        return cv2.cvtColor(laplacian, cv2.COLOR_GRAY2BGR)
+    else:
+        return frame  # Return original frame if method not recognized