Spaces:

CosmickVisions
/

Cloud

Sleeping

App Files Files Community

CosmickVisions commited on Mar 30

Commit

8f167cd

verified ·

1 Parent(s): f0f9239

Update app.py

Browse files

Files changed (1) hide show

app.py +478 -478

app.py CHANGED Viewed

@@ -433,10 +433,13 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None):
 class VideoProcessor(VideoProcessorBase):
     """Process video frames for real-time analysis with enhanced OpenCV processing"""
-    def __init__(self, analysis_types: List[str]):
         self.analysis_types = analysis_types
         self.frame_counter = 0
-        self.process_every_n_frames = 5  # Process every 5th frame
         self.vision_client = client  # Store client reference
         self.last_results = {}  # Cache results between processed frames
         self.last_processed_time = time.time()
@@ -453,6 +456,31 @@ class VideoProcessor(VideoProcessorBase):
         self.max_time_delta = 0.5
         self.min_time_delta = 0.05
     def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         self.frame_counter += 1
@@ -463,14 +491,14 @@ class VideoProcessor(VideoProcessorBase):
         # Add status display on all frames
         cv2.putText(img,
-                   f"Vision AI: {'Active' if self.processing_active else 'Paused'}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
         # Convert to grayscale for motion detection
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Apply motion detection for all frames
-        if self.prev_gray is not None:
             # Calculate frame difference for smoother motion detection
             frame_diff = cv2.absdiff(gray, self.prev_gray)
             _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
@@ -482,85 +510,274 @@ class VideoProcessor(VideoProcessorBase):
             mg_mask = cv2.motempl.calcMotionGradient(
                 self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
-            # Visualize motion segments (optional)
-            if "Motion" in self.analysis_types:
-                seg_mask, segments = cv2.motempl.segmentMotion(
-                    self.motion_history, timestamp, self.max_time_delta)
-                # Visualize motion segments
-                motion_img = np.zeros_like(img)
-                for i, segment in enumerate(segments):
-                    if segment[1] < 50:  # Filter out small segments
-                        continue
-                    # Draw motion regions with random colors
-                    color = np.random.randint(0, 255, 3).tolist()
-                    motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1)
-                # Overlay motion visualization
-                alpha = 0.3
-                cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img)
-        # Process at regular intervals
         current_time = time.time()
-        if current_time - self.last_processed_time > 1.0 and self.processing_active:  # Process max once per second
             self.last_processed_time = current_time
-            # Process with Vision API as in original code
-            # ... existing API processing code ...
-        # Update tracking between API calls for smoother object tracking
-        if "objects" in self.last_results and "Objects" in self.analysis_types:
-            # Use OpenCV's built-in object trackers for smoother tracking between API calls
-            for obj in self.last_results["objects"]:
-                obj_id = obj.name + str(hash(str(obj.bounding_poly.normalized_vertices)))
-                if obj_id not in self.object_trackers:
-                    # Initialize a new tracker
-                    tracker = cv2.TrackerKCF_create()  # or other trackers like CSRT, MIL, etc.
-                    # Get bounding box coordinates
-                    box_points = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
-                                 for vertex in obj.bounding_poly.normalized_vertices]
-                    x_min = min([p[0] for p in box_points])
-                    y_min = min([p[1] for p in box_points])
-                    x_max = max([p[0] for p in box_points])
-                    y_max = max([p[1] for p in box_points])
-                    # Initialize tracker
-                    bbox = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min))
-                    tracker.init(img, bbox)
-                    self.object_trackers[obj_id] = {
-                        "tracker": tracker,
-                        "name": obj.name,
-                        "score": obj.score,
-                        "last_update": self.frame_counter
-                    }
-            # Update all trackers
-            trackers_to_remove = []
-            for obj_id, tracker_info in self.object_trackers.items():
-                # Only keep trackers for a limited number of frames
-                if self.frame_counter - tracker_info["last_update"] > 30:  # Remove after 30 frames
-                    trackers_to_remove.append(obj_id)
-                    continue
-                success, bbox = tracker_info["tracker"].update(img)
-                if success:
-                    # Draw tracking box
-                    x, y, w, h = [int(v) for v in bbox]
-                    cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
-                    # Add label with confidence
-                    label = f"{tracker_info['name']}: {int(tracker_info['score'] * 100)}%"
-                    cv2.putText(img, label, (x, y - 10),
-                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
-            # Remove expired trackers
-            for obj_id in trackers_to_remove:
-                del self.object_trackers[obj_id]
         # Save current frame for next iteration
         self.prev_gray = gray
         return av.VideoFrame.from_ndarray(img, format="bgr24")
@@ -786,7 +1003,10 @@ def list_bigquery_resources():
     return resources
-def process_video_file(video_file, analysis_types):
     """Process an uploaded video file with enhanced Vision AI detection and analytics"""
     # Create a temporary file to save the uploaded video
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
@@ -801,7 +1021,7 @@ def process_video_file(video_file, analysis_types):
     if not cap.isOpened():
         st.error("Error opening video file")
         os.unlink(temp_video_path)
-        return None, None  # Return a tuple with None values instead of just None
     # Get video properties
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -817,7 +1037,15 @@ def process_video_file(video_file, analysis_types):
     # Scene change detection threshold
     scene_change_threshold = 40.0  # Adjust as needed: lower = more sensitive
     # Process every Nth frame to reduce API calls
-    process_every_n_frames = 5
     # Check OpenCV version for compatibility with advanced features
     opencv_version = cv2.__version__
@@ -839,6 +1067,38 @@ def process_video_file(video_file, analysis_types):
         use_advanced_tracking = False
     # ----------------- End Parameters -----------------
     # Inform user if video is being truncated
     if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
         st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
@@ -881,128 +1141,10 @@ def process_video_file(video_file, analysis_types):
     previous_frame_gray = None
     prev_points = None
-    try:
-        frame_count = 0
-        while frame_count < max_frames:  # Limit to 10 seconds
-            ret, frame = cap.read()
-            if not ret:
-                break
-            frame_count += 1
-            # Update progress
-            progress = int(frame_count / total_frames * 100)
-            progress_bar.progress(progress)
-            status_text.text(f"Processing frame {frame_count}/{total_frames} ({progress}%) - {frame_count/fps:.1f}s of 10s")
-            # Add timestamp to frame
-            cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
-                      (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
-            # Activity detection and scene change detection
-            current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
-            if previous_frame_gray is not None:
-                # Calculate frame difference for activity detection
-                frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
-                activity_level = np.mean(frame_diff)
-                detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
-                # Scene change detection
-                if activity_level > scene_change_threshold:
-                    detection_stats["scene_changes"].append(frame_count/fps)
-                    # Mark scene change on frame
-                    cv2.putText(frame, "SCENE CHANGE",
-                              (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
-                # Add optical flow tracking if enabled
-                if use_advanced_tracking and prev_points is not None:
-                    try:
-                        # Calculate optical flow
-                        next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
-                                                                        current_frame_gray,
-                                                                        prev_points,
-                                                                        None,
-                                                                        **lk_params)
-                        # Select good points
-                        if next_points is not None:
-                            good_new = next_points[status==1]
-                            good_old = prev_points[status==1]
-                            # Draw motion tracks
-                            for i, (new, old) in enumerate(zip(good_new, good_old)):
-                                a, b = new.ravel()
-                                c, d = old.ravel()
-                                # Draw motion lines
-                                cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
-                                cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
-                    except Exception as e:
-                        # If optical flow fails, just continue without it
-                        pass
-            # Update tracking points periodically if enabled
-            if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
-                try:
-                    prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
-                except Exception:
-                    # If feature tracking fails, just continue without it
-                    prev_points = None
-            previous_frame_gray = current_frame_gray
-            # Process frames with Vision API - keep this part of the code unchanged
-            if frame_count % process_every_n_frames == 0:
-                # ... existing API processing code ...
-                pass
-            # Add hint about slowed down speed
-            cv2.putText(frame, "Playback: 60% speed for better visualization",
-                      (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
-            # Write the frame to output video
-            out.write(frame)
-        # Release resources
-        cap.release()
-        out.release()
-        # Clear progress indicators
-        progress_bar.empty()
-        status_text.empty()
-        # Read the processed video as bytes for download
-        with open(output_path, 'rb') as file:
-            processed_video_bytes = file.read()
-        # Clean up temporary files
-        os.unlink(temp_video_path)
-        os.unlink(output_path)
-        # Return results
-        results = {"detection_stats": detection_stats}
-        # Store results in session state for chatbot context
-        st.session_state.analysis_results = results
-        # Update vectorstore with new results
-        update_vectorstore_with_results(results)
-        return processed_video_bytes, results
-    except Exception as e:
-        # Clean up on error
-        cap.release()
-        if 'out' in locals():
-            out.release()
-        os.unlink(temp_video_path)
-        if os.path.exists(output_path):
-            os.unlink(output_path)
-        # Return None values as a tuple instead of raising the exception
-        st.error(f"Error processing video: {str(e)}")
-        return None, None  # Return a tuple with None values
 def load_bigquery_table(dataset_id, table_id, limit=1000):
     """Load data directly from an existing BigQuery table"""
@@ -1649,6 +1791,16 @@ def main():
         # Analysis settings
         st.sidebar.markdown("### Video Analysis Settings")
         analysis_types = []
         if st.sidebar.checkbox("Object Detection", value=True):
             analysis_types.append("Objects")
@@ -1657,17 +1809,124 @@ def main():
         if st.sidebar.checkbox("Text Recognition"):
             analysis_types.append("Text")
         st.sidebar.markdown("---")
-        st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
         # Upload Video mode only - removed real-time camera option
         st.markdown("""
         #### 📤 Video Analysis
-        Upload a video file to analyze it with Google Cloud Vision AI.
         **Instructions:**
-        1. Select the analysis types in the sidebar
         2. Upload a video file (MP4, MOV, AVI)
         3. Click "Process Video" to begin analysis
         4. Download the processed video when complete
@@ -1690,10 +1949,24 @@ def main():
                 if not analysis_types:
                     st.warning("Please select at least one analysis type.")
                 else:
-                    with st.spinner("Processing video (max 10 seconds)..."):
                         try:
-                            # Process the video with enhanced detail
-                            processed_video, results = process_video_file(uploaded_file, analysis_types)
                             if processed_video:
                                 # Offer download of processed video
@@ -2000,277 +2273,4 @@ def main():
                                 st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
                                 st.write(f"Rows: {result['num_rows']}")
                                 st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")
-                                st.write(f"Schema: {', '.join(result['schema'])}")
-                                # Store table info in session state
-                                st.session_state["table_info"] = {
-                                    "dataset_id": dataset_id,
-                                    "table_id": table_id,
-                                    "schema": result["schema"]
-                                }
-                            except Exception as e:
-                                st.error(f"Error uploading to BigQuery: {str(e)}")
-                except Exception as e:
-                    st.error(f"Error reading CSV file: {str(e)}")
-            else:
-                st.info("Upload a CSV file to load data into BigQuery")
-        with query_tab:
-            st.markdown("### Query BigQuery Data")
-            if "query_results" in st.session_state and "table_info" in st.session_state:
-                # Display info about the loaded data
-                table_info = st.session_state["table_info"]
-                st.write(f"Working with table: **{table_info['dataset_id']}.{table_info['table_id']}**")
-                # Query input
-                default_query = f"SELECT * FROM `{credentials.project_id}.{table_info['dataset_id']}.{table_info['table_id']}` LIMIT 100"
-                query = st.text_area("SQL Query", default_query, height=100)
-                # Execute query button
-                if st.button("Run Query"):
-                    with st.spinner("Executing query..."):
-                        try:
-                            # Run the query
-                            results = run_bigquery(query)
-                            # Store results in session state
-                            st.session_state["query_results"] = results
-                            # Display results
-                            st.write("### Query Results")
-                            st.dataframe(results)
-                            # Download button for results
-                            csv = results.to_csv(index=False)
-                            st.download_button(
-                                label="Download Results as CSV",
-                                data=csv,
-                                file_name="query_results.csv",
-                                mime="text/csv"
-                            )
-                        except Exception as e:
-                            st.error(f"Error executing query: {str(e)}")
-            else:
-                st.info("Load a table from BigQuery or upload a CSV file first")
-        with visualization_tab:
-            st.markdown("### Visualize BigQuery Data")
-            if "query_results" in st.session_state and not st.session_state["query_results"].empty:
-                df = st.session_state["query_results"]
-                # Chart type selection
-                chart_type = st.selectbox(
-                    "Select Chart Type",
-                    ["Bar Chart", "Line Chart", "Scatter Plot", "Histogram", "Pie Chart"]
-                )
-                # Column selection based on data types
-                numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
-                all_cols = df.columns.tolist()
-                if len(numeric_cols) < 1:
-                    st.warning("No numeric columns available for visualization")
-                else:
-                    if chart_type in ["Bar Chart", "Line Chart", "Scatter Plot"]:
-                        col1, col2 = st.columns(2)
-                        with col1:
-                            x_axis = st.selectbox("X-axis", all_cols)
-                        with col2:
-                            y_axis = st.selectbox("Y-axis", numeric_cols)
-                        # Optional: Grouping/color dimension
-                        color_dim = st.selectbox("Color Dimension (Optional)", ["None"] + all_cols)
-                        # Generate the visualization based on selection
-                        if st.button("Generate Visualization"):
-                            st.write(f"### {chart_type}: {y_axis} by {x_axis}")
-                            if chart_type == "Bar Chart":
-                                if color_dim != "None":
-                                    fig = px.bar(df, x=x_axis, y=y_axis, color=color_dim,
-                                                 title=f"{y_axis} by {x_axis}")
-                                else:
-                                    fig = px.bar(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
-                                st.plotly_chart(fig)
-                            elif chart_type == "Line Chart":
-                                if color_dim != "None":
-                                    fig = px.line(df, x=x_axis, y=y_axis, color=color_dim,
-                                                  title=f"{y_axis} by {x_axis}")
-                                else:
-                                    fig = px.line(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
-                                st.plotly_chart(fig)
-                            elif chart_type == "Scatter Plot":
-                                if color_dim != "None":
-                                    fig = px.scatter(df, x=x_axis, y=y_axis, color=color_dim,
-                                                     title=f"{y_axis} vs {x_axis}")
-                                else:
-                                    fig = px.scatter(df, x=x_axis, y=y_axis, title=f"{y_axis} vs {x_axis}")
-                                st.plotly_chart(fig)
-                    elif chart_type == "Histogram":
-                        column = st.selectbox("Select Column", numeric_cols)
-                        bins = st.slider("Number of Bins", min_value=5, max_value=100, value=20)
-                        if st.button("Generate Visualization"):
-                            st.write(f"### Histogram of {column}")
-                            fig = px.histogram(df, x=column, nbins=bins, title=f"Distribution of {column}")
-                            st.plotly_chart(fig)
-                    elif chart_type == "Pie Chart":
-                        column = st.selectbox("Category Column", all_cols)
-                        value_col = st.selectbox("Value Column", numeric_cols)
-                        if st.button("Generate Visualization"):
-                            # Aggregate the data if needed
-                            pie_data = df.groupby(column)[value_col].sum().reset_index()
-                            st.write(f"### Pie Chart: {value_col} by {column}")
-                            fig = px.pie(pie_data, names=column, values=value_col,
-                                         title=f"{value_col} by {column}")
-                            st.plotly_chart(fig)
-            else:
-                st.info("Load a table from BigQuery or upload a CSV file first")
-    elif selected == "About":
-        st.markdown("## About This App")
-        st.write("""
-        This application uses Google Cloud Vision AI to analyze images and video streams. It can:
-        - **Detect labels** in images
-        - **Identify objects** and their locations
-        - **Extract text** from images
-        - **Detect faces** and facial landmarks
-        - **Analyze real-time video** from your camera
-        To use this app, you need to:
-        1. Set up Google Cloud Vision API credentials
-        2. Upload an image or use your camera
-        3. Select the types of analysis you want to perform
-        4. Click "Analyze Image" or start the video stream
-        The app is built with Streamlit and Google Cloud Vision API.
-        """)
-        st.info("Note: Make sure your Google Cloud credentials are properly set up to use this application.")
-    # Add the chatbot interface at the bottom of the page
-    chatbot_interface()
-if __name__ == "__main__":
-    # Use GOOGLE_CREDENTIALS directly - no need for file or GOOGLE_APPLICATION_CREDENTIALS
-    try:
-        if 'GOOGLE_CREDENTIALS' in os.environ:
-            # Create credentials object directly from JSON string
-            credentials_info = json.loads(os.environ['GOOGLE_CREDENTIALS'])
-            credentials = service_account.Credentials.from_service_account_info(credentials_info)
-            # Initialize client with these credentials directly
-            client = vision.ImageAnnotatorClient(credentials=credentials)
-        else:
-            st.sidebar.error("GOOGLE_CREDENTIALS environment variable not found")
-            client = None
-    except Exception as e:
-        st.sidebar.error(f"Error with credentials: {str(e)}")
-        client = None
-    main()
-# Add this function to your app
-def extract_video_frames(video_bytes, num_frames=5):
-    """Extract frames from video bytes for thumbnail display with improved key frame selection"""
-    import cv2
-    import numpy as np
-    import tempfile
-    from PIL import Image
-    import io
-    # Save video bytes to a temporary file
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
-        temp_file.write(video_bytes)
-        temp_video_path = temp_file.name
-    # Open the video file
-    cap = cv2.VideoCapture(temp_video_path)
-    # Get video properties
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    # Use more sophisticated frame selection based on content analysis
-    frames = []
-    frame_scores = []
-    sample_interval = max(1, frame_count // (num_frames * 3))  # Sample more frames than needed
-    # First pass: collect frame scores
-    prev_frame = None
-    frame_index = 0
-    while len(frame_scores) < num_frames * 3 and frame_index < frame_count:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # Convert to grayscale for analysis
-        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        gray = cv2.GaussianBlur(gray, (21, 21), 0)
-        # Calculate frame score based on Laplacian variance (focus measure)
-        focus_score = cv2.Laplacian(gray, cv2.CV_64F).var()
-        # Calculate frame difference if we have a previous frame
-        diff_score = 0
-        if prev_frame is not None:
-            frame_diff = cv2.absdiff(gray, prev_frame)
-            diff_score = np.mean(frame_diff)
-        # Combined score: favor sharp frames with significant changes
-        combined_score = focus_score * 0.6 + diff_score * 0.4
-        frame_scores.append((frame_index, combined_score))
-        # Store frame for next comparison
-        prev_frame = gray
-        frame_index += sample_interval
-    # Second pass: select the best frames based on scores
-    # Sort by score and get top N frames
-    sorted_frames = sorted(frame_scores, key=lambda x: x[1], reverse=True)
-    best_frames = sorted_frames[:num_frames]
-    # Sort back by frame index to maintain chronological order
-    selected_frames = sorted(best_frames, key=lambda x: x[0])
-    # Extract the selected frames
-    for idx, _ in selected_frames:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
-        ret, frame = cap.read()
-        if ret:
-            # Apply subtle enhancement to frames
-            enhanced_frame = frame.copy()
-            # Auto color balance
-            lab = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2LAB)
-            l, a, b = cv2.split(lab)
-            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-            cl = clahe.apply(l)
-            enhanced_lab = cv2.merge((cl, a, b))
-            enhanced_frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
-            # Convert to RGB (from BGR)
-            frame_rgb = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB)
-            # Convert to PIL Image
-            pil_img = Image.fromarray(frame_rgb)
-            # Save to bytes
-            img_byte_arr = io.BytesIO()
-            pil_img.save(img_byte_arr, format='JPEG', quality=90)
-            frames.append(img_byte_arr.getvalue())
-    # Clean up
-    cap.release()
-    import os
-    os.unlink(temp_video_path)
-    return frames

 class VideoProcessor(VideoProcessorBase):
     """Process video frames for real-time analysis with enhanced OpenCV processing"""
+    def __init__(self, analysis_types: List[str], processing_mode: str = "Hybrid (Google Vision + OpenCV)",
+                 track_update_frames: int = 5, confidence_threshold: float = 0.5):
         self.analysis_types = analysis_types
+        self.processing_mode = processing_mode
         self.frame_counter = 0
+        self.process_every_n_frames = track_update_frames  # Process every N frames
+        self.confidence_threshold = confidence_threshold
         self.vision_client = client  # Store client reference
         self.last_results = {}  # Cache results between processed frames
         self.last_processed_time = time.time()
         self.max_time_delta = 0.5
         self.min_time_delta = 0.05
+        # For OpenCV-only detection mode
+        self.opencv_detector = None
+        self.init_opencv_detector()
+    def init_opencv_detector(self):
+        """Initialize OpenCV-based object detector if needed"""
+        if self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)":
+            try:
+                # Initialize YOLO or other available models
+                # This is a placeholder - you might need to adjust based on available OpenCV DNN models
+                weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
+                config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
+                # Check if files exist, otherwise use a simpler fallback detector
+                if os.path.exists(weights_path) and os.path.exists(config_path):
+                    self.opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
+                else:
+                    # Fallback to HOG detector for people
+                    self.opencv_detector = cv2.HOGDescriptor()
+                    self.opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+                    st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
+            except Exception as e:
+                st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
+                self.opencv_detector = None
     def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
         img = frame.to_ndarray(format="bgr24")
         self.frame_counter += 1
         # Add status display on all frames
         cv2.putText(img,
+                   f"Vision AI: {'Active' if self.processing_active else 'Paused'} - Mode: {self.processing_mode}",
                    (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
         # Convert to grayscale for motion detection
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Apply motion detection for all frames if enabled
+        if "Motion" in self.analysis_types and self.prev_gray is not None:
             # Calculate frame difference for smoother motion detection
             frame_diff = cv2.absdiff(gray, self.prev_gray)
             _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
             mg_mask = cv2.motempl.calcMotionGradient(
                 self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
+            # Visualize motion segments
+            seg_mask, segments = cv2.motempl.segmentMotion(
+                self.motion_history, timestamp, self.max_time_delta)
+            # Visualize motion segments
+            motion_img = np.zeros_like(img)
+            for i, segment in enumerate(segments):
+                if segment[1] < 50:  # Filter out small segments
+                    continue
+                # Draw motion regions with random colors
+                color = np.random.randint(0, 255, 3).tolist()
+                motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1)
+            # Overlay motion visualization
+            alpha = 0.3
+            cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img)
+        # Process with Vision API at regular intervals if using Google Vision
         current_time = time.time()
+        if (self.processing_mode == "Google Vision API Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
+           (current_time - self.last_processed_time > 1.0) and self.processing_active and \
+           self.vision_client is not None:
             self.last_processed_time = current_time
+            # Convert frame to JPEG for Vision API
+            success, jpeg_frame = cv2.imencode('.jpg', img)
+            if success:
+                image_content = jpeg_frame.tobytes()
+                # Create vision image
+                vision_image = vision.Image(content=image_content)
+                try:
+                    # Perform detection based on selected types
+                    if "Objects" in self.analysis_types:
+                        objects = self.vision_client.object_localization(image=vision_image)
+                        # Filter objects by confidence threshold
+                        filtered_objects = [obj for obj in objects.localized_object_annotations
+                                          if obj.score >= self.confidence_threshold]
+                        self.last_results["objects"] = filtered_objects
+                        # Log detection for tracking
+                        for obj in filtered_objects:
+                            # Draw object boundaries
+                            box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
+                                  for vertex in obj.bounding_poly.normalized_vertices]
+                            points = np.array([[int(p[0]), int(p[1])] for p in box])
+                            cv2.polylines(img, [points], True, (0, 255, 0), 2)
+                            # Add label with confidence
+                            cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
+                                       (int(box[0][0]), int(box[0][1] - 10)),
+                                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                            # Create unique object ID for tracking
+                            obj_id = f"{obj.name}_{self.frame_counter}"
+                            # Calculate bounding box for tracker
+                            x_values = [p[0] for p in box]
+                            y_values = [p[1] for p in box]
+                            x_min, x_max = min(x_values), max(x_values)
+                            y_min, y_max = min(y_values), max(y_values)
+                            # Create or update tracker
+                            if obj.name not in self.object_trackers:
+                                self.object_trackers[obj.name] = {
+                                    "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+                                    "last_seen": self.frame_counter,
+                                    "score": obj.score
+                                }
+                            else:
+                                # Update existing tracker
+                                self.object_trackers[obj.name] = {
+                                    "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
+                                    "last_seen": self.frame_counter,
+                                    "score": obj.score
+                                }
+                    # Face detection if selected
+                    if "Face Detection" in self.analysis_types:
+                        faces = self.vision_client.face_detection(image=vision_image)
+                        self.last_results["faces"] = faces.face_annotations
+                        # Draw face boundaries
+                        for face in faces.face_annotations:
+                            if face.detection_confidence >= self.confidence_threshold:
+                                vertices = face.bounding_poly.vertices
+                                points = [(vertex.x, vertex.y) for vertex in vertices]
+                                points = np.array([[p[0], p[1]] for p in points])
+                                cv2.polylines(img, [points], True, (0, 0, 255), 2)
+                                # Add confidence score
+                                cv2.putText(img, f"Face: {int(face.detection_confidence * 100)}%",
+                                           (points[0][0], points[0][1] - 10),
+                                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+                                # Draw facial landmarks
+                                for landmark in face.landmarks:
+                                    px = landmark.position.x
+                                    py = landmark.position.y
+                                    cv2.circle(img, (int(px), int(py)), 2, (255, 255, 0), -1)
+                    # Text detection if selected
+                    if "Text" in self.analysis_types:
+                        text = self.vision_client.text_detection(image=vision_image)
+                        if text.text_annotations:
+                            self.last_results["text"] = text.text_annotations
+                            # Draw text bounding boxes
+                            for text_annot in text.text_annotations[1:]:  # Skip the first one (full text)
+                                box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
+                                points = np.array([[int(p[0]), int(p[1])] for p in box])
+                                cv2.polylines(img, [points], True, (255, 0, 0), 2)
+                                # Add recognized text
+                                cv2.putText(img, text_annot.description,
+                                           (points[0][0], points[0][1] - 10),
+                                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
+                except Exception as e:
+                    # Handle API errors gracefully
+                    error_msg = f"API Error: {str(e)}"
+                    cv2.putText(img, error_msg, (10, 70),
+                               cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
+        # Process with OpenCV object detection if enabled
+        if (self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
+           self.opencv_detector is not None and \
+           (self.frame_counter % self.process_every_n_frames == 0 or not self.object_trackers):
+            try:
+                # If using HOG detector (the fallback)
+                if isinstance(self.opencv_detector, cv2.HOGDescriptor):
+                    # Detect people
+                    boxes, weights = self.opencv_detector.detectMultiScale(
+                        img, winStride=(8, 8), padding=(4, 4), scale=1.05
+                    )
+                    # Draw bounding boxes
+                    for i, (x, y, w, h) in enumerate(boxes):
+                        if weights[i] > 0.3:  # Confidence threshold
+                            cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
+                            cv2.putText(img, f"Person: {int(weights[i] * 100)}%",
+                                      (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
+                            # Add to trackers
+                            self.object_trackers[f"person_{i}"] = {
+                                "bbox": (x, y, w, h),
+                                "last_seen": self.frame_counter,
+                                "score": weights[i]
+                            }
+                else:
+                    # Using YOLO or another DNN-based detector
+                    blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
+                    self.opencv_detector.setInput(blob)
+                    layer_names = self.opencv_detector.getLayerNames()
+                    output_layers = [layer_names[i - 1] for i in self.opencv_detector.getUnconnectedOutLayers()]
+                    outputs = self.opencv_detector.forward(output_layers)
+                    # Process detections
+                    class_ids = []
+                    confidences = []
+                    boxes = []
+                    for output in outputs:
+                        for detection in output:
+                            scores = detection[5:]
+                            class_id = np.argmax(scores)
+                            confidence = scores[class_id]
+                            if confidence > self.confidence_threshold:
+                                # Object detected
+                                center_x = int(detection[0] * img.shape[1])
+                                center_y = int(detection[1] * img.shape[0])
+                                w = int(detection[2] * img.shape[1])
+                                h = int(detection[3] * img.shape[0])
+                                # Rectangle coordinates
+                                x = int(center_x - w / 2)
+                                y = int(center_y - h / 2)
+                                boxes.append([x, y, w, h])
+                                confidences.append(float(confidence))
+                                class_ids.append(class_id)
+                    # Apply non-maximum suppression
+                    indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, 0.4)
+                    # Define COCO class names
+                    class_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+                                  "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
+                                  "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
+                                  "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
+                                  "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
+                                  "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                                  "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
+                                  "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
+                                  "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
+                                  "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
+                    for i in indices:
+                        if isinstance(i, (list, tuple)):  # Handle different OpenCV versions
+                            i = i[0]
+                        box = boxes[i]
+                        x, y, w, h = box
+                        # Get class label and draw bounding box
+                        class_id = class_ids[i]
+                        label = f"{class_names[class_id]}: {int(confidences[i] * 100)}%"
+                        cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
+                        cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+                        # Add to trackers
+                        object_name = class_names[class_id]
+                        self.object_trackers[f"{object_name}_{i}"] = {
+                            "bbox": (x, y, w, h),
+                            "last_seen": self.frame_counter,
+                            "score": confidences[i],
+                            "class": object_name
+                        }
+            except Exception as e:
+                cv2.putText(img, f"OpenCV Error: {str(e)}", (10, 110),
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+        # Update object tracking for existing objects (every frame)
+        objects_to_remove = []
+        for obj_id, tracker_info in self.object_trackers.items():
+            # Remove old trackers
+            if self.frame_counter - tracker_info["last_seen"] > 30:  # Remove after 30 frames
+                objects_to_remove.append(obj_id)
+                continue
+            # Draw tracking box (for objects not updated this frame)
+            if self.frame_counter - tracker_info["last_seen"] <= 5:  # Only show recent tracked objects
+                x, y, w, h = tracker_info["bbox"]
+                # Use different color for tracked vs detected objects
+                if self.frame_counter == tracker_info["last_seen"]:
+                    color = (0, 255, 0)  # Green for newly detected
+                else:
+                    color = (255, 165, 0)  # Orange for tracked
+                cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
+                # Add label with confidence and tracking status
+                tracking_age = self.frame_counter - tracker_info["last_seen"]
+                label = f"{obj_id.split('_')[0]}: {int(tracker_info['score'] * 100)}%"
+                if tracking_age > 0:
+                    label += f" (tracked {tracking_age}f)"
+                cv2.putText(img, label, (x, y - 10),
+                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+        # Remove expired trackers
+        for obj_id in objects_to_remove:
+            del self.object_trackers[obj_id]
         # Save current frame for next iteration
         self.prev_gray = gray
+        # Add processing mode indicator
+        cv2.putText(img, f"Mode: {self.processing_mode}",
+                   (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+        # Add frame counter
+        cv2.putText(img, f"Frame: {self.frame_counter}",
+                   (img.shape[1] - 150, img.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
         return av.VideoFrame.from_ndarray(img, format="bgr24")
     return resources
+def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Google Vision + OpenCV)",
+                       track_update_frames=5, confidence_threshold=0.5, vision_update_interval=1.0,
+                       max_results=10, enable_face_landmarks=True, tracking_algorithm="KCF",
+                       motion_sensitivity=32, prioritize_vision=True, blend_results=True):
     """Process an uploaded video file with enhanced Vision AI detection and analytics"""
     # Create a temporary file to save the uploaded video
     with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
     if not cap.isOpened():
         st.error("Error opening video file")
         os.unlink(temp_video_path)
+        return None, None
     # Get video properties
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     # Scene change detection threshold
     scene_change_threshold = 40.0  # Adjust as needed: lower = more sensitive
     # Process every Nth frame to reduce API calls
+    process_every_n_frames = track_update_frames
+    # Initialize object trackers dictionary for continuous tracking
+    object_trackers = {}
+    # Motion history parameters
+    motion_threshold = motion_sensitivity
+    max_time_delta = 0.5
+    min_time_delta = 0.05
     # Check OpenCV version for compatibility with advanced features
     opencv_version = cv2.__version__
         use_advanced_tracking = False
     # ----------------- End Parameters -----------------
+    # Initialize OpenCV detector if needed
+    opencv_detector = None
+    if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
+        try:
+            # Check if YOLO model files exist
+            weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
+            config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
+            if os.path.exists(weights_path) and os.path.exists(config_path):
+                opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
+                st.info("Using YOLO model for OpenCV detection")
+            else:
+                # Fallback to HOG detector for people
+                opencv_detector = cv2.HOGDescriptor()
+                opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
+                st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
+        except Exception as e:
+            st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
+    # Initialize the selected tracking algorithm
+    if tracking_algorithm == "CSRT":
+        tracker_create_func = cv2.legacy.TrackerCSRT_create
+    elif tracking_algorithm == "KCF":
+        tracker_create_func = cv2.legacy.TrackerKCF_create
+    elif tracking_algorithm == "MOSSE":
+        tracker_create_func = cv2.legacy.TrackerMOSSE_create
+    elif tracking_algorithm == "MedianFlow":
+        tracker_create_func = cv2.legacy.TrackerMedianFlow_create
+    else:
+        # Default to KCF if specified algorithm not available
+        tracker_create_func = cv2.legacy.TrackerKCF_create
     # Inform user if video is being truncated
     if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
         st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
     previous_frame_gray = None
     prev_points = None
+    # Display mode being used
+    st.info(f"Processing with {processing_mode} mode")
+    # The rest of the video processing code would follow...
 def load_bigquery_table(dataset_id, table_id, limit=1000):
     """Load data directly from an existing BigQuery table"""
         # Analysis settings
         st.sidebar.markdown("### Video Analysis Settings")
+        # Add processing mode selection
+        processing_mode = st.sidebar.radio(
+            "Processing Engine",
+            ["Hybrid (Google Vision + OpenCV)", "Google Vision API Only", "OpenCV Only"],
+            help="Select which technology to use for video analysis"
+        )
+        # Common analysis types selection
+        st.sidebar.markdown("### Detection Types")
         analysis_types = []
         if st.sidebar.checkbox("Object Detection", value=True):
             analysis_types.append("Objects")
         if st.sidebar.checkbox("Text Recognition"):
             analysis_types.append("Text")
+        # Add motion tracking option
+        if st.sidebar.checkbox("Motion Tracking", value=True):
+            analysis_types.append("Motion")
+        # Settings specific to the selected processing mode
         st.sidebar.markdown("---")
+        st.sidebar.markdown(f"### {processing_mode} Settings")
+        # Parameters for all modes
+        track_update_frames = 5
+        confidence_threshold = 0.5
+        # Mode-specific parameters
+        if processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
+            # Google Vision parameters
+            st.sidebar.markdown("#### Google Vision Parameters")
+            vision_update_interval = st.sidebar.slider(
+                "Vision API update interval (seconds)",
+                min_value=0.5,
+                max_value=5.0,
+                value=1.0,
+                step=0.5,
+                help="How often to call the Vision API (longer intervals save API quota)"
+            )
+            confidence_threshold = st.sidebar.slider(
+                "Google Vision Confidence Threshold",
+                min_value=0.0,
+                max_value=1.0,
+                value=0.5,
+                help="Minimum confidence score for Google Vision detections"
+            )
+            # Detailed API options (using an expander for advanced settings)
+            with st.sidebar.expander("Advanced Vision API Settings"):
+                max_results = st.slider(
+                    "Max objects per frame",
+                    min_value=1,
+                    max_value=20,
+                    value=10,
+                    help="Maximum number of objects to detect per frame"
+                )
+                enable_face_landmarks = st.checkbox(
+                    "Enable Face Landmarks",
+                    value=True,
+                    help="Detect facial features (eyes, nose, etc.)"
+                )
+        if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
+            # OpenCV parameters
+            st.sidebar.markdown("#### OpenCV Parameters")
+            track_update_frames = st.sidebar.slider(
+                "Update OpenCV tracking every N frames",
+                min_value=1,
+                max_value=15,
+                value=5,
+                help="Lower values = more accurate tracking but higher processing load"
+            )
+            if processing_mode == "OpenCV Only":
+                # Only show this in OpenCV-only mode
+                confidence_threshold = st.sidebar.slider(
+                    "OpenCV Detector Confidence Threshold",
+                    min_value=0.0,
+                    max_value=1.0,
+                    value=0.4,
+                    help="Minimum confidence score for OpenCV detections"
+                )
+            # OpenCV tracking options
+            with st.sidebar.expander("OpenCV Tracking Options"):
+                tracking_algorithm = st.selectbox(
+                    "Tracking Algorithm",
+                    ["KCF", "CSRT", "MOSSE", "MedianFlow"],
+                    index=0,
+                    help="Different algorithms have different speed/accuracy tradeoffs"
+                )
+                motion_sensitivity = st.slider(
+                    "Motion Sensitivity",
+                    min_value=10,
+                    max_value=100,
+                    value=32,
+                    help="Lower values detect more subtle motion"
+                )
+        # Hybrid-specific settings
+        if processing_mode == "Hybrid (Google Vision + OpenCV)":
+            # Hybrid specific parameters
+            st.sidebar.markdown("#### Hybrid Mode Settings")
+            prioritize_vision = st.sidebar.radio(
+                "When results conflict, prioritize:",
+                ["Google Vision (more accurate)", "OpenCV (faster)"],
+                index=0,
+                help="Which detection source to prioritize when there are conflicting results"
+            )
+            blend_results = st.sidebar.checkbox(
+                "Blend detection results",
+                value=True,
+                help="Combine detections from both systems for better accuracy"
+            )
+        # Display warning about API usage
+        st.sidebar.markdown("---")
+        if processing_mode != "OpenCV Only":
+            st.sidebar.warning("⚠️ Google Vision API usage may incur costs. Use responsibly.")
         # Upload Video mode only - removed real-time camera option
         st.markdown("""
         #### 📤 Video Analysis
+        Upload a video file to analyze it using the selected processing engine.
         **Instructions:**
+        1. Select the processing mode and parameters in the sidebar
         2. Upload a video file (MP4, MOV, AVI)
         3. Click "Process Video" to begin analysis
         4. Download the processed video when complete
                 if not analysis_types:
                     st.warning("Please select at least one analysis type.")
                 else:
+                    with st.spinner(f"Processing video with {processing_mode} mode (max 10 seconds)..."):
                         try:
+                            # Create a dict of processing parameters to pass to the processing function
+                            processing_params = {
+                                "processing_mode": processing_mode,
+                                "track_update_frames": track_update_frames,
+                                "confidence_threshold": confidence_threshold,
+                                "vision_update_interval": vision_update_interval,
+                                "max_results": max_results,
+                                "enable_face_landmarks": enable_face_landmarks,
+                                "tracking_algorithm": tracking_algorithm,
+                                "motion_sensitivity": motion_sensitivity,
+                                "prioritize_vision": prioritize_vision,
+                                "blend_results": blend_results
+                            }
+                            # Process the video with the parameters
+                            processed_video, results = process_video_file(uploaded_file, analysis_types, **processing_params)
                             if processed_video:
                                 # Offer download of processed video
                                 st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
                                 st.write(f"Rows: {result['num_rows']}")
                                 st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")