CosmickVisions commited on
Commit
8f167cd
·
verified ·
1 Parent(s): f0f9239

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +478 -478
app.py CHANGED
@@ -433,10 +433,13 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None):
433
  class VideoProcessor(VideoProcessorBase):
434
  """Process video frames for real-time analysis with enhanced OpenCV processing"""
435
 
436
- def __init__(self, analysis_types: List[str]):
 
437
  self.analysis_types = analysis_types
 
438
  self.frame_counter = 0
439
- self.process_every_n_frames = 5 # Process every 5th frame
 
440
  self.vision_client = client # Store client reference
441
  self.last_results = {} # Cache results between processed frames
442
  self.last_processed_time = time.time()
@@ -453,6 +456,31 @@ class VideoProcessor(VideoProcessorBase):
453
  self.max_time_delta = 0.5
454
  self.min_time_delta = 0.05
455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
456
  def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
457
  img = frame.to_ndarray(format="bgr24")
458
  self.frame_counter += 1
@@ -463,14 +491,14 @@ class VideoProcessor(VideoProcessorBase):
463
 
464
  # Add status display on all frames
465
  cv2.putText(img,
466
- f"Vision AI: {'Active' if self.processing_active else 'Paused'}",
467
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
468
 
469
  # Convert to grayscale for motion detection
470
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
471
 
472
- # Apply motion detection for all frames
473
- if self.prev_gray is not None:
474
  # Calculate frame difference for smoother motion detection
475
  frame_diff = cv2.absdiff(gray, self.prev_gray)
476
  _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
@@ -482,85 +510,274 @@ class VideoProcessor(VideoProcessorBase):
482
  mg_mask = cv2.motempl.calcMotionGradient(
483
  self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
484
 
485
- # Visualize motion segments (optional)
486
- if "Motion" in self.analysis_types:
487
- seg_mask, segments = cv2.motempl.segmentMotion(
488
- self.motion_history, timestamp, self.max_time_delta)
489
-
490
- # Visualize motion segments
491
- motion_img = np.zeros_like(img)
492
- for i, segment in enumerate(segments):
493
- if segment[1] < 50: # Filter out small segments
494
- continue
495
- # Draw motion regions with random colors
496
- color = np.random.randint(0, 255, 3).tolist()
497
- motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1)
498
-
499
- # Overlay motion visualization
500
- alpha = 0.3
501
- cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img)
502
-
503
- # Process at regular intervals
504
  current_time = time.time()
505
- if current_time - self.last_processed_time > 1.0 and self.processing_active: # Process max once per second
 
 
 
506
  self.last_processed_time = current_time
507
 
508
- # Process with Vision API as in original code
509
- # ... existing API processing code ...
510
-
511
- # Update tracking between API calls for smoother object tracking
512
- if "objects" in self.last_results and "Objects" in self.analysis_types:
513
- # Use OpenCV's built-in object trackers for smoother tracking between API calls
514
- for obj in self.last_results["objects"]:
515
- obj_id = obj.name + str(hash(str(obj.bounding_poly.normalized_vertices)))
516
 
517
- if obj_id not in self.object_trackers:
518
- # Initialize a new tracker
519
- tracker = cv2.TrackerKCF_create() # or other trackers like CSRT, MIL, etc.
520
-
521
- # Get bounding box coordinates
522
- box_points = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
523
- for vertex in obj.bounding_poly.normalized_vertices]
524
- x_min = min([p[0] for p in box_points])
525
- y_min = min([p[1] for p in box_points])
526
- x_max = max([p[0] for p in box_points])
527
- y_max = max([p[1] for p in box_points])
528
-
529
- # Initialize tracker
530
- bbox = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min))
531
- tracker.init(img, bbox)
532
- self.object_trackers[obj_id] = {
533
- "tracker": tracker,
534
- "name": obj.name,
535
- "score": obj.score,
536
- "last_update": self.frame_counter
537
- }
538
 
539
- # Update all trackers
540
- trackers_to_remove = []
541
- for obj_id, tracker_info in self.object_trackers.items():
542
- # Only keep trackers for a limited number of frames
543
- if self.frame_counter - tracker_info["last_update"] > 30: # Remove after 30 frames
544
- trackers_to_remove.append(obj_id)
545
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
 
547
- success, bbox = tracker_info["tracker"].update(img)
548
- if success:
549
- # Draw tracking box
550
- x, y, w, h = [int(v) for v in bbox]
551
- cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
- # Add label with confidence
554
- label = f"{tracker_info['name']}: {int(tracker_info['score'] * 100)}%"
555
- cv2.putText(img, label, (x, y - 10),
556
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
 
558
- # Remove expired trackers
559
- for obj_id in trackers_to_remove:
560
- del self.object_trackers[obj_id]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
 
562
  # Save current frame for next iteration
563
  self.prev_gray = gray
 
 
 
 
 
 
 
 
564
 
565
  return av.VideoFrame.from_ndarray(img, format="bgr24")
566
 
@@ -786,7 +1003,10 @@ def list_bigquery_resources():
786
 
787
  return resources
788
 
789
- def process_video_file(video_file, analysis_types):
 
 
 
790
  """Process an uploaded video file with enhanced Vision AI detection and analytics"""
791
  # Create a temporary file to save the uploaded video
792
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
@@ -801,7 +1021,7 @@ def process_video_file(video_file, analysis_types):
801
  if not cap.isOpened():
802
  st.error("Error opening video file")
803
  os.unlink(temp_video_path)
804
- return None, None # Return a tuple with None values instead of just None
805
 
806
  # Get video properties
807
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -817,7 +1037,15 @@ def process_video_file(video_file, analysis_types):
817
  # Scene change detection threshold
818
  scene_change_threshold = 40.0 # Adjust as needed: lower = more sensitive
819
  # Process every Nth frame to reduce API calls
820
- process_every_n_frames = 5
 
 
 
 
 
 
 
 
821
 
822
  # Check OpenCV version for compatibility with advanced features
823
  opencv_version = cv2.__version__
@@ -839,6 +1067,38 @@ def process_video_file(video_file, analysis_types):
839
  use_advanced_tracking = False
840
  # ----------------- End Parameters -----------------
841
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
842
  # Inform user if video is being truncated
843
  if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
844
  st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
@@ -881,128 +1141,10 @@ def process_video_file(video_file, analysis_types):
881
  previous_frame_gray = None
882
  prev_points = None
883
 
884
- try:
885
- frame_count = 0
886
- while frame_count < max_frames: # Limit to 10 seconds
887
- ret, frame = cap.read()
888
- if not ret:
889
- break
890
-
891
- frame_count += 1
892
-
893
- # Update progress
894
- progress = int(frame_count / total_frames * 100)
895
- progress_bar.progress(progress)
896
- status_text.text(f"Processing frame {frame_count}/{total_frames} ({progress}%) - {frame_count/fps:.1f}s of 10s")
897
-
898
- # Add timestamp to frame
899
- cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
900
- (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
901
-
902
- # Activity detection and scene change detection
903
- current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
904
- current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
905
-
906
- if previous_frame_gray is not None:
907
- # Calculate frame difference for activity detection
908
- frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
909
- activity_level = np.mean(frame_diff)
910
- detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
911
-
912
- # Scene change detection
913
- if activity_level > scene_change_threshold:
914
- detection_stats["scene_changes"].append(frame_count/fps)
915
- # Mark scene change on frame
916
- cv2.putText(frame, "SCENE CHANGE",
917
- (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
918
-
919
- # Add optical flow tracking if enabled
920
- if use_advanced_tracking and prev_points is not None:
921
- try:
922
- # Calculate optical flow
923
- next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
924
- current_frame_gray,
925
- prev_points,
926
- None,
927
- **lk_params)
928
-
929
- # Select good points
930
- if next_points is not None:
931
- good_new = next_points[status==1]
932
- good_old = prev_points[status==1]
933
-
934
- # Draw motion tracks
935
- for i, (new, old) in enumerate(zip(good_new, good_old)):
936
- a, b = new.ravel()
937
- c, d = old.ravel()
938
- # Draw motion lines
939
- cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
940
- cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
941
- except Exception as e:
942
- # If optical flow fails, just continue without it
943
- pass
944
-
945
- # Update tracking points periodically if enabled
946
- if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
947
- try:
948
- prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
949
- except Exception:
950
- # If feature tracking fails, just continue without it
951
- prev_points = None
952
-
953
- previous_frame_gray = current_frame_gray
954
-
955
- # Process frames with Vision API - keep this part of the code unchanged
956
- if frame_count % process_every_n_frames == 0:
957
- # ... existing API processing code ...
958
- pass
959
-
960
- # Add hint about slowed down speed
961
- cv2.putText(frame, "Playback: 60% speed for better visualization",
962
- (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
963
-
964
- # Write the frame to output video
965
- out.write(frame)
966
-
967
- # Release resources
968
- cap.release()
969
- out.release()
970
-
971
- # Clear progress indicators
972
- progress_bar.empty()
973
- status_text.empty()
974
-
975
- # Read the processed video as bytes for download
976
- with open(output_path, 'rb') as file:
977
- processed_video_bytes = file.read()
978
-
979
- # Clean up temporary files
980
- os.unlink(temp_video_path)
981
- os.unlink(output_path)
982
-
983
- # Return results
984
- results = {"detection_stats": detection_stats}
985
-
986
- # Store results in session state for chatbot context
987
- st.session_state.analysis_results = results
988
-
989
- # Update vectorstore with new results
990
- update_vectorstore_with_results(results)
991
-
992
- return processed_video_bytes, results
993
-
994
- except Exception as e:
995
- # Clean up on error
996
- cap.release()
997
- if 'out' in locals():
998
- out.release()
999
- os.unlink(temp_video_path)
1000
- if os.path.exists(output_path):
1001
- os.unlink(output_path)
1002
-
1003
- # Return None values as a tuple instead of raising the exception
1004
- st.error(f"Error processing video: {str(e)}")
1005
- return None, None # Return a tuple with None values
1006
 
1007
  def load_bigquery_table(dataset_id, table_id, limit=1000):
1008
  """Load data directly from an existing BigQuery table"""
@@ -1649,6 +1791,16 @@ def main():
1649
 
1650
  # Analysis settings
1651
  st.sidebar.markdown("### Video Analysis Settings")
 
 
 
 
 
 
 
 
 
 
1652
  analysis_types = []
1653
  if st.sidebar.checkbox("Object Detection", value=True):
1654
  analysis_types.append("Objects")
@@ -1657,17 +1809,124 @@ def main():
1657
  if st.sidebar.checkbox("Text Recognition"):
1658
  analysis_types.append("Text")
1659
 
 
 
 
 
 
1660
  st.sidebar.markdown("---")
1661
- st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1662
 
1663
  # Upload Video mode only - removed real-time camera option
1664
  st.markdown("""
1665
  #### 📤 Video Analysis
1666
 
1667
- Upload a video file to analyze it with Google Cloud Vision AI.
1668
 
1669
  **Instructions:**
1670
- 1. Select the analysis types in the sidebar
1671
  2. Upload a video file (MP4, MOV, AVI)
1672
  3. Click "Process Video" to begin analysis
1673
  4. Download the processed video when complete
@@ -1690,10 +1949,24 @@ def main():
1690
  if not analysis_types:
1691
  st.warning("Please select at least one analysis type.")
1692
  else:
1693
- with st.spinner("Processing video (max 10 seconds)..."):
1694
  try:
1695
- # Process the video with enhanced detail
1696
- processed_video, results = process_video_file(uploaded_file, analysis_types)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1697
 
1698
  if processed_video:
1699
  # Offer download of processed video
@@ -2000,277 +2273,4 @@ def main():
2000
  st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
2001
  st.write(f"Rows: {result['num_rows']}")
2002
  st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")
2003
- st.write(f"Schema: {', '.join(result['schema'])}")
2004
-
2005
- # Store table info in session state
2006
- st.session_state["table_info"] = {
2007
- "dataset_id": dataset_id,
2008
- "table_id": table_id,
2009
- "schema": result["schema"]
2010
- }
2011
- except Exception as e:
2012
- st.error(f"Error uploading to BigQuery: {str(e)}")
2013
- except Exception as e:
2014
- st.error(f"Error reading CSV file: {str(e)}")
2015
- else:
2016
- st.info("Upload a CSV file to load data into BigQuery")
2017
-
2018
- with query_tab:
2019
- st.markdown("### Query BigQuery Data")
2020
-
2021
- if "query_results" in st.session_state and "table_info" in st.session_state:
2022
- # Display info about the loaded data
2023
- table_info = st.session_state["table_info"]
2024
- st.write(f"Working with table: **{table_info['dataset_id']}.{table_info['table_id']}**")
2025
-
2026
- # Query input
2027
- default_query = f"SELECT * FROM `{credentials.project_id}.{table_info['dataset_id']}.{table_info['table_id']}` LIMIT 100"
2028
- query = st.text_area("SQL Query", default_query, height=100)
2029
-
2030
- # Execute query button
2031
- if st.button("Run Query"):
2032
- with st.spinner("Executing query..."):
2033
- try:
2034
- # Run the query
2035
- results = run_bigquery(query)
2036
-
2037
- # Store results in session state
2038
- st.session_state["query_results"] = results
2039
-
2040
- # Display results
2041
- st.write("### Query Results")
2042
- st.dataframe(results)
2043
-
2044
- # Download button for results
2045
- csv = results.to_csv(index=False)
2046
- st.download_button(
2047
- label="Download Results as CSV",
2048
- data=csv,
2049
- file_name="query_results.csv",
2050
- mime="text/csv"
2051
- )
2052
- except Exception as e:
2053
- st.error(f"Error executing query: {str(e)}")
2054
- else:
2055
- st.info("Load a table from BigQuery or upload a CSV file first")
2056
-
2057
- with visualization_tab:
2058
- st.markdown("### Visualize BigQuery Data")
2059
-
2060
- if "query_results" in st.session_state and not st.session_state["query_results"].empty:
2061
- df = st.session_state["query_results"]
2062
-
2063
- # Chart type selection
2064
- chart_type = st.selectbox(
2065
- "Select Chart Type",
2066
- ["Bar Chart", "Line Chart", "Scatter Plot", "Histogram", "Pie Chart"]
2067
- )
2068
-
2069
- # Column selection based on data types
2070
- numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
2071
- all_cols = df.columns.tolist()
2072
-
2073
- if len(numeric_cols) < 1:
2074
- st.warning("No numeric columns available for visualization")
2075
- else:
2076
- if chart_type in ["Bar Chart", "Line Chart", "Scatter Plot"]:
2077
- col1, col2 = st.columns(2)
2078
-
2079
- with col1:
2080
- x_axis = st.selectbox("X-axis", all_cols)
2081
-
2082
- with col2:
2083
- y_axis = st.selectbox("Y-axis", numeric_cols)
2084
-
2085
- # Optional: Grouping/color dimension
2086
- color_dim = st.selectbox("Color Dimension (Optional)", ["None"] + all_cols)
2087
-
2088
- # Generate the visualization based on selection
2089
- if st.button("Generate Visualization"):
2090
- st.write(f"### {chart_type}: {y_axis} by {x_axis}")
2091
-
2092
- if chart_type == "Bar Chart":
2093
- if color_dim != "None":
2094
- fig = px.bar(df, x=x_axis, y=y_axis, color=color_dim,
2095
- title=f"{y_axis} by {x_axis}")
2096
- else:
2097
- fig = px.bar(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
2098
- st.plotly_chart(fig)
2099
-
2100
- elif chart_type == "Line Chart":
2101
- if color_dim != "None":
2102
- fig = px.line(df, x=x_axis, y=y_axis, color=color_dim,
2103
- title=f"{y_axis} by {x_axis}")
2104
- else:
2105
- fig = px.line(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
2106
- st.plotly_chart(fig)
2107
-
2108
- elif chart_type == "Scatter Plot":
2109
- if color_dim != "None":
2110
- fig = px.scatter(df, x=x_axis, y=y_axis, color=color_dim,
2111
- title=f"{y_axis} vs {x_axis}")
2112
- else:
2113
- fig = px.scatter(df, x=x_axis, y=y_axis, title=f"{y_axis} vs {x_axis}")
2114
- st.plotly_chart(fig)
2115
-
2116
- elif chart_type == "Histogram":
2117
- column = st.selectbox("Select Column", numeric_cols)
2118
- bins = st.slider("Number of Bins", min_value=5, max_value=100, value=20)
2119
-
2120
- if st.button("Generate Visualization"):
2121
- st.write(f"### Histogram of {column}")
2122
- fig = px.histogram(df, x=column, nbins=bins, title=f"Distribution of {column}")
2123
- st.plotly_chart(fig)
2124
-
2125
- elif chart_type == "Pie Chart":
2126
- column = st.selectbox("Category Column", all_cols)
2127
- value_col = st.selectbox("Value Column", numeric_cols)
2128
-
2129
- if st.button("Generate Visualization"):
2130
- # Aggregate the data if needed
2131
- pie_data = df.groupby(column)[value_col].sum().reset_index()
2132
- st.write(f"### Pie Chart: {value_col} by {column}")
2133
- fig = px.pie(pie_data, names=column, values=value_col,
2134
- title=f"{value_col} by {column}")
2135
- st.plotly_chart(fig)
2136
- else:
2137
- st.info("Load a table from BigQuery or upload a CSV file first")
2138
-
2139
- elif selected == "About":
2140
- st.markdown("## About This App")
2141
- st.write("""
2142
- This application uses Google Cloud Vision AI to analyze images and video streams. It can:
2143
-
2144
- - **Detect labels** in images
2145
- - **Identify objects** and their locations
2146
- - **Extract text** from images
2147
- - **Detect faces** and facial landmarks
2148
- - **Analyze real-time video** from your camera
2149
-
2150
- To use this app, you need to:
2151
- 1. Set up Google Cloud Vision API credentials
2152
- 2. Upload an image or use your camera
2153
- 3. Select the types of analysis you want to perform
2154
- 4. Click "Analyze Image" or start the video stream
2155
-
2156
- The app is built with Streamlit and Google Cloud Vision API.
2157
- """)
2158
-
2159
- st.info("Note: Make sure your Google Cloud credentials are properly set up to use this application.")
2160
-
2161
- # Add the chatbot interface at the bottom of the page
2162
- chatbot_interface()
2163
-
2164
- if __name__ == "__main__":
2165
- # Use GOOGLE_CREDENTIALS directly - no need for file or GOOGLE_APPLICATION_CREDENTIALS
2166
- try:
2167
- if 'GOOGLE_CREDENTIALS' in os.environ:
2168
- # Create credentials object directly from JSON string
2169
- credentials_info = json.loads(os.environ['GOOGLE_CREDENTIALS'])
2170
- credentials = service_account.Credentials.from_service_account_info(credentials_info)
2171
-
2172
- # Initialize client with these credentials directly
2173
- client = vision.ImageAnnotatorClient(credentials=credentials)
2174
- else:
2175
- st.sidebar.error("GOOGLE_CREDENTIALS environment variable not found")
2176
- client = None
2177
- except Exception as e:
2178
- st.sidebar.error(f"Error with credentials: {str(e)}")
2179
- client = None
2180
-
2181
- main()
2182
-
2183
- # Add this function to your app
2184
- def extract_video_frames(video_bytes, num_frames=5):
2185
- """Extract frames from video bytes for thumbnail display with improved key frame selection"""
2186
- import cv2
2187
- import numpy as np
2188
- import tempfile
2189
- from PIL import Image
2190
- import io
2191
-
2192
- # Save video bytes to a temporary file
2193
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
2194
- temp_file.write(video_bytes)
2195
- temp_video_path = temp_file.name
2196
-
2197
- # Open the video file
2198
- cap = cv2.VideoCapture(temp_video_path)
2199
-
2200
- # Get video properties
2201
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
2202
- fps = cap.get(cv2.CAP_PROP_FPS)
2203
-
2204
- # Use more sophisticated frame selection based on content analysis
2205
- frames = []
2206
- frame_scores = []
2207
- sample_interval = max(1, frame_count // (num_frames * 3)) # Sample more frames than needed
2208
-
2209
- # First pass: collect frame scores
2210
- prev_frame = None
2211
- frame_index = 0
2212
-
2213
- while len(frame_scores) < num_frames * 3 and frame_index < frame_count:
2214
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
2215
- ret, frame = cap.read()
2216
- if not ret:
2217
- break
2218
-
2219
- # Convert to grayscale for analysis
2220
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
2221
- gray = cv2.GaussianBlur(gray, (21, 21), 0)
2222
-
2223
- # Calculate frame score based on Laplacian variance (focus measure)
2224
- focus_score = cv2.Laplacian(gray, cv2.CV_64F).var()
2225
-
2226
- # Calculate frame difference if we have a previous frame
2227
- diff_score = 0
2228
- if prev_frame is not None:
2229
- frame_diff = cv2.absdiff(gray, prev_frame)
2230
- diff_score = np.mean(frame_diff)
2231
-
2232
- # Combined score: favor sharp frames with significant changes
2233
- combined_score = focus_score * 0.6 + diff_score * 0.4
2234
- frame_scores.append((frame_index, combined_score))
2235
-
2236
- # Store frame for next comparison
2237
- prev_frame = gray
2238
- frame_index += sample_interval
2239
-
2240
- # Second pass: select the best frames based on scores
2241
- # Sort by score and get top N frames
2242
- sorted_frames = sorted(frame_scores, key=lambda x: x[1], reverse=True)
2243
- best_frames = sorted_frames[:num_frames]
2244
- # Sort back by frame index to maintain chronological order
2245
- selected_frames = sorted(best_frames, key=lambda x: x[0])
2246
-
2247
- # Extract the selected frames
2248
- for idx, _ in selected_frames:
2249
- cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
2250
- ret, frame = cap.read()
2251
- if ret:
2252
- # Apply subtle enhancement to frames
2253
- enhanced_frame = frame.copy()
2254
- # Auto color balance
2255
- lab = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2LAB)
2256
- l, a, b = cv2.split(lab)
2257
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
2258
- cl = clahe.apply(l)
2259
- enhanced_lab = cv2.merge((cl, a, b))
2260
- enhanced_frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
2261
-
2262
- # Convert to RGB (from BGR)
2263
- frame_rgb = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB)
2264
- # Convert to PIL Image
2265
- pil_img = Image.fromarray(frame_rgb)
2266
- # Save to bytes
2267
- img_byte_arr = io.BytesIO()
2268
- pil_img.save(img_byte_arr, format='JPEG', quality=90)
2269
- frames.append(img_byte_arr.getvalue())
2270
-
2271
- # Clean up
2272
- cap.release()
2273
- import os
2274
- os.unlink(temp_video_path)
2275
-
2276
- return frames
 
433
  class VideoProcessor(VideoProcessorBase):
434
  """Process video frames for real-time analysis with enhanced OpenCV processing"""
435
 
436
+ def __init__(self, analysis_types: List[str], processing_mode: str = "Hybrid (Google Vision + OpenCV)",
437
+ track_update_frames: int = 5, confidence_threshold: float = 0.5):
438
  self.analysis_types = analysis_types
439
+ self.processing_mode = processing_mode
440
  self.frame_counter = 0
441
+ self.process_every_n_frames = track_update_frames # Process every N frames
442
+ self.confidence_threshold = confidence_threshold
443
  self.vision_client = client # Store client reference
444
  self.last_results = {} # Cache results between processed frames
445
  self.last_processed_time = time.time()
 
456
  self.max_time_delta = 0.5
457
  self.min_time_delta = 0.05
458
 
459
+ # For OpenCV-only detection mode
460
+ self.opencv_detector = None
461
+ self.init_opencv_detector()
462
+
463
+ def init_opencv_detector(self):
464
+ """Initialize OpenCV-based object detector if needed"""
465
+ if self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)":
466
+ try:
467
+ # Initialize YOLO or other available models
468
+ # This is a placeholder - you might need to adjust based on available OpenCV DNN models
469
+ weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
470
+ config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
471
+
472
+ # Check if files exist, otherwise use a simpler fallback detector
473
+ if os.path.exists(weights_path) and os.path.exists(config_path):
474
+ self.opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
475
+ else:
476
+ # Fallback to HOG detector for people
477
+ self.opencv_detector = cv2.HOGDescriptor()
478
+ self.opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
479
+ st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
480
+ except Exception as e:
481
+ st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
482
+ self.opencv_detector = None
483
+
484
  def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
485
  img = frame.to_ndarray(format="bgr24")
486
  self.frame_counter += 1
 
491
 
492
  # Add status display on all frames
493
  cv2.putText(img,
494
+ f"Vision AI: {'Active' if self.processing_active else 'Paused'} - Mode: {self.processing_mode}",
495
  (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
496
 
497
  # Convert to grayscale for motion detection
498
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
499
 
500
+ # Apply motion detection for all frames if enabled
501
+ if "Motion" in self.analysis_types and self.prev_gray is not None:
502
  # Calculate frame difference for smoother motion detection
503
  frame_diff = cv2.absdiff(gray, self.prev_gray)
504
  _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
 
510
  mg_mask = cv2.motempl.calcMotionGradient(
511
  self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
512
 
513
+ # Visualize motion segments
514
+ seg_mask, segments = cv2.motempl.segmentMotion(
515
+ self.motion_history, timestamp, self.max_time_delta)
516
+
517
+ # Visualize motion segments
518
+ motion_img = np.zeros_like(img)
519
+ for i, segment in enumerate(segments):
520
+ if segment[1] < 50: # Filter out small segments
521
+ continue
522
+ # Draw motion regions with random colors
523
+ color = np.random.randint(0, 255, 3).tolist()
524
+ motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1)
525
+
526
+ # Overlay motion visualization
527
+ alpha = 0.3
528
+ cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img)
529
+
530
+ # Process with Vision API at regular intervals if using Google Vision
 
531
  current_time = time.time()
532
+ if (self.processing_mode == "Google Vision API Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
533
+ (current_time - self.last_processed_time > 1.0) and self.processing_active and \
534
+ self.vision_client is not None:
535
+
536
  self.last_processed_time = current_time
537
 
538
+ # Convert frame to JPEG for Vision API
539
+ success, jpeg_frame = cv2.imencode('.jpg', img)
540
+ if success:
541
+ image_content = jpeg_frame.tobytes()
 
 
 
 
542
 
543
+ # Create vision image
544
+ vision_image = vision.Image(content=image_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
 
546
+ try:
547
+ # Perform detection based on selected types
548
+ if "Objects" in self.analysis_types:
549
+ objects = self.vision_client.object_localization(image=vision_image)
550
+ # Filter objects by confidence threshold
551
+ filtered_objects = [obj for obj in objects.localized_object_annotations
552
+ if obj.score >= self.confidence_threshold]
553
+ self.last_results["objects"] = filtered_objects
554
+
555
+ # Log detection for tracking
556
+ for obj in filtered_objects:
557
+ # Draw object boundaries
558
+ box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
559
+ for vertex in obj.bounding_poly.normalized_vertices]
560
+ points = np.array([[int(p[0]), int(p[1])] for p in box])
561
+ cv2.polylines(img, [points], True, (0, 255, 0), 2)
562
+
563
+ # Add label with confidence
564
+ cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
565
+ (int(box[0][0]), int(box[0][1] - 10)),
566
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
567
+
568
+ # Create unique object ID for tracking
569
+ obj_id = f"{obj.name}_{self.frame_counter}"
570
+
571
+ # Calculate bounding box for tracker
572
+ x_values = [p[0] for p in box]
573
+ y_values = [p[1] for p in box]
574
+ x_min, x_max = min(x_values), max(x_values)
575
+ y_min, y_max = min(y_values), max(y_values)
576
+
577
+ # Create or update tracker
578
+ if obj.name not in self.object_trackers:
579
+ self.object_trackers[obj.name] = {
580
+ "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
581
+ "last_seen": self.frame_counter,
582
+ "score": obj.score
583
+ }
584
+ else:
585
+ # Update existing tracker
586
+ self.object_trackers[obj.name] = {
587
+ "bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
588
+ "last_seen": self.frame_counter,
589
+ "score": obj.score
590
+ }
591
 
592
+ # Face detection if selected
593
+ if "Face Detection" in self.analysis_types:
594
+ faces = self.vision_client.face_detection(image=vision_image)
595
+ self.last_results["faces"] = faces.face_annotations
596
+
597
+ # Draw face boundaries
598
+ for face in faces.face_annotations:
599
+ if face.detection_confidence >= self.confidence_threshold:
600
+ vertices = face.bounding_poly.vertices
601
+ points = [(vertex.x, vertex.y) for vertex in vertices]
602
+ points = np.array([[p[0], p[1]] for p in points])
603
+ cv2.polylines(img, [points], True, (0, 0, 255), 2)
604
+
605
+ # Add confidence score
606
+ cv2.putText(img, f"Face: {int(face.detection_confidence * 100)}%",
607
+ (points[0][0], points[0][1] - 10),
608
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
609
+
610
+ # Draw facial landmarks
611
+ for landmark in face.landmarks:
612
+ px = landmark.position.x
613
+ py = landmark.position.y
614
+ cv2.circle(img, (int(px), int(py)), 2, (255, 255, 0), -1)
615
 
616
+ # Text detection if selected
617
+ if "Text" in self.analysis_types:
618
+ text = self.vision_client.text_detection(image=vision_image)
619
+ if text.text_annotations:
620
+ self.last_results["text"] = text.text_annotations
621
+
622
+ # Draw text bounding boxes
623
+ for text_annot in text.text_annotations[1:]: # Skip the first one (full text)
624
+ box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
625
+ points = np.array([[int(p[0]), int(p[1])] for p in box])
626
+ cv2.polylines(img, [points], True, (255, 0, 0), 2)
627
+
628
+ # Add recognized text
629
+ cv2.putText(img, text_annot.description,
630
+ (points[0][0], points[0][1] - 10),
631
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
632
+ except Exception as e:
633
+ # Handle API errors gracefully
634
+ error_msg = f"API Error: {str(e)}"
635
+ cv2.putText(img, error_msg, (10, 70),
636
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
637
+
638
+ # Process with OpenCV object detection if enabled
639
+ if (self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
640
+ self.opencv_detector is not None and \
641
+ (self.frame_counter % self.process_every_n_frames == 0 or not self.object_trackers):
642
 
643
+ try:
644
+ # If using HOG detector (the fallback)
645
+ if isinstance(self.opencv_detector, cv2.HOGDescriptor):
646
+ # Detect people
647
+ boxes, weights = self.opencv_detector.detectMultiScale(
648
+ img, winStride=(8, 8), padding=(4, 4), scale=1.05
649
+ )
650
+
651
+ # Draw bounding boxes
652
+ for i, (x, y, w, h) in enumerate(boxes):
653
+ if weights[i] > 0.3: # Confidence threshold
654
+ cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
655
+ cv2.putText(img, f"Person: {int(weights[i] * 100)}%",
656
+ (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
657
+
658
+ # Add to trackers
659
+ self.object_trackers[f"person_{i}"] = {
660
+ "bbox": (x, y, w, h),
661
+ "last_seen": self.frame_counter,
662
+ "score": weights[i]
663
+ }
664
+ else:
665
+ # Using YOLO or another DNN-based detector
666
+ blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
667
+ self.opencv_detector.setInput(blob)
668
+ layer_names = self.opencv_detector.getLayerNames()
669
+ output_layers = [layer_names[i - 1] for i in self.opencv_detector.getUnconnectedOutLayers()]
670
+ outputs = self.opencv_detector.forward(output_layers)
671
+
672
+ # Process detections
673
+ class_ids = []
674
+ confidences = []
675
+ boxes = []
676
+
677
+ for output in outputs:
678
+ for detection in output:
679
+ scores = detection[5:]
680
+ class_id = np.argmax(scores)
681
+ confidence = scores[class_id]
682
+
683
+ if confidence > self.confidence_threshold:
684
+ # Object detected
685
+ center_x = int(detection[0] * img.shape[1])
686
+ center_y = int(detection[1] * img.shape[0])
687
+ w = int(detection[2] * img.shape[1])
688
+ h = int(detection[3] * img.shape[0])
689
+
690
+ # Rectangle coordinates
691
+ x = int(center_x - w / 2)
692
+ y = int(center_y - h / 2)
693
+
694
+ boxes.append([x, y, w, h])
695
+ confidences.append(float(confidence))
696
+ class_ids.append(class_id)
697
+
698
+ # Apply non-maximum suppression
699
+ indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, 0.4)
700
+
701
+ # Define COCO class names
702
+ class_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
703
+ "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
704
+ "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
705
+ "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
706
+ "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
707
+ "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
708
+ "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
709
+ "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
710
+ "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
711
+ "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
712
+
713
+ for i in indices:
714
+ if isinstance(i, (list, tuple)): # Handle different OpenCV versions
715
+ i = i[0]
716
+
717
+ box = boxes[i]
718
+ x, y, w, h = box
719
+
720
+ # Get class label and draw bounding box
721
+ class_id = class_ids[i]
722
+ label = f"{class_names[class_id]}: {int(confidences[i] * 100)}%"
723
+ cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
724
+ cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
725
+
726
+ # Add to trackers
727
+ object_name = class_names[class_id]
728
+ self.object_trackers[f"{object_name}_{i}"] = {
729
+ "bbox": (x, y, w, h),
730
+ "last_seen": self.frame_counter,
731
+ "score": confidences[i],
732
+ "class": object_name
733
+ }
734
+ except Exception as e:
735
+ cv2.putText(img, f"OpenCV Error: {str(e)}", (10, 110),
736
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
737
+
738
+ # Update object tracking for existing objects (every frame)
739
+ objects_to_remove = []
740
+ for obj_id, tracker_info in self.object_trackers.items():
741
+ # Remove old trackers
742
+ if self.frame_counter - tracker_info["last_seen"] > 30: # Remove after 30 frames
743
+ objects_to_remove.append(obj_id)
744
+ continue
745
+
746
+ # Draw tracking box (for objects not updated this frame)
747
+ if self.frame_counter - tracker_info["last_seen"] <= 5: # Only show recent tracked objects
748
+ x, y, w, h = tracker_info["bbox"]
749
+
750
+ # Use different color for tracked vs detected objects
751
+ if self.frame_counter == tracker_info["last_seen"]:
752
+ color = (0, 255, 0) # Green for newly detected
753
+ else:
754
+ color = (255, 165, 0) # Orange for tracked
755
+
756
+ cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
757
+
758
+ # Add label with confidence and tracking status
759
+ tracking_age = self.frame_counter - tracker_info["last_seen"]
760
+ label = f"{obj_id.split('_')[0]}: {int(tracker_info['score'] * 100)}%"
761
+ if tracking_age > 0:
762
+ label += f" (tracked {tracking_age}f)"
763
+
764
+ cv2.putText(img, label, (x, y - 10),
765
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
766
+
767
+ # Remove expired trackers
768
+ for obj_id in objects_to_remove:
769
+ del self.object_trackers[obj_id]
770
 
771
  # Save current frame for next iteration
772
  self.prev_gray = gray
773
+
774
+ # Add processing mode indicator
775
+ cv2.putText(img, f"Mode: {self.processing_mode}",
776
+ (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
777
+
778
+ # Add frame counter
779
+ cv2.putText(img, f"Frame: {self.frame_counter}",
780
+ (img.shape[1] - 150, img.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
781
 
782
  return av.VideoFrame.from_ndarray(img, format="bgr24")
783
 
 
1003
 
1004
  return resources
1005
 
1006
+ def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Google Vision + OpenCV)",
1007
+ track_update_frames=5, confidence_threshold=0.5, vision_update_interval=1.0,
1008
+ max_results=10, enable_face_landmarks=True, tracking_algorithm="KCF",
1009
+ motion_sensitivity=32, prioritize_vision=True, blend_results=True):
1010
  """Process an uploaded video file with enhanced Vision AI detection and analytics"""
1011
  # Create a temporary file to save the uploaded video
1012
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
 
1021
  if not cap.isOpened():
1022
  st.error("Error opening video file")
1023
  os.unlink(temp_video_path)
1024
+ return None, None
1025
 
1026
  # Get video properties
1027
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 
1037
  # Scene change detection threshold
1038
  scene_change_threshold = 40.0 # Adjust as needed: lower = more sensitive
1039
  # Process every Nth frame to reduce API calls
1040
+ process_every_n_frames = track_update_frames
1041
+
1042
+ # Initialize object trackers dictionary for continuous tracking
1043
+ object_trackers = {}
1044
+
1045
+ # Motion history parameters
1046
+ motion_threshold = motion_sensitivity
1047
+ max_time_delta = 0.5
1048
+ min_time_delta = 0.05
1049
 
1050
  # Check OpenCV version for compatibility with advanced features
1051
  opencv_version = cv2.__version__
 
1067
  use_advanced_tracking = False
1068
  # ----------------- End Parameters -----------------
1069
 
1070
+ # Initialize OpenCV detector if needed
1071
+ opencv_detector = None
1072
+ if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
1073
+ try:
1074
+ # Check if YOLO model files exist
1075
+ weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
1076
+ config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
1077
+
1078
+ if os.path.exists(weights_path) and os.path.exists(config_path):
1079
+ opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
1080
+ st.info("Using YOLO model for OpenCV detection")
1081
+ else:
1082
+ # Fallback to HOG detector for people
1083
+ opencv_detector = cv2.HOGDescriptor()
1084
+ opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
1085
+ st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
1086
+ except Exception as e:
1087
+ st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
1088
+
1089
+ # Initialize the selected tracking algorithm
1090
+ if tracking_algorithm == "CSRT":
1091
+ tracker_create_func = cv2.legacy.TrackerCSRT_create
1092
+ elif tracking_algorithm == "KCF":
1093
+ tracker_create_func = cv2.legacy.TrackerKCF_create
1094
+ elif tracking_algorithm == "MOSSE":
1095
+ tracker_create_func = cv2.legacy.TrackerMOSSE_create
1096
+ elif tracking_algorithm == "MedianFlow":
1097
+ tracker_create_func = cv2.legacy.TrackerMedianFlow_create
1098
+ else:
1099
+ # Default to KCF if specified algorithm not available
1100
+ tracker_create_func = cv2.legacy.TrackerKCF_create
1101
+
1102
  # Inform user if video is being truncated
1103
  if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
1104
  st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
 
1141
  previous_frame_gray = None
1142
  prev_points = None
1143
 
1144
+ # Display mode being used
1145
+ st.info(f"Processing with {processing_mode} mode")
1146
+
1147
+ # The rest of the video processing code would follow...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
 
1149
  def load_bigquery_table(dataset_id, table_id, limit=1000):
1150
  """Load data directly from an existing BigQuery table"""
 
1791
 
1792
  # Analysis settings
1793
  st.sidebar.markdown("### Video Analysis Settings")
1794
+
1795
+ # Add processing mode selection
1796
+ processing_mode = st.sidebar.radio(
1797
+ "Processing Engine",
1798
+ ["Hybrid (Google Vision + OpenCV)", "Google Vision API Only", "OpenCV Only"],
1799
+ help="Select which technology to use for video analysis"
1800
+ )
1801
+
1802
+ # Common analysis types selection
1803
+ st.sidebar.markdown("### Detection Types")
1804
  analysis_types = []
1805
  if st.sidebar.checkbox("Object Detection", value=True):
1806
  analysis_types.append("Objects")
 
1809
  if st.sidebar.checkbox("Text Recognition"):
1810
  analysis_types.append("Text")
1811
 
1812
+ # Add motion tracking option
1813
+ if st.sidebar.checkbox("Motion Tracking", value=True):
1814
+ analysis_types.append("Motion")
1815
+
1816
+ # Settings specific to the selected processing mode
1817
  st.sidebar.markdown("---")
1818
+ st.sidebar.markdown(f"### {processing_mode} Settings")
1819
+
1820
+ # Parameters for all modes
1821
+ track_update_frames = 5
1822
+ confidence_threshold = 0.5
1823
+
1824
+ # Mode-specific parameters
1825
+ if processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
1826
+ # Google Vision parameters
1827
+ st.sidebar.markdown("#### Google Vision Parameters")
1828
+ vision_update_interval = st.sidebar.slider(
1829
+ "Vision API update interval (seconds)",
1830
+ min_value=0.5,
1831
+ max_value=5.0,
1832
+ value=1.0,
1833
+ step=0.5,
1834
+ help="How often to call the Vision API (longer intervals save API quota)"
1835
+ )
1836
+
1837
+ confidence_threshold = st.sidebar.slider(
1838
+ "Google Vision Confidence Threshold",
1839
+ min_value=0.0,
1840
+ max_value=1.0,
1841
+ value=0.5,
1842
+ help="Minimum confidence score for Google Vision detections"
1843
+ )
1844
+
1845
+ # Detailed API options (using an expander for advanced settings)
1846
+ with st.sidebar.expander("Advanced Vision API Settings"):
1847
+ max_results = st.slider(
1848
+ "Max objects per frame",
1849
+ min_value=1,
1850
+ max_value=20,
1851
+ value=10,
1852
+ help="Maximum number of objects to detect per frame"
1853
+ )
1854
+
1855
+ enable_face_landmarks = st.checkbox(
1856
+ "Enable Face Landmarks",
1857
+ value=True,
1858
+ help="Detect facial features (eyes, nose, etc.)"
1859
+ )
1860
+
1861
+ if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
1862
+ # OpenCV parameters
1863
+ st.sidebar.markdown("#### OpenCV Parameters")
1864
+
1865
+ track_update_frames = st.sidebar.slider(
1866
+ "Update OpenCV tracking every N frames",
1867
+ min_value=1,
1868
+ max_value=15,
1869
+ value=5,
1870
+ help="Lower values = more accurate tracking but higher processing load"
1871
+ )
1872
+
1873
+ if processing_mode == "OpenCV Only":
1874
+ # Only show this in OpenCV-only mode
1875
+ confidence_threshold = st.sidebar.slider(
1876
+ "OpenCV Detector Confidence Threshold",
1877
+ min_value=0.0,
1878
+ max_value=1.0,
1879
+ value=0.4,
1880
+ help="Minimum confidence score for OpenCV detections"
1881
+ )
1882
+
1883
+ # OpenCV tracking options
1884
+ with st.sidebar.expander("OpenCV Tracking Options"):
1885
+ tracking_algorithm = st.selectbox(
1886
+ "Tracking Algorithm",
1887
+ ["KCF", "CSRT", "MOSSE", "MedianFlow"],
1888
+ index=0,
1889
+ help="Different algorithms have different speed/accuracy tradeoffs"
1890
+ )
1891
+
1892
+ motion_sensitivity = st.slider(
1893
+ "Motion Sensitivity",
1894
+ min_value=10,
1895
+ max_value=100,
1896
+ value=32,
1897
+ help="Lower values detect more subtle motion"
1898
+ )
1899
+
1900
+ # Hybrid-specific settings
1901
+ if processing_mode == "Hybrid (Google Vision + OpenCV)":
1902
+ # Hybrid specific parameters
1903
+ st.sidebar.markdown("#### Hybrid Mode Settings")
1904
+ prioritize_vision = st.sidebar.radio(
1905
+ "When results conflict, prioritize:",
1906
+ ["Google Vision (more accurate)", "OpenCV (faster)"],
1907
+ index=0,
1908
+ help="Which detection source to prioritize when there are conflicting results"
1909
+ )
1910
+
1911
+ blend_results = st.sidebar.checkbox(
1912
+ "Blend detection results",
1913
+ value=True,
1914
+ help="Combine detections from both systems for better accuracy"
1915
+ )
1916
+
1917
+ # Display warning about API usage
1918
+ st.sidebar.markdown("---")
1919
+ if processing_mode != "OpenCV Only":
1920
+ st.sidebar.warning("⚠️ Google Vision API usage may incur costs. Use responsibly.")
1921
 
1922
  # Upload Video mode only - removed real-time camera option
1923
  st.markdown("""
1924
  #### 📤 Video Analysis
1925
 
1926
+ Upload a video file to analyze it using the selected processing engine.
1927
 
1928
  **Instructions:**
1929
+ 1. Select the processing mode and parameters in the sidebar
1930
  2. Upload a video file (MP4, MOV, AVI)
1931
  3. Click "Process Video" to begin analysis
1932
  4. Download the processed video when complete
 
1949
  if not analysis_types:
1950
  st.warning("Please select at least one analysis type.")
1951
  else:
1952
+ with st.spinner(f"Processing video with {processing_mode} mode (max 10 seconds)..."):
1953
  try:
1954
+ # Create a dict of processing parameters to pass to the processing function
1955
+ processing_params = {
1956
+ "processing_mode": processing_mode,
1957
+ "track_update_frames": track_update_frames,
1958
+ "confidence_threshold": confidence_threshold,
1959
+ "vision_update_interval": vision_update_interval,
1960
+ "max_results": max_results,
1961
+ "enable_face_landmarks": enable_face_landmarks,
1962
+ "tracking_algorithm": tracking_algorithm,
1963
+ "motion_sensitivity": motion_sensitivity,
1964
+ "prioritize_vision": prioritize_vision,
1965
+ "blend_results": blend_results
1966
+ }
1967
+
1968
+ # Process the video with the parameters
1969
+ processed_video, results = process_video_file(uploaded_file, analysis_types, **processing_params)
1970
 
1971
  if processed_video:
1972
  # Offer download of processed video
 
2273
  st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
2274
  st.write(f"Rows: {result['num_rows']}")
2275
  st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")
2276
+