Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -433,10 +433,13 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None):
|
|
433 |
class VideoProcessor(VideoProcessorBase):
|
434 |
"""Process video frames for real-time analysis with enhanced OpenCV processing"""
|
435 |
|
436 |
-
def __init__(self, analysis_types: List[str])
|
|
|
437 |
self.analysis_types = analysis_types
|
|
|
438 |
self.frame_counter = 0
|
439 |
-
self.process_every_n_frames =
|
|
|
440 |
self.vision_client = client # Store client reference
|
441 |
self.last_results = {} # Cache results between processed frames
|
442 |
self.last_processed_time = time.time()
|
@@ -453,6 +456,31 @@ class VideoProcessor(VideoProcessorBase):
|
|
453 |
self.max_time_delta = 0.5
|
454 |
self.min_time_delta = 0.05
|
455 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
|
457 |
img = frame.to_ndarray(format="bgr24")
|
458 |
self.frame_counter += 1
|
@@ -463,14 +491,14 @@ class VideoProcessor(VideoProcessorBase):
|
|
463 |
|
464 |
# Add status display on all frames
|
465 |
cv2.putText(img,
|
466 |
-
f"Vision AI: {'Active' if self.processing_active else 'Paused'}",
|
467 |
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
468 |
|
469 |
# Convert to grayscale for motion detection
|
470 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
471 |
|
472 |
-
# Apply motion detection for all frames
|
473 |
-
if self.prev_gray is not None:
|
474 |
# Calculate frame difference for smoother motion detection
|
475 |
frame_diff = cv2.absdiff(gray, self.prev_gray)
|
476 |
_, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
|
@@ -482,85 +510,274 @@ class VideoProcessor(VideoProcessorBase):
|
|
482 |
mg_mask = cv2.motempl.calcMotionGradient(
|
483 |
self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
|
484 |
|
485 |
-
# Visualize motion segments
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
# Process at regular intervals
|
504 |
current_time = time.time()
|
505 |
-
if
|
|
|
|
|
|
|
506 |
self.last_processed_time = current_time
|
507 |
|
508 |
-
#
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
if "objects" in self.last_results and "Objects" in self.analysis_types:
|
513 |
-
# Use OpenCV's built-in object trackers for smoother tracking between API calls
|
514 |
-
for obj in self.last_results["objects"]:
|
515 |
-
obj_id = obj.name + str(hash(str(obj.bounding_poly.normalized_vertices)))
|
516 |
|
517 |
-
|
518 |
-
|
519 |
-
tracker = cv2.TrackerKCF_create() # or other trackers like CSRT, MIL, etc.
|
520 |
-
|
521 |
-
# Get bounding box coordinates
|
522 |
-
box_points = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
|
523 |
-
for vertex in obj.bounding_poly.normalized_vertices]
|
524 |
-
x_min = min([p[0] for p in box_points])
|
525 |
-
y_min = min([p[1] for p in box_points])
|
526 |
-
x_max = max([p[0] for p in box_points])
|
527 |
-
y_max = max([p[1] for p in box_points])
|
528 |
-
|
529 |
-
# Initialize tracker
|
530 |
-
bbox = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min))
|
531 |
-
tracker.init(img, bbox)
|
532 |
-
self.object_trackers[obj_id] = {
|
533 |
-
"tracker": tracker,
|
534 |
-
"name": obj.name,
|
535 |
-
"score": obj.score,
|
536 |
-
"last_update": self.frame_counter
|
537 |
-
}
|
538 |
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
-
#
|
554 |
-
|
555 |
-
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
557 |
|
558 |
-
|
559 |
-
|
560 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
561 |
|
562 |
# Save current frame for next iteration
|
563 |
self.prev_gray = gray
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
564 |
|
565 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
566 |
|
@@ -786,7 +1003,10 @@ def list_bigquery_resources():
|
|
786 |
|
787 |
return resources
|
788 |
|
789 |
-
def process_video_file(video_file, analysis_types)
|
|
|
|
|
|
|
790 |
"""Process an uploaded video file with enhanced Vision AI detection and analytics"""
|
791 |
# Create a temporary file to save the uploaded video
|
792 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
|
@@ -801,7 +1021,7 @@ def process_video_file(video_file, analysis_types):
|
|
801 |
if not cap.isOpened():
|
802 |
st.error("Error opening video file")
|
803 |
os.unlink(temp_video_path)
|
804 |
-
return None, None
|
805 |
|
806 |
# Get video properties
|
807 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
@@ -817,7 +1037,15 @@ def process_video_file(video_file, analysis_types):
|
|
817 |
# Scene change detection threshold
|
818 |
scene_change_threshold = 40.0 # Adjust as needed: lower = more sensitive
|
819 |
# Process every Nth frame to reduce API calls
|
820 |
-
process_every_n_frames =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
821 |
|
822 |
# Check OpenCV version for compatibility with advanced features
|
823 |
opencv_version = cv2.__version__
|
@@ -839,6 +1067,38 @@ def process_video_file(video_file, analysis_types):
|
|
839 |
use_advanced_tracking = False
|
840 |
# ----------------- End Parameters -----------------
|
841 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
842 |
# Inform user if video is being truncated
|
843 |
if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
|
844 |
st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
|
@@ -881,128 +1141,10 @@ def process_video_file(video_file, analysis_types):
|
|
881 |
previous_frame_gray = None
|
882 |
prev_points = None
|
883 |
|
884 |
-
|
885 |
-
|
886 |
-
|
887 |
-
|
888 |
-
if not ret:
|
889 |
-
break
|
890 |
-
|
891 |
-
frame_count += 1
|
892 |
-
|
893 |
-
# Update progress
|
894 |
-
progress = int(frame_count / total_frames * 100)
|
895 |
-
progress_bar.progress(progress)
|
896 |
-
status_text.text(f"Processing frame {frame_count}/{total_frames} ({progress}%) - {frame_count/fps:.1f}s of 10s")
|
897 |
-
|
898 |
-
# Add timestamp to frame
|
899 |
-
cv2.putText(frame, f"Time: {frame_count/fps:.2f}s",
|
900 |
-
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
901 |
-
|
902 |
-
# Activity detection and scene change detection
|
903 |
-
current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
904 |
-
current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0)
|
905 |
-
|
906 |
-
if previous_frame_gray is not None:
|
907 |
-
# Calculate frame difference for activity detection
|
908 |
-
frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray)
|
909 |
-
activity_level = np.mean(frame_diff)
|
910 |
-
detection_stats["activity_metrics"].append((frame_count/fps, activity_level))
|
911 |
-
|
912 |
-
# Scene change detection
|
913 |
-
if activity_level > scene_change_threshold:
|
914 |
-
detection_stats["scene_changes"].append(frame_count/fps)
|
915 |
-
# Mark scene change on frame
|
916 |
-
cv2.putText(frame, "SCENE CHANGE",
|
917 |
-
(width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)
|
918 |
-
|
919 |
-
# Add optical flow tracking if enabled
|
920 |
-
if use_advanced_tracking and prev_points is not None:
|
921 |
-
try:
|
922 |
-
# Calculate optical flow
|
923 |
-
next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray,
|
924 |
-
current_frame_gray,
|
925 |
-
prev_points,
|
926 |
-
None,
|
927 |
-
**lk_params)
|
928 |
-
|
929 |
-
# Select good points
|
930 |
-
if next_points is not None:
|
931 |
-
good_new = next_points[status==1]
|
932 |
-
good_old = prev_points[status==1]
|
933 |
-
|
934 |
-
# Draw motion tracks
|
935 |
-
for i, (new, old) in enumerate(zip(good_new, good_old)):
|
936 |
-
a, b = new.ravel()
|
937 |
-
c, d = old.ravel()
|
938 |
-
# Draw motion lines
|
939 |
-
cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2)
|
940 |
-
cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1)
|
941 |
-
except Exception as e:
|
942 |
-
# If optical flow fails, just continue without it
|
943 |
-
pass
|
944 |
-
|
945 |
-
# Update tracking points periodically if enabled
|
946 |
-
if use_advanced_tracking and (frame_count % 5 == 0 or prev_points is None or (prev_points is not None and len(prev_points) < 10)):
|
947 |
-
try:
|
948 |
-
prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params)
|
949 |
-
except Exception:
|
950 |
-
# If feature tracking fails, just continue without it
|
951 |
-
prev_points = None
|
952 |
-
|
953 |
-
previous_frame_gray = current_frame_gray
|
954 |
-
|
955 |
-
# Process frames with Vision API - keep this part of the code unchanged
|
956 |
-
if frame_count % process_every_n_frames == 0:
|
957 |
-
# ... existing API processing code ...
|
958 |
-
pass
|
959 |
-
|
960 |
-
# Add hint about slowed down speed
|
961 |
-
cv2.putText(frame, "Playback: 60% speed for better visualization",
|
962 |
-
(width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2)
|
963 |
-
|
964 |
-
# Write the frame to output video
|
965 |
-
out.write(frame)
|
966 |
-
|
967 |
-
# Release resources
|
968 |
-
cap.release()
|
969 |
-
out.release()
|
970 |
-
|
971 |
-
# Clear progress indicators
|
972 |
-
progress_bar.empty()
|
973 |
-
status_text.empty()
|
974 |
-
|
975 |
-
# Read the processed video as bytes for download
|
976 |
-
with open(output_path, 'rb') as file:
|
977 |
-
processed_video_bytes = file.read()
|
978 |
-
|
979 |
-
# Clean up temporary files
|
980 |
-
os.unlink(temp_video_path)
|
981 |
-
os.unlink(output_path)
|
982 |
-
|
983 |
-
# Return results
|
984 |
-
results = {"detection_stats": detection_stats}
|
985 |
-
|
986 |
-
# Store results in session state for chatbot context
|
987 |
-
st.session_state.analysis_results = results
|
988 |
-
|
989 |
-
# Update vectorstore with new results
|
990 |
-
update_vectorstore_with_results(results)
|
991 |
-
|
992 |
-
return processed_video_bytes, results
|
993 |
-
|
994 |
-
except Exception as e:
|
995 |
-
# Clean up on error
|
996 |
-
cap.release()
|
997 |
-
if 'out' in locals():
|
998 |
-
out.release()
|
999 |
-
os.unlink(temp_video_path)
|
1000 |
-
if os.path.exists(output_path):
|
1001 |
-
os.unlink(output_path)
|
1002 |
-
|
1003 |
-
# Return None values as a tuple instead of raising the exception
|
1004 |
-
st.error(f"Error processing video: {str(e)}")
|
1005 |
-
return None, None # Return a tuple with None values
|
1006 |
|
1007 |
def load_bigquery_table(dataset_id, table_id, limit=1000):
|
1008 |
"""Load data directly from an existing BigQuery table"""
|
@@ -1649,6 +1791,16 @@ def main():
|
|
1649 |
|
1650 |
# Analysis settings
|
1651 |
st.sidebar.markdown("### Video Analysis Settings")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1652 |
analysis_types = []
|
1653 |
if st.sidebar.checkbox("Object Detection", value=True):
|
1654 |
analysis_types.append("Objects")
|
@@ -1657,17 +1809,124 @@ def main():
|
|
1657 |
if st.sidebar.checkbox("Text Recognition"):
|
1658 |
analysis_types.append("Text")
|
1659 |
|
|
|
|
|
|
|
|
|
|
|
1660 |
st.sidebar.markdown("---")
|
1661 |
-
st.sidebar.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1662 |
|
1663 |
# Upload Video mode only - removed real-time camera option
|
1664 |
st.markdown("""
|
1665 |
#### 📤 Video Analysis
|
1666 |
|
1667 |
-
Upload a video file to analyze it
|
1668 |
|
1669 |
**Instructions:**
|
1670 |
-
1. Select the
|
1671 |
2. Upload a video file (MP4, MOV, AVI)
|
1672 |
3. Click "Process Video" to begin analysis
|
1673 |
4. Download the processed video when complete
|
@@ -1690,10 +1949,24 @@ def main():
|
|
1690 |
if not analysis_types:
|
1691 |
st.warning("Please select at least one analysis type.")
|
1692 |
else:
|
1693 |
-
with st.spinner("Processing video (max 10 seconds)..."):
|
1694 |
try:
|
1695 |
-
#
|
1696 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1697 |
|
1698 |
if processed_video:
|
1699 |
# Offer download of processed video
|
@@ -2000,277 +2273,4 @@ def main():
|
|
2000 |
st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
|
2001 |
st.write(f"Rows: {result['num_rows']}")
|
2002 |
st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")
|
2003 |
-
|
2004 |
-
|
2005 |
-
# Store table info in session state
|
2006 |
-
st.session_state["table_info"] = {
|
2007 |
-
"dataset_id": dataset_id,
|
2008 |
-
"table_id": table_id,
|
2009 |
-
"schema": result["schema"]
|
2010 |
-
}
|
2011 |
-
except Exception as e:
|
2012 |
-
st.error(f"Error uploading to BigQuery: {str(e)}")
|
2013 |
-
except Exception as e:
|
2014 |
-
st.error(f"Error reading CSV file: {str(e)}")
|
2015 |
-
else:
|
2016 |
-
st.info("Upload a CSV file to load data into BigQuery")
|
2017 |
-
|
2018 |
-
with query_tab:
|
2019 |
-
st.markdown("### Query BigQuery Data")
|
2020 |
-
|
2021 |
-
if "query_results" in st.session_state and "table_info" in st.session_state:
|
2022 |
-
# Display info about the loaded data
|
2023 |
-
table_info = st.session_state["table_info"]
|
2024 |
-
st.write(f"Working with table: **{table_info['dataset_id']}.{table_info['table_id']}**")
|
2025 |
-
|
2026 |
-
# Query input
|
2027 |
-
default_query = f"SELECT * FROM `{credentials.project_id}.{table_info['dataset_id']}.{table_info['table_id']}` LIMIT 100"
|
2028 |
-
query = st.text_area("SQL Query", default_query, height=100)
|
2029 |
-
|
2030 |
-
# Execute query button
|
2031 |
-
if st.button("Run Query"):
|
2032 |
-
with st.spinner("Executing query..."):
|
2033 |
-
try:
|
2034 |
-
# Run the query
|
2035 |
-
results = run_bigquery(query)
|
2036 |
-
|
2037 |
-
# Store results in session state
|
2038 |
-
st.session_state["query_results"] = results
|
2039 |
-
|
2040 |
-
# Display results
|
2041 |
-
st.write("### Query Results")
|
2042 |
-
st.dataframe(results)
|
2043 |
-
|
2044 |
-
# Download button for results
|
2045 |
-
csv = results.to_csv(index=False)
|
2046 |
-
st.download_button(
|
2047 |
-
label="Download Results as CSV",
|
2048 |
-
data=csv,
|
2049 |
-
file_name="query_results.csv",
|
2050 |
-
mime="text/csv"
|
2051 |
-
)
|
2052 |
-
except Exception as e:
|
2053 |
-
st.error(f"Error executing query: {str(e)}")
|
2054 |
-
else:
|
2055 |
-
st.info("Load a table from BigQuery or upload a CSV file first")
|
2056 |
-
|
2057 |
-
with visualization_tab:
|
2058 |
-
st.markdown("### Visualize BigQuery Data")
|
2059 |
-
|
2060 |
-
if "query_results" in st.session_state and not st.session_state["query_results"].empty:
|
2061 |
-
df = st.session_state["query_results"]
|
2062 |
-
|
2063 |
-
# Chart type selection
|
2064 |
-
chart_type = st.selectbox(
|
2065 |
-
"Select Chart Type",
|
2066 |
-
["Bar Chart", "Line Chart", "Scatter Plot", "Histogram", "Pie Chart"]
|
2067 |
-
)
|
2068 |
-
|
2069 |
-
# Column selection based on data types
|
2070 |
-
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
|
2071 |
-
all_cols = df.columns.tolist()
|
2072 |
-
|
2073 |
-
if len(numeric_cols) < 1:
|
2074 |
-
st.warning("No numeric columns available for visualization")
|
2075 |
-
else:
|
2076 |
-
if chart_type in ["Bar Chart", "Line Chart", "Scatter Plot"]:
|
2077 |
-
col1, col2 = st.columns(2)
|
2078 |
-
|
2079 |
-
with col1:
|
2080 |
-
x_axis = st.selectbox("X-axis", all_cols)
|
2081 |
-
|
2082 |
-
with col2:
|
2083 |
-
y_axis = st.selectbox("Y-axis", numeric_cols)
|
2084 |
-
|
2085 |
-
# Optional: Grouping/color dimension
|
2086 |
-
color_dim = st.selectbox("Color Dimension (Optional)", ["None"] + all_cols)
|
2087 |
-
|
2088 |
-
# Generate the visualization based on selection
|
2089 |
-
if st.button("Generate Visualization"):
|
2090 |
-
st.write(f"### {chart_type}: {y_axis} by {x_axis}")
|
2091 |
-
|
2092 |
-
if chart_type == "Bar Chart":
|
2093 |
-
if color_dim != "None":
|
2094 |
-
fig = px.bar(df, x=x_axis, y=y_axis, color=color_dim,
|
2095 |
-
title=f"{y_axis} by {x_axis}")
|
2096 |
-
else:
|
2097 |
-
fig = px.bar(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
|
2098 |
-
st.plotly_chart(fig)
|
2099 |
-
|
2100 |
-
elif chart_type == "Line Chart":
|
2101 |
-
if color_dim != "None":
|
2102 |
-
fig = px.line(df, x=x_axis, y=y_axis, color=color_dim,
|
2103 |
-
title=f"{y_axis} by {x_axis}")
|
2104 |
-
else:
|
2105 |
-
fig = px.line(df, x=x_axis, y=y_axis, title=f"{y_axis} by {x_axis}")
|
2106 |
-
st.plotly_chart(fig)
|
2107 |
-
|
2108 |
-
elif chart_type == "Scatter Plot":
|
2109 |
-
if color_dim != "None":
|
2110 |
-
fig = px.scatter(df, x=x_axis, y=y_axis, color=color_dim,
|
2111 |
-
title=f"{y_axis} vs {x_axis}")
|
2112 |
-
else:
|
2113 |
-
fig = px.scatter(df, x=x_axis, y=y_axis, title=f"{y_axis} vs {x_axis}")
|
2114 |
-
st.plotly_chart(fig)
|
2115 |
-
|
2116 |
-
elif chart_type == "Histogram":
|
2117 |
-
column = st.selectbox("Select Column", numeric_cols)
|
2118 |
-
bins = st.slider("Number of Bins", min_value=5, max_value=100, value=20)
|
2119 |
-
|
2120 |
-
if st.button("Generate Visualization"):
|
2121 |
-
st.write(f"### Histogram of {column}")
|
2122 |
-
fig = px.histogram(df, x=column, nbins=bins, title=f"Distribution of {column}")
|
2123 |
-
st.plotly_chart(fig)
|
2124 |
-
|
2125 |
-
elif chart_type == "Pie Chart":
|
2126 |
-
column = st.selectbox("Category Column", all_cols)
|
2127 |
-
value_col = st.selectbox("Value Column", numeric_cols)
|
2128 |
-
|
2129 |
-
if st.button("Generate Visualization"):
|
2130 |
-
# Aggregate the data if needed
|
2131 |
-
pie_data = df.groupby(column)[value_col].sum().reset_index()
|
2132 |
-
st.write(f"### Pie Chart: {value_col} by {column}")
|
2133 |
-
fig = px.pie(pie_data, names=column, values=value_col,
|
2134 |
-
title=f"{value_col} by {column}")
|
2135 |
-
st.plotly_chart(fig)
|
2136 |
-
else:
|
2137 |
-
st.info("Load a table from BigQuery or upload a CSV file first")
|
2138 |
-
|
2139 |
-
elif selected == "About":
|
2140 |
-
st.markdown("## About This App")
|
2141 |
-
st.write("""
|
2142 |
-
This application uses Google Cloud Vision AI to analyze images and video streams. It can:
|
2143 |
-
|
2144 |
-
- **Detect labels** in images
|
2145 |
-
- **Identify objects** and their locations
|
2146 |
-
- **Extract text** from images
|
2147 |
-
- **Detect faces** and facial landmarks
|
2148 |
-
- **Analyze real-time video** from your camera
|
2149 |
-
|
2150 |
-
To use this app, you need to:
|
2151 |
-
1. Set up Google Cloud Vision API credentials
|
2152 |
-
2. Upload an image or use your camera
|
2153 |
-
3. Select the types of analysis you want to perform
|
2154 |
-
4. Click "Analyze Image" or start the video stream
|
2155 |
-
|
2156 |
-
The app is built with Streamlit and Google Cloud Vision API.
|
2157 |
-
""")
|
2158 |
-
|
2159 |
-
st.info("Note: Make sure your Google Cloud credentials are properly set up to use this application.")
|
2160 |
-
|
2161 |
-
# Add the chatbot interface at the bottom of the page
|
2162 |
-
chatbot_interface()
|
2163 |
-
|
2164 |
-
if __name__ == "__main__":
|
2165 |
-
# Use GOOGLE_CREDENTIALS directly - no need for file or GOOGLE_APPLICATION_CREDENTIALS
|
2166 |
-
try:
|
2167 |
-
if 'GOOGLE_CREDENTIALS' in os.environ:
|
2168 |
-
# Create credentials object directly from JSON string
|
2169 |
-
credentials_info = json.loads(os.environ['GOOGLE_CREDENTIALS'])
|
2170 |
-
credentials = service_account.Credentials.from_service_account_info(credentials_info)
|
2171 |
-
|
2172 |
-
# Initialize client with these credentials directly
|
2173 |
-
client = vision.ImageAnnotatorClient(credentials=credentials)
|
2174 |
-
else:
|
2175 |
-
st.sidebar.error("GOOGLE_CREDENTIALS environment variable not found")
|
2176 |
-
client = None
|
2177 |
-
except Exception as e:
|
2178 |
-
st.sidebar.error(f"Error with credentials: {str(e)}")
|
2179 |
-
client = None
|
2180 |
-
|
2181 |
-
main()
|
2182 |
-
|
2183 |
-
# Add this function to your app
|
2184 |
-
def extract_video_frames(video_bytes, num_frames=5):
|
2185 |
-
"""Extract frames from video bytes for thumbnail display with improved key frame selection"""
|
2186 |
-
import cv2
|
2187 |
-
import numpy as np
|
2188 |
-
import tempfile
|
2189 |
-
from PIL import Image
|
2190 |
-
import io
|
2191 |
-
|
2192 |
-
# Save video bytes to a temporary file
|
2193 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
|
2194 |
-
temp_file.write(video_bytes)
|
2195 |
-
temp_video_path = temp_file.name
|
2196 |
-
|
2197 |
-
# Open the video file
|
2198 |
-
cap = cv2.VideoCapture(temp_video_path)
|
2199 |
-
|
2200 |
-
# Get video properties
|
2201 |
-
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
2202 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
2203 |
-
|
2204 |
-
# Use more sophisticated frame selection based on content analysis
|
2205 |
-
frames = []
|
2206 |
-
frame_scores = []
|
2207 |
-
sample_interval = max(1, frame_count // (num_frames * 3)) # Sample more frames than needed
|
2208 |
-
|
2209 |
-
# First pass: collect frame scores
|
2210 |
-
prev_frame = None
|
2211 |
-
frame_index = 0
|
2212 |
-
|
2213 |
-
while len(frame_scores) < num_frames * 3 and frame_index < frame_count:
|
2214 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
|
2215 |
-
ret, frame = cap.read()
|
2216 |
-
if not ret:
|
2217 |
-
break
|
2218 |
-
|
2219 |
-
# Convert to grayscale for analysis
|
2220 |
-
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
2221 |
-
gray = cv2.GaussianBlur(gray, (21, 21), 0)
|
2222 |
-
|
2223 |
-
# Calculate frame score based on Laplacian variance (focus measure)
|
2224 |
-
focus_score = cv2.Laplacian(gray, cv2.CV_64F).var()
|
2225 |
-
|
2226 |
-
# Calculate frame difference if we have a previous frame
|
2227 |
-
diff_score = 0
|
2228 |
-
if prev_frame is not None:
|
2229 |
-
frame_diff = cv2.absdiff(gray, prev_frame)
|
2230 |
-
diff_score = np.mean(frame_diff)
|
2231 |
-
|
2232 |
-
# Combined score: favor sharp frames with significant changes
|
2233 |
-
combined_score = focus_score * 0.6 + diff_score * 0.4
|
2234 |
-
frame_scores.append((frame_index, combined_score))
|
2235 |
-
|
2236 |
-
# Store frame for next comparison
|
2237 |
-
prev_frame = gray
|
2238 |
-
frame_index += sample_interval
|
2239 |
-
|
2240 |
-
# Second pass: select the best frames based on scores
|
2241 |
-
# Sort by score and get top N frames
|
2242 |
-
sorted_frames = sorted(frame_scores, key=lambda x: x[1], reverse=True)
|
2243 |
-
best_frames = sorted_frames[:num_frames]
|
2244 |
-
# Sort back by frame index to maintain chronological order
|
2245 |
-
selected_frames = sorted(best_frames, key=lambda x: x[0])
|
2246 |
-
|
2247 |
-
# Extract the selected frames
|
2248 |
-
for idx, _ in selected_frames:
|
2249 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
|
2250 |
-
ret, frame = cap.read()
|
2251 |
-
if ret:
|
2252 |
-
# Apply subtle enhancement to frames
|
2253 |
-
enhanced_frame = frame.copy()
|
2254 |
-
# Auto color balance
|
2255 |
-
lab = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2LAB)
|
2256 |
-
l, a, b = cv2.split(lab)
|
2257 |
-
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
2258 |
-
cl = clahe.apply(l)
|
2259 |
-
enhanced_lab = cv2.merge((cl, a, b))
|
2260 |
-
enhanced_frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR)
|
2261 |
-
|
2262 |
-
# Convert to RGB (from BGR)
|
2263 |
-
frame_rgb = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB)
|
2264 |
-
# Convert to PIL Image
|
2265 |
-
pil_img = Image.fromarray(frame_rgb)
|
2266 |
-
# Save to bytes
|
2267 |
-
img_byte_arr = io.BytesIO()
|
2268 |
-
pil_img.save(img_byte_arr, format='JPEG', quality=90)
|
2269 |
-
frames.append(img_byte_arr.getvalue())
|
2270 |
-
|
2271 |
-
# Clean up
|
2272 |
-
cap.release()
|
2273 |
-
import os
|
2274 |
-
os.unlink(temp_video_path)
|
2275 |
-
|
2276 |
-
return frames
|
|
|
433 |
class VideoProcessor(VideoProcessorBase):
|
434 |
"""Process video frames for real-time analysis with enhanced OpenCV processing"""
|
435 |
|
436 |
+
def __init__(self, analysis_types: List[str], processing_mode: str = "Hybrid (Google Vision + OpenCV)",
|
437 |
+
track_update_frames: int = 5, confidence_threshold: float = 0.5):
|
438 |
self.analysis_types = analysis_types
|
439 |
+
self.processing_mode = processing_mode
|
440 |
self.frame_counter = 0
|
441 |
+
self.process_every_n_frames = track_update_frames # Process every N frames
|
442 |
+
self.confidence_threshold = confidence_threshold
|
443 |
self.vision_client = client # Store client reference
|
444 |
self.last_results = {} # Cache results between processed frames
|
445 |
self.last_processed_time = time.time()
|
|
|
456 |
self.max_time_delta = 0.5
|
457 |
self.min_time_delta = 0.05
|
458 |
|
459 |
+
# For OpenCV-only detection mode
|
460 |
+
self.opencv_detector = None
|
461 |
+
self.init_opencv_detector()
|
462 |
+
|
463 |
+
def init_opencv_detector(self):
|
464 |
+
"""Initialize OpenCV-based object detector if needed"""
|
465 |
+
if self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)":
|
466 |
+
try:
|
467 |
+
# Initialize YOLO or other available models
|
468 |
+
# This is a placeholder - you might need to adjust based on available OpenCV DNN models
|
469 |
+
weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
|
470 |
+
config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
|
471 |
+
|
472 |
+
# Check if files exist, otherwise use a simpler fallback detector
|
473 |
+
if os.path.exists(weights_path) and os.path.exists(config_path):
|
474 |
+
self.opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
|
475 |
+
else:
|
476 |
+
# Fallback to HOG detector for people
|
477 |
+
self.opencv_detector = cv2.HOGDescriptor()
|
478 |
+
self.opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
|
479 |
+
st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
|
480 |
+
except Exception as e:
|
481 |
+
st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
|
482 |
+
self.opencv_detector = None
|
483 |
+
|
484 |
def transform(self, frame: av.VideoFrame) -> av.VideoFrame:
|
485 |
img = frame.to_ndarray(format="bgr24")
|
486 |
self.frame_counter += 1
|
|
|
491 |
|
492 |
# Add status display on all frames
|
493 |
cv2.putText(img,
|
494 |
+
f"Vision AI: {'Active' if self.processing_active else 'Paused'} - Mode: {self.processing_mode}",
|
495 |
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
496 |
|
497 |
# Convert to grayscale for motion detection
|
498 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
499 |
|
500 |
+
# Apply motion detection for all frames if enabled
|
501 |
+
if "Motion" in self.analysis_types and self.prev_gray is not None:
|
502 |
# Calculate frame difference for smoother motion detection
|
503 |
frame_diff = cv2.absdiff(gray, self.prev_gray)
|
504 |
_, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY)
|
|
|
510 |
mg_mask = cv2.motempl.calcMotionGradient(
|
511 |
self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5)
|
512 |
|
513 |
+
# Visualize motion segments
|
514 |
+
seg_mask, segments = cv2.motempl.segmentMotion(
|
515 |
+
self.motion_history, timestamp, self.max_time_delta)
|
516 |
+
|
517 |
+
# Visualize motion segments
|
518 |
+
motion_img = np.zeros_like(img)
|
519 |
+
for i, segment in enumerate(segments):
|
520 |
+
if segment[1] < 50: # Filter out small segments
|
521 |
+
continue
|
522 |
+
# Draw motion regions with random colors
|
523 |
+
color = np.random.randint(0, 255, 3).tolist()
|
524 |
+
motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1)
|
525 |
+
|
526 |
+
# Overlay motion visualization
|
527 |
+
alpha = 0.3
|
528 |
+
cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img)
|
529 |
+
|
530 |
+
# Process with Vision API at regular intervals if using Google Vision
|
|
|
531 |
current_time = time.time()
|
532 |
+
if (self.processing_mode == "Google Vision API Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
|
533 |
+
(current_time - self.last_processed_time > 1.0) and self.processing_active and \
|
534 |
+
self.vision_client is not None:
|
535 |
+
|
536 |
self.last_processed_time = current_time
|
537 |
|
538 |
+
# Convert frame to JPEG for Vision API
|
539 |
+
success, jpeg_frame = cv2.imencode('.jpg', img)
|
540 |
+
if success:
|
541 |
+
image_content = jpeg_frame.tobytes()
|
|
|
|
|
|
|
|
|
542 |
|
543 |
+
# Create vision image
|
544 |
+
vision_image = vision.Image(content=image_content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
|
546 |
+
try:
|
547 |
+
# Perform detection based on selected types
|
548 |
+
if "Objects" in self.analysis_types:
|
549 |
+
objects = self.vision_client.object_localization(image=vision_image)
|
550 |
+
# Filter objects by confidence threshold
|
551 |
+
filtered_objects = [obj for obj in objects.localized_object_annotations
|
552 |
+
if obj.score >= self.confidence_threshold]
|
553 |
+
self.last_results["objects"] = filtered_objects
|
554 |
+
|
555 |
+
# Log detection for tracking
|
556 |
+
for obj in filtered_objects:
|
557 |
+
# Draw object boundaries
|
558 |
+
box = [(vertex.x * img.shape[1], vertex.y * img.shape[0])
|
559 |
+
for vertex in obj.bounding_poly.normalized_vertices]
|
560 |
+
points = np.array([[int(p[0]), int(p[1])] for p in box])
|
561 |
+
cv2.polylines(img, [points], True, (0, 255, 0), 2)
|
562 |
+
|
563 |
+
# Add label with confidence
|
564 |
+
cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%",
|
565 |
+
(int(box[0][0]), int(box[0][1] - 10)),
|
566 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
567 |
+
|
568 |
+
# Create unique object ID for tracking
|
569 |
+
obj_id = f"{obj.name}_{self.frame_counter}"
|
570 |
+
|
571 |
+
# Calculate bounding box for tracker
|
572 |
+
x_values = [p[0] for p in box]
|
573 |
+
y_values = [p[1] for p in box]
|
574 |
+
x_min, x_max = min(x_values), max(x_values)
|
575 |
+
y_min, y_max = min(y_values), max(y_values)
|
576 |
+
|
577 |
+
# Create or update tracker
|
578 |
+
if obj.name not in self.object_trackers:
|
579 |
+
self.object_trackers[obj.name] = {
|
580 |
+
"bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
|
581 |
+
"last_seen": self.frame_counter,
|
582 |
+
"score": obj.score
|
583 |
+
}
|
584 |
+
else:
|
585 |
+
# Update existing tracker
|
586 |
+
self.object_trackers[obj.name] = {
|
587 |
+
"bbox": (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)),
|
588 |
+
"last_seen": self.frame_counter,
|
589 |
+
"score": obj.score
|
590 |
+
}
|
591 |
|
592 |
+
# Face detection if selected
|
593 |
+
if "Face Detection" in self.analysis_types:
|
594 |
+
faces = self.vision_client.face_detection(image=vision_image)
|
595 |
+
self.last_results["faces"] = faces.face_annotations
|
596 |
+
|
597 |
+
# Draw face boundaries
|
598 |
+
for face in faces.face_annotations:
|
599 |
+
if face.detection_confidence >= self.confidence_threshold:
|
600 |
+
vertices = face.bounding_poly.vertices
|
601 |
+
points = [(vertex.x, vertex.y) for vertex in vertices]
|
602 |
+
points = np.array([[p[0], p[1]] for p in points])
|
603 |
+
cv2.polylines(img, [points], True, (0, 0, 255), 2)
|
604 |
+
|
605 |
+
# Add confidence score
|
606 |
+
cv2.putText(img, f"Face: {int(face.detection_confidence * 100)}%",
|
607 |
+
(points[0][0], points[0][1] - 10),
|
608 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
609 |
+
|
610 |
+
# Draw facial landmarks
|
611 |
+
for landmark in face.landmarks:
|
612 |
+
px = landmark.position.x
|
613 |
+
py = landmark.position.y
|
614 |
+
cv2.circle(img, (int(px), int(py)), 2, (255, 255, 0), -1)
|
615 |
|
616 |
+
# Text detection if selected
|
617 |
+
if "Text" in self.analysis_types:
|
618 |
+
text = self.vision_client.text_detection(image=vision_image)
|
619 |
+
if text.text_annotations:
|
620 |
+
self.last_results["text"] = text.text_annotations
|
621 |
+
|
622 |
+
# Draw text bounding boxes
|
623 |
+
for text_annot in text.text_annotations[1:]: # Skip the first one (full text)
|
624 |
+
box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices]
|
625 |
+
points = np.array([[int(p[0]), int(p[1])] for p in box])
|
626 |
+
cv2.polylines(img, [points], True, (255, 0, 0), 2)
|
627 |
+
|
628 |
+
# Add recognized text
|
629 |
+
cv2.putText(img, text_annot.description,
|
630 |
+
(points[0][0], points[0][1] - 10),
|
631 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
|
632 |
+
except Exception as e:
|
633 |
+
# Handle API errors gracefully
|
634 |
+
error_msg = f"API Error: {str(e)}"
|
635 |
+
cv2.putText(img, error_msg, (10, 70),
|
636 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
|
637 |
+
|
638 |
+
# Process with OpenCV object detection if enabled
|
639 |
+
if (self.processing_mode == "OpenCV Only" or self.processing_mode == "Hybrid (Google Vision + OpenCV)") and \
|
640 |
+
self.opencv_detector is not None and \
|
641 |
+
(self.frame_counter % self.process_every_n_frames == 0 or not self.object_trackers):
|
642 |
|
643 |
+
try:
|
644 |
+
# If using HOG detector (the fallback)
|
645 |
+
if isinstance(self.opencv_detector, cv2.HOGDescriptor):
|
646 |
+
# Detect people
|
647 |
+
boxes, weights = self.opencv_detector.detectMultiScale(
|
648 |
+
img, winStride=(8, 8), padding=(4, 4), scale=1.05
|
649 |
+
)
|
650 |
+
|
651 |
+
# Draw bounding boxes
|
652 |
+
for i, (x, y, w, h) in enumerate(boxes):
|
653 |
+
if weights[i] > 0.3: # Confidence threshold
|
654 |
+
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
|
655 |
+
cv2.putText(img, f"Person: {int(weights[i] * 100)}%",
|
656 |
+
(x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
|
657 |
+
|
658 |
+
# Add to trackers
|
659 |
+
self.object_trackers[f"person_{i}"] = {
|
660 |
+
"bbox": (x, y, w, h),
|
661 |
+
"last_seen": self.frame_counter,
|
662 |
+
"score": weights[i]
|
663 |
+
}
|
664 |
+
else:
|
665 |
+
# Using YOLO or another DNN-based detector
|
666 |
+
blob = cv2.dnn.blobFromImage(img, 1/255.0, (416, 416), swapRB=True, crop=False)
|
667 |
+
self.opencv_detector.setInput(blob)
|
668 |
+
layer_names = self.opencv_detector.getLayerNames()
|
669 |
+
output_layers = [layer_names[i - 1] for i in self.opencv_detector.getUnconnectedOutLayers()]
|
670 |
+
outputs = self.opencv_detector.forward(output_layers)
|
671 |
+
|
672 |
+
# Process detections
|
673 |
+
class_ids = []
|
674 |
+
confidences = []
|
675 |
+
boxes = []
|
676 |
+
|
677 |
+
for output in outputs:
|
678 |
+
for detection in output:
|
679 |
+
scores = detection[5:]
|
680 |
+
class_id = np.argmax(scores)
|
681 |
+
confidence = scores[class_id]
|
682 |
+
|
683 |
+
if confidence > self.confidence_threshold:
|
684 |
+
# Object detected
|
685 |
+
center_x = int(detection[0] * img.shape[1])
|
686 |
+
center_y = int(detection[1] * img.shape[0])
|
687 |
+
w = int(detection[2] * img.shape[1])
|
688 |
+
h = int(detection[3] * img.shape[0])
|
689 |
+
|
690 |
+
# Rectangle coordinates
|
691 |
+
x = int(center_x - w / 2)
|
692 |
+
y = int(center_y - h / 2)
|
693 |
+
|
694 |
+
boxes.append([x, y, w, h])
|
695 |
+
confidences.append(float(confidence))
|
696 |
+
class_ids.append(class_id)
|
697 |
+
|
698 |
+
# Apply non-maximum suppression
|
699 |
+
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confidence_threshold, 0.4)
|
700 |
+
|
701 |
+
# Define COCO class names
|
702 |
+
class_names = ["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
703 |
+
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
704 |
+
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
705 |
+
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
706 |
+
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
707 |
+
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
708 |
+
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
709 |
+
"couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
|
710 |
+
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
|
711 |
+
"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]
|
712 |
+
|
713 |
+
for i in indices:
|
714 |
+
if isinstance(i, (list, tuple)): # Handle different OpenCV versions
|
715 |
+
i = i[0]
|
716 |
+
|
717 |
+
box = boxes[i]
|
718 |
+
x, y, w, h = box
|
719 |
+
|
720 |
+
# Get class label and draw bounding box
|
721 |
+
class_id = class_ids[i]
|
722 |
+
label = f"{class_names[class_id]}: {int(confidences[i] * 100)}%"
|
723 |
+
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
|
724 |
+
cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
725 |
+
|
726 |
+
# Add to trackers
|
727 |
+
object_name = class_names[class_id]
|
728 |
+
self.object_trackers[f"{object_name}_{i}"] = {
|
729 |
+
"bbox": (x, y, w, h),
|
730 |
+
"last_seen": self.frame_counter,
|
731 |
+
"score": confidences[i],
|
732 |
+
"class": object_name
|
733 |
+
}
|
734 |
+
except Exception as e:
|
735 |
+
cv2.putText(img, f"OpenCV Error: {str(e)}", (10, 110),
|
736 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
737 |
+
|
738 |
+
# Update object tracking for existing objects (every frame)
|
739 |
+
objects_to_remove = []
|
740 |
+
for obj_id, tracker_info in self.object_trackers.items():
|
741 |
+
# Remove old trackers
|
742 |
+
if self.frame_counter - tracker_info["last_seen"] > 30: # Remove after 30 frames
|
743 |
+
objects_to_remove.append(obj_id)
|
744 |
+
continue
|
745 |
+
|
746 |
+
# Draw tracking box (for objects not updated this frame)
|
747 |
+
if self.frame_counter - tracker_info["last_seen"] <= 5: # Only show recent tracked objects
|
748 |
+
x, y, w, h = tracker_info["bbox"]
|
749 |
+
|
750 |
+
# Use different color for tracked vs detected objects
|
751 |
+
if self.frame_counter == tracker_info["last_seen"]:
|
752 |
+
color = (0, 255, 0) # Green for newly detected
|
753 |
+
else:
|
754 |
+
color = (255, 165, 0) # Orange for tracked
|
755 |
+
|
756 |
+
cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)
|
757 |
+
|
758 |
+
# Add label with confidence and tracking status
|
759 |
+
tracking_age = self.frame_counter - tracker_info["last_seen"]
|
760 |
+
label = f"{obj_id.split('_')[0]}: {int(tracker_info['score'] * 100)}%"
|
761 |
+
if tracking_age > 0:
|
762 |
+
label += f" (tracked {tracking_age}f)"
|
763 |
+
|
764 |
+
cv2.putText(img, label, (x, y - 10),
|
765 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
|
766 |
+
|
767 |
+
# Remove expired trackers
|
768 |
+
for obj_id in objects_to_remove:
|
769 |
+
del self.object_trackers[obj_id]
|
770 |
|
771 |
# Save current frame for next iteration
|
772 |
self.prev_gray = gray
|
773 |
+
|
774 |
+
# Add processing mode indicator
|
775 |
+
cv2.putText(img, f"Mode: {self.processing_mode}",
|
776 |
+
(img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
777 |
+
|
778 |
+
# Add frame counter
|
779 |
+
cv2.putText(img, f"Frame: {self.frame_counter}",
|
780 |
+
(img.shape[1] - 150, img.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
|
781 |
|
782 |
return av.VideoFrame.from_ndarray(img, format="bgr24")
|
783 |
|
|
|
1003 |
|
1004 |
return resources
|
1005 |
|
1006 |
+
def process_video_file(video_file, analysis_types, processing_mode="Hybrid (Google Vision + OpenCV)",
|
1007 |
+
track_update_frames=5, confidence_threshold=0.5, vision_update_interval=1.0,
|
1008 |
+
max_results=10, enable_face_landmarks=True, tracking_algorithm="KCF",
|
1009 |
+
motion_sensitivity=32, prioritize_vision=True, blend_results=True):
|
1010 |
"""Process an uploaded video file with enhanced Vision AI detection and analytics"""
|
1011 |
# Create a temporary file to save the uploaded video
|
1012 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
|
|
|
1021 |
if not cap.isOpened():
|
1022 |
st.error("Error opening video file")
|
1023 |
os.unlink(temp_video_path)
|
1024 |
+
return None, None
|
1025 |
|
1026 |
# Get video properties
|
1027 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
|
1037 |
# Scene change detection threshold
|
1038 |
scene_change_threshold = 40.0 # Adjust as needed: lower = more sensitive
|
1039 |
# Process every Nth frame to reduce API calls
|
1040 |
+
process_every_n_frames = track_update_frames
|
1041 |
+
|
1042 |
+
# Initialize object trackers dictionary for continuous tracking
|
1043 |
+
object_trackers = {}
|
1044 |
+
|
1045 |
+
# Motion history parameters
|
1046 |
+
motion_threshold = motion_sensitivity
|
1047 |
+
max_time_delta = 0.5
|
1048 |
+
min_time_delta = 0.05
|
1049 |
|
1050 |
# Check OpenCV version for compatibility with advanced features
|
1051 |
opencv_version = cv2.__version__
|
|
|
1067 |
use_advanced_tracking = False
|
1068 |
# ----------------- End Parameters -----------------
|
1069 |
|
1070 |
+
# Initialize OpenCV detector if needed
|
1071 |
+
opencv_detector = None
|
1072 |
+
if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
|
1073 |
+
try:
|
1074 |
+
# Check if YOLO model files exist
|
1075 |
+
weights_path = os.path.join(os.path.dirname(__file__), "models/yolov3.weights")
|
1076 |
+
config_path = os.path.join(os.path.dirname(__file__), "models/yolov3.cfg")
|
1077 |
+
|
1078 |
+
if os.path.exists(weights_path) and os.path.exists(config_path):
|
1079 |
+
opencv_detector = cv2.dnn.readNetFromDarknet(config_path, weights_path)
|
1080 |
+
st.info("Using YOLO model for OpenCV detection")
|
1081 |
+
else:
|
1082 |
+
# Fallback to HOG detector for people
|
1083 |
+
opencv_detector = cv2.HOGDescriptor()
|
1084 |
+
opencv_detector.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
|
1085 |
+
st.info("Using basic OpenCV HOG detector. For better results, install YOLO model files.")
|
1086 |
+
except Exception as e:
|
1087 |
+
st.warning(f"Could not initialize OpenCV detector: {str(e)}. Falling back to basic detection.")
|
1088 |
+
|
1089 |
+
# Initialize the selected tracking algorithm
|
1090 |
+
if tracking_algorithm == "CSRT":
|
1091 |
+
tracker_create_func = cv2.legacy.TrackerCSRT_create
|
1092 |
+
elif tracking_algorithm == "KCF":
|
1093 |
+
tracker_create_func = cv2.legacy.TrackerKCF_create
|
1094 |
+
elif tracking_algorithm == "MOSSE":
|
1095 |
+
tracker_create_func = cv2.legacy.TrackerMOSSE_create
|
1096 |
+
elif tracking_algorithm == "MedianFlow":
|
1097 |
+
tracker_create_func = cv2.legacy.TrackerMedianFlow_create
|
1098 |
+
else:
|
1099 |
+
# Default to KCF if specified algorithm not available
|
1100 |
+
tracker_create_func = cv2.legacy.TrackerKCF_create
|
1101 |
+
|
1102 |
# Inform user if video is being truncated
|
1103 |
if int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) > max_frames:
|
1104 |
st.info("⚠️ Video is longer than 10 seconds. Only the first 10 seconds will be processed.")
|
|
|
1141 |
previous_frame_gray = None
|
1142 |
prev_points = None
|
1143 |
|
1144 |
+
# Display mode being used
|
1145 |
+
st.info(f"Processing with {processing_mode} mode")
|
1146 |
+
|
1147 |
+
# The rest of the video processing code would follow...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1148 |
|
1149 |
def load_bigquery_table(dataset_id, table_id, limit=1000):
|
1150 |
"""Load data directly from an existing BigQuery table"""
|
|
|
1791 |
|
1792 |
# Analysis settings
|
1793 |
st.sidebar.markdown("### Video Analysis Settings")
|
1794 |
+
|
1795 |
+
# Add processing mode selection
|
1796 |
+
processing_mode = st.sidebar.radio(
|
1797 |
+
"Processing Engine",
|
1798 |
+
["Hybrid (Google Vision + OpenCV)", "Google Vision API Only", "OpenCV Only"],
|
1799 |
+
help="Select which technology to use for video analysis"
|
1800 |
+
)
|
1801 |
+
|
1802 |
+
# Common analysis types selection
|
1803 |
+
st.sidebar.markdown("### Detection Types")
|
1804 |
analysis_types = []
|
1805 |
if st.sidebar.checkbox("Object Detection", value=True):
|
1806 |
analysis_types.append("Objects")
|
|
|
1809 |
if st.sidebar.checkbox("Text Recognition"):
|
1810 |
analysis_types.append("Text")
|
1811 |
|
1812 |
+
# Add motion tracking option
|
1813 |
+
if st.sidebar.checkbox("Motion Tracking", value=True):
|
1814 |
+
analysis_types.append("Motion")
|
1815 |
+
|
1816 |
+
# Settings specific to the selected processing mode
|
1817 |
st.sidebar.markdown("---")
|
1818 |
+
st.sidebar.markdown(f"### {processing_mode} Settings")
|
1819 |
+
|
1820 |
+
# Parameters for all modes
|
1821 |
+
track_update_frames = 5
|
1822 |
+
confidence_threshold = 0.5
|
1823 |
+
|
1824 |
+
# Mode-specific parameters
|
1825 |
+
if processing_mode == "Google Vision API Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
|
1826 |
+
# Google Vision parameters
|
1827 |
+
st.sidebar.markdown("#### Google Vision Parameters")
|
1828 |
+
vision_update_interval = st.sidebar.slider(
|
1829 |
+
"Vision API update interval (seconds)",
|
1830 |
+
min_value=0.5,
|
1831 |
+
max_value=5.0,
|
1832 |
+
value=1.0,
|
1833 |
+
step=0.5,
|
1834 |
+
help="How often to call the Vision API (longer intervals save API quota)"
|
1835 |
+
)
|
1836 |
+
|
1837 |
+
confidence_threshold = st.sidebar.slider(
|
1838 |
+
"Google Vision Confidence Threshold",
|
1839 |
+
min_value=0.0,
|
1840 |
+
max_value=1.0,
|
1841 |
+
value=0.5,
|
1842 |
+
help="Minimum confidence score for Google Vision detections"
|
1843 |
+
)
|
1844 |
+
|
1845 |
+
# Detailed API options (using an expander for advanced settings)
|
1846 |
+
with st.sidebar.expander("Advanced Vision API Settings"):
|
1847 |
+
max_results = st.slider(
|
1848 |
+
"Max objects per frame",
|
1849 |
+
min_value=1,
|
1850 |
+
max_value=20,
|
1851 |
+
value=10,
|
1852 |
+
help="Maximum number of objects to detect per frame"
|
1853 |
+
)
|
1854 |
+
|
1855 |
+
enable_face_landmarks = st.checkbox(
|
1856 |
+
"Enable Face Landmarks",
|
1857 |
+
value=True,
|
1858 |
+
help="Detect facial features (eyes, nose, etc.)"
|
1859 |
+
)
|
1860 |
+
|
1861 |
+
if processing_mode == "OpenCV Only" or processing_mode == "Hybrid (Google Vision + OpenCV)":
|
1862 |
+
# OpenCV parameters
|
1863 |
+
st.sidebar.markdown("#### OpenCV Parameters")
|
1864 |
+
|
1865 |
+
track_update_frames = st.sidebar.slider(
|
1866 |
+
"Update OpenCV tracking every N frames",
|
1867 |
+
min_value=1,
|
1868 |
+
max_value=15,
|
1869 |
+
value=5,
|
1870 |
+
help="Lower values = more accurate tracking but higher processing load"
|
1871 |
+
)
|
1872 |
+
|
1873 |
+
if processing_mode == "OpenCV Only":
|
1874 |
+
# Only show this in OpenCV-only mode
|
1875 |
+
confidence_threshold = st.sidebar.slider(
|
1876 |
+
"OpenCV Detector Confidence Threshold",
|
1877 |
+
min_value=0.0,
|
1878 |
+
max_value=1.0,
|
1879 |
+
value=0.4,
|
1880 |
+
help="Minimum confidence score for OpenCV detections"
|
1881 |
+
)
|
1882 |
+
|
1883 |
+
# OpenCV tracking options
|
1884 |
+
with st.sidebar.expander("OpenCV Tracking Options"):
|
1885 |
+
tracking_algorithm = st.selectbox(
|
1886 |
+
"Tracking Algorithm",
|
1887 |
+
["KCF", "CSRT", "MOSSE", "MedianFlow"],
|
1888 |
+
index=0,
|
1889 |
+
help="Different algorithms have different speed/accuracy tradeoffs"
|
1890 |
+
)
|
1891 |
+
|
1892 |
+
motion_sensitivity = st.slider(
|
1893 |
+
"Motion Sensitivity",
|
1894 |
+
min_value=10,
|
1895 |
+
max_value=100,
|
1896 |
+
value=32,
|
1897 |
+
help="Lower values detect more subtle motion"
|
1898 |
+
)
|
1899 |
+
|
1900 |
+
# Hybrid-specific settings
|
1901 |
+
if processing_mode == "Hybrid (Google Vision + OpenCV)":
|
1902 |
+
# Hybrid specific parameters
|
1903 |
+
st.sidebar.markdown("#### Hybrid Mode Settings")
|
1904 |
+
prioritize_vision = st.sidebar.radio(
|
1905 |
+
"When results conflict, prioritize:",
|
1906 |
+
["Google Vision (more accurate)", "OpenCV (faster)"],
|
1907 |
+
index=0,
|
1908 |
+
help="Which detection source to prioritize when there are conflicting results"
|
1909 |
+
)
|
1910 |
+
|
1911 |
+
blend_results = st.sidebar.checkbox(
|
1912 |
+
"Blend detection results",
|
1913 |
+
value=True,
|
1914 |
+
help="Combine detections from both systems for better accuracy"
|
1915 |
+
)
|
1916 |
+
|
1917 |
+
# Display warning about API usage
|
1918 |
+
st.sidebar.markdown("---")
|
1919 |
+
if processing_mode != "OpenCV Only":
|
1920 |
+
st.sidebar.warning("⚠️ Google Vision API usage may incur costs. Use responsibly.")
|
1921 |
|
1922 |
# Upload Video mode only - removed real-time camera option
|
1923 |
st.markdown("""
|
1924 |
#### 📤 Video Analysis
|
1925 |
|
1926 |
+
Upload a video file to analyze it using the selected processing engine.
|
1927 |
|
1928 |
**Instructions:**
|
1929 |
+
1. Select the processing mode and parameters in the sidebar
|
1930 |
2. Upload a video file (MP4, MOV, AVI)
|
1931 |
3. Click "Process Video" to begin analysis
|
1932 |
4. Download the processed video when complete
|
|
|
1949 |
if not analysis_types:
|
1950 |
st.warning("Please select at least one analysis type.")
|
1951 |
else:
|
1952 |
+
with st.spinner(f"Processing video with {processing_mode} mode (max 10 seconds)..."):
|
1953 |
try:
|
1954 |
+
# Create a dict of processing parameters to pass to the processing function
|
1955 |
+
processing_params = {
|
1956 |
+
"processing_mode": processing_mode,
|
1957 |
+
"track_update_frames": track_update_frames,
|
1958 |
+
"confidence_threshold": confidence_threshold,
|
1959 |
+
"vision_update_interval": vision_update_interval,
|
1960 |
+
"max_results": max_results,
|
1961 |
+
"enable_face_landmarks": enable_face_landmarks,
|
1962 |
+
"tracking_algorithm": tracking_algorithm,
|
1963 |
+
"motion_sensitivity": motion_sensitivity,
|
1964 |
+
"prioritize_vision": prioritize_vision,
|
1965 |
+
"blend_results": blend_results
|
1966 |
+
}
|
1967 |
+
|
1968 |
+
# Process the video with the parameters
|
1969 |
+
processed_video, results = process_video_file(uploaded_file, analysis_types, **processing_params)
|
1970 |
|
1971 |
if processed_video:
|
1972 |
# Offer download of processed video
|
|
|
2273 |
st.success(f"Successfully uploaded to {dataset_id}.{table_id}")
|
2274 |
st.write(f"Rows: {result['num_rows']}")
|
2275 |
st.write(f"Size: {result['size_bytes'] / 1024:.2f} KB")
|
2276 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|