diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -36,9 +36,6 @@ from langchain.memory import ConversationBufferMemory from langchain_community.document_loaders import TextLoader import re import base64 -from numpy.lib.stride_tricks import as_strided -import tarfile -import urllib.request # Set page config st.set_page_config( @@ -208,21 +205,21 @@ def analyze_image(image, analysis_types, confidence_threshold=0.5): if label.score >= confidence_threshold} if "Objects" in analysis_types: - objects = [] - if "objects" in analysis_types or "all" in analysis_types: - # Use SSD model for object detection - net, classes, output_layer_names = load_ssd_model() - objects = detect_objects_ssd(np.array(image), net, classes, output_layer_names, confidence_threshold) + objects = client.object_localization(image=vision_image) + # Apply confidence threshold + filtered_objects = [obj for obj in objects.localized_object_annotations + if obj.score >= confidence_threshold] - objects_data = {obj['label']: round(obj['confidence'] * 100) - for obj in objects} + objects_data = {obj.name: round(obj.score * 100) + for obj in filtered_objects} # Draw object boundaries - for obj in objects: - x, y, w, h = obj['bbox'] - draw.rectangle([x, y, x+w, y+h], outline='red', width=2) - draw.text((x, y - 10), - f"{obj['label']}: {int(obj['confidence'] * 100)}%", + for obj in filtered_objects: + box = [(vertex.x * image.width, vertex.y * image.height) + for vertex in obj.bounding_poly.normalized_vertices] + draw.polygon(box, outline='red', width=2) + draw.text((box[0][0], box[0][1] - 10), + f"{obj.name}: {int(obj.score * 100)}%", fill='red') if "Text" in analysis_types: @@ -434,359 +431,137 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None): return summary_img class VideoProcessor(VideoProcessorBase): - """Process video frames with hybrid local/cloud processing""" + """Process video frames for real-time analysis with enhanced OpenCV processing""" - def __init__(self, analysis_types: List[str], processing_mode="hybrid", stabilize=False, - edge_detection=None, segmentation=None, enable_tracking=False): + def __init__(self, analysis_types: List[str]): self.analysis_types = analysis_types - self.processing_mode = processing_mode # "local", "cloud", or "hybrid" - self.stabilize = stabilize - self.edge_detection = edge_detection # None, "canny", "sobel", or "laplacian" - self.segmentation = segmentation # None, "watershed", or "grabcut" - self.enable_tracking = enable_tracking - self.frame_counter = 0 - self.cloud_process_interval = 10 # Process with Google Vision every 10 frames + self.process_every_n_frames = 5 # Process every 5th frame self.vision_client = client # Store client reference self.last_results = {} # Cache results between processed frames self.last_processed_time = time.time() self.processing_active = True - # Initialize motion tracking + # Enhanced tracking + self.object_trackers = {} + self.tracking_points = None self.prev_gray = None - self.motion_history = [] - self.motion_threshold = 40.0 # Threshold for scene change detection - self.scene_changes = [] - - # Initialize local models if needed - self.net = None - self.classes = None - self.output_layer_names = None - self.face_cascade = None - - if processing_mode in ["local", "hybrid"]: - # Initialize SSD model - self.net, self.classes, self.output_layer_names = load_ssd_model() - self.face_cascade = load_haar_cascades() - - # Initialize object tracker if enabled - if self.enable_tracking: - self.object_tracker = ObjectTracker(tracker_type="CSRT") - self.tracking_initialized = False - self.tracked_objects = {} - # How often to reinitialize tracking with new detections (in frames) - self.detection_interval = 15 - + + # Motion history for better activity detection + self.motion_history = np.zeros((480, 640), np.float32) + self.motion_threshold = 32 + self.max_time_delta = 0.5 + self.min_time_delta = 0.05 + def transform(self, frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") self.frame_counter += 1 + # Resize for consistent processing if needed + if img.shape[0] != 480 or img.shape[1] != 640: + img = cv2.resize(img, (640, 480)) + # Add status display on all frames cv2.putText(img, - f"Vision AI: {self.processing_mode.title()} Mode", + f"Vision AI: {'Active' if self.processing_active else 'Paused'}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) - # Make a copy for processing that won't affect the original - processed_img = img.copy() - - # Prepare grayscale image for motion tracking - current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0) - - # Stabilize frame if enabled - if self.stabilize and self.prev_gray is not None: - img = stabilize_frame(img, self.prev_gray, current_gray) - processed_img = img.copy() - # Update current_gray after stabilization - current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0) + # Convert to grayscale for motion detection + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # Process motion if we have a previous frame + # Apply motion detection for all frames if self.prev_gray is not None: - # Calculate optical flow for motion detection - motion_level, motion_area, motion_mask, flow = calculate_optical_flow( - self.prev_gray, current_gray - ) - - # Store motion metrics + # Calculate frame difference for smoother motion detection + frame_diff = cv2.absdiff(gray, self.prev_gray) + _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY) timestamp = time.time() - self.motion_history.append({ - "timestamp": timestamp, - "frame": self.frame_counter, - "motion_level": motion_level, - "motion_area": motion_area * 100 # Convert to percentage - }) - - # Detect scene changes - if motion_level > self.motion_threshold: - self.scene_changes.append(self.frame_counter) - # Mark scene change on frame - cv2.putText(img, "SCENE CHANGE", - (img.shape[1] // 2 - 100, 50), - cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2) - - # Visualize motion - motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET) - motion_overlay = cv2.resize(motion_overlay, (img.shape[1] // 4, img.shape[0] // 4)) - - # Add motion overlay to corner of frame - h, w = motion_overlay.shape[:2] - img[10:10+h, img.shape[1]-10-w:img.shape[1]-10] = motion_overlay - - # Add motion level indicator - cv2.putText(img, f"Motion: {motion_level:.1f}", - (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) - - # Store current frame as previous for next iteration - self.prev_gray = current_gray - - # Process with local models if in local or hybrid mode - detected_objects = [] - - if self.processing_mode in ["local", "hybrid"]: - # Object detection with SSD - if "Objects" in self.analysis_types: - try: - # Use SSD model for object detection - objects = detect_objects_ssd( - processed_img, self.net, self.classes, - self.output_layer_names, confidence_threshold=0.4 - ) - - # Update results cache - self.last_results["objects"] = objects - - # Draw detected objects - for obj in objects: - x, y, w, h = obj["bbox"] - label = obj["label"] - confidence = obj["confidence"] - - # Add to detected objects list for tracking - detected_objects.append((x, y, w, h, label)) - - # Draw box (skip if tracking is enabled, as tracker will draw boxes) - if not self.enable_tracking: - cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) - - # Add label with confidence - label_text = f"{label}: {int(confidence * 100)}%" - cv2.putText(img, label_text, - (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - except Exception as e: - cv2.putText(img, f"SSD Error: {str(e)[:30]}", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + # Update motion history + cv2.motempl.updateMotionHistory(motion_mask, self.motion_history, timestamp, self.max_time_delta) - # Face detection with Haar cascades - if "Face Detection" in self.analysis_types: - try: - faces = detect_faces_haar(processed_img, self.face_cascade) - - # Update results cache - self.last_results["faces"] = faces - - # Add to detected objects list for tracking - for face in faces: - x, y, w, h = face["box"] - detected_objects.append((x, y, w, h, "Face")) - - # Draw detected faces (skip if tracking is enabled) - if not self.enable_tracking: - for face in faces: - x, y, w, h = face["box"] - - # Draw box - cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2) - except Exception as e: - cv2.putText(img, f"Face Detection Error: {str(e)[:30]}", - (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Handle object tracking if enabled - if self.enable_tracking: - try: - # Initialize tracking on first frame or periodically with new detections - if not self.tracking_initialized or self.frame_counter % self.detection_interval == 0: - # Reset if tracking is already initialized - if self.tracking_initialized: - self.object_tracker = ObjectTracker(tracker_type="CSRT") - - # Register each detected object with the tracker - for x, y, w, h, label in detected_objects: - self.object_tracker.register(processed_img, (x, y, w, h), label) - - self.tracking_initialized = True + # Calculate motion gradient + mg_mask = cv2.motempl.calcMotionGradient( + self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5) + + # Visualize motion segments (optional) + if "Motion" in self.analysis_types: + seg_mask, segments = cv2.motempl.segmentMotion( + self.motion_history, timestamp, self.max_time_delta) - # Update tracking on every frame - self.tracked_objects = self.object_tracker.update(processed_img) + # Visualize motion segments + motion_img = np.zeros_like(img) + for i, segment in enumerate(segments): + if segment[1] < 50: # Filter out small segments + continue + # Draw motion regions with random colors + color = np.random.randint(0, 255, 3).tolist() + motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1) - # Draw tracked objects - img = self.object_tracker.draw_tracked_objects(img, self.tracked_objects) + # Overlay motion visualization + alpha = 0.3 + cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img) - # Add tracking status - cv2.putText(img, f"Tracking {len(self.tracked_objects)} objects", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2) - except Exception as e: - cv2.putText(img, f"Tracking Error: {str(e)[:30]}", - (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Process with Google Vision API periodically if in cloud or hybrid mode + # Process at regular intervals current_time = time.time() - should_process_cloud = ( - self.processing_mode in ["cloud", "hybrid"] and - (self.frame_counter % self.cloud_process_interval == 0) and - (current_time - self.last_processed_time > 1.0) and # Max once per second - self.processing_active - ) - - if should_process_cloud: + if current_time - self.last_processed_time > 1.0 and self.processing_active: # Process max once per second self.last_processed_time = current_time - try: - # Convert to PIL Image for Vision API - pil_img = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)) - - # Process with Vision API - img_byte_arr = io.BytesIO() - pil_img.save(img_byte_arr, format='PNG') - content = img_byte_arr.getvalue() - vision_image = vision.Image(content=content) - - # Update status text - cv2.putText(img, "Cloud Processing...", (10, 180), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) - - # Process according to selected analysis types - if "Text" in self.analysis_types: - text = self.vision_client.text_detection(image=vision_image) - self.last_results["text"] = text.text_annotations - - if "Labels" in self.analysis_types: - labels = self.vision_client.label_detection(image=vision_image, max_results=5) - self.last_results["labels"] = labels.label_annotations - - # Only use Vision API for objects/faces if in cloud-only mode - if self.processing_mode == "cloud": - if "Objects" in self.analysis_types: - objects = self.vision_client.object_localization(image=vision_image) - self.last_results["objects"] = objects.localized_object_annotations - - if "Face Detection" in self.analysis_types: - faces = self.vision_client.face_detection(image=vision_image) - self.last_results["faces"] = faces.face_annotations - - except Exception as e: - # Show error on frame - cv2.putText(img, f"API Error: {str(e)[:30]}", - (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + # Process with Vision API as in original code + # ... existing API processing code ... - # Always draw the cached cloud results for smooth display - try: - # Draw text detections from cloud - if "text" in self.last_results and "Text" in self.analysis_types: - if len(self.last_results["text"]) > 1: # Skip the first one (full text) - for text_annot in self.last_results["text"][1:]: - box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] - pts = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [pts], True, (255, 0, 0), 1) + # Update tracking between API calls for smoother object tracking + if "objects" in self.last_results and "Objects" in self.analysis_types: + # Use OpenCV's built-in object trackers for smoother tracking between API calls + for obj in self.last_results["objects"]: + obj_id = obj.name + str(hash(str(obj.bounding_poly.normalized_vertices))) - # Show full text summary - if self.last_results["text"]: - full_text = self.last_results["text"][0].description - words = full_text.split() - short_text = " ".join(words[:3]) - if len(words) > 3: - short_text += "..." + if obj_id not in self.object_trackers: + # Initialize a new tracker + tracker = cv2.TrackerKCF_create() # or other trackers like CSRT, MIL, etc. - # Display text at top of frame - cv2.putText(img, f"Text: {short_text}", - (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) - - # Draw labels from cloud - if "labels" in self.last_results and "Labels" in self.analysis_types: - y_pos = img.shape[0] - 50 - for i, label in enumerate(self.last_results["labels"][:3]): # Show top 3 labels - label_text = f"Label: {label.description} ({int(label.score*100)}%)" - cv2.putText(img, label_text, - (img.shape[1] - 300, y_pos - i*20), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) - - # Draw cloud-detected objects and faces only if in cloud-only mode - if self.processing_mode == "cloud" and not self.enable_tracking: - # Draw objects - if "objects" in self.last_results and "Objects" in self.analysis_types: - for obj in self.last_results["objects"]: - box = [(vertex.x * img.shape[1], vertex.y * img.shape[0]) - for vertex in obj.bounding_poly.normalized_vertices] - box = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [box], True, (0, 255, 0), 2) - # Add label - cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%", - (int(box[0][0][0]), int(box[0][0][1]) - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + # Get bounding box coordinates + box_points = [(vertex.x * img.shape[1], vertex.y * img.shape[0]) + for vertex in obj.bounding_poly.normalized_vertices] + x_min = min([p[0] for p in box_points]) + y_min = min([p[1] for p in box_points]) + x_max = max([p[0] for p in box_points]) + y_max = max([p[1] for p in box_points]) + + # Initialize tracker + bbox = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)) + tracker.init(img, bbox) + self.object_trackers[obj_id] = { + "tracker": tracker, + "name": obj.name, + "score": obj.score, + "last_update": self.frame_counter + } - # Draw faces - if "faces" in self.last_results and "Face Detection" in self.analysis_types: - for face in self.last_results["faces"]: - vertices = face.bounding_poly.vertices - points = [(vertex.x, vertex.y) for vertex in vertices] - pts = np.array(points, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [pts], True, (0, 0, 255), 2) - - # Draw landmarks - for landmark in face.landmarks: - px = int(landmark.position.x) - py = int(landmark.position.y) - cv2.circle(img, (px, py), 2, (255, 255, 0), -1) - - except Exception as e: - cv2.putText(img, f"Display Error: {str(e)[:30]}", - (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Apply edge detection if enabled - if self.edge_detection: - # Create edge detection visualization - edge_img = detect_edges(processed_img, method=self.edge_detection) - - # Display edge detection mode - cv2.putText(img, f"Edge: {self.edge_detection.title()}", - (10, img.shape[0] - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Update all trackers + trackers_to_remove = [] + for obj_id, tracker_info in self.object_trackers.items(): + # Only keep trackers for a limited number of frames + if self.frame_counter - tracker_info["last_update"] > 30: # Remove after 30 frames + trackers_to_remove.append(obj_id) + continue + + success, bbox = tracker_info["tracker"].update(img) + if success: + # Draw tracking box + x, y, w, h = [int(v) for v in bbox] + cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) + + # Add label with confidence + label = f"{tracker_info['name']}: {int(tracker_info['score'] * 100)}%" + cv2.putText(img, label, (x, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - # Show edge detection in a corner (similar to motion overlay) - edge_small = cv2.resize(edge_img, (img.shape[1] // 4, img.shape[0] // 4)) - h, w = edge_small.shape[:2] - img[10:10+h, 10:10+w] = edge_small + # Remove expired trackers + for obj_id in trackers_to_remove: + del self.object_trackers[obj_id] - # Apply segmentation if enabled - if self.segmentation: - try: - # Create segmentation visualization - segmented_img, _ = segment_image(processed_img, method=self.segmentation) - - # Display segmentation mode - cv2.putText(img, f"Segment: {self.segmentation.title()}", - (10, img.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Save current frame for next iteration + self.prev_gray = gray - # Show segmentation in a corner opposite to edge detection or motion - seg_small = cv2.resize(segmented_img, (img.shape[1] // 4, img.shape[0] // 4)) - h, w = seg_small.shape[:2] - img[10+h+10:10+h+10+h, 10:10+w] = seg_small - except Exception as e: - cv2.putText(img, f"Segmentation Error: {str(e)[:30]}", - (10, img.shape[0] - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Add processing mode and stabilization status - mode_text = f"Mode: {self.processing_mode.title()}" - features = [] - if self.stabilize: - features.append("Stabilized") - if self.enable_tracking: - features.append("Tracking") - if features: - mode_text += f" | {', '.join(features)}" - - cv2.putText(img, mode_text, - (10, img.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - return av.VideoFrame.from_ndarray(img, format="bgr24") def analyze_document(file_content, processor_id, location="us"): @@ -1011,20 +786,24 @@ def list_bigquery_resources(): return resources -def process_video_file(video_file, analysis_types, processing_mode="hybrid", stabilize=False, - edge_detection=None, segmentation=None, enable_tracking=False): - """Process video file with computer vision techniques""" - # Create output directory if it doesn't exist - output_dir = "processed_videos" - os.makedirs(output_dir, exist_ok=True) - - # Generate a unique output filename - timestamp = int(time.time()) - output_filename = os.path.join(output_dir, f"processed_{timestamp}.mp4") - stats_filename = os.path.join(output_dir, f"stats_{timestamp}.json") +def process_video_file(video_file, analysis_types): + """Process an uploaded video file with enhanced Vision AI detection and analytics""" + # Create a temporary file to save the uploaded video + with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: + temp_file.write(video_file.read()) + temp_video_path = temp_file.name + + # Create a temp file for the output video + output_path = f"{temp_video_path}_processed.mp4" + + # Open the video file + cap = cv2.VideoCapture(temp_video_path) + if not cap.isOpened(): + st.error("Error opening video file") + os.unlink(temp_video_path) + return None # Get video properties - cap = cv2.VideoCapture(video_file) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) @@ -1054,8 +833,8 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta fourcc = cv2.VideoWriter_fourcc(*'DIB ') # Uncompressed RGB out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True) - # Process every Nth frame to reduce API calls - cloud_process_interval = 10 # How often to use Google Vision API + # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail + process_every_n_frames = 5 # Create a progress bar progress_bar = st.progress(0) @@ -1067,28 +846,24 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta "faces": 0, "text_blocks": 0, "labels": {}, - # Motion tracking - "motion_data": [], - "scene_changes": [], - "avg_motion_level": 0, - "processing_mode": processing_mode, - "stabilized": stabilize + # New advanced tracking + "object_tracking": {}, # Track object appearances by frame + "activity_metrics": [], # Track frame-to-frame differences + "scene_changes": [] # Track major scene transitions } - # Initialize object tracker if enabled - if enable_tracking: - object_tracker = ObjectTracker(tracker_type="CSRT") - tracked_objects = {} - detection_interval = 15 # How often to reinitialize tracking - - # Load models based on processing mode - if processing_mode in ["local", "hybrid"]: - yolo_net, yolo_classes, yolo_output_layers = load_yolo_model() - face_cascade = load_haar_cascades() - # For scene change detection and motion tracking previous_frame_gray = None - scene_change_threshold = 40.0 # Threshold for scene change detection + prev_points = None + lk_params = dict(winSize=(15, 15), + maxLevel=2, + criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) + + # Feature detection params for tracking + feature_params = dict(maxCorners=100, + qualityLevel=0.3, + minDistance=7, + blockSize=7) try: frame_count = 0 @@ -1108,274 +883,255 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta cv2.putText(frame, f"Time: {frame_count/fps:.2f}s", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) - # Prepare grayscale image for motion analysis + # Activity detection and scene change detection current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0) - - # Stabilize frame if enabled - if stabilize and previous_frame_gray is not None: - frame = stabilize_frame(frame, previous_frame_gray, current_frame_gray) - # Update grayscale after stabilization - current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0) - - # Motion detection and scene change detection + if previous_frame_gray is not None: - # Calculate optical flow for motion detection - motion_level, motion_area, motion_mask, flow = calculate_optical_flow( - previous_frame_gray, current_frame_gray - ) + # Calculate frame difference for activity detection + frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray) + _, thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY) + thresh = cv2.dilate(thresh, None, iterations=2) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - # Store motion metrics - detection_stats["motion_data"].append({ - "time": frame_count/fps, - "motion_level": motion_level, - "motion_area": motion_area * 100 # Convert to percentage - }) + # Better activity metric using contour area + activity_level = sum(cv2.contourArea(c) for c in contours) / (frame.shape[0] * frame.shape[1]) + activity_level *= 100 # Convert to percentage + detection_stats["activity_metrics"].append((frame_count/fps, activity_level)) - # Scene change detection - if motion_level > scene_change_threshold: + # Add optical flow for better motion tracking + if "Objects" in analysis_types and prev_points is not None: + # Calculate optical flow + next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray, + current_frame_gray, + prev_points, + None, + **lk_params) + + # Select good points + if next_points is not None: + good_new = next_points[status==1] + good_old = prev_points[status==1] + + # Draw motion tracks + for i, (new, old) in enumerate(zip(good_new, good_old)): + a, b = new.ravel() + c, d = old.ravel() + # Draw motion lines + cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2) + cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1) + + # Scene change detection using contour analysis for more robust results + if activity_level > scene_change_threshold: detection_stats["scene_changes"].append(frame_count/fps) # Mark scene change on frame cv2.putText(frame, "SCENE CHANGE", (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2) - - # Visualize motion - motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET) - motion_overlay = cv2.resize(motion_overlay, (width // 4, height // 4)) - - # Add motion overlay to corner of frame - h, w = motion_overlay.shape[:2] - frame[10:10+h, width-10-w:width-10] = motion_overlay - - # Add motion indicator - cv2.putText(frame, f"Motion: {motion_level:.1f}", - (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) - - previous_frame_gray = current_frame_gray + + # Reset tracking points on scene change + prev_points = None - # Apply edge detection if enabled - if edge_detection: - # Create edge detection visualization in a corner - edge_img = detect_edges(frame, method=edge_detection) - - # Display edge detection mode - cv2.putText(frame, f"Edge: {edge_detection.title()}", - (10, height - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Update tracking points periodically + if frame_count % 5 == 0 or prev_points is None or len(prev_points) < 10: + prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params) - # Show edge detection in a corner - edge_small = cv2.resize(edge_img, (width // 4, height // 4)) - h, w = edge_small.shape[:2] - frame[10:10+h, 10:10+w] = edge_small + previous_frame_gray = current_frame_gray - # Apply segmentation if enabled - if segmentation: + # Process frames with Vision API + if frame_count % process_every_n_frames == 0: try: - # Create segmentation visualization - segmented_img, _ = segment_image(frame, method=segmentation) - - # Display segmentation mode - cv2.putText(frame, f"Segment: {segmentation.title()}", - (10, height - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Convert OpenCV frame to PIL Image for Vision API + pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - # Show segmentation in another corner - seg_small = cv2.resize(segmented_img, (width // 4, height // 4)) - h, w = seg_small.shape[:2] - frame[10+h+10:10+h+10+h, 10:10+w] = seg_small - except Exception as e: - cv2.putText(frame, f"Segmentation Error: {str(e)[:30]}", - (10, height - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Add processing mode indicator - mode_text = f"Mode: {processing_mode.title()}" - if stabilize: - mode_text += " | Stabilized" - cv2.putText(frame, mode_text, - (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - - # Handle object tracking if enabled - detected_objects = [] - - # Local processing (SSD and Haar cascades) - if processing_mode in ["local", "hybrid"]: - # Object detection with SSD - if "Objects" in analysis_types: - objects = detect_objects_ssd( - frame, yolo_net, yolo_classes, yolo_output_layers, confidence_threshold=0.5 - ) + # Create vision image + img_byte_arr = io.BytesIO() + pil_img.save(img_byte_arr, format='PNG') + content = img_byte_arr.getvalue() + vision_image = vision.Image(content=content) - # Collect objects for tracking - for obj in objects: - x, y, w, h = obj["bbox"] - label = obj["label"] - confidence = obj["confidence"] - - # Add to detected objects list for tracking - detected_objects.append((x, y, w, h, label)) - - # Update statistics and draw boxes (if tracking disabled) - if not enable_tracking: - if label in detection_stats["objects"]: - detection_stats["objects"][label] += 1 + # Apply analysis based on selected types with enhanced detail + if "Objects" in analysis_types: + objects = client.object_localization(image=vision_image) + # Draw boxes around detected objects with enhanced info + for obj in objects.localized_object_annotations: + obj_name = obj.name + # Update basic stats + if obj_name in detection_stats["objects"]: + detection_stats["objects"][obj_name] += 1 + else: + detection_stats["objects"][obj_name] = 1 + + # Enhanced object tracking + timestamp = frame_count/fps + if obj_name not in detection_stats["object_tracking"]: + detection_stats["object_tracking"][obj_name] = { + "first_seen": timestamp, + "last_seen": timestamp, + "frames_present": 1, + "timestamps": [timestamp] + } else: - detection_stats["objects"][label] = 1 + tracking = detection_stats["object_tracking"][obj_name] + tracking["frames_present"] += 1 + tracking["last_seen"] = timestamp + tracking["timestamps"].append(timestamp) + + # Calculate box coordinates + box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0]) + for vertex in obj.bounding_poly.normalized_vertices] + box = np.array(box, np.int32).reshape((-1, 1, 2)) + + # Draw more noticeable box with thicker lines + cv2.polylines(frame, [box], True, (0, 255, 0), 3) + + # Calculate box size for better placement of labels + x_min = min([p[0][0] for p in box]) + y_min = min([p[0][1] for p in box]) - # Draw box - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + # Draw filled box with opacity for better label visibility + overlay = frame.copy() + box_np = np.array(box) + hull = cv2.convexHull(box_np) + cv2.fillConvexPoly(overlay, hull, (0, 255, 0, 64)) + # Apply overlay with transparency + alpha = 0.3 + cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) - # Add label with confidence - label_text = f"{label}: {int(confidence * 100)}%" + # Enhanced label with confidence and border + confidence = int(obj.score * 100) + label_text = f"{obj.name}: {confidence}%" + text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0] + + # Create better text background with rounded rectangle + text_bg_pts = np.array([ + [x_min, y_min - text_size[1] - 10], + [x_min + text_size[0] + 10, y_min - text_size[1] - 10], + [x_min + text_size[0] + 10, y_min], + [x_min, y_min] + ], np.int32) + + cv2.fillPoly(frame, [text_bg_pts], (0, 0, 0)) cv2.putText(frame, label_text, - (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - # Face detection with Haar cascades - if "Face Detection" in analysis_types: - faces = detect_faces_haar(frame, face_cascade) + (int(x_min) + 5, int(y_min) - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) - # Update faces count and add to detected objects for tracking - if not enable_tracking: - detection_stats["faces"] += len(faces) - - for face in faces: - x, y, w, h = face["box"] - detected_objects.append((x, y, w, h, "Face")) - - # Draw boxes only if tracking is disabled - if not enable_tracking: - # Draw box - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2) - - # Add tracking code - if enable_tracking: - try: - # Initialize tracking on first frame or periodically - if frame_count == 1 or frame_count % self.detection_interval == 0: - # Reset tracker periodically - if frame_count > 1: - object_tracker = ObjectTracker(tracker_type="CSRT") + if "Face Detection" in analysis_types: + faces = client.face_detection(image=vision_image) + # Track statistics + detection_stats["faces"] += len(faces.face_annotations) - # Register each detected object - for x, y, w, h, label in detected_objects: - object_tracker.register(frame, (x, y, w, h), label) - - # Update tracking on every frame - tracked_objects = object_tracker.update(frame) - - # Draw tracked objects - frame = object_tracker.draw_tracked_objects(frame, tracked_objects) - - # Add tracking status - cv2.putText(frame, f"Tracking {len(tracked_objects)} objects", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2) - - # Count object types in tracking - for _, (_, _, _, _, label) in tracked_objects.items(): - if label in detection_stats["objects"]: - detection_stats["objects"][label] += 1 - else: - detection_stats["objects"][label] = 1 + for face in faces.face_annotations: + vertices = face.bounding_poly.vertices + points = [(vertex.x, vertex.y) for vertex in vertices] + # Draw face box with thicker lines + pts = np.array(points, np.int32).reshape((-1, 1, 2)) + cv2.polylines(frame, [pts], True, (0, 0, 255), 3) - # Update faces count if any faces are being tracked - face_count = sum(1 for _, (_, _, _, _, label) in tracked_objects.items() if label == "Face") - detection_stats["faces"] += face_count - except Exception as e: - cv2.putText(frame, f"Tracking Error: {str(e)[:30]}", - (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Cloud processing with Google Vision API (less frequent) - if processing_mode in ["cloud", "hybrid"] and frame_count % cloud_process_interval == 0: - try: - # Convert to PIL Image for Vision API - pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - - # Create vision image - img_byte_arr = io.BytesIO() - pil_img.save(img_byte_arr, format='PNG') - content = img_byte_arr.getvalue() - vision_image = vision.Image(content=content) - - # Add cloud processing indicator - cv2.putText(frame, "Cloud Processing", (width - 200, 30), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) + # Enhanced face info visualization + emotions = [] + if face.joy_likelihood >= 3: + emotions.append("Joy") + if face.anger_likelihood >= 3: + emotions.append("Anger") + if face.surprise_likelihood >= 3: + emotions.append("Surprise") + if face.sorrow_likelihood >= 3: + emotions.append("Sorrow") + + emotion_text = ", ".join(emotions) if emotions else "Neutral" + x_min = min([p[0] for p in points]) + y_min = min([p[1] for p in points]) + + # Add emotion gauge bar for better visualization + emotions_map = { + "Joy": (0, 255, 0), # Green + "Anger": (0, 0, 255), # Red + "Surprise": (255, 255, 0), # Yellow + "Sorrow": (255, 0, 0) # Blue + } + + # Add detailed emotion text with colored background + text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0] + cv2.rectangle(frame, + (int(x_min), int(y_min) - text_size[1] - 8), + (int(x_min) + text_size[0] + 8, int(y_min)), + (0, 0, 0), -1) + + cv2.putText(frame, emotion_text, + (int(x_min) + 4, int(y_min) - 4), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + + # Draw enhanced landmarks with connections + if len(face.landmarks) > 0: + landmarks = [(int(landmark.position.x), int(landmark.position.y)) + for landmark in face.landmarks] + + # Draw each landmark + for landmark in landmarks: + cv2.circle(frame, landmark, 3, (255, 255, 0), -1) + + # Connect landmarks for eyes, nose, mouth if there are enough points + if len(landmarks) >= 8: + # These indices are approximate - adjust based on your actual data + eye_indices = [0, 1, 2, 3] + for i in range(len(eye_indices)-1): + cv2.line(frame, landmarks[eye_indices[i]], + landmarks[eye_indices[i+1]], (255, 255, 0), 1) - # Text detection if "Text" in analysis_types: - text = self.vision_client.text_detection(image=vision_image) - + text = client.text_detection(image=vision_image) # Update stats - if text.text_annotations: + if len(text.text_annotations) > 1: detection_stats["text_blocks"] += len(text.text_annotations) - 1 - # Draw text boxes - for text_annot in text.text_annotations[1:]: - box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] - pts = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [pts], True, (255, 0, 0), 2) - - # Show text summary + # Add overall text summary to the frame + if text.text_annotations: full_text = text.text_annotations[0].description words = full_text.split() short_text = " ".join(words[:5]) if len(words) > 5: short_text += "..." - + + # Add text summary to top of frame with better visibility + cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1) cv2.putText(frame, f"Text: {short_text}", - (10, height - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) + (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + + # Draw text boxes with improved visibility + for text_annot in text.text_annotations[1:]: + box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] + pts = np.array(box, np.int32).reshape((-1, 1, 2)) + cv2.polylines(frame, [pts], True, (255, 0, 0), 2) # Thicker lines - # Label detection + # Add Labels analysis for more detail if "Labels" in analysis_types: - labels = self.vision_client.label_detection(image=vision_image, max_results=5) + labels = client.label_detection(image=vision_image, max_results=5) + + # Add labels to the frame with better visibility + y_pos = 120 + cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1) + cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - # Update stats and show labels + # Track stats and show labels for i, label in enumerate(labels.label_annotations): + # Update stats if label.description in detection_stats["labels"]: detection_stats["labels"][label.description] += 1 else: detection_stats["labels"][label.description] = 1 - - # Display on frame - cv2.putText(frame, f"Label: {label.description}", - (width - 200, 60 + i*30), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) - - # Only do object/face detection with Vision API in cloud-only mode - if self.processing_mode == "cloud" and not enable_tracking: - if "Objects" in analysis_types: - objects = self.vision_client.object_localization(image=vision_image) - - for obj in objects.localized_object_annotations: - # Update stats - if obj.name in detection_stats["objects"]: - detection_stats["objects"][obj.name] += 1 - else: - detection_stats["objects"][obj.name] = 1 - - # Draw box - box = [(vertex.x * width, vertex.y * height) - for vertex in obj.bounding_poly.normalized_vertices] - box = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [box], True, (0, 255, 0), 2) - # Add label - x_min = min([p[0][0] for p in box]) - y_min = min([p[0][1] for p in box]) - cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%", - (int(x_min), int(y_min) - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - if "Face Detection" in analysis_types: - faces = self.vision_client.face_detection(image=vision_image) - detection_stats["faces"] += len(faces.face_annotations) - - for face in faces.face_annotations: - vertices = face.bounding_poly.vertices - points = [(vertex.x, vertex.y) for vertex in vertices] - pts = np.array(points, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [pts], True, (0, 0, 255), 2) + # Display on frame with larger text + cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%", + (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + except Exception as e: # Show error on frame cv2.putText(frame, f"API Error: {str(e)[:30]}", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + + # Add hint about slowed down speed + cv2.putText(frame, "Playback: 60% speed for better visualization", + (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2) # Write the frame to output video out.write(frame) @@ -1388,21 +1144,6 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta progress_bar.empty() status_text.empty() - # Calculate additional statistics - if detection_stats["motion_data"]: - detection_stats["avg_motion_level"] = sum(item["motion_level"] for item in detection_stats["motion_data"]) / len(detection_stats["motion_data"]) - - # Update the detection_stats to include the new features - detection_stats.update({ - "edge_detection": edge_detection, - "segmentation": segmentation, - "tracking": { - "enabled": enable_tracking, - "method": "CSRT" if enable_tracking else None, - "objects_tracked": len(tracked_objects) if enable_tracking else 0 - } - }) - # Read the processed video as bytes for download with open(output_path, 'rb') as file: processed_video_bytes = file.read() @@ -1411,13 +1152,24 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta os.unlink(temp_video_path) os.unlink(output_path) + # Calculate additional statistics + for obj_name, tracking in detection_stats["object_tracking"].items(): + # Calculate total screen time + tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2) + # Calculate average confidence if available + if "confidences" in tracking and tracking["confidences"]: + tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"]) + + # Return enhanced results + results = {"detection_stats": detection_stats} + # Store results in session state for chatbot context - st.session_state.analysis_results = {"detection_stats": detection_stats} + st.session_state.analysis_results = results # Update vectorstore with new results - update_vectorstore_with_results({"detection_stats": detection_stats}) + update_vectorstore_with_results(results) - return processed_video_bytes, {"detection_stats": detection_stats} + return processed_video_bytes, results except Exception as e: # Clean up on error @@ -1889,17 +1641,9 @@ def chatbot_interface(): st.markdown('', unsafe_allow_html=True) def main(): - # Header + # Header - Updated title st.markdown('