diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -36,9 +36,6 @@ from langchain.memory import ConversationBufferMemory from langchain_community.document_loaders import TextLoader import re import base64 -from numpy.lib.stride_tricks import as_strided -import tarfile -import urllib.request # Set page config st.set_page_config( @@ -208,21 +205,21 @@ def analyze_image(image, analysis_types, confidence_threshold=0.5): if label.score >= confidence_threshold} if "Objects" in analysis_types: - objects = [] - if "objects" in analysis_types or "all" in analysis_types: - # Use SSD model for object detection - net, classes, output_layer_names = load_ssd_model() - objects = detect_objects_ssd(np.array(image), net, classes, output_layer_names, confidence_threshold) + objects = client.object_localization(image=vision_image) + # Apply confidence threshold + filtered_objects = [obj for obj in objects.localized_object_annotations + if obj.score >= confidence_threshold] - objects_data = {obj['label']: round(obj['confidence'] * 100) - for obj in objects} + objects_data = {obj.name: round(obj.score * 100) + for obj in filtered_objects} # Draw object boundaries - for obj in objects: - x, y, w, h = obj['bbox'] - draw.rectangle([x, y, x+w, y+h], outline='red', width=2) - draw.text((x, y - 10), - f"{obj['label']}: {int(obj['confidence'] * 100)}%", + for obj in filtered_objects: + box = [(vertex.x * image.width, vertex.y * image.height) + for vertex in obj.bounding_poly.normalized_vertices] + draw.polygon(box, outline='red', width=2) + draw.text((box[0][0], box[0][1] - 10), + f"{obj.name}: {int(obj.score * 100)}%", fill='red') if "Text" in analysis_types: @@ -434,359 +431,137 @@ def create_summary_image(annotated_img, labels, objects, text, colors=None): return summary_img class VideoProcessor(VideoProcessorBase): - """Process video frames with hybrid local/cloud processing""" + """Process video frames for real-time analysis with enhanced OpenCV processing""" - def __init__(self, analysis_types: List[str], processing_mode="hybrid", stabilize=False, - edge_detection=None, segmentation=None, enable_tracking=False): + def __init__(self, analysis_types: List[str]): self.analysis_types = analysis_types - self.processing_mode = processing_mode # "local", "cloud", or "hybrid" - self.stabilize = stabilize - self.edge_detection = edge_detection # None, "canny", "sobel", or "laplacian" - self.segmentation = segmentation # None, "watershed", or "grabcut" - self.enable_tracking = enable_tracking - self.frame_counter = 0 - self.cloud_process_interval = 10 # Process with Google Vision every 10 frames + self.process_every_n_frames = 5 # Process every 5th frame self.vision_client = client # Store client reference self.last_results = {} # Cache results between processed frames self.last_processed_time = time.time() self.processing_active = True - # Initialize motion tracking + # Enhanced tracking + self.object_trackers = {} + self.tracking_points = None self.prev_gray = None - self.motion_history = [] - self.motion_threshold = 40.0 # Threshold for scene change detection - self.scene_changes = [] - - # Initialize local models if needed - self.net = None - self.classes = None - self.output_layer_names = None - self.face_cascade = None - - if processing_mode in ["local", "hybrid"]: - # Initialize SSD model - self.net, self.classes, self.output_layer_names = load_ssd_model() - self.face_cascade = load_haar_cascades() - - # Initialize object tracker if enabled - if self.enable_tracking: - self.object_tracker = ObjectTracker(tracker_type="CSRT") - self.tracking_initialized = False - self.tracked_objects = {} - # How often to reinitialize tracking with new detections (in frames) - self.detection_interval = 15 - + + # Motion history for better activity detection + self.motion_history = np.zeros((480, 640), np.float32) + self.motion_threshold = 32 + self.max_time_delta = 0.5 + self.min_time_delta = 0.05 + def transform(self, frame: av.VideoFrame) -> av.VideoFrame: img = frame.to_ndarray(format="bgr24") self.frame_counter += 1 + # Resize for consistent processing if needed + if img.shape[0] != 480 or img.shape[1] != 640: + img = cv2.resize(img, (640, 480)) + # Add status display on all frames cv2.putText(img, - f"Vision AI: {self.processing_mode.title()} Mode", + f"Vision AI: {'Active' if self.processing_active else 'Paused'}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) - # Make a copy for processing that won't affect the original - processed_img = img.copy() - - # Prepare grayscale image for motion tracking - current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0) - - # Stabilize frame if enabled - if self.stabilize and self.prev_gray is not None: - img = stabilize_frame(img, self.prev_gray, current_gray) - processed_img = img.copy() - # Update current_gray after stabilization - current_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - current_gray = cv2.GaussianBlur(current_gray, (21, 21), 0) + # Convert to grayscale for motion detection + gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # Process motion if we have a previous frame + # Apply motion detection for all frames if self.prev_gray is not None: - # Calculate optical flow for motion detection - motion_level, motion_area, motion_mask, flow = calculate_optical_flow( - self.prev_gray, current_gray - ) - - # Store motion metrics + # Calculate frame difference for smoother motion detection + frame_diff = cv2.absdiff(gray, self.prev_gray) + _, motion_mask = cv2.threshold(frame_diff, self.motion_threshold, 1, cv2.THRESH_BINARY) timestamp = time.time() - self.motion_history.append({ - "timestamp": timestamp, - "frame": self.frame_counter, - "motion_level": motion_level, - "motion_area": motion_area * 100 # Convert to percentage - }) - - # Detect scene changes - if motion_level > self.motion_threshold: - self.scene_changes.append(self.frame_counter) - # Mark scene change on frame - cv2.putText(img, "SCENE CHANGE", - (img.shape[1] // 2 - 100, 50), - cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2) - - # Visualize motion - motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET) - motion_overlay = cv2.resize(motion_overlay, (img.shape[1] // 4, img.shape[0] // 4)) - - # Add motion overlay to corner of frame - h, w = motion_overlay.shape[:2] - img[10:10+h, img.shape[1]-10-w:img.shape[1]-10] = motion_overlay - - # Add motion level indicator - cv2.putText(img, f"Motion: {motion_level:.1f}", - (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) - - # Store current frame as previous for next iteration - self.prev_gray = current_gray - - # Process with local models if in local or hybrid mode - detected_objects = [] - - if self.processing_mode in ["local", "hybrid"]: - # Object detection with SSD - if "Objects" in self.analysis_types: - try: - # Use SSD model for object detection - objects = detect_objects_ssd( - processed_img, self.net, self.classes, - self.output_layer_names, confidence_threshold=0.4 - ) - - # Update results cache - self.last_results["objects"] = objects - - # Draw detected objects - for obj in objects: - x, y, w, h = obj["bbox"] - label = obj["label"] - confidence = obj["confidence"] - - # Add to detected objects list for tracking - detected_objects.append((x, y, w, h, label)) - - # Draw box (skip if tracking is enabled, as tracker will draw boxes) - if not self.enable_tracking: - cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) - - # Add label with confidence - label_text = f"{label}: {int(confidence * 100)}%" - cv2.putText(img, label_text, - (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - except Exception as e: - cv2.putText(img, f"SSD Error: {str(e)[:30]}", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + # Update motion history + cv2.motempl.updateMotionHistory(motion_mask, self.motion_history, timestamp, self.max_time_delta) - # Face detection with Haar cascades - if "Face Detection" in self.analysis_types: - try: - faces = detect_faces_haar(processed_img, self.face_cascade) - - # Update results cache - self.last_results["faces"] = faces - - # Add to detected objects list for tracking - for face in faces: - x, y, w, h = face["box"] - detected_objects.append((x, y, w, h, "Face")) - - # Draw detected faces (skip if tracking is enabled) - if not self.enable_tracking: - for face in faces: - x, y, w, h = face["box"] - - # Draw box - cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2) - except Exception as e: - cv2.putText(img, f"Face Detection Error: {str(e)[:30]}", - (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Handle object tracking if enabled - if self.enable_tracking: - try: - # Initialize tracking on first frame or periodically with new detections - if not self.tracking_initialized or self.frame_counter % self.detection_interval == 0: - # Reset if tracking is already initialized - if self.tracking_initialized: - self.object_tracker = ObjectTracker(tracker_type="CSRT") - - # Register each detected object with the tracker - for x, y, w, h, label in detected_objects: - self.object_tracker.register(processed_img, (x, y, w, h), label) - - self.tracking_initialized = True + # Calculate motion gradient + mg_mask = cv2.motempl.calcMotionGradient( + self.motion_history, self.min_time_delta, self.max_time_delta, apertureSize=5) + + # Visualize motion segments (optional) + if "Motion" in self.analysis_types: + seg_mask, segments = cv2.motempl.segmentMotion( + self.motion_history, timestamp, self.max_time_delta) - # Update tracking on every frame - self.tracked_objects = self.object_tracker.update(processed_img) + # Visualize motion segments + motion_img = np.zeros_like(img) + for i, segment in enumerate(segments): + if segment[1] < 50: # Filter out small segments + continue + # Draw motion regions with random colors + color = np.random.randint(0, 255, 3).tolist() + motion_img = cv2.drawContours(motion_img, [np.array(segment[2])], -1, color, -1) - # Draw tracked objects - img = self.object_tracker.draw_tracked_objects(img, self.tracked_objects) + # Overlay motion visualization + alpha = 0.3 + cv2.addWeighted(motion_img, alpha, img, 1 - alpha, 0, img) - # Add tracking status - cv2.putText(img, f"Tracking {len(self.tracked_objects)} objects", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2) - except Exception as e: - cv2.putText(img, f"Tracking Error: {str(e)[:30]}", - (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Process with Google Vision API periodically if in cloud or hybrid mode + # Process at regular intervals current_time = time.time() - should_process_cloud = ( - self.processing_mode in ["cloud", "hybrid"] and - (self.frame_counter % self.cloud_process_interval == 0) and - (current_time - self.last_processed_time > 1.0) and # Max once per second - self.processing_active - ) - - if should_process_cloud: + if current_time - self.last_processed_time > 1.0 and self.processing_active: # Process max once per second self.last_processed_time = current_time - try: - # Convert to PIL Image for Vision API - pil_img = Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)) - - # Process with Vision API - img_byte_arr = io.BytesIO() - pil_img.save(img_byte_arr, format='PNG') - content = img_byte_arr.getvalue() - vision_image = vision.Image(content=content) - - # Update status text - cv2.putText(img, "Cloud Processing...", (10, 180), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) - - # Process according to selected analysis types - if "Text" in self.analysis_types: - text = self.vision_client.text_detection(image=vision_image) - self.last_results["text"] = text.text_annotations - - if "Labels" in self.analysis_types: - labels = self.vision_client.label_detection(image=vision_image, max_results=5) - self.last_results["labels"] = labels.label_annotations - - # Only use Vision API for objects/faces if in cloud-only mode - if self.processing_mode == "cloud": - if "Objects" in self.analysis_types: - objects = self.vision_client.object_localization(image=vision_image) - self.last_results["objects"] = objects.localized_object_annotations - - if "Face Detection" in self.analysis_types: - faces = self.vision_client.face_detection(image=vision_image) - self.last_results["faces"] = faces.face_annotations - - except Exception as e: - # Show error on frame - cv2.putText(img, f"API Error: {str(e)[:30]}", - (10, 180), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + # Process with Vision API as in original code + # ... existing API processing code ... - # Always draw the cached cloud results for smooth display - try: - # Draw text detections from cloud - if "text" in self.last_results and "Text" in self.analysis_types: - if len(self.last_results["text"]) > 1: # Skip the first one (full text) - for text_annot in self.last_results["text"][1:]: - box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] - pts = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [pts], True, (255, 0, 0), 1) + # Update tracking between API calls for smoother object tracking + if "objects" in self.last_results and "Objects" in self.analysis_types: + # Use OpenCV's built-in object trackers for smoother tracking between API calls + for obj in self.last_results["objects"]: + obj_id = obj.name + str(hash(str(obj.bounding_poly.normalized_vertices))) - # Show full text summary - if self.last_results["text"]: - full_text = self.last_results["text"][0].description - words = full_text.split() - short_text = " ".join(words[:3]) - if len(words) > 3: - short_text += "..." + if obj_id not in self.object_trackers: + # Initialize a new tracker + tracker = cv2.TrackerKCF_create() # or other trackers like CSRT, MIL, etc. - # Display text at top of frame - cv2.putText(img, f"Text: {short_text}", - (img.shape[1] - 300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) - - # Draw labels from cloud - if "labels" in self.last_results and "Labels" in self.analysis_types: - y_pos = img.shape[0] - 50 - for i, label in enumerate(self.last_results["labels"][:3]): # Show top 3 labels - label_text = f"Label: {label.description} ({int(label.score*100)}%)" - cv2.putText(img, label_text, - (img.shape[1] - 300, y_pos - i*20), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 2) - - # Draw cloud-detected objects and faces only if in cloud-only mode - if self.processing_mode == "cloud" and not self.enable_tracking: - # Draw objects - if "objects" in self.last_results and "Objects" in self.analysis_types: - for obj in self.last_results["objects"]: - box = [(vertex.x * img.shape[1], vertex.y * img.shape[0]) - for vertex in obj.bounding_poly.normalized_vertices] - box = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [box], True, (0, 255, 0), 2) - # Add label - cv2.putText(img, f"{obj.name}: {int(obj.score * 100)}%", - (int(box[0][0][0]), int(box[0][0][1]) - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) + # Get bounding box coordinates + box_points = [(vertex.x * img.shape[1], vertex.y * img.shape[0]) + for vertex in obj.bounding_poly.normalized_vertices] + x_min = min([p[0] for p in box_points]) + y_min = min([p[1] for p in box_points]) + x_max = max([p[0] for p in box_points]) + y_max = max([p[1] for p in box_points]) + + # Initialize tracker + bbox = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)) + tracker.init(img, bbox) + self.object_trackers[obj_id] = { + "tracker": tracker, + "name": obj.name, + "score": obj.score, + "last_update": self.frame_counter + } - # Draw faces - if "faces" in self.last_results and "Face Detection" in self.analysis_types: - for face in self.last_results["faces"]: - vertices = face.bounding_poly.vertices - points = [(vertex.x, vertex.y) for vertex in vertices] - pts = np.array(points, np.int32).reshape((-1, 1, 2)) - cv2.polylines(img, [pts], True, (0, 0, 255), 2) - - # Draw landmarks - for landmark in face.landmarks: - px = int(landmark.position.x) - py = int(landmark.position.y) - cv2.circle(img, (px, py), 2, (255, 255, 0), -1) - - except Exception as e: - cv2.putText(img, f"Display Error: {str(e)[:30]}", - (10, 210), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Apply edge detection if enabled - if self.edge_detection: - # Create edge detection visualization - edge_img = detect_edges(processed_img, method=self.edge_detection) - - # Display edge detection mode - cv2.putText(img, f"Edge: {self.edge_detection.title()}", - (10, img.shape[0] - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Update all trackers + trackers_to_remove = [] + for obj_id, tracker_info in self.object_trackers.items(): + # Only keep trackers for a limited number of frames + if self.frame_counter - tracker_info["last_update"] > 30: # Remove after 30 frames + trackers_to_remove.append(obj_id) + continue + + success, bbox = tracker_info["tracker"].update(img) + if success: + # Draw tracking box + x, y, w, h = [int(v) for v in bbox] + cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) + + # Add label with confidence + label = f"{tracker_info['name']}: {int(tracker_info['score'] * 100)}%" + cv2.putText(img, label, (x, y - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - # Show edge detection in a corner (similar to motion overlay) - edge_small = cv2.resize(edge_img, (img.shape[1] // 4, img.shape[0] // 4)) - h, w = edge_small.shape[:2] - img[10:10+h, 10:10+w] = edge_small + # Remove expired trackers + for obj_id in trackers_to_remove: + del self.object_trackers[obj_id] - # Apply segmentation if enabled - if self.segmentation: - try: - # Create segmentation visualization - segmented_img, _ = segment_image(processed_img, method=self.segmentation) - - # Display segmentation mode - cv2.putText(img, f"Segment: {self.segmentation.title()}", - (10, img.shape[0] - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Save current frame for next iteration + self.prev_gray = gray - # Show segmentation in a corner opposite to edge detection or motion - seg_small = cv2.resize(segmented_img, (img.shape[1] // 4, img.shape[0] // 4)) - h, w = seg_small.shape[:2] - img[10+h+10:10+h+10+h, 10:10+w] = seg_small - except Exception as e: - cv2.putText(img, f"Segmentation Error: {str(e)[:30]}", - (10, img.shape[0] - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Add processing mode and stabilization status - mode_text = f"Mode: {self.processing_mode.title()}" - features = [] - if self.stabilize: - features.append("Stabilized") - if self.enable_tracking: - features.append("Tracking") - if features: - mode_text += f" | {', '.join(features)}" - - cv2.putText(img, mode_text, - (10, img.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - return av.VideoFrame.from_ndarray(img, format="bgr24") def analyze_document(file_content, processor_id, location="us"): @@ -1011,20 +786,24 @@ def list_bigquery_resources(): return resources -def process_video_file(video_file, analysis_types, processing_mode="hybrid", stabilize=False, - edge_detection=None, segmentation=None, enable_tracking=False): - """Process video file with computer vision techniques""" - # Create output directory if it doesn't exist - output_dir = "processed_videos" - os.makedirs(output_dir, exist_ok=True) - - # Generate a unique output filename - timestamp = int(time.time()) - output_filename = os.path.join(output_dir, f"processed_{timestamp}.mp4") - stats_filename = os.path.join(output_dir, f"stats_{timestamp}.json") +def process_video_file(video_file, analysis_types): + """Process an uploaded video file with enhanced Vision AI detection and analytics""" + # Create a temporary file to save the uploaded video + with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file: + temp_file.write(video_file.read()) + temp_video_path = temp_file.name + + # Create a temp file for the output video + output_path = f"{temp_video_path}_processed.mp4" + + # Open the video file + cap = cv2.VideoCapture(temp_video_path) + if not cap.isOpened(): + st.error("Error opening video file") + os.unlink(temp_video_path) + return None # Get video properties - cap = cv2.VideoCapture(video_file) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) @@ -1054,8 +833,8 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta fourcc = cv2.VideoWriter_fourcc(*'DIB ') # Uncompressed RGB out = cv2.VideoWriter(output_path, fourcc, output_fps, (width, height), isColor=True) - # Process every Nth frame to reduce API calls - cloud_process_interval = 10 # How often to use Google Vision API + # Process every Nth frame to reduce API calls but increase from 10 to 5 for more detail + process_every_n_frames = 5 # Create a progress bar progress_bar = st.progress(0) @@ -1067,28 +846,24 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta "faces": 0, "text_blocks": 0, "labels": {}, - # Motion tracking - "motion_data": [], - "scene_changes": [], - "avg_motion_level": 0, - "processing_mode": processing_mode, - "stabilized": stabilize + # New advanced tracking + "object_tracking": {}, # Track object appearances by frame + "activity_metrics": [], # Track frame-to-frame differences + "scene_changes": [] # Track major scene transitions } - # Initialize object tracker if enabled - if enable_tracking: - object_tracker = ObjectTracker(tracker_type="CSRT") - tracked_objects = {} - detection_interval = 15 # How often to reinitialize tracking - - # Load models based on processing mode - if processing_mode in ["local", "hybrid"]: - yolo_net, yolo_classes, yolo_output_layers = load_yolo_model() - face_cascade = load_haar_cascades() - # For scene change detection and motion tracking previous_frame_gray = None - scene_change_threshold = 40.0 # Threshold for scene change detection + prev_points = None + lk_params = dict(winSize=(15, 15), + maxLevel=2, + criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)) + + # Feature detection params for tracking + feature_params = dict(maxCorners=100, + qualityLevel=0.3, + minDistance=7, + blockSize=7) try: frame_count = 0 @@ -1108,274 +883,255 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta cv2.putText(frame, f"Time: {frame_count/fps:.2f}s", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) - # Prepare grayscale image for motion analysis + # Activity detection and scene change detection current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0) - - # Stabilize frame if enabled - if stabilize and previous_frame_gray is not None: - frame = stabilize_frame(frame, previous_frame_gray, current_frame_gray) - # Update grayscale after stabilization - current_frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - current_frame_gray = cv2.GaussianBlur(current_frame_gray, (21, 21), 0) - - # Motion detection and scene change detection + if previous_frame_gray is not None: - # Calculate optical flow for motion detection - motion_level, motion_area, motion_mask, flow = calculate_optical_flow( - previous_frame_gray, current_frame_gray - ) + # Calculate frame difference for activity detection + frame_diff = cv2.absdiff(current_frame_gray, previous_frame_gray) + _, thresh = cv2.threshold(frame_diff, 25, 255, cv2.THRESH_BINARY) + thresh = cv2.dilate(thresh, None, iterations=2) + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - # Store motion metrics - detection_stats["motion_data"].append({ - "time": frame_count/fps, - "motion_level": motion_level, - "motion_area": motion_area * 100 # Convert to percentage - }) + # Better activity metric using contour area + activity_level = sum(cv2.contourArea(c) for c in contours) / (frame.shape[0] * frame.shape[1]) + activity_level *= 100 # Convert to percentage + detection_stats["activity_metrics"].append((frame_count/fps, activity_level)) - # Scene change detection - if motion_level > scene_change_threshold: + # Add optical flow for better motion tracking + if "Objects" in analysis_types and prev_points is not None: + # Calculate optical flow + next_points, status, _ = cv2.calcOpticalFlowPyrLK(previous_frame_gray, + current_frame_gray, + prev_points, + None, + **lk_params) + + # Select good points + if next_points is not None: + good_new = next_points[status==1] + good_old = prev_points[status==1] + + # Draw motion tracks + for i, (new, old) in enumerate(zip(good_new, good_old)): + a, b = new.ravel() + c, d = old.ravel() + # Draw motion lines + cv2.line(frame, (int(c), int(d)), (int(a), int(b)), (0, 255, 255), 2) + cv2.circle(frame, (int(a), int(b)), 3, (0, 255, 0), -1) + + # Scene change detection using contour analysis for more robust results + if activity_level > scene_change_threshold: detection_stats["scene_changes"].append(frame_count/fps) # Mark scene change on frame cv2.putText(frame, "SCENE CHANGE", (width // 2 - 100, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2) - - # Visualize motion - motion_overlay = cv2.applyColorMap(motion_mask, cv2.COLORMAP_JET) - motion_overlay = cv2.resize(motion_overlay, (width // 4, height // 4)) - - # Add motion overlay to corner of frame - h, w = motion_overlay.shape[:2] - frame[10:10+h, width-10-w:width-10] = motion_overlay - - # Add motion indicator - cv2.putText(frame, f"Motion: {motion_level:.1f}", - (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) - - previous_frame_gray = current_frame_gray + + # Reset tracking points on scene change + prev_points = None - # Apply edge detection if enabled - if edge_detection: - # Create edge detection visualization in a corner - edge_img = detect_edges(frame, method=edge_detection) - - # Display edge detection mode - cv2.putText(frame, f"Edge: {edge_detection.title()}", - (10, height - 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Update tracking points periodically + if frame_count % 5 == 0 or prev_points is None or len(prev_points) < 10: + prev_points = cv2.goodFeaturesToTrack(current_frame_gray, **feature_params) - # Show edge detection in a corner - edge_small = cv2.resize(edge_img, (width // 4, height // 4)) - h, w = edge_small.shape[:2] - frame[10:10+h, 10:10+w] = edge_small + previous_frame_gray = current_frame_gray - # Apply segmentation if enabled - if segmentation: + # Process frames with Vision API + if frame_count % process_every_n_frames == 0: try: - # Create segmentation visualization - segmented_img, _ = segment_image(frame, method=segmentation) - - # Display segmentation mode - cv2.putText(frame, f"Segment: {segmentation.title()}", - (10, height - 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + # Convert OpenCV frame to PIL Image for Vision API + pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - # Show segmentation in another corner - seg_small = cv2.resize(segmented_img, (width // 4, height // 4)) - h, w = seg_small.shape[:2] - frame[10+h+10:10+h+10+h, 10:10+w] = seg_small - except Exception as e: - cv2.putText(frame, f"Segmentation Error: {str(e)[:30]}", - (10, height - 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Add processing mode indicator - mode_text = f"Mode: {processing_mode.title()}" - if stabilize: - mode_text += " | Stabilized" - cv2.putText(frame, mode_text, - (10, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - - # Handle object tracking if enabled - detected_objects = [] - - # Local processing (SSD and Haar cascades) - if processing_mode in ["local", "hybrid"]: - # Object detection with SSD - if "Objects" in analysis_types: - objects = detect_objects_ssd( - frame, yolo_net, yolo_classes, yolo_output_layers, confidence_threshold=0.5 - ) + # Create vision image + img_byte_arr = io.BytesIO() + pil_img.save(img_byte_arr, format='PNG') + content = img_byte_arr.getvalue() + vision_image = vision.Image(content=content) - # Collect objects for tracking - for obj in objects: - x, y, w, h = obj["bbox"] - label = obj["label"] - confidence = obj["confidence"] - - # Add to detected objects list for tracking - detected_objects.append((x, y, w, h, label)) - - # Update statistics and draw boxes (if tracking disabled) - if not enable_tracking: - if label in detection_stats["objects"]: - detection_stats["objects"][label] += 1 + # Apply analysis based on selected types with enhanced detail + if "Objects" in analysis_types: + objects = client.object_localization(image=vision_image) + # Draw boxes around detected objects with enhanced info + for obj in objects.localized_object_annotations: + obj_name = obj.name + # Update basic stats + if obj_name in detection_stats["objects"]: + detection_stats["objects"][obj_name] += 1 + else: + detection_stats["objects"][obj_name] = 1 + + # Enhanced object tracking + timestamp = frame_count/fps + if obj_name not in detection_stats["object_tracking"]: + detection_stats["object_tracking"][obj_name] = { + "first_seen": timestamp, + "last_seen": timestamp, + "frames_present": 1, + "timestamps": [timestamp] + } else: - detection_stats["objects"][label] = 1 + tracking = detection_stats["object_tracking"][obj_name] + tracking["frames_present"] += 1 + tracking["last_seen"] = timestamp + tracking["timestamps"].append(timestamp) + + # Calculate box coordinates + box = [(vertex.x * frame.shape[1], vertex.y * frame.shape[0]) + for vertex in obj.bounding_poly.normalized_vertices] + box = np.array(box, np.int32).reshape((-1, 1, 2)) + + # Draw more noticeable box with thicker lines + cv2.polylines(frame, [box], True, (0, 255, 0), 3) + + # Calculate box size for better placement of labels + x_min = min([p[0][0] for p in box]) + y_min = min([p[0][1] for p in box]) - # Draw box - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) + # Draw filled box with opacity for better label visibility + overlay = frame.copy() + box_np = np.array(box) + hull = cv2.convexHull(box_np) + cv2.fillConvexPoly(overlay, hull, (0, 255, 0, 64)) + # Apply overlay with transparency + alpha = 0.3 + cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) - # Add label with confidence - label_text = f"{label}: {int(confidence * 100)}%" + # Enhanced label with confidence and border + confidence = int(obj.score * 100) + label_text = f"{obj.name}: {confidence}%" + text_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)[0] + + # Create better text background with rounded rectangle + text_bg_pts = np.array([ + [x_min, y_min - text_size[1] - 10], + [x_min + text_size[0] + 10, y_min - text_size[1] - 10], + [x_min + text_size[0] + 10, y_min], + [x_min, y_min] + ], np.int32) + + cv2.fillPoly(frame, [text_bg_pts], (0, 0, 0)) cv2.putText(frame, label_text, - (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - # Face detection with Haar cascades - if "Face Detection" in analysis_types: - faces = detect_faces_haar(frame, face_cascade) + (int(x_min) + 5, int(y_min) - 5), + cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) - # Update faces count and add to detected objects for tracking - if not enable_tracking: - detection_stats["faces"] += len(faces) - - for face in faces: - x, y, w, h = face["box"] - detected_objects.append((x, y, w, h, "Face")) - - # Draw boxes only if tracking is disabled - if not enable_tracking: - # Draw box - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 2) - - # Add tracking code - if enable_tracking: - try: - # Initialize tracking on first frame or periodically - if frame_count == 1 or frame_count % self.detection_interval == 0: - # Reset tracker periodically - if frame_count > 1: - object_tracker = ObjectTracker(tracker_type="CSRT") + if "Face Detection" in analysis_types: + faces = client.face_detection(image=vision_image) + # Track statistics + detection_stats["faces"] += len(faces.face_annotations) - # Register each detected object - for x, y, w, h, label in detected_objects: - object_tracker.register(frame, (x, y, w, h), label) - - # Update tracking on every frame - tracked_objects = object_tracker.update(frame) - - # Draw tracked objects - frame = object_tracker.draw_tracked_objects(frame, tracked_objects) - - # Add tracking status - cv2.putText(frame, f"Tracking {len(tracked_objects)} objects", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 165, 0), 2) - - # Count object types in tracking - for _, (_, _, _, _, label) in tracked_objects.items(): - if label in detection_stats["objects"]: - detection_stats["objects"][label] += 1 - else: - detection_stats["objects"][label] = 1 + for face in faces.face_annotations: + vertices = face.bounding_poly.vertices + points = [(vertex.x, vertex.y) for vertex in vertices] + # Draw face box with thicker lines + pts = np.array(points, np.int32).reshape((-1, 1, 2)) + cv2.polylines(frame, [pts], True, (0, 0, 255), 3) - # Update faces count if any faces are being tracked - face_count = sum(1 for _, (_, _, _, _, label) in tracked_objects.items() if label == "Face") - detection_stats["faces"] += face_count - except Exception as e: - cv2.putText(frame, f"Tracking Error: {str(e)[:30]}", - (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) - - # Cloud processing with Google Vision API (less frequent) - if processing_mode in ["cloud", "hybrid"] and frame_count % cloud_process_interval == 0: - try: - # Convert to PIL Image for Vision API - pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) - - # Create vision image - img_byte_arr = io.BytesIO() - pil_img.save(img_byte_arr, format='PNG') - content = img_byte_arr.getvalue() - vision_image = vision.Image(content=content) - - # Add cloud processing indicator - cv2.putText(frame, "Cloud Processing", (width - 200, 30), - cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2) + # Enhanced face info visualization + emotions = [] + if face.joy_likelihood >= 3: + emotions.append("Joy") + if face.anger_likelihood >= 3: + emotions.append("Anger") + if face.surprise_likelihood >= 3: + emotions.append("Surprise") + if face.sorrow_likelihood >= 3: + emotions.append("Sorrow") + + emotion_text = ", ".join(emotions) if emotions else "Neutral" + x_min = min([p[0] for p in points]) + y_min = min([p[1] for p in points]) + + # Add emotion gauge bar for better visualization + emotions_map = { + "Joy": (0, 255, 0), # Green + "Anger": (0, 0, 255), # Red + "Surprise": (255, 255, 0), # Yellow + "Sorrow": (255, 0, 0) # Blue + } + + # Add detailed emotion text with colored background + text_size = cv2.getTextSize(emotion_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0] + cv2.rectangle(frame, + (int(x_min), int(y_min) - text_size[1] - 8), + (int(x_min) + text_size[0] + 8, int(y_min)), + (0, 0, 0), -1) + + cv2.putText(frame, emotion_text, + (int(x_min) + 4, int(y_min) - 4), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + + # Draw enhanced landmarks with connections + if len(face.landmarks) > 0: + landmarks = [(int(landmark.position.x), int(landmark.position.y)) + for landmark in face.landmarks] + + # Draw each landmark + for landmark in landmarks: + cv2.circle(frame, landmark, 3, (255, 255, 0), -1) + + # Connect landmarks for eyes, nose, mouth if there are enough points + if len(landmarks) >= 8: + # These indices are approximate - adjust based on your actual data + eye_indices = [0, 1, 2, 3] + for i in range(len(eye_indices)-1): + cv2.line(frame, landmarks[eye_indices[i]], + landmarks[eye_indices[i+1]], (255, 255, 0), 1) - # Text detection if "Text" in analysis_types: - text = self.vision_client.text_detection(image=vision_image) - + text = client.text_detection(image=vision_image) # Update stats - if text.text_annotations: + if len(text.text_annotations) > 1: detection_stats["text_blocks"] += len(text.text_annotations) - 1 - # Draw text boxes - for text_annot in text.text_annotations[1:]: - box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] - pts = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [pts], True, (255, 0, 0), 2) - - # Show text summary + # Add overall text summary to the frame + if text.text_annotations: full_text = text.text_annotations[0].description words = full_text.split() short_text = " ".join(words[:5]) if len(words) > 5: short_text += "..." - + + # Add text summary to top of frame with better visibility + cv2.rectangle(frame, (10, 60), (10 + len(short_text)*10, 90), (0, 0, 0), -1) cv2.putText(frame, f"Text: {short_text}", - (10, height - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) + (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + + # Draw text boxes with improved visibility + for text_annot in text.text_annotations[1:]: + box = [(vertex.x, vertex.y) for vertex in text_annot.bounding_poly.vertices] + pts = np.array(box, np.int32).reshape((-1, 1, 2)) + cv2.polylines(frame, [pts], True, (255, 0, 0), 2) # Thicker lines - # Label detection + # Add Labels analysis for more detail if "Labels" in analysis_types: - labels = self.vision_client.label_detection(image=vision_image, max_results=5) + labels = client.label_detection(image=vision_image, max_results=5) + + # Add labels to the frame with better visibility + y_pos = 120 + cv2.rectangle(frame, (10, y_pos-20), (250, y_pos+20*len(labels.label_annotations)), (0, 0, 0), -1) + cv2.putText(frame, "Scene labels:", (15, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) - # Update stats and show labels + # Track stats and show labels for i, label in enumerate(labels.label_annotations): + # Update stats if label.description in detection_stats["labels"]: detection_stats["labels"][label.description] += 1 else: detection_stats["labels"][label.description] = 1 - - # Display on frame - cv2.putText(frame, f"Label: {label.description}", - (width - 200, 60 + i*30), - cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) - - # Only do object/face detection with Vision API in cloud-only mode - if self.processing_mode == "cloud" and not enable_tracking: - if "Objects" in analysis_types: - objects = self.vision_client.object_localization(image=vision_image) - - for obj in objects.localized_object_annotations: - # Update stats - if obj.name in detection_stats["objects"]: - detection_stats["objects"][obj.name] += 1 - else: - detection_stats["objects"][obj.name] = 1 - - # Draw box - box = [(vertex.x * width, vertex.y * height) - for vertex in obj.bounding_poly.normalized_vertices] - box = np.array(box, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [box], True, (0, 255, 0), 2) - # Add label - x_min = min([p[0][0] for p in box]) - y_min = min([p[0][1] for p in box]) - cv2.putText(frame, f"{obj.name}: {int(obj.score * 100)}%", - (int(x_min), int(y_min) - 10), - cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - if "Face Detection" in analysis_types: - faces = self.vision_client.face_detection(image=vision_image) - detection_stats["faces"] += len(faces.face_annotations) - - for face in faces.face_annotations: - vertices = face.bounding_poly.vertices - points = [(vertex.x, vertex.y) for vertex in vertices] - pts = np.array(points, np.int32).reshape((-1, 1, 2)) - cv2.polylines(frame, [pts], True, (0, 0, 255), 2) + # Display on frame with larger text + cv2.putText(frame, f"- {label.description}: {int(label.score*100)}%", + (15, y_pos + 20*(i+1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + except Exception as e: # Show error on frame cv2.putText(frame, f"API Error: {str(e)[:30]}", - (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) + + # Add hint about slowed down speed + cv2.putText(frame, "Playback: 60% speed for better visualization", + (width - 400, height - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 200, 0), 2) # Write the frame to output video out.write(frame) @@ -1388,21 +1144,6 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta progress_bar.empty() status_text.empty() - # Calculate additional statistics - if detection_stats["motion_data"]: - detection_stats["avg_motion_level"] = sum(item["motion_level"] for item in detection_stats["motion_data"]) / len(detection_stats["motion_data"]) - - # Update the detection_stats to include the new features - detection_stats.update({ - "edge_detection": edge_detection, - "segmentation": segmentation, - "tracking": { - "enabled": enable_tracking, - "method": "CSRT" if enable_tracking else None, - "objects_tracked": len(tracked_objects) if enable_tracking else 0 - } - }) - # Read the processed video as bytes for download with open(output_path, 'rb') as file: processed_video_bytes = file.read() @@ -1411,13 +1152,24 @@ def process_video_file(video_file, analysis_types, processing_mode="hybrid", sta os.unlink(temp_video_path) os.unlink(output_path) + # Calculate additional statistics + for obj_name, tracking in detection_stats["object_tracking"].items(): + # Calculate total screen time + tracking["screen_time"] = round(tracking["frames_present"] * (1/fps) * process_every_n_frames, 2) + # Calculate average confidence if available + if "confidences" in tracking and tracking["confidences"]: + tracking["avg_confidence"] = sum(tracking["confidences"]) / len(tracking["confidences"]) + + # Return enhanced results + results = {"detection_stats": detection_stats} + # Store results in session state for chatbot context - st.session_state.analysis_results = {"detection_stats": detection_stats} + st.session_state.analysis_results = results # Update vectorstore with new results - update_vectorstore_with_results({"detection_stats": detection_stats}) + update_vectorstore_with_results(results) - return processed_video_bytes, {"detection_stats": detection_stats} + return processed_video_bytes, results except Exception as e: # Clean up on error @@ -1889,17 +1641,9 @@ def chatbot_interface(): st.markdown('', unsafe_allow_html=True) def main(): - # Header + # Header - Updated title st.markdown('
Cosmick Cloud AI Analyzer
', unsafe_allow_html=True) - # Call the function here after all definitions have been loaded - try: - with st.spinner("Downloading object detection models..."): - download_ssd_model_files() - st.success("Object detection models loaded successfully") - except Exception as e: - st.error(f"Error downloading models: {str(e)}") - # Navigation selected = option_menu( menu_title=None, @@ -2078,36 +1822,10 @@ def main(): st.error(f"Error processing {uploaded_file.name}: {str(e)}") elif selected == "Video Analysis": - st.markdown('
Video Analysis with Hybrid Processing
', unsafe_allow_html=True) + st.markdown('
Video Analysis
', unsafe_allow_html=True) - # Enhanced analysis settings + # Analysis settings st.sidebar.markdown("### Video Analysis Settings") - - # Add processing mode selection - processing_mode = st.sidebar.radio( - "Processing Mode", - ["hybrid", "local", "cloud"], - format_func=lambda x: { - "hybrid": "Hybrid (Local + Cloud) - Recommended", - "local": "Local Only (Faster, Less Accurate)", - "cloud": "Cloud Only (Slower, More Accurate)" - }[x], - index=0 # Default to hybrid - ) - - # Show appropriate explanation based on selected mode - if processing_mode == "hybrid": - st.sidebar.info("Hybrid mode uses local processing for real-time tasks and Google Vision for detailed analysis.") - elif processing_mode == "local": - st.sidebar.info("Local mode runs entirely on your device using YOLOv4-tiny for object detection and Haar cascades for faces.") - else: # cloud - st.sidebar.info("Cloud mode sends all frames to Google Vision API for high-accuracy analysis.") - - # Add stabilization toggle - stabilize = st.sidebar.checkbox("Enable Video Stabilization", value=False, - help="Reduces camera shake using optical flow") - - # Analysis type selection analysis_types = [] if st.sidebar.checkbox("Object Detection", value=True): analysis_types.append("Objects") @@ -2115,33 +1833,21 @@ def main(): analysis_types.append("Face Detection") if st.sidebar.checkbox("Text Recognition"): analysis_types.append("Text") - if st.sidebar.checkbox("Label Detection"): - analysis_types.append("Labels") st.sidebar.markdown("---") + st.sidebar.warning("⚠️ Video analysis may use a significant amount of API calls. Use responsibly.") - # Add info about processing limits and usage - if processing_mode in ["cloud", "hybrid"]: - st.sidebar.warning("⚠️ Cloud analysis may use a significant amount of API calls. Use responsibly.") - - # Main content + # Upload Video mode only - removed real-time camera option st.markdown(""" - #### 📤 Enhanced Video Analysis - - Upload a video file to analyze it with hybrid AI processing. + #### 📤 Video Analysis - **Features:** - - **Local Processing**: Fast object & face detection using YOLOv4-tiny and Haar cascades - - **Cloud Processing**: High-accuracy text recognition and labels with Google Vision AI - - **Motion Analysis**: Track movement patterns with optical flow - - **Video Stabilization**: Reduce camera shake (optional) - - **Scene Changes**: Automatically detect major scene transitions + Upload a video file to analyze it with Google Cloud Vision AI. **Instructions:** - 1. Select processing mode and analysis types in the sidebar + 1. Select the analysis types in the sidebar 2. Upload a video file (MP4, MOV, AVI) 3. Click "Process Video" to begin analysis - 4. Explore the enhanced analytics and download the processed video + 4. Download the processed video when complete **Note:** Videos are limited to 10 seconds of processing to manage API usage. """) @@ -2161,15 +1867,10 @@ def main(): if not analysis_types: st.warning("Please select at least one analysis type.") else: - with st.spinner(f"Processing video in {processing_mode} mode (max 10 seconds)..."): + with st.spinner("Processing video (max 10 seconds)..."): try: - # Process the video with hybrid processing - processed_video, results = process_video_file( - uploaded_file, - analysis_types, - processing_mode=processing_mode, - stabilize=stabilize - ) + # Process the video with enhanced detail + processed_video, results = process_video_file(uploaded_file, analysis_types) if processed_video: # Offer download of processed video @@ -2181,206 +1882,64 @@ def main(): mime="video/mp4" ) - # Enhanced analytics display - detection_stats = results["detection_stats"] - - st.markdown("### Enhanced Video Analytics") - - # Display processing mode info - st.info(f"Processing mode: **{detection_stats['processing_mode'].title()}**" + - (", with video stabilization" if detection_stats['stabilized'] else "")) + # Show detailed analysis results + st.markdown("### Detailed Analysis Results") - # Create tabs for different analytics - tab1, tab2, tab3, tab4 = st.tabs([ - "Object Detection", - "Motion Analysis", - "Scene Changes", - "Text & Labels" - ]) - - with tab1: + # Display object detection summary + if "Objects" in analysis_types and results["detection_stats"]["objects"]: st.markdown("#### 📦 Objects Detected") - if detection_stats["objects"]: - # Sort objects by frequency - sorted_objects = dict(sorted(detection_stats["objects"].items(), - key=lambda x: x[1], reverse=True)) - - # Create bar chart for objects - if sorted_objects: - fig = px.bar( - x=list(sorted_objects.keys()), - y=list(sorted_objects.values()), - labels={"x": "Object Type", "y": "Frequency"}, - title="Objects Detected in Video", - color=list(sorted_objects.values()), - color_continuous_scale="Viridis" - ) - st.plotly_chart(fig, use_container_width=True) - - # Object statistics - st.markdown("##### Object Detection Statistics") - total_objects = sum(sorted_objects.values()) - unique_objects = len(sorted_objects) - - col1, col2, col3 = st.columns(3) - with col1: - st.metric("Total Detections", total_objects) - with col2: - st.metric("Unique Objects", unique_objects) - with col3: - if "faces" in detection_stats: - st.metric("Faces Detected", detection_stats["faces"]) - - st.markdown("##### Top Objects") - if sorted_objects: # Check if there are any sorted objects - for obj, count in list(sorted_objects.items())[:10]: - st.markdown(f"- **{obj}**: {count} occurrences") - else: - st.info("No objects detected in the video.") - with tab2: - st.markdown("#### 🔄 Motion Analysis") - - if detection_stats["motion_data"]: - # Create a DataFrame for the motion data - motion_df = pd.DataFrame(detection_stats["motion_data"]) - - # Plot motion level over time - st.markdown("##### Motion Intensity Over Time") - fig = px.line( - motion_df, - x="time", - y="motion_level", - labels={"time": "Time (seconds)", "motion_level": "Motion Intensity"}, - title="Motion Intensity Throughout Video" - ) - # Add a horizontal line for scene change threshold - fig.add_hline( - y=40.0, - line_dash="dash", - line_color="red", - annotation_text="Scene Change Threshold" - ) - st.plotly_chart(fig, use_container_width=True) - - # Motion area percentage - st.markdown("##### Motion Area Percentage") - fig = px.area( - motion_df, - x="time", - y="motion_area", - labels={"time": "Time (seconds)", "motion_area": "% of Frame with Motion"}, - title="Percentage of Frame with Detected Motion" - ) - st.plotly_chart(fig, use_container_width=True) - - # Motion statistics - st.markdown("##### Motion Statistics") - col1, col2, col3 = st.columns(3) - with col1: - st.metric( - "Average Motion", - f"{detection_stats['avg_motion_level']:.2f}" - ) - with col2: - st.metric( - "Peak Motion", - f"{max(item['motion_level'] for item in detection_stats['motion_data']):.2f}" - ) - with col3: - st.metric( - "Motion Variability", - f"{np.std([item['motion_level'] for item in detection_stats['motion_data']]):.2f}" - ) - else: - st.info("No motion data collected for this video.") - - with tab3: - st.markdown("#### 🎬 Scene Changes") + # Sort objects by frequency + sorted_objects = dict(sorted(results["detection_stats"]["objects"].items(), + key=lambda x: x[1], reverse=True)) - if detection_stats["scene_changes"]: - # Create a timeline of scene changes - st.markdown("##### Timeline of Detected Scene Changes") - - # Create a DataFrame with scene change markers - timeline_df = pd.DataFrame({ - "time": detection_stats["scene_changes"], - "event": ["Scene Change"] * len(detection_stats["scene_changes"]) - }) + # Create bar chart for objects + if sorted_objects: + fig, ax = plt.subplots(figsize=(10, 5)) + objects = list(sorted_objects.keys()) + counts = list(sorted_objects.values()) + ax.barh(objects, counts, color='skyblue') + ax.set_xlabel('Number of Detections') + ax.set_title('Objects Detected in Video') + st.pyplot(fig) - # Plot the timeline - fig = px.scatter( - timeline_df, - x="time", - y="event", - labels={"time": "Time (seconds)"}, - title="Scene Change Timeline", - size=[10] * len(timeline_df), - color_discrete_sequence=["red"] - ) - # Add vertical lines for each scene change - for time in detection_stats["scene_changes"]: - fig.add_vline(x=time, line_dash="solid", line_color="rgba(255,0,0,0.3)") - - # Adjust the y-axis - fig.update_yaxes(showticklabels=False) - - # Show the plot - st.plotly_chart(fig, use_container_width=True) - - # List scene changes - st.markdown("##### Scene Changes Detected At:") - for i, time in enumerate(sorted(detection_stats["scene_changes"])): - st.markdown(f"**Scene {i+1}**: {time:.2f} seconds") - - # Scene statistics - st.markdown("##### Scene Statistics") + # List with counts col1, col2 = st.columns(2) with col1: - st.metric("Number of Scenes", len(detection_stats["scene_changes"]) + 1) - with col2: - if len(detection_stats["scene_changes"]) > 0: - avg_scene_duration = 10.0 / (len(detection_stats["scene_changes"]) + 1) - st.metric("Average Scene Duration", f"{avg_scene_duration:.2f}s") - else: - st.info("No scene changes detected in this video.") + st.markdown("**Top Objects:**") + for obj, count in list(sorted_objects.items())[:10]: + st.markdown(f"- {obj}: {count} occurrences") - with tab4: - st.markdown("#### 📝 Text & Labels") - - col1, col2 = st.columns(2) + # Display face detection summary + if "Face Detection" in analysis_types and results["detection_stats"]["faces"] > 0: + st.markdown("#### 👤 Face Analysis") + st.markdown(f"Total faces detected: {results['detection_stats']['faces']}") + + # Display text detection summary + if "Text" in analysis_types and results["detection_stats"]["text_blocks"] > 0: + st.markdown("#### 📝 Text Analysis") + st.markdown(f"Total text blocks detected: {results['detection_stats']['text_blocks']}") + + # Display label detection summary + if "Labels" in analysis_types and results["detection_stats"]["labels"]: + st.markdown("#### 🏷️ Scene Labels") - with col1: - st.markdown("##### Text Detection") - if detection_stats["text_blocks"] > 0: - st.metric("Text Blocks Detected", detection_stats["text_blocks"]) - st.info("Text recognition powered by Google Cloud Vision AI") - else: - st.info("No text detected in the video.") + # Sort labels by frequency + sorted_labels = dict(sorted(results["detection_stats"]["labels"].items(), + key=lambda x: x[1], reverse=True)) - with col2: - st.markdown("##### Scene Labels") - if detection_stats["labels"]: - # Sort labels by frequency - sorted_labels = dict(sorted(detection_stats["labels"].items(), - key=lambda x: x[1], reverse=True)) - - # Create pie chart for top labels - fig = px.pie( - names=list(sorted_labels.keys())[:7], - values=list(sorted_labels.values())[:7], - title="Distribution of Scene Labels", - hole=0.3 - ) - st.plotly_chart(fig, use_container_width=True) + # Create pie chart for top labels + if sorted_labels: + fig, ax = plt.subplots(figsize=(8, 8)) + top_labels = dict(list(sorted_labels.items())[:7]) + if len(sorted_labels) > 7: + other_count = sum(list(sorted_labels.values())[7:]) + top_labels["Other"] = other_count - # List labels - st.markdown("**Top Labels:**") - for label, count in list(sorted_labels.items())[:7]: - st.markdown(f"- {label}: {count} occurrences") - else: - st.info("No labels detected in the video.") - + ax.pie(top_labels.values(), labels=top_labels.keys(), autopct='%1.1f%%') + ax.set_title('Distribution of Scene Labels') + st.pyplot(fig) + except Exception as e: st.error(f"Error processing video: {str(e)}") @@ -2800,7 +2359,7 @@ if __name__ == "__main__": # Add this function to your app def extract_video_frames(video_bytes, num_frames=5): - """Extract frames from video bytes for thumbnail display""" + """Extract frames from video bytes for thumbnail display with improved key frame selection""" import cv2 import numpy as np import tempfile @@ -2817,24 +2376,73 @@ def extract_video_frames(video_bytes, num_frames=5): # Get video properties frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + fps = cap.get(cv2.CAP_PROP_FPS) - # Calculate frame intervals - interval = max(1, frame_count // (num_frames + 1)) - - # Extract frames at intervals + # Use more sophisticated frame selection based on content analysis frames = [] - for i in range(1, num_frames + 1): - frame_position = i * interval - cap.set(cv2.CAP_PROP_POS_FRAMES, frame_position) + frame_scores = [] + sample_interval = max(1, frame_count // (num_frames * 3)) # Sample more frames than needed + + # First pass: collect frame scores + prev_frame = None + frame_index = 0 + + while len(frame_scores) < num_frames * 3 and frame_index < frame_count: + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) + ret, frame = cap.read() + if not ret: + break + + # Convert to grayscale for analysis + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + gray = cv2.GaussianBlur(gray, (21, 21), 0) + + # Calculate frame score based on Laplacian variance (focus measure) + focus_score = cv2.Laplacian(gray, cv2.CV_64F).var() + + # Calculate frame difference if we have a previous frame + diff_score = 0 + if prev_frame is not None: + frame_diff = cv2.absdiff(gray, prev_frame) + diff_score = np.mean(frame_diff) + + # Combined score: favor sharp frames with significant changes + combined_score = focus_score * 0.6 + diff_score * 0.4 + frame_scores.append((frame_index, combined_score)) + + # Store frame for next comparison + prev_frame = gray + frame_index += sample_interval + + # Second pass: select the best frames based on scores + # Sort by score and get top N frames + sorted_frames = sorted(frame_scores, key=lambda x: x[1], reverse=True) + best_frames = sorted_frames[:num_frames] + # Sort back by frame index to maintain chronological order + selected_frames = sorted(best_frames, key=lambda x: x[0]) + + # Extract the selected frames + for idx, _ in selected_frames: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) ret, frame = cap.read() if ret: + # Apply subtle enhancement to frames + enhanced_frame = frame.copy() + # Auto color balance + lab = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2LAB) + l, a, b = cv2.split(lab) + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + cl = clahe.apply(l) + enhanced_lab = cv2.merge((cl, a, b)) + enhanced_frame = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2BGR) + # Convert to RGB (from BGR) - frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + frame_rgb = cv2.cvtColor(enhanced_frame, cv2.COLOR_BGR2RGB) # Convert to PIL Image pil_img = Image.fromarray(frame_rgb) # Save to bytes img_byte_arr = io.BytesIO() - pil_img.save(img_byte_arr, format='JPEG') + pil_img.save(img_byte_arr, format='JPEG', quality=90) frames.append(img_byte_arr.getvalue()) # Clean up @@ -2842,607 +2450,4 @@ def extract_video_frames(video_bytes, num_frames=5): import os os.unlink(temp_video_path) - return frames - -def load_yolo_model(): - """Load YOLOv4-tiny model for object detection""" - # Create directory for models if it doesn't exist - models_dir = Path("models") - models_dir.mkdir(exist_ok=True) - - # Paths for YOLO files - weights_path = models_dir / "yolov4-tiny.weights" - cfg_path = models_dir / "yolov4-tiny.cfg" - names_path = models_dir / "coco.names" - - # Download YOLO files if they don't exist - if not weights_path.exists(): - st.info("Downloading YOLOv4-tiny weights (first time only)...") - import urllib.request - urllib.request.urlretrieve( - "https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights", - str(weights_path) - ) - - if not cfg_path.exists(): - st.info("Downloading YOLOv4-tiny configuration (first time only)...") - import urllib.request - urllib.request.urlretrieve( - "https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4-tiny.cfg", - str(cfg_path) - ) - - if not names_path.exists(): - st.info("Downloading COCO class names (first time only)...") - import urllib.request - urllib.request.urlretrieve( - "https://raw.githubusercontent.com/AlexeyAB/darknet/master/data/coco.names", - str(names_path) - ) - - # Load YOLO model - net = cv2.dnn.readNet(str(weights_path), str(cfg_path)) - - # Load class names - with open(str(names_path), "r") as f: - classes = [line.strip() for line in f.readlines()] - - # Get output layer names - layer_names = net.getLayerNames() - try: - # OpenCV 4.5.4+ - output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()] - except: - # Older OpenCV versions - output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] - - return net, classes, output_layers - -def load_haar_cascades(): - """Load Haar cascade classifiers for face detection""" - # Create directory for models if it doesn't exist - models_dir = Path("models") - models_dir.mkdir(exist_ok=True) - - # Paths for Haar cascade files - face_cascade_path = models_dir / "haarcascade_frontalface_default.xml" - - # Download Haar cascade files if they don't exist - if not face_cascade_path.exists(): - st.info("Downloading Haar cascade face detector (first time only)...") - import urllib.request - urllib.request.urlretrieve( - "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml", - str(face_cascade_path) - ) - - # Load face cascade - face_cascade = cv2.CascadeClassifier(str(face_cascade_path)) - - return face_cascade - -# Add these functions before process_video_file - -def detect_objects_yolo(frame, net, classes, output_layers, confidence_threshold=0.5): - """Detect objects in frame using YOLOv4-tiny""" - height, width, _ = frame.shape - - # Prepare image for YOLO - blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False) - net.setInput(blob) - - # Forward pass - layer_outputs = net.forward(output_layers) - - # Initialize lists for detected objects - boxes = [] - confidences = [] - class_ids = [] - - # Process each output layer - for output in layer_outputs: - for detection in output: - scores = detection[5:] - class_id = np.argmax(scores) - confidence = scores[class_id] - - if confidence > confidence_threshold: - # Scale box coordinates to frame size - center_x = int(detection[0] * width) - center_y = int(detection[1] * height) - w = int(detection[2] * width) - h = int(detection[3] * height) - - # Rectangle coordinates - x = int(center_x - w / 2) - y = int(center_y - h / 2) - - # Add to lists - boxes.append([x, y, w, h]) - confidences.append(float(confidence)) - class_ids.append(class_id) - - # Apply non-maximum suppression - indexes = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, 0.4) - - # Prepare results - results = [] - - if len(indexes) > 0: - # Ensure indexes is properly flattened (OpenCV 4.5.4+ vs older versions) - try: - flat_indexes = indexes.flatten() - except: - flat_indexes = indexes - - for i in flat_indexes: - box = boxes[i] - x, y, w, h = box - label = str(classes[class_ids[i]]) - confidence = confidences[i] - - results.append({ - "box": (x, y, w, h), - "label": label, - "confidence": confidence - }) - - return results - -def detect_faces_haar(frame, face_cascade): - """Detect faces using Haar cascades""" - # Convert to grayscale for Haar cascade - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - - # Detect faces - faces = face_cascade.detectMultiScale( - gray, - scaleFactor=1.1, - minNeighbors=5, - minSize=(30, 30) - ) - - # Prepare results - results = [] - - for (x, y, w, h) in faces: - results.append({ - "box": (x, y, w, h) - }) - - return results - -def calculate_optical_flow(prev_gray, current_gray): - """Calculate optical flow between frames for motion detection""" - # Calculate flow using Lucas-Kanade method - flow = cv2.calcOpticalFlowFarneback( - prev_gray, current_gray, - None, 0.5, 3, 15, 3, 5, 1.2, 0 - ) - - # Calculate magnitude and angle - magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1]) - - # Create visualization - motion_mask = np.zeros_like(prev_gray) - - # Normalize magnitude for visualization - norm_magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX) - motion_mask = norm_magnitude.astype(np.uint8) - - # Calculate motion metrics - motion_level = np.mean(magnitude) - motion_area = np.sum(magnitude > 0.5) / (magnitude.shape[0] * magnitude.shape[1]) - - return motion_level, motion_area, motion_mask, flow - -def stabilize_frame(frame, prev_frame_gray, current_frame_gray): - """Stabilize video frame using optical flow""" - # Calculate optical flow - flow = cv2.calcOpticalFlowFarneback( - prev_frame_gray, current_frame_gray, - None, 0.5, 3, 15, 3, 5, 1.2, 0 - ) - - # Calculate the median flow vectors - h, w = flow.shape[:2] - flow_median_x = np.median(flow[..., 0]) - flow_median_y = np.median(flow[..., 1]) - - # Create transformation matrix for affine transform - transform = np.array([[1, 0, -flow_median_x], [0, 1, -flow_median_y]], dtype=np.float32) - - # Apply affine transformation to stabilize the frame - stabilized_frame = cv2.warpAffine(frame, transform, (w, h)) - - return stabilized_frame - -def create_tracker(tracker_type="CSRT"): - (major_ver, minor_ver, subminor_ver) = cv2.__version__.split('.') - - # For OpenCV 4.5.x and newer - if int(major_ver) >= 4 and int(minor_ver) >= 5: - if tracker_type == "CSRT": - return cv2.legacy.TrackerCSRT_create() - elif tracker_type == "KCF": - return cv2.legacy.TrackerKCF_create() - # Add other tracker types as needed - else: - # For older OpenCV versions - if tracker_type == "CSRT": - return cv2.TrackerCSRT_create() - elif tracker_type == "KCF": - return cv2.TrackerKCF_create() - # Add other tracker types as needed - -class ObjectTracker: - """Manages object tracking across video frames""" - - def __init__(self, tracker_type="CSRT", max_disappeared=30): - self.tracker_type = tracker_type - self.trackers = {} # Dict of active trackers - self.disappeared = {} # Count of frames where object disappeared - self.max_disappeared = max_disappeared # Max frames to keep tracking after disappearance - self.next_object_id = 0 # Counter for object IDs - self.objects = {} # Dict of tracked object positions {ID: (x, y, w, h, label)} - - def register(self, frame, bbox, label="Object"): - """Register a new object to track""" - # Create a new tracker - tracker = create_tracker(self.tracker_type) - tracker.init(frame, bbox) - - # Register the object - object_id = self.next_object_id - self.trackers[object_id] = tracker - self.objects[object_id] = (*bbox, label) - self.disappeared[object_id] = 0 - - # Increment the counter - self.next_object_id += 1 - - return object_id - - def deregister(self, object_id): - """Stop tracking an object""" - # Remove from dictionaries - self.trackers.pop(object_id, None) - self.objects.pop(object_id, None) - self.disappeared.pop(object_id, None) - - def update(self, frame): - """Update all trackers with new frame""" - # Check if we have no objects - if len(self.trackers) == 0: - return self.objects - - # Initialize a list of updated objects - updated_objects = {} - - # Loop through tracked objects - for object_id in list(self.trackers.keys()): - # Get the tracker - tracker = self.trackers[object_id] - - # Update the tracker - success, bbox = tracker.update(frame) - - if success: - # Successfully tracked, reset disappeared counter - self.disappeared[object_id] = 0 - - # Update object position, keeping the same label - _, _, _, _, label = self.objects[object_id] - self.objects[object_id] = (*bbox, label) - updated_objects[object_id] = self.objects[object_id] - else: - # Tracking failed, increment disappeared counter - self.disappeared[object_id] += 1 - - # If object has disappeared for too long, deregister it - if self.disappeared[object_id] > self.max_disappeared: - self.deregister(object_id) - else: - # Keep the last known position - updated_objects[object_id] = self.objects[object_id] - - return updated_objects - - def draw_tracked_objects(self, frame, objects): - """Draw bounding boxes and IDs for tracked objects""" - for object_id, (x, y, w, h, label) in objects.items(): - # Convert to integer coordinates - x, y, w, h = int(x), int(y), int(w), int(h) - - # Draw bounding box - cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) - - # Draw ID and label - text = f"ID:{object_id} {label}" - cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) - - return frame - -def segment_image(frame, method="watershed", rect=None): - """Segment an image into foreground and background regions""" - if method == "watershed": - # Watershed segmentation - - # Convert to grayscale if needed - if len(frame.shape) == 3: - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - else: - gray = frame.copy() - frame = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) - - # Apply threshold - _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) - - # Noise removal with morphological operations - kernel = np.ones((3, 3), np.uint8) - opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) - - # Sure background area - sure_bg = cv2.dilate(opening, kernel, iterations=3) - - # Finding sure foreground area - dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5) - _, sure_fg = cv2.threshold(dist_transform, 0.7*dist_transform.max(), 255, 0) - - # Finding unknown region - sure_fg = np.uint8(sure_fg) - unknown = cv2.subtract(sure_bg, sure_fg) - - # Marker labeling - _, markers = cv2.connectedComponents(sure_fg) - - # Add 1 to all labels so that background is 1 instead of 0 - markers = markers + 1 - - # Mark the unknown region with 0 - markers[unknown == 255] = 0 - - # Apply watershed - markers = cv2.watershed(frame, markers) - - # Create visualization with boundaries - segmented = frame.copy() - segmented[markers == -1] = [0, 0, 255] # Mark boundaries in red - - # Create a colored mask for visualization - mask = np.zeros_like(frame) - for label in np.unique(markers): - if label > 1: # Skip background (1) and boundaries (-1) - # Create a random color for this segment - color = np.random.randint(0, 255, size=3, dtype=np.uint8) - mask[markers == label] = color - - # Blend the original image with the segmentation mask - result = cv2.addWeighted(frame, 0.7, mask, 0.3, 0) - - return result, markers - - elif method == "grabcut": - # GrabCut segmentation - - # Create mask and temporary arrays - mask = np.zeros(frame.shape[:2], np.uint8) - bgd_model = np.zeros((1, 65), np.float64) - fgd_model = np.zeros((1, 65), np.float64) - - # If no rectangle provided, use center portion of image - if rect is None: - h, w = frame.shape[:2] - rect = (w//4, h//4, w//2, h//2) - - # Apply GrabCut - cv2.grabCut(frame, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT) - - # Create mask where certain == background (0) and probable == background (2) - mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8') - - # Create segmented image - segmented = frame * mask2[:, :, np.newaxis] - - # Create visualization that highlights foreground - highlight = frame.copy() - highlight_mask = np.zeros_like(frame) - highlight_mask[mask2 == 1] = [0, 255, 0] # Green for foreground - result = cv2.addWeighted(highlight, 0.7, highlight_mask, 0.3, 0) - - return result, mask - - else: - return frame, None # Return original frame if method not recognized - -def detect_edges(frame, method="canny", low_threshold=100, high_threshold=200): - """Detect edges in an image using various methods""" - # Convert to grayscale if needed - if len(frame.shape) == 3: - gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) - else: - gray = frame - - # Apply Gaussian blur to reduce noise - blurred = cv2.GaussianBlur(gray, (5, 5), 0) - - if method == "canny": - # Canny edge detector - edges = cv2.Canny(blurred, low_threshold, high_threshold) - # Convert back to 3-channel for visualization - return cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR) - - elif method == "sobel": - # Sobel edge detector - sobel_x = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=3) - sobel_y = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=3) - - # Calculate magnitude and convert to uint8 - magnitude = cv2.magnitude(sobel_x, sobel_y) - magnitude = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) - - # Convert back to 3-channel for visualization - return cv2.cvtColor(magnitude, cv2.COLOR_GRAY2BGR) - - elif method == "laplacian": - # Laplacian edge detector - laplacian = cv2.Laplacian(blurred, cv2.CV_64F) - laplacian = np.uint8(np.absolute(laplacian)) - laplacian = cv2.normalize(laplacian, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8) - - # Convert back to 3-channel for visualization - return cv2.cvtColor(laplacian, cv2.COLOR_GRAY2BGR) - - else: - return frame # Return original frame if method not recognized - -def load_ssd_model(): - """ - Load SSD MobileNet model for object detection using OpenCV DNN - - Returns: - tuple: (net, classes, output_layer_names) - """ - # Model paths - you'll need to download these files - model_path = "models/ssd_mobilenet_v2_coco_2018_03_29.pb" - config_path = "models/ssd_mobilenet_v2_coco_2018_03_29.pbtxt" - classes_path = "models/coco_classes.txt" - - # Load the network - net = cv2.dnn.readNetFromTensorflow(model_path, config_path) - - # Load class names - with open(classes_path, 'r') as f: - classes = [line.strip() for line in f.readlines()] - - # Get output layer names - output_layer_names = net.getUnconnectedOutLayersNames() - - return net, classes, output_layer_names - -def detect_objects_ssd(frame, net, classes, output_layer_names, confidence_threshold=0.5): - """ - Detect objects in a frame using SSD model - - Args: - frame: Input image frame - net: Loaded DNN model - classes: List of class names - output_layer_names: Names of output layers - confidence_threshold: Minimum confidence to consider a detection - - Returns: - list: List of detected objects with label, confidence, and bounding box - """ - height, width = frame.shape[:2] - - # Create a blob from the image - blob = cv2.dnn.blobFromImage(frame, size=(300, 300), swapRB=True) - - # Set the input and perform forward pass - net.setInput(blob) - detections = net.forward() - - objects = [] - - # Process detection results - for i in range(detections.shape[2]): - confidence = detections[0, 0, i, 2] - - if confidence > confidence_threshold: - # Get the class ID - class_id = int(detections[0, 0, i, 1]) - - # Calculate bounding box coordinates - box = detections[0, 0, i, 3:7] * np.array([width, height, width, height]) - (x_min, y_min, x_max, y_max) = box.astype("int") - - # Ensure box is within image boundaries - x_min = max(0, x_min) - y_min = max(0, y_min) - x_max = min(width, x_max) - y_max = min(height, y_max) - - # Get width and height - w = x_max - x_min - h = y_max - y_min - - # Add detection to results - if 0 <= class_id < len(classes): - objects.append({ - 'label': classes[class_id], - 'confidence': float(confidence), - 'bbox': (x_min, y_min, w, h) - }) - - return objects - -def download_ssd_model_files(): - """Download required model files for SSD MobileNet""" - import os - import urllib.request - import tarfile - - # Create models directory if it doesn't exist - if not os.path.exists("models"): - os.makedirs("models") - - # Download SSD MobileNet model - model_url = "http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz" - tar_file = "models/ssd_mobilenet_v2_coco_2018_03_29.tar.gz" - - if not os.path.exists("models/ssd_mobilenet_v2_coco_2018_03_29.pb"): - print("Downloading SSD MobileNet model...") - urllib.request.urlretrieve(model_url, tar_file) - - # Extract the tar file - with tarfile.open(tar_file, 'r:gz') as tar: - tar.extractall(path="models") - - # Move the .pb file to the models directory - os.rename("models/ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb", - "models/ssd_mobilenet_v2_coco_2018_03_29.pb") - - # Create config file if it doesn't exist - if not os.path.exists("models/ssd_mobilenet_v2_coco_2018_03_29.pbtxt"): - config_content = """ - # SSD MobileNet v2 COCO configuration - model { - ssd { - num_classes: 90 - box_coder { - faster_rcnn_box_coder { - y_scale: 10.0 - x_scale: 10.0 - height_scale: 5.0 - width_scale: 5.0 - } - } - # ... more configuration ... - } - } - """ - with open("models/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", 'w') as f: - f.write(config_content) - - # Create class names file if it doesn't exist - if not os.path.exists("models/coco_classes.txt"): - coco_classes = [ - "background", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", - "truck", "boat", "traffic light", "fire hydrant", "street sign", "stop sign", - "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", - "bear", "zebra", "giraffe", "hat", "backpack", "umbrella", "shoe", "eye glasses", - "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", - "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", - "plate", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", - "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", - "chair", "couch", "potted plant", "bed", "mirror", "dining table", "window", "desk", - "toilet", "door", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", - "microwave", "oven", "toaster", "sink", "refrigerator", "blender", "book", "clock", - "vase", "scissors", "teddy bear", "hair drier", "toothbrush" - ] - - with open("models/coco_classes.txt", 'w') as f: - f.write("\n".join(coco_classes)) - - print("Model files ready!") + return frames \ No newline at end of file