import gradio as gr import torch import cv2 import numpy as np import time from ultralytics import YOLO import spaces import os class CrowdDetection: def __init__(self, model_path="yolov8n.pt"): self.model_path = model_path @spaces.GPU def crowd_detection(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_crowd.mp4" fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") CROWD_THRESHOLD = 10 frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 results = model(frame) person_count = sum(1 for result in results for cls in result.boxes.cls.cpu().numpy() if int(cls) == 0) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() for box, cls in zip(boxes, classes): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(frame, "Person", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) alert_text = "Crowd Alert!" if person_count > CROWD_THRESHOLD else f"People: {person_count}" cv2.putText(frame, alert_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255) if person_count > CROWD_THRESHOLD else (0, 255, 0), 2) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in crowd_detection: {str(e)}") class PeopleTracking: def __init__(self, yolo_model_path="yolov8n.pt"): self.model_path = yolo_model_path @spaces.GPU def people_tracking(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_tracking.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.track(frame, persist=True) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() ids = result.boxes.id.cpu().numpy() if result.boxes.id is not None else np.arange(len(boxes)) for box, cls, obj_id in zip(boxes, classes, ids): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(frame, f"ID {int(obj_id)}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) out.write(frame) cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in people_tracking: {str(e)}") class FallDetection: def __init__(self, yolo_model_path="yolov8l.pt"): self.model_path = yolo_model_path @spaces.GPU def fall_detection(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8l.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_fall.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model(frame) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() for box, cls in zip(boxes, classes): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) width = x2 - x1 height = y2 - y1 aspect_ratio = width / height if height > 0 else float('inf') if aspect_ratio > 0.55: color = (0, 0, 255) label = "FALL DETECTED" else: color = (0, 255, 0) label = "Standing" cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in fall_detection: {str(e)}") class FightDetection: def __init__(self, yolo_model_path="yolov8n-pose.pt"): self.model_path = yolo_model_path @spaces.GPU def fight_detection(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n-pose.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_fight.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") while cap.isOpened(): ret, frame = cap.read() if not ret: break results = model.track(frame, persist=True) fight_detected = False person_count = 0 for result in results: keypoints = result.keypoints.xy.cpu().numpy() if result.keypoints else [] boxes = result.boxes.xyxy.cpu().numpy() if result.boxes else [] classes = result.boxes.cls.cpu().numpy() if result.boxes else [] for box, kp, cls in zip(boxes, keypoints, classes): if int(cls) == 0: person_count += 1 x1, y1, x2, y2 = map(int, box) if len(kp) > 7 and (kp[5][1] < y1 + (y2 - y1) * 0.3 or kp[7][1] < y1 + (y2 - y1) * 0.3): fight_detected = True cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255) if fight_detected else (0, 255, 0), 2) label = "FIGHT DETECTED" if fight_detected else "Person" cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255) if fight_detected else (0, 255, 0), 2) if fight_detected and person_count > 1: cv2.putText(frame, "FIGHT ALERT!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) out.write(frame) cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in fight_detection: {str(e)}") class IntrusionDetection: def __init__(self, model_path="yolov8n.pt", max_intrusion_time=300, iou_threshold=0.5, conf_threshold=0.5): self.model_path = model_path self.max_intrusion_time = max_intrusion_time self.iou_threshold = iou_threshold self.conf_threshold = conf_threshold @spaces.GPU def detect_intrusion(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_intrusion.mp4" fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 results = model(frame) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() confidences = result.boxes.conf.cpu().numpy() for box, cls, conf in zip(boxes, classes, confidences): if int(cls) == 0 and conf > self.conf_threshold: # Person class with confidence filter x1, y1, x2, y2 = map(int, box) label = "Intruder" color = (0, 0, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in detect_intrusion: {str(e)}") class LoiteringDetection: def __init__(self, model_path="yolov8n.pt", loitering_threshold=10, conf_threshold=0.5): self.model_path = model_path self.loitering_threshold = loitering_threshold self.conf_threshold = conf_threshold self.entry_time = {} self.area = [(153, 850), (139, 535), (239, 497), (291, 857)] @spaces.GPU def load_model(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = YOLO(self.model_path).to(device) return model def calculate_center(self, box): x1, y1, x2, y2 = box return int((x1 + x2) / 2), int((y1 + y2) / 2) def track_time(self, id, frame_duration): if id not in self.entry_time: self.entry_time[id] = {'duration': 0, 'loitering': False} else: self.entry_time[id]['duration'] += frame_duration if self.entry_time[id]['duration'] > self.loitering_threshold: self.entry_time[id]['loitering'] = True def detect_loitering(self, video_path): try: model = self.load_model() cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30 width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_loitering.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) frame_duration = 1 / fps frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 results = model.track(frame, conf=self.conf_threshold, iou=0.1, classes=[0], persist=True) boxes = results[0].boxes.xyxy.cpu().tolist() ids = results[0].boxes.id.cpu().tolist() ids_in_area = [] for box, id in zip(boxes, ids): center = self.calculate_center(box) if cv2.pointPolygonTest(np.array(self.area, np.int32), center, False) >= 0: ids_in_area.append(id) self.track_time(id, frame_duration) for id in ids_in_area: color = (0, 0, 255) if self.entry_time.get(id, {}).get('loitering', False) else (0, 255, 0) cv2.putText(frame, f"ID {id}, Time: {self.entry_time[id]['duration']:.1f}s", (15, 30 + id * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) pts = np.array(self.area, np.int32).reshape((-1, 1, 2)) color = (0, 0, 255) if any(self.entry_time.get(id, {}).get('loitering', False) for id in ids_in_area) else (152, 251, 152) cv2.polylines(frame, [pts], isClosed=True, color=color, thickness=3) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in detect_loitering: {str(e)}") # Unified processing function with status output def process_video(feature, video): detectors = { "Crowd Detection": CrowdDetection, "People Tracking": PeopleTracking, "Fall Detection": FallDetection, "Fight Detection": FightDetection, "Intrusion Detection" : IntrusionDetection, "Loitering Detection" : LoiteringDetection } try: detector = detectors[feature]() method_name = feature.lower().replace(" ", "_").replace("detection", "detect") # Ensures correct method name output_path = getattr(detector, method_name)(video) return f"{feature} completed successfully", output_path except Exception as e: return f"Error: {str(e)}", None # Gradio Interface with dual outputs interface = gr.Interface( fn=process_video, inputs=[ gr.Dropdown(choices=["Crowd Detection", "People Tracking", "Fall Detection", "Fight Detection", "Intrusion Detection", "Loitering Detection"], label="Select Feature"), gr.Video(label="Upload Video") ], outputs=[ gr.Textbox(label="Status"), gr.Video(label="Processed Video") ], title="YOLOv8 Multitask Video Processing", description="Select a feature to process your video: Crowd Detection, People Tracking, Fall Detection, or Fight Detection." ) if __name__ == "__main__": interface.launch(debug=True)