import gradio as gr import torch import cv2 import numpy as np import time from ultralytics import YOLO import os import spaces class CrowdDetection: def __init__(self, model_path="yolov8n.pt"): self.model_path = model_path @spaces.GPU def crowd_detect(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) output_path = "output_crowd.mp4" fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") CROWD_THRESHOLD = 10 frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, (width, height)) frame_count += 1 results = model(frame) person_count = sum(1 for result in results for cls in result.boxes.cls.cpu().numpy() if int(cls) == 0) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() for box, cls in zip(boxes, classes): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) cv2.putText(frame, "Person", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) alert_text = "Crowd Alert!" if person_count > CROWD_THRESHOLD else f"People: {person_count}" cv2.putText(frame, alert_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255) if person_count > CROWD_THRESHOLD else (0, 255, 0), 2) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in crowd_detection: {str(e)}") class PeopleTracking: def __init__(self, yolo_model_path="yolov8n.pt"): self.model_path = yolo_model_path @spaces.GPU def people_tracking(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) output_path = "output_tracking.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, (width, height)) results = model.track(frame, persist=True) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() ids = result.boxes.id.cpu().numpy() if result.boxes.id is not None else np.arange(len(boxes)) for box, cls, obj_id in zip(boxes, classes, ids): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) cv2.putText(frame, f"ID {int(obj_id)}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) out.write(frame) cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in people_tracking: {str(e)}") class FallDetection: def __init__(self, yolo_model_path="yolov8l.pt"): self.model_path = yolo_model_path @spaces.GPU def fall_detect(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load YOLOv8 model if not os.path.exists(self.model_path): model = YOLO("yolov8l.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) output_path = "output_fall.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") frame_skip = 3 # Process every 3rd frame to optimize performance frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: print("⚠️ No more frames to read. Exiting loop.") break frame_count += 1 if frame_count % frame_skip != 0: continue # Skip frames to optimize performance frame = cv2.resize(frame, (width, height)) # Ensure YOLO runs without unnecessary graph tracking with torch.no_grad(): results = model.predict(frame, imgsz=640, device=device) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() for box, cls in zip(boxes, classes): if int(cls) == 0: x1, y1, x2, y2 = map(int, box) obj_width = x2 - x1 obj_height = y2 - y1 aspect_ratio = obj_width / obj_height if obj_height > 0 else float('inf') if aspect_ratio > 0.55: color = (0, 0, 255) label = "FALL DETECTED" else: color = (0, 255, 0) label = "Standing" cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) # ✅ Release resources after processing cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in fall_detection: {str(e)}") import os import cv2 import time import torch import numpy as np from ultralytics import YOLO class FightDetection: def __init__(self, yolo_model_path="yolov8n-pose.pt"): self.model_path = yolo_model_path @spaces.GPU def fight_detect(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load YOLO Pose model if not os.path.exists(self.model_path): model = YOLO("yolov8n-pose.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) // 2 # Slow down FPS for better tracking width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) output_path = "output_fight.mp4" out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") # Fight detection parameters FIGHT_THRESHOLD = 2.0 PROXIMITY_THRESHOLD = 100 frame_skip = 2 frame_count = 0 person_movements = {} while cap.isOpened(): ret, frame = cap.read() if not ret: break # End of video frame_count += 1 if frame_count % frame_skip != 0: continue # Skip frames for performance frame = cv2.resize(frame, (width, height)) results = model.track(frame, persist=True) current_time = time.time() persons = [] for result in results: keypoints = result.keypoints.xy.cpu().numpy() if result.keypoints else [] classes = result.boxes.cls.cpu().numpy() if result.boxes else [] ids = result.boxes.id.cpu().numpy() if result.boxes.id is not None else [] for i, (kp, cls) in enumerate(zip(keypoints, classes)): if int(cls) == 0: # Person class person_id = int(ids[i]) if len(ids) > i else f"{int(kp[0][0])}-{int(kp[0][1])}" persons.append((person_id, kp)) if person_id not in person_movements: person_movements[person_id] = [] person_movements[person_id].append((current_time, kp)) # Draw keypoints for point in kp: x, y = int(point[0]), int(point[1]) cv2.circle(frame, (x, y), 5, (255, 255, 0), -1) # Check for fights fight_detected = False for i in range(len(persons)): for j in range(i + 1, len(persons)): person1, kp1 = persons[i] person2, kp2 = persons[j] distance = np.linalg.norm(kp1[0] - kp2[0]) if distance > PROXIMITY_THRESHOLD: continue # Ignore if too far apart if len(person_movements[person1]) > 1 and len(person_movements[person2]) > 1: hands1 = np.mean(kp1[[7, 8]], axis=0) hands2 = np.mean(kp2[[7, 8]], axis=0) prev_hands1 = person_movements[person1][-2][1][[7, 8]].mean(axis=0) prev_hands2 = person_movements[person2][-2][1][[7, 8]].mean(axis=0) speed1 = np.linalg.norm(hands1 - prev_hands1) speed2 = np.linalg.norm(hands2 - prev_hands2) if speed1 > FIGHT_THRESHOLD and speed2 > FIGHT_THRESHOLD: fight_detected = True x1, y1 = int(kp1[0][0]), int(kp1[0][1]) x2, y2 = int(kp2[0][0]), int(kp2[0][1]) cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 255), 3) cv2.putText(frame, "FIGHT DETECTED", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) if fight_detected: cv2.putText(frame, "FIGHT ALERT!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) out.write(frame) cap.release() out.release() if not os.path.exists(output_path): raise ValueError("❌ Processing failed") return output_path except Exception as e: raise ValueError(f"Error in fight_detection: {str(e)}") class IntrusionDetection: def __init__(self, model_path="yolov8n.pt", max_intrusion_time=300, iou_threshold=0.5, conf_threshold=0.5): self.model_path = model_path self.max_intrusion_time = max_intrusion_time self.iou_threshold = iou_threshold self.conf_threshold = conf_threshold @spaces.GPU def intrusion_detect(self, video_path): try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if not os.path.exists(self.model_path): model = YOLO("yolov8n.pt") model.save(self.model_path) else: model = YOLO(self.model_path) model.to(device) cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) output_path = "output_intrusion.mp4" fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 frame = cv2.resize(frame, (width, height)) results = model(frame) for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() confidences = result.boxes.conf.cpu().numpy() for box, cls, conf in zip(boxes, classes, confidences): if int(cls) == 0 and conf > self.conf_threshold: # Person class with confidence filter x1, y1, x2, y2 = map(int, box) label = "Intruder" color = (0, 0, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in detect_intrusion: {str(e)}") class IntrusionDetectionEn: def __init__(self, model_path="yolov8n.pt", max_intrusion_time=300, iou_threshold=0.5, conf_threshold=0.7): self.model_path = model_path self.max_intrusion_time = max_intrusion_time self.iou_threshold = iou_threshold self.conf_threshold = conf_threshold # Predefined staff uniform colors (RGB format) self.staff_colors = [ (139, 143, 133), # Grayish tone (146, 150, 140), # Light grayish tone (146, 152, 141), # Muted gray-green (143, 147, 136), # Gray-green (48, 59, 71) # Dark blue/gray ] # 🔹 Load the model once self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model = YOLO(self.model_path).to(self.device) def is_staff(self, person_crop): """Checks if the detected person is a staff member based on clothing color.""" avg_color = np.mean(person_crop, axis=(0, 1)) # Compute average color (BGR) avg_color = avg_color[::-1] # Convert BGR to RGB # Compute Euclidean distance to known staff colors for color in self.staff_colors: dist = np.linalg.norm(np.array(avg_color) - np.array(color)) if dist < 30: # Threshold to consider it a match return True return False @spaces.GPU def intrusion_detect_en(self, video_path): try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError(f"❌ Failed to open video: {video_path}") fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) output_path = "output_intrusion.mp4" fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) if not out.isOpened(): cap.release() raise ValueError(f"❌ Failed to initialize video writer") frame_count = 0 while cap.isOpened(): ret, frame = cap.read() if not ret: break frame_count += 1 results = self.model(frame) # 🔹 Use preloaded model for result in results: boxes = result.boxes.xyxy.cpu().numpy() classes = result.boxes.cls.cpu().numpy() confidences = result.boxes.conf.cpu().numpy() for box, cls, conf in zip(boxes, classes, confidences): if int(cls) == 0 and conf > self.conf_threshold: # Person class x1, y1, x2, y2 = map(int, box) person_crop = frame[y1:y2, x1:x2] if self.is_staff(person_crop): continue # Ignore staff members label = "Intruder" color = (0, 0, 255) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) out.write(frame) cap.release() out.release() if frame_count == 0 or not os.path.exists(output_path): raise ValueError("❌ Processing failed: No frames processed or output not created") return output_path except Exception as e: raise ValueError(f"Error in detect_intrusion: {str(e)}") import cv2 import numpy as np from ultralytics import YOLO from shapely.geometry import Point, Polygon import time import tempfile import moviepy.editor as mpy class FireAndSmokeDetection: def __init__(self, model_path='fire_model.pt'): self.model_path = model_path @spaces.GPU def fire_and_smoke_detect(self, video_path): model = YOLO(self.model_path, task="detect") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) if not fps or fps == 0: fps = 30 fps = int(fps) frames = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break frames.append(frame) cap.release() if not frames: return None processed_frames = [] total_frames = len(frames) # Process frames one by one (with progress feedback) for i, frame in enumerate(frames): result = model(frame) processed_frame = result[0].plot() processed_frames.append(processed_frame) # Convert frames from BGR (OpenCV) to RGB (MoviePy expects RGB) processed_frames_rgb = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in processed_frames] # Use MoviePy to assemble the video file using H.264 encoding output_video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name clip = mpy.ImageSequenceClip(processed_frames_rgb, fps=fps) clip.write_videofile(output_video_path, codec='libx264', audio=False, verbose=False, logger=None) return output_video_path class LoiteringDetection: def __init__(self, model_path='loitering_model.pt'): self.model_path = model_path @spaces.GPU def loitering_detect(self, video_path, area): # Create polygon zone device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = YOLO(self.model_path) model.to(device) person_info = {} time_threshold = 5 detection_threshold = 0.6 zone_points = None if area == '131': zone_points = [(842//1.5, 514//1.7), (686//1.5, 290//1.7), (775//1.5, 279//1.7), (961//1.5, 488//1.7)] elif area == '145': zone_points = [(153//1.8, 850//1.7), (139//1.8, 535//1.7), (239//1.8, 497//1.7), (291//1.8, 857//1.7)] zone = Polygon(zone_points) # Open video cap = cv2.VideoCapture(video_path) #width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5) #height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5) width = 1152 height = 648 fps = int(cap.get(cv2.CAP_PROP_FPS)) # Create video writer output_path = os.path.join(tempfile.gettempdir(), "loitering_video.mp4") fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = cv2.resize(frame, (width, height)) # Perform object detection and tracking results = model.track(frame, persist=True, classes=[0], conf=detection_threshold) # 0 is the class ID for person # List to store time information for display time_display = [] if results[0].boxes.id is not None: boxes = results[0].boxes.xyxy.cpu().numpy().astype(int) ids = results[0].boxes.id.cpu().numpy().astype(int) for box, id in zip(boxes, ids): x1, y1, x2, y2 = box center = Point((x1 + x2) / 2, (y1 + y2) / 2) if id not in person_info: person_info[id] = {'in_zone': False, 'start_time': None, 'duration': 0} if zone.contains(center): if not person_info[id]['in_zone']: person_info[id]['in_zone'] = True person_info[id]['start_time'] = time.time() person_info[id]['duration'] = time.time() - person_info[id]['start_time'] if person_info[id]['duration'] > time_threshold: color = (0, 0, 255) # Red for loitering else: color = (0, 255, 0) # Green for in zone time_display.append(f"ID: {id}, Time: {person_info[id]['duration']:.2f}s") else: person_info[id]['in_zone'] = False person_info[id]['start_time'] = None person_info[id]['duration'] = 0 color = (255, 0, 0) # Blue for outside zone cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) #cv2.putText(frame, f"ID: {id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) # Draw polygon zone cv2.polylines(frame, [np.array(zone_points, np.int32)], True, (255, 255, 0), 2) # Display time information in top left for i, text in enumerate(time_display): cv2.putText(frame, text, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2) out.write(frame) cap.release() out.release() return output_path def process_video(feature, video, area=None): detectors = { "Crowd Detection": CrowdDetection, "People Tracking": PeopleTracking, "Fall Detection": FallDetection, "Fight Detection": FightDetection, "Intrusion Detection": IntrusionDetection, "Intrusion Detection En" : IntrusionDetectionEn, "Loitering Detection": LoiteringDetection, "Fire And Smoke Detection": FireAndSmokeDetection } try: detector = detectors[feature]() method_name = feature.lower().replace(" ", "_").replace("detection", "detect") # Ensures correct method name if feature == "Loitering Detection": output_path = detector.loitering_detect(video, area) # Pass area if required else: output_path = getattr(detector, method_name)(video) return f"{feature} completed successfully", output_path except Exception as e: return f"Error: {str(e)}", None # Gradio Interface with additional input for Loitering Detection interface = gr.Interface( fn=process_video, inputs=[ gr.Dropdown(choices=[ "Crowd Detection", "Fall Detection", "Fight Detection", "Intrusion Detection", "Intrusion Detection En", "Loitering Detection", "Fire And Smoke Detection" ], label="Select Feature"), gr.Video(label="Upload Video"), gr.Textbox(label="Loitering Area (131 or 145)") ], outputs=[ gr.Textbox(label="Status"), gr.Video(label="Processed Video") ], title="City Stars Features Demo", description="Select a feature to process your video Input." ) if __name__ == "__main__": interface.launch(debug=True)