Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import cv2 | |
import numpy as np | |
import time | |
from ultralytics import YOLO | |
import spaces | |
class CrowdDetection: | |
def __init__(self, model_path="yolov8n.pt"): | |
"""Initialize the YOLO model once to avoid PicklingError.""" | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
if not os.path.exists(model_path): | |
# Download the model if not present | |
from ultralytics import YOLO | |
self.model = YOLO("yolov8n.pt") # This downloads the model automatically | |
self.model.save(model_path) # Save locally | |
else: | |
self.model = YOLO(model_path) | |
self.model.to(self.device) | |
def detect_crowd(self, video_path): | |
"""Process video using YOLOv8 for crowd detection.""" | |
cap = cv2.VideoCapture(video_path) | |
if not cap.isOpened(): | |
raise ValueError(f"❌ Failed to open video: {video_path}") | |
fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
output_path = "output_crowd.mp4" | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
if not out.isOpened(): | |
cap.release() | |
raise ValueError(f"❌ Failed to initialize video writer for {output_path}") | |
CROWD_THRESHOLD = 10 | |
frame_count = 0 | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break # End of video | |
frame_count += 1 | |
# Run YOLO inference on the frame | |
results = self.model(frame) | |
# Count detected persons | |
person_count = sum( | |
1 for result in results | |
for cls in result.boxes.cls.cpu().numpy() if int(cls) == 0 | |
) | |
# Draw bounding boxes | |
for result in results: | |
boxes = result.boxes.xyxy.cpu().numpy() | |
classes = result.boxes.cls.cpu().numpy() | |
for box, cls in zip(boxes, classes): | |
if int(cls) == 0: # Person class | |
x1, y1, x2, y2 = map(int, box) | |
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
cv2.putText(frame, "Person", (x1, y1 - 10), | |
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
# Display count on frame | |
alert_text = "Crowd Alert!" if person_count > CROWD_THRESHOLD else f"People: {person_count}" | |
cv2.putText(frame, alert_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, | |
(0, 0, 255) if person_count > CROWD_THRESHOLD else (0, 255, 0), 2) | |
out.write(frame) | |
cap.release() | |
out.release() | |
if frame_count == 0: | |
raise ValueError("❌ No frames were processed!") | |
if not os.path.exists(output_path): | |
raise FileNotFoundError(f"❌ Output video not found: {output_path}") | |
return output_path | |
# Define Gradio interface function | |
def process_video(video): | |
try: | |
detector = CrowdDetection() # Instantiate inside to avoid pickling | |
output_path = detector.detect_crowd(video) | |
return "Crowd detection complete!", output_path | |
except Exception as e: | |
return f"Error: {str(e)}", None | |
# Create Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Crowd Detection with YOLOv8") | |
gr.Markdown("Upload a video to detect people and get crowd alerts (threshold: 10 people)") | |
with gr.Row(): | |
with gr.Column(): | |
video_input = gr.Video(label="Upload Video") | |
submit_btn = gr.Button("Detect Crowd") | |
with gr.Column(): | |
status_output = gr.Textbox(label="Status") | |
video_output = gr.Video(label="Result") | |
submit_btn.click( | |
fn=process_video, | |
inputs=[video_input], | |
outputs=[status_output, video_output] | |
) | |
demo.launch(debug=True) | |
class PeopleTracking: | |
def __init__(self, yolo_model_path="yolov8n.pt"): | |
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
self.model = YOLO(yolo_model_path).to(self.device) | |
def track_people(self, video_path): | |
cap = cv2.VideoCapture(video_path) | |
output_path = "output_tracking.mp4" | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)), | |
(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
results = self.model.track(frame, persist=True) | |
for result in results: | |
boxes = result.boxes.xyxy.cpu().numpy() | |
classes = result.boxes.cls.cpu().numpy() | |
ids = result.boxes.id.cpu().numpy() if hasattr(result.boxes, "id") else np.arange(len(boxes)) | |
for box, cls, obj_id in zip(boxes, classes, ids): | |
if int(cls) == 0: | |
x1, y1, x2, y2 = map(int, box) | |
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) | |
cv2.putText(frame, f"ID {int(obj_id)}", (x1, y1 - 10), | |
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) | |
out.write(frame) | |
cap.release() | |
out.release() | |
return output_path | |
# Define Fall Detection | |
class FallDetection: | |
def __init__(self, yolo_model_path="yolov8l.pt"): | |
self.model = YOLO(yolo_model_path) | |
def detect_fall(self, video_path): | |
cap = cv2.VideoCapture(video_path) | |
output_path = "output_fall.mp4" | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)), | |
(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
results = self.model(frame) | |
for result in results: | |
boxes = result.boxes.xyxy.cpu().numpy() | |
classes = result.boxes.cls.cpu().numpy() | |
for box, cls in zip(boxes, classes): | |
if int(cls) == 0: | |
x1, y1, x2, y2 = map(int, box) | |
width = x2 - x1 | |
height = y2 - y1 | |
aspect_ratio = width / height | |
if aspect_ratio > 0.55: | |
color = (0, 0, 255) | |
label = "FALL DETECTED" | |
else: | |
color = (0, 255, 0) | |
label = "Standing" | |
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) | |
cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
out.write(frame) | |
cap.release() | |
out.release() | |
return output_path | |
# Define Fight Detection | |
class FightDetection: | |
def __init__(self, yolo_model_path="yolov8n-pose.pt"): | |
self.model = YOLO(yolo_model_path).to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) | |
def detect_fight(self, video_path): | |
cap = cv2.VideoCapture(video_path) | |
output_path = "output_fight.mp4" | |
fourcc = cv2.VideoWriter_fourcc(*"mp4v") | |
out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)), | |
(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
results = self.model.track(frame, persist=True) | |
for result in results: | |
keypoints = result.keypoints.xy.cpu().numpy() if result.keypoints else [] | |
classes = result.boxes.cls.cpu().numpy() if result.boxes else [] | |
for kp, cls in zip(keypoints, classes): | |
if int(cls) == 0: | |
x1, y1 = int(kp[0][0]), int(kp[0][1]) | |
x2, y2 = int(kp[-1][0]), int(kp[-1][1]) | |
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2) | |
cv2.putText(frame, "FIGHT DETECTED", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) | |
out.write(frame) | |
cap.release() | |
out.release() | |
return output_path | |
# Function to process video based on selected feature | |
def process_video(feature, video): | |
detectors = { | |
"Crowd Detection": CrowdDetection, | |
"People Tracking": PeopleTracking, | |
"Fall Detection": FallDetection, | |
"Fight Detection": FightDetection | |
} | |
detector = detectors[feature]() | |
method_name = f"detect_{feature.lower().replace(' ', '_')}" | |
return getattr(detector, method_name)(video) | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=process_video, | |
inputs=[ | |
gr.Dropdown(choices=["Crowd Detection", "People Tracking", "Fall Detection", "Fight Detection"], label="Select Feature"), | |
gr.Video(label="Upload Video") | |
], | |
outputs=gr.Video(label="Processed Video"), | |
title="YOLOv8 Multitask Video Processing" | |
) | |
if __name__ == "__main__": | |
interface.launch(debug=True) | |