SecurityDemo

Sleeping

File size: 9,904 Bytes

import gradio as gr
import torch
import cv2
import numpy as np
import time
from ultralytics import YOLO
import spaces

@spaces.GPU

class CrowdDetection:
    def __init__(self, model_path="yolov8n.pt"):
        """Initialize the YOLO model once to avoid PicklingError."""
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        if not os.path.exists(model_path):
            # Download the model if not present
            from ultralytics import YOLO
            self.model = YOLO("yolov8n.pt")  # This downloads the model automatically
            self.model.save(model_path)  # Save locally
        else:
            self.model = YOLO(model_path)
        self.model.to(self.device)

    @spaces.GPU
    def detect_crowd(self, video_path):
        """Process video using YOLOv8 for crowd detection."""
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            raise ValueError(f"❌ Failed to open video: {video_path}")

        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        output_path = "output_crowd.mp4"
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

        if not out.isOpened():
            cap.release()
            raise ValueError(f"❌ Failed to initialize video writer for {output_path}")

        CROWD_THRESHOLD = 10
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break  # End of video
            
            frame_count += 1

            # Run YOLO inference on the frame
            results = self.model(frame)

            # Count detected persons
            person_count = sum(
                1 for result in results
                for cls in result.boxes.cls.cpu().numpy() if int(cls) == 0
            )

            # Draw bounding boxes
            for result in results:
                boxes = result.boxes.xyxy.cpu().numpy()
                classes = result.boxes.cls.cpu().numpy()

                for box, cls in zip(boxes, classes):
                    if int(cls) == 0:  # Person class
                        x1, y1, x2, y2 = map(int, box)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                        cv2.putText(frame, "Person", (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

            # Display count on frame
            alert_text = "Crowd Alert!" if person_count > CROWD_THRESHOLD else f"People: {person_count}"
            cv2.putText(frame, alert_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 0, 255) if person_count > CROWD_THRESHOLD else (0, 255, 0), 2)

            out.write(frame)

        cap.release()
        out.release()

        if frame_count == 0:
            raise ValueError("❌ No frames were processed!")

        if not os.path.exists(output_path):
            raise FileNotFoundError(f"❌ Output video not found: {output_path}")

        return output_path

# Define Gradio interface function
def process_video(video):
    try:
        detector = CrowdDetection()  # Instantiate inside to avoid pickling
        output_path = detector.detect_crowd(video)
        return "Crowd detection complete!", output_path
    except Exception as e:
        return f"Error: {str(e)}", None

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Crowd Detection with YOLOv8")
    gr.Markdown("Upload a video to detect people and get crowd alerts (threshold: 10 people)")
    
    with gr.Row():
        with gr.Column():
            video_input = gr.Video(label="Upload Video")
            submit_btn = gr.Button("Detect Crowd")
        with gr.Column():
            status_output = gr.Textbox(label="Status")
            video_output = gr.Video(label="Result")
    
    submit_btn.click(
        fn=process_video,
        inputs=[video_input],
        outputs=[status_output, video_output]
    )

demo.launch(debug=True)


class PeopleTracking:
    def __init__(self, yolo_model_path="yolov8n.pt"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = YOLO(yolo_model_path).to(self.device)
    
    def track_people(self, video_path):
        cap = cv2.VideoCapture(video_path)
        output_path = "output_tracking.mp4"
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)),
                              (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            results = self.model.track(frame, persist=True)
            for result in results:
                boxes = result.boxes.xyxy.cpu().numpy()
                classes = result.boxes.cls.cpu().numpy()
                ids = result.boxes.id.cpu().numpy() if hasattr(result.boxes, "id") else np.arange(len(boxes))
                
                for box, cls, obj_id in zip(boxes, classes, ids):
                    if int(cls) == 0:
                        x1, y1, x2, y2 = map(int, box)
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                        cv2.putText(frame, f"ID {int(obj_id)}", (x1, y1 - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
            
            out.write(frame)
        
        cap.release()
        out.release()
        return output_path

# Define Fall Detection
class FallDetection:
    def __init__(self, yolo_model_path="yolov8l.pt"):
        self.model = YOLO(yolo_model_path)
    
    def detect_fall(self, video_path):
        cap = cv2.VideoCapture(video_path)
        output_path = "output_fall.mp4"
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)),
                              (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            results = self.model(frame)
            for result in results:
                boxes = result.boxes.xyxy.cpu().numpy()
                classes = result.boxes.cls.cpu().numpy()
                
                for box, cls in zip(boxes, classes):
                    if int(cls) == 0:
                        x1, y1, x2, y2 = map(int, box)
                        width = x2 - x1
                        height = y2 - y1
                        aspect_ratio = width / height
                        
                        if aspect_ratio > 0.55:
                            color = (0, 0, 255)
                            label = "FALL DETECTED"
                        else:
                            color = (0, 255, 0)
                            label = "Standing"
                        
                        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                        cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            
            out.write(frame)
        
        cap.release()
        out.release()
        return output_path

# Define Fight Detection
class FightDetection:
    def __init__(self, yolo_model_path="yolov8n-pose.pt"):
        self.model = YOLO(yolo_model_path).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    
    def detect_fight(self, video_path):
        cap = cv2.VideoCapture(video_path)
        output_path = "output_fight.mp4"
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_path, fourcc, int(cap.get(cv2.CAP_PROP_FPS)),
                              (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            
            results = self.model.track(frame, persist=True)
            for result in results:
                keypoints = result.keypoints.xy.cpu().numpy() if result.keypoints else []
                classes = result.boxes.cls.cpu().numpy() if result.boxes else []
                
                for kp, cls in zip(keypoints, classes):
                    if int(cls) == 0:
                        x1, y1 = int(kp[0][0]), int(kp[0][1])
                        x2, y2 = int(kp[-1][0]), int(kp[-1][1])
                        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                        cv2.putText(frame, "FIGHT DETECTED", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
            
            out.write(frame)
        
        cap.release()
        out.release()
        return output_path

# Function to process video based on selected feature
def process_video(feature, video):
    detectors = {
        "Crowd Detection": CrowdDetection,
        "People Tracking": PeopleTracking,
        "Fall Detection": FallDetection,
        "Fight Detection": FightDetection
    }
    
    detector = detectors[feature]()
    method_name = f"detect_{feature.lower().replace(' ', '_')}"
    return getattr(detector, method_name)(video)

# Gradio Interface
interface = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Dropdown(choices=["Crowd Detection", "People Tracking", "Fall Detection", "Fight Detection"], label="Select Feature"),
        gr.Video(label="Upload Video")
    ],
    outputs=gr.Video(label="Processed Video"),
    title="YOLOv8 Multitask Video Processing"
)

if __name__ == "__main__":
    interface.launch(debug=True)