import gradio as gr
import torch
import cv2
import numpy as np
import time
from ultralytics import YOLO
import os
import spaces

class CrowdDetection:
    def __init__(self, model_path="yolov8n.pt"):
        self.model_path = model_path

    @spaces.GPU
    def crowd_detect(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8n.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)

            output_path = "output_crowd.mp4"
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            CROWD_THRESHOLD = 10
            frame_count = 0

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame = cv2.resize(frame, (width, height))
                frame_count += 1

                results = model(frame)
                person_count = sum(1 for result in results for cls in result.boxes.cls.cpu().numpy() if int(cls) == 0)

                for result in results:
                    boxes = result.boxes.xyxy.cpu().numpy()
                    classes = result.boxes.cls.cpu().numpy()
                    for box, cls in zip(boxes, classes):
                        if int(cls) == 0:
                            x1, y1, x2, y2 = map(int, box)
                            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                            cv2.putText(frame, "Person", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                alert_text = "Crowd Alert!" if person_count > CROWD_THRESHOLD else f"People: {person_count}"
                cv2.putText(frame, alert_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (0, 0, 255) if person_count > CROWD_THRESHOLD else (0, 255, 0), 2)
                out.write(frame)

            cap.release()
            out.release()
            if frame_count == 0 or not os.path.exists(output_path):
                raise ValueError("❌ Processing failed: No frames processed or output not created")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in crowd_detection: {str(e)}")

class PeopleTracking:
    def __init__(self, yolo_model_path="yolov8n.pt"):
        self.model_path = yolo_model_path
    
    @spaces.GPU
    def people_tracking(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8n.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)
            output_path = "output_tracking.mp4"
            out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break

                frame = cv2.resize(frame, (width, height))
                results = model.track(frame, persist=True)
                for result in results:
                    boxes = result.boxes.xyxy.cpu().numpy()
                    classes = result.boxes.cls.cpu().numpy()
                    ids = result.boxes.id.cpu().numpy() if result.boxes.id is not None else np.arange(len(boxes))

                    for box, cls, obj_id in zip(boxes, classes, ids):
                        if int(cls) == 0:
                            x1, y1, x2, y2 = map(int, box)
                            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
                            cv2.putText(frame, f"ID {int(obj_id)}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

                out.write(frame)

            cap.release()
            out.release()
            if not os.path.exists(output_path):
                raise ValueError("❌ Processing failed")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in people_tracking: {str(e)}")

class FallDetection:
    def __init__(self, yolo_model_path="yolov8l.pt"):
        self.model_path = yolo_model_path
    
    @spaces.GPU
    def fall_detect(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8l.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)
            output_path = "output_fall.mp4"
            out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame = cv2.resize(frame, (width, height))
                results = model(frame)
                for result in results:
                    boxes = result.boxes.xyxy.cpu().numpy()
                    classes = result.boxes.cls.cpu().numpy()

                    for box, cls in zip(boxes, classes):
                        if int(cls) == 0:
                            x1, y1, x2, y2 = map(int, box)
                            width = x2 - x1
                            height = y2 - y1
                            aspect_ratio = width / height if height > 0 else float('inf')

                            if aspect_ratio > 0.55:
                                color = (0, 0, 255)
                                label = "FALL DETECTED"
                            else:
                                color = (0, 255, 0)
                                label = "Standing"
                            
                            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                out.write(frame)

            cap.release()
            out.release()
            if not os.path.exists(output_path):
                raise ValueError("❌ Processing failed")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in fall_detection: {str(e)}")

class FightDetection:
    def __init__(self, yolo_model_path="yolov8n-pose.pt"):
        self.model_path = yolo_model_path
    
    @spaces.GPU
    def fight_detect(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8n-pose.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)
            output_path = "output_fight.mp4"
            out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                
                frame = cv2.resize(frame, (width, height))
                results = model.track(frame, persist=True)
                fight_detected = False
                person_count = 0

                for result in results:
                    keypoints = result.keypoints.xy.cpu().numpy() if result.keypoints else []
                    boxes = result.boxes.xyxy.cpu().numpy() if result.boxes else []
                    classes = result.boxes.cls.cpu().numpy() if result.boxes else []

                    for box, kp, cls in zip(boxes, keypoints, classes):
                        if int(cls) == 0:
                            person_count += 1
                            x1, y1, x2, y2 = map(int, box)
                            if len(kp) > 7 and (kp[5][1] < y1 + (y2 - y1) * 0.3 or kp[7][1] < y1 + (y2 - y1) * 0.3):
                                fight_detected = True
                            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255) if fight_detected else (0, 255, 0), 2)
                            label = "FIGHT DETECTED" if fight_detected else "Person"
                            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 
                                        (0, 0, 255) if fight_detected else (0, 255, 0), 2)

                if fight_detected and person_count > 1:
                    cv2.putText(frame, "FIGHT ALERT!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                out.write(frame)

            cap.release()
            out.release()
            if not os.path.exists(output_path):
                raise ValueError("❌ Processing failed")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in fight_detection: {str(e)}")

class IntrusionDetection:
    def __init__(self, model_path="yolov8n.pt", max_intrusion_time=300, iou_threshold=0.5, conf_threshold=0.5):
        self.model_path = model_path
        self.max_intrusion_time = max_intrusion_time
        self.iou_threshold = iou_threshold
        self.conf_threshold = conf_threshold
    
    @spaces.GPU
    def intrusion_detect(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8n.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)

            output_path = "output_intrusion.mp4"
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            frame_count = 0

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame_count += 1
                frame = cv2.resize(frame, (width, height))

                results = model(frame)
                for result in results:
                    boxes = result.boxes.xyxy.cpu().numpy()
                    classes = result.boxes.cls.cpu().numpy()
                    confidences = result.boxes.conf.cpu().numpy()
                    for box, cls, conf in zip(boxes, classes, confidences):
                        if int(cls) == 0 and conf > self.conf_threshold:  # Person class with confidence filter
                            x1, y1, x2, y2 = map(int, box)
                            label = "Intruder"
                            color = (0, 0, 255)
                            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                
                out.write(frame)

            cap.release()
            out.release()
            if frame_count == 0 or not os.path.exists(output_path):
                raise ValueError("❌ Processing failed: No frames processed or output not created")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in detect_intrusion: {str(e)}")

class IntrusionDetectionEn:
    def __init__(self, model_path="yolov8n.pt", max_intrusion_time=300, iou_threshold=0.5, conf_threshold=0.7):
        self.model_path = model_path
        self.max_intrusion_time = max_intrusion_time
        self.iou_threshold = iou_threshold
        self.conf_threshold = conf_threshold

        # Predefined staff uniform colors (RGB format)
        self.staff_colors = [
            (139, 143, 133),  # Grayish tone
            (146, 150, 140),  # Light grayish tone
            (146, 152, 141),  # Muted gray-green
            (143, 147, 136),  # Gray-green
            (48, 59, 71)      # Dark blue/gray
        ]

    def is_staff(self, person_crop):
        """Checks if the detected person is a staff member based on clothing color."""
        avg_color = np.mean(person_crop, axis=(0, 1))  # Compute average color (BGR)
        avg_color = avg_color[::-1]  # Convert BGR to RGB

        # Compute Euclidean distance to known staff colors
        for color in self.staff_colors:
            dist = np.linalg.norm(np.array(avg_color) - np.array(color))
            if dist < 30:  # Threshold to consider it a match
                return True
        return False

    def intrusion_detect_en(self, video_path):
        try:
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            if not os.path.exists(self.model_path):
                model = YOLO("yolov8n.pt")
                model.save(self.model_path)
            else:
                model = YOLO(self.model_path)
            model.to(device)

            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                raise ValueError(f"❌ Failed to open video: {video_path}")

            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

            output_path = "output_intrusion.mp4"
            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
            if not out.isOpened():
                cap.release()
                raise ValueError(f"❌ Failed to initialize video writer")

            frame_count = 0

            while cap.isOpened():
                ret, frame = cap.read()
                if not ret:
                    break
                frame_count += 1

                results = model(frame)
                for result in results:
                    boxes = result.boxes.xyxy.cpu().numpy()
                    classes = result.boxes.cls.cpu().numpy()
                    confidences = result.boxes.conf.cpu().numpy()

                    for box, cls, conf in zip(boxes, classes, confidences):
                        if int(cls) == 0 and conf > self.conf_threshold:  # Person class
                            x1, y1, x2, y2 = map(int, box)
                            person_crop = frame[y1:y2, x1:x2]

                            if self.is_staff(person_crop):
                                continue  # Ignore staff members

                            label = "Intruder"
                            color = (0, 0, 255)
                            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                out.write(frame)

            cap.release()
            out.release()
            if frame_count == 0 or not os.path.exists(output_path):
                raise ValueError("❌ Processing failed: No frames processed or output not created")
            return output_path
        except Exception as e:
            raise ValueError(f"Error in detect_intrusion: {str(e)}")

import cv2
import numpy as np
from ultralytics import YOLO
from shapely.geometry import Point, Polygon
import time
import tempfile
import moviepy.editor as mpy

class FireAndSmokeDetection:
    def __init__(self, model_path='fire_model.pt'):
        self.model_path = model_path
    
    @spaces.GPU
    def fire_and_smoke_detect(self, video_path):
        model = YOLO(self.model_path, task="detect")
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model.to(device)

        cap = cv2.VideoCapture(video_path)

        fps = cap.get(cv2.CAP_PROP_FPS)
        if not fps or fps == 0:
            fps = 30
        fps = int(fps)

        frames = []
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()

        if not frames:
            return None

        processed_frames = []
        total_frames = len(frames)

        # Process frames one by one (with progress feedback)
        for i, frame in enumerate(frames):
            result = model(frame)
            processed_frame = result[0].plot()
            processed_frames.append(processed_frame)

        # Convert frames from BGR (OpenCV) to RGB (MoviePy expects RGB)
        processed_frames_rgb = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in processed_frames]

        # Use MoviePy to assemble the video file using H.264 encoding
        output_video_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
        clip = mpy.ImageSequenceClip(processed_frames_rgb, fps=fps)
        clip.write_videofile(output_video_path, codec='libx264', audio=False, verbose=False, logger=None)

        return output_video_path


class LoiteringDetection:
    def __init__(self, model_path='loitering_model.pt'):
        self.model_path = model_path
    
    @spaces.GPU
    def loitering_detect(self, video_path, area):
        # Create polygon zone
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = YOLO(self.model_path)
        model.to(device)
        person_info = {}
        time_threshold = 5
        detection_threshold = 0.6
        zone_points = None
        if area == '131':
            zone_points = [(842//1.5, 514//1.7), (686//1.5, 290//1.7), (775//1.5, 279//1.7), (961//1.5, 488//1.7)]
        elif area == '145':
            zone_points = [(153//1.8, 850//1.7), (139//1.8, 535//1.7), (239//1.8, 497//1.7), (291//1.8, 857//1.7)]
        zone = Polygon(zone_points)

        # Open video
        cap = cv2.VideoCapture(video_path)
        #width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * 0.5)
        #height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.5)
        width = 1152
        height = 648
        fps = int(cap.get(cv2.CAP_PROP_FPS))

        # Create video writer
        output_path = os.path.join(tempfile.gettempdir(), "loitering_video.mp4")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.resize(frame, (width, height))
            # Perform object detection and tracking
            results = model.track(frame, persist=True, classes=[0], conf=detection_threshold)  # 0 is the class ID for person

            # List to store time information for display
            time_display = []

            if results[0].boxes.id is not None:
                boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
                ids = results[0].boxes.id.cpu().numpy().astype(int)

                for box, id in zip(boxes, ids):
                    x1, y1, x2, y2 = box
                    center = Point((x1 + x2) / 2, (y1 + y2) / 2)

                    if id not in person_info:
                        person_info[id] = {'in_zone': False, 'start_time': None, 'duration': 0}

                    if zone.contains(center):
                        if not person_info[id]['in_zone']:
                            person_info[id]['in_zone'] = True
                            person_info[id]['start_time'] = time.time()

                        person_info[id]['duration'] = time.time() - person_info[id]['start_time']

                        if person_info[id]['duration'] > time_threshold:
                            color = (0, 0, 255)  # Red for loitering
                        else:
                            color = (0, 255, 0)  # Green for in zone

                        time_display.append(f"ID: {id}, Time: {person_info[id]['duration']:.2f}s")
                    else:
                        person_info[id]['in_zone'] = False
                        person_info[id]['start_time'] = None
                        person_info[id]['duration'] = 0
                        color = (255, 0, 0)  # Blue for outside zone

                    cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                    #cv2.putText(frame, f"ID: {id}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Draw polygon zone
            cv2.polylines(frame, [np.array(zone_points, np.int32)], True, (255, 255, 0), 2)

            # Display time information in top left
            for i, text in enumerate(time_display):
                cv2.putText(frame, text, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

            out.write(frame)

        cap.release()
        out.release()

        return output_path


def process_video(feature, video, area=None):
    detectors = {
        "Crowd Detection": CrowdDetection,
        "People Tracking": PeopleTracking,
        "Fall Detection": FallDetection,
        "Fight Detection": FightDetection,
        "Intrusion Detection": IntrusionDetection,
        "Intrusion Detection En" : IntrusionDetectionEn, 
        "Loitering Detection": LoiteringDetection,
        "Fire And Smoke Detection": FireAndSmokeDetection
    }
    
    try:
        detector = detectors[feature]()
        method_name = feature.lower().replace(" ", "_").replace("detection", "detect")  # Ensures correct method name
        
        if feature == "Loitering Detection":
            output_path = detector.loitering_detect(video, area)  # Pass area if required
        else:
            output_path = getattr(detector, method_name)(video)
        
        return f"{feature} completed successfully", output_path
    except Exception as e:
        return f"Error: {str(e)}", None

# Gradio Interface with additional input for Loitering Detection
interface = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Dropdown(choices=[
            "Crowd Detection", "Fall Detection", 
            "Fight Detection", "Intrusion Detection", "Intrusion Detection En", "Loitering Detection",
            "Fire And Smoke Detection"
        ], label="Select Feature"),
        gr.Video(label="Upload Video"),
        gr.Textbox(label="Loitering Area (131 or 145)")
    ],
    outputs=[
        gr.Textbox(label="Status"),
        gr.Video(label="Processed Video")
    ],
    title="City Stars Features Demo",
    description="Select a feature to process your video Input."
)

if __name__ == "__main__":
    interface.launch(debug=True)