Spaces:

Sompote
/

rail_obstruct

Sleeping

App Files Files Community

Sompote commited on Jul 7

Commit

f225817

verified ·

1 Parent(s): ce57fb7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +1469 -0
requirements.txt +28 -0

app.py ADDED Viewed

	@@ -0,0 +1,1469 @@

+import gradio as gr
+import numpy as np
+import cv2
+import requests
+import json
+import base64
+from PIL import Image
+import io
+import os
+from dotenv import load_dotenv
+from collections import defaultdict
+import time
+from skimage.metrics import structural_similarity as ssim
+# Load environment variables
+load_dotenv()
+# Define API endpoint from environment variable
+API_URL = os.getenv("API_URL", "http://122.155.170.240:81")
+print(f"Using API URL: {API_URL}")
+DEFAULT_CONFIDENCE = float(os.getenv("DEFAULT_CONFIDENCE_THRESHOLD", "0.25"))
+def calculate_iou(box1, box2):
+    """Calculate Intersection over Union (IoU) between two bounding boxes"""
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    intersection = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union = area1 + area2 - intersection
+    return intersection / union if union > 0 else 0
+def calculate_bbox_similarity(bbox1, bbox2):
+    """Calculate similarity between two bounding boxes using IoU and center distance"""
+    try:
+        # Calculate IoU
+        iou = calculate_iou(bbox1, bbox2)
+        # Calculate center distance
+        center1 = get_box_center(bbox1)
+        center2 = get_box_center(bbox2)
+        if center1 is None or center2 is None:
+            return 0.0
+        distance = np.sqrt((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)
+        # Normalize distance based on bbox size
+        bbox_size = max(bbox1[2] - bbox1[0], bbox1[3] - bbox1[1])
+        normalized_distance = distance / max(bbox_size, 1)
+        # Combine IoU and distance for final similarity score
+        similarity = iou * 0.7 + max(0, 1 - normalized_distance * 0.3) * 0.3
+        return similarity
+    except Exception as e:
+        return 0.0
+def get_box_center(bbox):
+    """Calculate center point of bounding box"""
+    try:
+        # Handle different bbox formats (x,y,w,h) or (x1,y1,x2,y2)
+        if len(bbox) == 4:
+            if bbox[2] < bbox[0] or bbox[3] < bbox[1]:  # If it's x1,y1,x2,y2 format
+                x = (bbox[0] + bbox[2]) / 2
+                y = (bbox[1] + bbox[3]) / 2
+            else:  # If it's x,y,w,h format
+                x = bbox[0] + bbox[2]/2
+                y = bbox[1] + bbox[3]/2
+        else:
+            return None
+        return (x, y)
+    except Exception as e:
+        return None
+def calculate_movement(prev_center, curr_center, min_movement=10):
+    """Calculate if there's significant movement between frames"""
+    try:
+        if prev_center is None or curr_center is None:
+            return False
+        dx = curr_center[0] - prev_center[0]
+        dy = curr_center[1] - prev_center[1]
+        distance = np.sqrt(dx*dx + dy*dy)
+        return distance > min_movement
+    except Exception as e:
+        return False
+def extract_bbox_image(frame, bbox):
+    """Extract image region from bounding box"""
+    try:
+        if frame is None or len(bbox) != 4:
+            return None
+        # Convert bbox to integers and ensure valid coordinates
+        x1, y1, x2, y2 = map(int, bbox)
+        # Handle different bbox formats
+        if x2 < x1 or y2 < y1:  # If it's x,y,w,h format
+            x1, y1, w, h = bbox
+            x2, y2 = x1 + w, y1 + h
+        # Ensure coordinates are within frame bounds
+        h, w = frame.shape[:2]
+        x1 = max(0, min(x1, w-1))
+        y1 = max(0, min(y1, h-1))
+        x2 = max(x1+1, min(x2, w))
+        y2 = max(y1+1, min(y2, h))
+        # Extract region
+        bbox_img = frame[y1:y2, x1:x2]
+        # Resize to standard size for comparison (64x64)
+        if bbox_img.size > 0:
+            bbox_img = cv2.resize(bbox_img, (64, 64))
+            return bbox_img
+        return None
+    except Exception as e:
+        return None
+def calculate_histogram_similarity(img1, img2):
+    """Calculate histogram-based similarity between two images"""
+    try:
+        if img1 is None or img2 is None:
+            return 0.0
+        # Convert to HSV for better color comparison
+        hsv1 = cv2.cvtColor(img1, cv2.COLOR_BGR2HSV)
+        hsv2 = cv2.cvtColor(img2, cv2.COLOR_BGR2HSV)
+        # Calculate histograms
+        hist1 = cv2.calcHist([hsv1], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+        hist2 = cv2.calcHist([hsv2], [0, 1, 2], None, [50, 60, 60], [0, 180, 0, 256, 0, 256])
+        # Compare histograms using correlation
+        correlation = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
+        # Normalize to 0-1 range
+        return max(0, correlation)
+    except Exception as e:
+        return 0.0
+def calculate_ssim_similarity(img1, img2):
+    """Calculate Structural Similarity Index (SSIM) between two images"""
+    try:
+        if img1 is None or img2 is None:
+            return 0.0
+        # Convert to grayscale
+        gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+        gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+        # Calculate SSIM
+        similarity_index = ssim(gray1, gray2)
+        # Normalize to 0-1 range (SSIM can be negative)
+        return max(0, (similarity_index + 1) / 2)
+    except Exception as e:
+        return 0.0
+def calculate_feature_similarity(img1, img2):
+    """Calculate feature-based similarity using ORB features"""
+    try:
+        if img1 is None or img2 is None:
+            return 0.0
+        # Convert to grayscale
+        gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
+        gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
+        # Initialize ORB detector
+        orb = cv2.ORB_create(nfeatures=50)
+        # Find keypoints and descriptors
+        kp1, des1 = orb.detectAndCompute(gray1, None)
+        kp2, des2 = orb.detectAndCompute(gray2, None)
+        if des1 is None or des2 is None or len(des1) < 5 or len(des2) < 5:
+            return 0.0
+        # Match features
+        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+        matches = bf.match(des1, des2)
+        # Calculate similarity based on good matches
+        if len(matches) > 0:
+            # Sort matches by distance
+            matches = sorted(matches, key=lambda x: x.distance)
+            good_matches = [m for m in matches if m.distance < 50]  # Threshold for good matches
+            # Similarity based on ratio of good matches
+            similarity = len(good_matches) / max(len(kp1), len(kp2))
+            return min(1.0, similarity)
+        return 0.0
+    except Exception as e:
+        return 0.0
+def calculate_enhanced_bbox_similarity(bbox1, bbox2, frame1=None, frame2=None):
+    """Enhanced similarity calculation combining geometric and visual features"""
+    try:
+        # Geometric similarity (IoU + distance) - 40% weight
+        geometric_similarity = calculate_bbox_similarity(bbox1, bbox2)
+        # If no frames provided, use only geometric similarity
+        if frame1 is None or frame2 is None:
+            return geometric_similarity
+        # Extract image regions from bounding boxes
+        img1 = extract_bbox_image(frame1, bbox1)
+        img2 = extract_bbox_image(frame2, bbox2)
+        if img1 is None or img2 is None:
+            return geometric_similarity
+        # Visual similarity components
+        hist_similarity = calculate_histogram_similarity(img1, img2)      # Color similarity
+        ssim_similarity = calculate_ssim_similarity(img1, img2)          # Structural similarity
+        feature_similarity = calculate_feature_similarity(img1, img2)     # Feature similarity
+        # Combine all similarities with weights
+        final_similarity = (
+            geometric_similarity * 0.4 +    # Geometric (IoU + distance)
+            hist_similarity * 0.25 +        # Color histogram
+            ssim_similarity * 0.25 +        # Structural similarity
+            feature_similarity * 0.1        # Feature matching
+        )
+        return min(1.0, final_similarity)
+    except Exception as e:
+        return calculate_bbox_similarity(bbox1, bbox2)  # Fallback to geometric only
+class TrackedObject:
+    def __init__(self, obj_id, obj_class, bbox):
+        self.id = obj_id
+        self.class_name = obj_class
+        self.alternative_classes = set()  # Track alternative classes (e.g., person when primary is motorcycle)
+        self.trajectory = []  # List of center points
+        self.bboxes = []     # List of bounding boxes
+        self.frame_images = []  # Store recent frame images for visual comparison
+        self.counted = False
+        self.last_seen = 0   # Frame number when last seen
+        self.first_seen = 0  # Frame number when first seen
+        self.frames_in_red_zone = 0  # Number of consecutive frames in red zone
+        self.warning_triggered = False  # Whether warning has been triggered
+        self.red_zone_entry_frame = None  # Frame when object entered red zone
+        self.similarity_scores = []  # Track similarity scores over time
+        self.add_detection(bbox)
+    def update_class(self, new_class):
+        """Update object class, handling motorcycle+person combinations"""
+        # Prioritize motorcycle over person (motorcycle with rider)
+        if self.class_name == 'person' and new_class == 'motorcycle':
+            self.alternative_classes.add(self.class_name)
+            self.class_name = new_class
+        elif self.class_name == 'motorcycle' and new_class == 'person':
+            self.alternative_classes.add(new_class)
+            # Keep motorcycle as primary class
+        elif new_class != self.class_name:
+            # Different class detected, add to alternatives
+            self.alternative_classes.add(new_class)
+    def get_primary_class(self):
+        """Get the primary class for counting purposes"""
+        # Always prioritize motorcycle if it's been detected
+        if 'motorcycle' in [self.class_name] or 'motorcycle' in self.alternative_classes:
+            return 'motorcycle'
+        return self.class_name
+    def add_detection(self, bbox, frame_image=None):
+        try:
+            center = get_box_center(bbox)
+            if center is not None:
+                self.trajectory.append(center)
+                self.bboxes.append(bbox)
+                # Store frame image for visual comparison
+                if frame_image is not None:
+                    self.frame_images.append(frame_image.copy())
+                # Keep only recent history to prevent memory issues
+                if len(self.trajectory) > 50:
+                    self.trajectory = self.trajectory[-25:]
+                    self.bboxes = self.bboxes[-25:]
+                    self.frame_images = self.frame_images[-25:] if self.frame_images else []
+        except Exception as e:
+            pass
+    def has_movement(self, min_movement=10):
+        try:
+            if len(self.trajectory) < 2:
+                return False
+            return calculate_movement(self.trajectory[-2], self.trajectory[-1], min_movement)
+        except Exception as e:
+            return False
+    def update_red_zone_status(self, is_in_red_zone, frame_number):
+        """Update red zone status and handle warnings"""
+        if is_in_red_zone:
+            if self.red_zone_entry_frame is None:
+                self.red_zone_entry_frame = frame_number
+                # Mark as entered red zone immediately when first detected
+                return "entered"
+            self.frames_in_red_zone += 1
+            # Check if warning should be triggered using configurable threshold
+            if self.frames_in_red_zone > state.warning_frame_threshold and not self.warning_triggered:
+                self.warning_triggered = True
+                return "warning"  # Return warning to indicate warning should be shown
+        else:
+            # Object left red zone, reset counters
+            if self.red_zone_entry_frame is not None:
+                # Object was in red zone and now left
+                self.frames_in_red_zone = 0
+                self.red_zone_entry_frame = None
+                self.warning_triggered = False
+                return "exited"
+        return None
+    def get_similarity_with(self, other_bbox, current_frame=None, similarity_threshold=0.5):
+        """Calculate enhanced similarity with another bounding box using visual comparison"""
+        if len(self.bboxes) == 0:
+            return 0.0
+        current_bbox = self.bboxes[-1]
+        # Get the most recent frame image for comparison
+        previous_frame = self.frame_images[-1] if self.frame_images else None
+        # Use enhanced similarity calculation with visual comparison
+        similarity = calculate_enhanced_bbox_similarity(
+            current_bbox,
+            other_bbox,
+            previous_frame,
+            current_frame
+        )
+        # Store similarity score for debugging
+        self.similarity_scores.append({
+            'frame': state.frame_count,
+            'similarity': similarity,
+            'bbox': other_bbox,
+            'method': 'enhanced' if (previous_frame is not None and current_frame is not None) else 'geometric'
+        })
+        # Keep only recent similarity scores to prevent memory issues
+        if len(self.similarity_scores) > 20:
+            self.similarity_scores = self.similarity_scores[-10:]
+        return similarity
+def is_similar_object(obj1, obj2, similarity_threshold=0.35):
+    """Check if two objects are similar based on class, position and bounding box similarity"""
+    try:
+        # Allow cross-class matching for motorcycle and person (same object - person on motorcycle)
+        class1, class2 = obj1['class'], obj2['class']
+        # Check if classes are compatible (same class or motorcycle+person combination)
+        compatible_classes = (
+            class1 == class2 or  # Same class
+            (class1 == 'motorcycle' and class2 == 'person') or  # Person on motorcycle
+            (class1 == 'person' and class2 == 'motorcycle')     # Motorcycle with person
+        )
+        if not compatible_classes:
+            return False
+        box1 = obj1['bbox']
+        box2 = obj2['bbox']
+        # Convert to x1,y1,x2,y2 format if needed
+        if len(box1) == 4 and len(box2) == 4:
+            if box1[2] < box1[0] or box1[3] < box1[1]:  # Already in x1,y1,x2,y2
+                bbox1 = box1
+            else:  # Convert from x,y,w,h to x1,y1,x2,y2
+                bbox1 = [box1[0], box1[1], box1[0] + box1[2], box1[1] + box1[3]]
+            if box2[2] < box2[0] or box2[3] < box2[1]:  # Already in x1,y1,x2,y2
+                bbox2 = box2
+            else:  # Convert from x,y,w,h to x1,y1,x2,y2
+                bbox2 = [box2[0], box2[1], box2[0] + box2[2], box2[1] + box2[3]]
+            similarity = calculate_bbox_similarity(bbox1, bbox2)
+            # Use lower threshold for motorcycle+person combinations
+            if class1 != class2 and ('motorcycle' in [class1, class2] and 'person' in [class1, class2]):
+                return similarity > (similarity_threshold * 0.7)  # 30% more lenient for cross-class
+            return similarity > similarity_threshold
+        return False
+    except Exception as e:
+        return False
+# Global state for protection area and previous detections
+class State:
+    def __init__(self):
+        self.protection_points = []  # Store clicked points
+        self.detected_segments = []
+        self.segment_image = None
+        self.selected_segments = []
+        self.previous_detections = None
+        self.cached_protection_area = None
+        self.current_image = None  # Store current image for drawing
+        self.original_dims = None  # Store original image dimensions
+        self.display_dims = None   # Store display dimensions
+        self.tracked_objects = {}  # Dictionary of tracked objects
+        self.next_obj_id = 0      # Counter for generating unique object IDs
+        self.object_count = defaultdict(int)  # Count by class
+        self.frame_count = 0      # Count processed frames
+        self.red_zone_passed_objects = defaultdict(int)  # Objects that passed through red zone
+        self.red_zone_warnings = []  # Store warning messages
+        self.time_window = 10     # Configurable time window for similarity comparison
+        self.similarity_threshold = 0.35  # Configurable similarity threshold (lowered for better matching)
+        self.warning_frame_threshold = 3  # Configurable warning threshold (frames in red zone)
+        # Enhanced red zone tracking
+        self.red_zone_entered_objects = defaultdict(int)  # All objects that entered red zone
+        self.red_zone_current_objects = defaultdict(list)  # Objects currently in red zone
+        self.red_zone_exited_objects = defaultdict(int)   # Objects that exited red zone
+    def reset_tracking(self):
+        """Reset all tracking data"""
+        self.tracked_objects = {}
+        self.next_obj_id = 0
+        self.object_count = defaultdict(int)
+        self.frame_count = 0
+        self.red_zone_passed_objects = defaultdict(int)
+        self.red_zone_warnings = []
+        # Reset enhanced red zone tracking
+        self.red_zone_entered_objects = defaultdict(int)
+        self.red_zone_current_objects = defaultdict(list)
+        self.red_zone_exited_objects = defaultdict(int)
+state = State()
+def image_to_bytes(image):
+    """Convert PIL Image to bytes for API request"""
+    # Log original image size
+    original_width, original_height = image.size
+    print(f"Original image dimensions: {original_width}x{original_height}")
+    # Convert image to bytes without resizing
+    img_byte_arr = io.BytesIO()
+    image.save(img_byte_arr, format='PNG')
+    print(f"Sending image with original dimensions: {original_width}x{original_height}")
+    return img_byte_arr.getvalue()
+def base64_to_image(base64_str):
+    """Convert base64 string to OpenCV image"""
+    img_data = base64.b64decode(base64_str)
+    nparr = np.frombuffer(img_data, np.uint8)
+    return cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+def opencv_to_pil(opencv_image):
+    """Convert OpenCV image to PIL format"""
+    # Convert from BGR to RGB for PIL
+    rgb_image = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2RGB)
+    return Image.fromarray(rgb_image)
+def scale_point_to_original(x, y):
+    """Scale display coordinates back to original image coordinates"""
+    if state.original_dims is None or state.display_dims is None:
+        return x, y
+    orig_w, orig_h = state.original_dims
+    disp_w, disp_h = state.display_dims
+    # Calculate scaling factors
+    scale_x = orig_w / disp_w
+    scale_y = orig_h / disp_h
+    # Scale the coordinates
+    orig_x = int(x * scale_x)
+    orig_y = int(y * scale_y)
+    return orig_x, orig_y
+def scale_points_to_display(points):
+    """Scale points from original image coordinates to display coordinates"""
+    if state.original_dims is None or state.display_dims is None:
+        return points
+    orig_w, orig_h = state.original_dims
+    disp_w, disp_h = state.display_dims
+    # Calculate scaling factors
+    scale_x = disp_w / orig_w
+    scale_y = disp_h / orig_h
+    # Scale all points
+    display_points = []
+    for point in points:
+        x = int(point[0] * scale_x)
+        y = int(point[1] * scale_y)
+        display_points.append([x, y])
+    return display_points
+def draw_protection_area(image):
+    """Draw protection area points and lines on the image"""
+    img = image.copy()
+    points = state.protection_points
+    # Draw existing points and lines
+    if len(points) > 0:
+        # Convert points to numpy array
+        points_array = np.array(points, dtype=np.int32)
+        # Draw lines between points
+        if len(points) > 1:
+            cv2.polylines(img, [points_array],
+                         True if len(points) == 4 else False,
+                         (0, 255, 0), 2)
+        # Draw points with numbers
+        for i, point in enumerate(points):
+            cv2.circle(img, tuple(point), 5, (0, 0, 255), -1)
+            cv2.putText(img, str(i+1),
+                      (point[0]+10, point[1]+10),
+                      cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
+        # Fill polygon with semi-transparent color if we have at least 3 points
+        if len(points) >= 3:
+            overlay = img.copy()
+            cv2.fillPoly(overlay, [points_array], (0, 255, 0))
+            cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)
+    return img
+def update_preview(video):
+    if video is None:
+        return None, [], gr.update(visible=False)
+    cap = cv2.VideoCapture(video)
+    ret, frame = cap.read()
+    cap.release()
+    if ret:
+        # Reset state
+        state.protection_points = []
+        state.detected_segments = []
+        state.segment_image = None
+        state.selected_segments = []
+        state.previous_detections = None
+        state.cached_protection_area = None
+        # Store original frame and its dimensions
+        state.current_image = frame.copy()  # Store the original frame
+        state.original_dims = (frame.shape[1], frame.shape[0])  # (width, height)
+        state.display_dims = state.original_dims  # Set display dims same as original
+        # Convert to RGB without resizing
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return frame_rgb, gr.update(choices=[], value=[], visible=False)
+    return None, gr.update(choices=[], value=[], visible=False)
+def handle_image_click(evt: gr.SelectData, img):
+    """Handle mouse clicks on the image"""
+    if len(state.protection_points) >= 4:
+        # Reset points if we already have 4
+        state.protection_points = []
+    if state.current_image is None:
+        return img, "Error: No image loaded"
+    # Get click coordinates from the event - these are now in original scale
+    click_x, click_y = evt.index[0], evt.index[1]
+    # Add point directly (no scaling needed as we're working with original coordinates)
+    state.protection_points.append([click_x, click_y])
+    # Create a copy of the current image for display
+    display_img = state.current_image.copy()
+    # Draw points and lines
+    for i, point in enumerate(state.protection_points):
+        # Draw point
+        cv2.circle(display_img, (point[0], point[1]), 5, (0, 0, 255), -1)
+        cv2.putText(display_img, str(i+1),
+                    (point[0] + 10, point[1] + 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
+    # Draw lines between points
+    if len(state.protection_points) > 1:
+        points_array = np.array(state.protection_points, dtype=np.int32)
+        # Draw lines
+        cv2.polylines(display_img, [points_array],
+                     True if len(state.protection_points) == 4 else False,
+                     (0, 255, 0), 2)
+        # Fill polygon with semi-transparent color if we have at least 3 points
+        if len(state.protection_points) >= 3:
+            overlay = display_img.copy()
+            cv2.fillPoly(overlay, [points_array], (0, 255, 0))
+            cv2.addWeighted(overlay, 0.3, display_img, 0.7, 0, display_img)
+    # Convert to RGB for display
+    display_img_rgb = cv2.cvtColor(display_img, cv2.COLOR_BGR2RGB)
+    # Return the image and status
+    return display_img_rgb, f"Selected {len(state.protection_points)} points\nCoordinates: {state.protection_points}"
+def reset_points():
+    """Reset protection points"""
+    state.protection_points = []
+    if state.current_image is not None:
+        # Convert original image to RGB for display
+        display_img_rgb = cv2.cvtColor(state.current_image.copy(), cv2.COLOR_BGR2RGB)
+        return display_img_rgb, "Points reset"
+    return None, "Points reset"
+def detect_rail_segments(image):
+    """Detect rail segments using the API"""
+    try:
+        # Log original image dimensions
+        width, height = image.size
+        print(f"Detecting rail segments on image with dimensions: {width}x{height}")
+        files = {"file": image_to_bytes(image)}
+        response = requests.post(
+            f"{API_URL}/detect/rail-segment",
+            files=files,
+            timeout=60
+        )
+        if response.status_code == 200:
+            result = response.json()
+            if "segments" in result:
+                return result["segments"], base64_to_image(result["image_base64"])
+            else:
+                return [], None
+        else:
+            print(f"API error: {response.status_code} - Image size was {width}x{height}")
+            return [], None
+    except Exception as e:
+        print(f"Error in detect_rail_segments: {str(e)}")
+        return [], None
+def extract_protection_area(first_frame):
+    """Extract and cache protection area points using rail segment detection"""
+    try:
+        # Log original frame dimensions
+        height, width = first_frame.shape[:2]
+        print(f"Extracting protection area from frame with dimensions: {width}x{height}")
+        # Convert frame to PIL Image without resizing
+        first_frame_pil = Image.fromarray(cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB))
+        # Verify PIL image dimensions
+        pil_width, pil_height = first_frame_pil.size
+        print(f"PIL Image dimensions before API call: {pil_width}x{pil_height}")
+        # Detect rail segments
+        segments, segment_img = detect_rail_segments(first_frame_pil)
+        if segments and len(segments) > 0:
+            # Verify segment image dimensions
+            if segment_img is not None:
+                seg_height, seg_width = segment_img.shape[:2]
+                print(f"Received segment image dimensions: {seg_width}x{seg_height}")
+                # Only resize if dimensions don't match
+                if (seg_width, seg_height) != (width, height):
+                    print(f"Resizing segment image from {seg_width}x{seg_height} to {width}x{height}")
+                    segment_img = cv2.resize(segment_img, (width, height), interpolation=cv2.INTER_LANCZOS4)
+            # Store segments and image
+            state.detected_segments = segments
+            state.segment_image = segment_img
+            # Create segment choices with more detailed information
+            segment_choices = []
+            for i, segment in enumerate(segments):
+                # Extract mask dimensions for verification
+                mask_points = segment.get('mask', [])
+                if mask_points:
+                    mask_x = [p[0] for p in mask_points]
+                    mask_y = [p[1] for p in mask_points]
+                    mask_width = max(mask_x) - min(mask_x)
+                    mask_height = max(mask_y) - min(mask_y)
+                    print(f"Segment {i+1} mask dimensions: {mask_width}x{mask_height}")
+                choice_text = f"Segment {i+1} (Confidence: {segment['confidence']:.2f})"
+                segment_choices.append(choice_text)
+            state.selected_segments = segment_choices  # Select all segments by default
+            # Use the first segment's mask as protection area
+            segment = segments[0]
+            if 'mask' in segment and segment['mask']:
+                mask_points = segment['mask']
+                # Convert to list of [x,y] points and ensure integer values
+                mask_points = [[int(float(x)), int(float(y))] for x, y in mask_points]
+                if len(mask_points) >= 3:  # Need at least 3 points for a valid polygon
+                    state.cached_protection_area = mask_points
+                    # Convert segment image to RGB for display without resizing
+                    if segment_img is not None:
+                        display_img = cv2.cvtColor(segment_img, cv2.COLOR_BGR2RGB)
+                        return True, "Protection area extracted successfully", display_img
+            return False, "Invalid mask points in segment", None
+        return False, "No valid rail segments detected", None
+    except Exception as e:
+        print(f"Error in extract_protection_area: {str(e)}")
+        return False, f"Error extracting protection area: {str(e)}", None
+def get_segment_index(choice_text):
+    """Extract segment index from choice text"""
+    try:
+        # Extract index from "Segment X (Confidence: Y)" format
+        return int(choice_text.split()[1]) - 1
+    except:
+        return -1
+def update_object_tracking(objects_in_area, current_frame=None):
+    """Update object tracking with new detections"""
+    try:
+        current_tracked = set()  # Keep track of objects seen in this frame
+        current_warnings = []  # Collect warnings for this frame
+        # Clear current objects list for this frame
+        state.red_zone_current_objects = defaultdict(list)
+        # Match new detections with existing tracked objects
+        for obj in objects_in_area:
+            try:
+                if 'bbox' not in obj or 'class' not in obj:
+                    continue
+                bbox = obj['bbox']
+                obj_class = obj['class']
+                is_in_red_zone = obj.get('in_protection_area', False)
+                matched = False
+                best_match_id = None
+                best_similarity = 0.0
+                # Try to match with existing tracked objects using cross-class similarity
+                for obj_id, tracked in state.tracked_objects.items():
+                    # Check if object was seen recently (within time window)
+                    if state.frame_count - tracked.last_seen <= state.time_window:
+                        # Create temporary objects for similarity comparison
+                        temp_obj1 = {'class': tracked.class_name, 'bbox': tracked.bboxes[-1] if tracked.bboxes else bbox}
+                        temp_obj2 = {'class': obj_class, 'bbox': bbox}
+                        if is_similar_object(temp_obj1, temp_obj2, state.similarity_threshold):
+                            # Use enhanced similarity calculation with visual comparison
+                            similarity = tracked.get_similarity_with(bbox, current_frame)
+                            # Use the best match above threshold
+                            if similarity > best_similarity:
+                                best_similarity = similarity
+                                best_match_id = obj_id
+                # If good match found, update existing object
+                if best_match_id is not None:
+                    tracked = state.tracked_objects[best_match_id]
+                    tracked.add_detection(bbox, current_frame)  # Pass current frame
+                    tracked.update_class(obj_class)  # Update class information
+                    tracked.last_seen = state.frame_count
+                    current_tracked.add(best_match_id)
+                    matched = True
+                    # Check red zone status and handle state changes
+                    zone_status = tracked.update_red_zone_status(is_in_red_zone, state.frame_count)
+                    primary_class = tracked.get_primary_class()  # Use primary class for counting
+                    if zone_status == "entered":
+                        # Object just entered red zone - count it immediately
+                        if not tracked.counted:
+                            tracked.counted = True
+                            state.red_zone_entered_objects[primary_class] += 1
+                    elif zone_status == "warning":
+                        warning_msg = f"⚠️ WARNING: {primary_class} (ID: {tracked.id}) has been in red zone for {tracked.frames_in_red_zone} frames!"
+                        current_warnings.append(warning_msg)
+                        state.red_zone_warnings.append({
+                            'frame': state.frame_count,
+                            'object_id': tracked.id,
+                            'class': primary_class,
+                            'frames_in_zone': tracked.frames_in_red_zone,
+                            'message': warning_msg
+                        })
+                    elif zone_status == "exited":
+                        # Object exited red zone
+                        state.red_zone_exited_objects[primary_class] += 1
+                    # Add to current objects in red zone if still in zone
+                    if is_in_red_zone:
+                        display_class = primary_class
+                        if tracked.alternative_classes:
+                            display_class += f" ({'+'.join(sorted(tracked.alternative_classes))})"
+                        state.red_zone_current_objects[primary_class].append({
+                            'id': tracked.id,
+                            'frames_in_zone': tracked.frames_in_red_zone,
+                            'entry_frame': tracked.red_zone_entry_frame,
+                            'display_class': display_class
+                        })
+                # If no match found, create new tracked object
+                if not matched:
+                    new_obj = TrackedObject(state.next_obj_id, obj_class, bbox)
+                    new_obj.add_detection(bbox, current_frame)  # Pass current frame
+                    new_obj.last_seen = state.frame_count
+                    new_obj.first_seen = state.frame_count
+                    state.tracked_objects[state.next_obj_id] = new_obj
+                    current_tracked.add(state.next_obj_id)
+                    # Check red zone status for new object
+                    zone_status = new_obj.update_red_zone_status(is_in_red_zone, state.frame_count)
+                    primary_class = new_obj.get_primary_class()
+                    if zone_status == "entered":
+                        # New object entered red zone immediately
+                        new_obj.counted = True
+                        state.red_zone_entered_objects[primary_class] += 1
+                        # Add to current objects in red zone
+                        state.red_zone_current_objects[primary_class].append({
+                            'id': new_obj.id,
+                            'frames_in_zone': new_obj.frames_in_red_zone,
+                            'entry_frame': new_obj.red_zone_entry_frame,
+                            'display_class': primary_class
+                        })
+                    state.next_obj_id += 1
+            except Exception as e:
+                continue
+        # Update objects not seen in current frame
+        for obj_id, tracked in state.tracked_objects.items():
+            if obj_id not in current_tracked:
+                # Object not seen in current frame, update red zone status
+                zone_status = tracked.update_red_zone_status(False, state.frame_count)
+                if zone_status == "exited":
+                    # Object exited red zone
+                    primary_class = tracked.get_primary_class()
+                    state.red_zone_exited_objects[primary_class] += 1
+        # Remove objects that haven't been seen for a while
+        if state.frame_count > state.time_window:
+            to_remove = []
+            for obj_id, tracked in state.tracked_objects.items():
+                if state.frame_count - tracked.last_seen > state.time_window * 2:  # Remove after 2x time window
+                    # If object was in red zone when lost, count as exited
+                    if tracked.red_zone_entry_frame is not None:
+                        primary_class = tracked.get_primary_class()
+                        state.red_zone_exited_objects[primary_class] += 1
+                    to_remove.append(obj_id)
+            for obj_id in to_remove:
+                del state.tracked_objects[obj_id]
+        # Store current warnings
+        if current_warnings:
+            print(f"Frame {state.frame_count} Warnings: {current_warnings}")
+    except Exception as e:
+        print(f"Error in update_object_tracking: {str(e)}")
+def get_red_zone_summary():
+    """Generate comprehensive summary of objects in red zone with proper grouping"""
+    summary = []
+    # Header
+    summary.append("🔴 RED ZONE MONITORING REPORT")
+    summary.append("=" * 40)
+    # Objects that entered red zone (all time)
+    if state.red_zone_entered_objects:
+        summary.append("\n📊 OBJECTS ENTERED RED ZONE:")
+        total_entered = sum(state.red_zone_entered_objects.values())
+        summary.append(f"Total objects entered: {total_entered}")
+        for obj_class, count in sorted(state.red_zone_entered_objects.items()):
+            summary.append(f"  • {obj_class}: {count}")
+    else:
+        summary.append("\n📊 OBJECTS ENTERED RED ZONE:")
+        summary.append("No objects have entered the red zone yet")
+    # Objects currently in red zone
+    current_total = sum(len(objects) for objects in state.red_zone_current_objects.values())
+    if current_total > 0:
+        summary.append(f"\n🚨 CURRENTLY IN RED ZONE ({current_total} objects):")
+        for obj_class, objects in sorted(state.red_zone_current_objects.items()):
+            if objects:
+                summary.append(f"  {obj_class} ({len(objects)} objects):")
+                for obj_info in objects:
+                    display_class = obj_info.get('display_class', obj_class)
+                    summary.append(f"    - ID {obj_info['id']}: {obj_info['frames_in_zone']} frames (entered: frame {obj_info['entry_frame']}) [{display_class}]")
+    else:
+        summary.append("\n🚨 CURRENTLY IN RED ZONE:")
+        summary.append("No objects currently in red zone")
+    # Objects that exited red zone
+    if state.red_zone_exited_objects:
+        summary.append("\n✅ OBJECTS EXITED RED ZONE:")
+        total_exited = sum(state.red_zone_exited_objects.values())
+        summary.append(f"Total objects exited: {total_exited}")
+        for obj_class, count in sorted(state.red_zone_exited_objects.items()):
+            summary.append(f"  • {obj_class}: {count}")
+    # Recent warnings
+    recent_warnings = [w for w in state.red_zone_warnings if state.frame_count - w['frame'] <= 10]
+    if recent_warnings:
+        summary.append("\n⚠️ RECENT WARNINGS:")
+        for warning in recent_warnings[-5:]:  # Show last 5 warnings
+            summary.append(f"  • Frame {warning['frame']}: {warning['class']} (ID: {warning['object_id']}) - {warning['frames_in_zone']} frames in zone")
+    # Statistics summary
+    summary.append(f"\n📈 STATISTICS:")
+    summary.append(f"  • Total unique objects tracked: {len(state.tracked_objects)}")
+    summary.append(f"  • Active warnings: {len([w for w in state.red_zone_warnings if state.frame_count - w['frame'] <= 5])}")
+    summary.append(f"  • Frame: {state.frame_count}")
+    summary.append(f"  • Warning threshold: {state.warning_frame_threshold} frames")
+    # Show object combination info
+    combined_objects = 0
+    for tracked in state.tracked_objects.values():
+        if tracked.alternative_classes:
+            combined_objects += 1
+    if combined_objects > 0:
+        summary.append(f"  • Objects with combined detections: {combined_objects}")
+    return "\n".join(summary)
+def process_frame(frame, confidence):
+    """Process a video frame using cached protection area"""
+    try:
+        protection_area = []
+        if state.selected_segments and state.detected_segments:
+            for choice in state.selected_segments:
+                idx = get_segment_index(choice)
+                if 0 <= idx < len(state.detected_segments):
+                    segment = state.detected_segments[idx]
+                    if 'mask' in segment and segment['mask']:
+                        protection_area = segment['mask']
+                    break
+        elif len(state.protection_points) >= 3:
+            protection_area = state.protection_points
+        if not protection_area:
+            return None, "Protection area not set. Please extract protection area first."
+        # Ensure frame is valid
+        if frame is None or frame.size == 0:
+            return None, "Invalid frame"
+        success, buffer = cv2.imencode('.png', frame)
+        if not success:
+            return None, "Failed to encode frame"
+        files = {
+            "file": ("frame.png", buffer.tobytes(), "image/png")
+        }
+        protection_area_json = json.dumps(protection_area)
+        data = {
+            "protection_area": protection_area_json,
+            "confidence_threshold": str(confidence)
+        }
+        if state.previous_detections:
+            data["previous_detections"] = json.dumps(state.previous_detections)
+        try:
+            response = requests.post(
+                f"{API_URL}/detect/objects-and-redlight",
+                files=files,
+                data=data,
+                timeout=60
+            )
+            if response.status_code == 200:
+                result = response.json()
+                if not result.get("success"):
+                    return None, f"API Error: {result.get('detail', 'Unknown error')}"
+                result_data = result.get("result", {})
+                if not result_data:
+                    return None, "No result data received"
+                red_light_info = result_data.get("red_light", {})
+                red_light_detected = red_light_info.get("detected", False)
+                red_light_prob = red_light_info.get("probability", 0)
+                img_base64 = result_data.get("image_base64")
+                if not img_base64:
+                    return None, "No image data received from API"
+                try:
+                    if ',' in img_base64:
+                        img_base64 = img_base64.split(',')[1]
+                    img_data = base64.b64decode(img_base64)
+                    nparr = np.frombuffer(img_data, np.uint8)
+                    processed_img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
+                    if processed_img is None or processed_img.size == 0:
+                        return None, "Failed to decode image from API response"
+                    objects_in_area = [obj for obj in result_data.get("objects", [])
+                                     if obj.get("in_protection_area", False) and
+                                     'bbox' in obj and 'class' in obj]
+                    # Update object tracking
+                    state.frame_count += 1
+                    update_object_tracking(objects_in_area, processed_img)
+                    # Cache detections for next frame
+                    state.previous_detections = objects_in_area
+                    processed_img_rgb = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
+                    status = []
+                    status.append(f"Red Light: {'YES' if red_light_detected else 'NO'} ({red_light_prob:.2f})")
+                    # Add enhanced red zone summary
+                    red_zone_summary = get_red_zone_summary()
+                    status.append(f"\n{red_zone_summary}")
+                    if objects_in_area:
+                        status.append("\n📊 CURRENT FRAME DETECTIONS:")
+                        for obj in objects_in_area:
+                            status.append(f"  • {obj['class']} (confidence: {obj['confidence']:.2f})")
+                    # Add tracking statistics
+                    active_objects = len([obj for obj in state.tracked_objects.values()
+                                        if state.frame_count - obj.last_seen <= 3])
+                    status.append(f"\n📈 TRACKING STATS:")
+                    status.append(f"  • Active tracked objects: {active_objects}")
+                    status.append(f"  • Frame: {state.frame_count}")
+                    status.append(f"  • Time window: {state.time_window} frames")
+                    status.append(f"  • Similarity threshold: {state.similarity_threshold:.2f}")
+                    return processed_img_rgb, "\n".join(status)
+                except Exception as e:
+                    return None, f"Error processing detection results: {str(e)}"
+            else:
+                error_detail = f"API Error: {response.status_code}"
+                try:
+                    error_json = response.json()
+                    if 'detail' in error_json:
+                        error_detail += f" - {error_json['detail']}"
+                except:
+                    error_detail += f" - {response.text}"
+                return None, error_detail
+        except requests.exceptions.Timeout:
+            return None, "API request timed out"
+        except requests.exceptions.ConnectionError:
+            return None, "Could not connect to API server"
+        except Exception as e:
+            return None, f"API request failed: {str(e)}"
+    except Exception as e:
+        return None, f"Error processing frame: {str(e)}"
+def process_video(video, confidence=DEFAULT_CONFIDENCE, target_fps=1):
+    """Stream processed frames in real-time using cached protection area"""
+    detection_results = []
+    cap = cv2.VideoCapture(video)
+    if not cap.isOpened():
+        yield None, "Error: Could not open video file"
+        return
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_interval = max(1, int(fps / target_fps))
+    frame_number = 0
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_number += 1
+            if frame_number % frame_interval != 0:
+                continue
+            # Process frame and get results
+            processed_frame, result = process_frame(frame, confidence)
+            if processed_frame is not None:
+                # Frame is already in RGB format from process_frame
+                current_status = f"Processing frame {frame_number}/{total_frames}\n{result}"
+                yield processed_frame, current_status
+            else:
+                current_status = f"Frame {frame_number}: {result}"
+                yield None, current_status
+        # Release resources
+        cap.release()
+        # Generate final summary
+        final_summary = generate_final_summary()
+        yield None, final_summary
+    except Exception as e:
+        yield None, f"Error processing video: {str(e)}"
+    finally:
+        cap.release()
+def generate_final_summary():
+    """Generate comprehensive final summary of video processing"""
+    summary_lines = []
+    summary_lines.append("🎬 VIDEO PROCESSING COMPLETE")
+    summary_lines.append("=" * 50)
+    # Processing statistics
+    summary_lines.append(f"📊 PROCESSING STATISTICS:")
+    summary_lines.append(f"  • Total frames processed: {state.frame_count}")
+    summary_lines.append(f"  • Time window used: {state.time_window} frames")
+    summary_lines.append(f"  • Similarity threshold: {state.similarity_threshold:.2f}")
+    summary_lines.append(f"  • Warning threshold: {state.warning_frame_threshold} frames")
+    # Enhanced red zone summary
+    if state.red_zone_entered_objects:
+        summary_lines.append(f"\n🔴 RED ZONE ANALYSIS:")
+        total_entered = sum(state.red_zone_entered_objects.values())
+        total_exited = sum(state.red_zone_exited_objects.values())
+        summary_lines.append(f"  • Total objects entered red zone: {total_entered}")
+        summary_lines.append(f"  • Total objects exited red zone: {total_exited}")
+        summary_lines.append(f"  • Objects still in red zone: {total_entered - total_exited}")
+        summary_lines.append(f"\n  📋 BREAKDOWN BY OBJECT CLASS:")
+        # Combine all object classes that appeared in red zone
+        all_classes = set(state.red_zone_entered_objects.keys()) | set(state.red_zone_exited_objects.keys())
+        for obj_class in sorted(all_classes):
+            entered = state.red_zone_entered_objects.get(obj_class, 0)
+            exited = state.red_zone_exited_objects.get(obj_class, 0)
+            still_in = entered - exited
+            summary_lines.append(f"    {obj_class}:")
+            summary_lines.append(f"      - Entered: {entered}")
+            summary_lines.append(f"      - Exited: {exited}")
+            summary_lines.append(f"      - Still in zone: {still_in}")
+    else:
+        summary_lines.append(f"\n🔴 RED ZONE ANALYSIS:")
+        summary_lines.append(f"  • No objects detected in red zone during processing")
+    # Object combination analysis
+    combined_objects = []
+    motorcycle_person_combinations = 0
+    for obj_id, tracked in state.tracked_objects.items():
+        if tracked.alternative_classes:
+            combo_info = f"ID {obj_id}: {tracked.class_name}"
+            if tracked.alternative_classes:
+                combo_info += f" + {', '.join(sorted(tracked.alternative_classes))}"
+            combined_objects.append(combo_info)
+            # Count motorcycle+person combinations specifically
+            if (tracked.class_name == 'motorcycle' and 'person' in tracked.alternative_classes) or \
+               (tracked.class_name == 'person' and 'motorcycle' in tracked.alternative_classes):
+                motorcycle_person_combinations += 1
+    if combined_objects:
+        summary_lines.append(f"\n🔗 OBJECT COMBINATIONS DETECTED:")
+        summary_lines.append(f"  • Total combined detections: {len(combined_objects)}")
+        summary_lines.append(f"  • Motorcycle+Person combinations: {motorcycle_person_combinations}")
+        summary_lines.append(f"  • Details:")
+        for combo in combined_objects:
+            summary_lines.append(f"    - {combo}")
+    # Warning summary
+    if state.red_zone_warnings:
+        summary_lines.append(f"\n⚠️ WARNING SUMMARY:")
+        summary_lines.append(f"  • Total warnings generated: {len(state.red_zone_warnings)}")
+        # Group warnings by object class
+        warning_by_class = defaultdict(int)
+        for warning in state.red_zone_warnings:
+            warning_by_class[warning['class']] += 1
+        for obj_class, count in sorted(warning_by_class.items()):
+            summary_lines.append(f"    - {obj_class}: {count} warnings")
+        # Show detailed warning log
+        if len(state.red_zone_warnings) > 0:
+            summary_lines.append(f"\n  📋 Warning Log (last 10):")
+            for warning in state.red_zone_warnings[-10:]:  # Last 10 warnings
+                summary_lines.append(f"    - Frame {warning['frame']}: {warning['class']} (ID: {warning['object_id']}) - {warning['frames_in_zone']} frames in zone")
+    else:
+        summary_lines.append(f"\n⚠️ WARNING SUMMARY:")
+        summary_lines.append(f"  • No warnings generated (no objects stayed in red zone > {state.warning_frame_threshold} frames)")
+    # Active tracking summary
+    total_tracked = len(state.tracked_objects)
+    if total_tracked > 0:
+        summary_lines.append(f"\n📈 OBJECT TRACKING SUMMARY:")
+        summary_lines.append(f"  • Total unique objects tracked: {total_tracked}")
+        # Group by primary class
+        objects_by_class = defaultdict(int)
+        for obj in state.tracked_objects.values():
+            primary_class = obj.get_primary_class()
+            objects_by_class[primary_class] += 1
+        for obj_class, count in sorted(objects_by_class.items()):
+            summary_lines.append(f"    - {obj_class}: {count}")
+    summary_lines.append("\n✅ Processing completed successfully!")
+    summary_lines.append("\nNote: Objects detected as both motorcycle and person are counted as motorcycle (person riding motorcycle)")
+    return "\n".join(summary_lines)
+def extract_area_from_video(video):
+    if video is None:
+        return None, "Please upload a video", gr.update(choices=[], value=[], visible=False)
+    cap = cv2.VideoCapture(video)
+    ret, frame = cap.read()
+    cap.release()
+    if not ret:
+        return None, "Could not read video frame", gr.update(choices=[], value=[], visible=False)
+    success, message, segment_img = extract_protection_area(frame)
+    if success and segment_img is not None:
+        # Convert segment image to RGB for display
+        segment_img_rgb = cv2.cvtColor(segment_img, cv2.COLOR_BGR2RGB)
+        # Create segment choices
+        segment_choices = [f"Segment {i+1} (Confidence: {segment['confidence']:.2f})"
+                         for i, segment in enumerate(state.detected_segments)]
+        return segment_img_rgb, message, gr.update(choices=segment_choices, value=segment_choices, visible=True)
+    return None, message, gr.update(choices=[], value=[], visible=False)
+def update_selected_segments(selected):
+    if selected is None:
+        selected = []
+    state.selected_segments = selected
+    return gr.update()
+def process_video_wrapper(video, confidence=DEFAULT_CONFIDENCE, target_fps=1, time_window=10, similarity_threshold=0.35, warning_frame_threshold=3):
+    """Wrapper around process_video to handle full-size video processing"""
+    if video is None:
+        yield None, "Please upload a video"
+        return
+    # Reset tracking state and update parameters
+    state.reset_tracking()
+    state.time_window = time_window
+    state.similarity_threshold = similarity_threshold
+    state.warning_frame_threshold = warning_frame_threshold
+    protection_area = []
+    if state.selected_segments and state.detected_segments:
+        for choice in state.selected_segments:
+            idx = get_segment_index(choice)
+            if 0 <= idx < len(state.detected_segments):
+                segment = state.detected_segments[idx]
+                if 'mask' in segment and segment['mask']:
+                    protection_area = segment['mask']
+                break
+    elif len(state.protection_points) >= 3:
+        protection_area = state.protection_points
+    if not protection_area:
+        yield None, "Please extract protection area first"
+        return
+    try:
+        yield None, f"🚀 Starting video processing...\n⚙️ Time window: {time_window} frames\n⚙️ Similarity threshold: {similarity_threshold:.2f}\n⚙️ Warning threshold: {warning_frame_threshold} frames"
+        for frame, status in process_video(video, confidence, target_fps):
+            yield frame, status
+    except Exception as e:
+        yield None, f"Error processing video: {str(e)}"
+# Enhanced Gradio interface
+with gr.Blocks(title="Enhanced Rail Traffic Monitor") as demo:
+    gr.Markdown("""
+    # Enhanced Rail Traffic Monitoring System
+    ## Features:
+    - **Smart Object Tracking**: Uses enhanced similarity method combining geometric and visual features
+    - **Visual Similarity Comparison**: Compares actual images within bounding boxes using multiple methods
+    - **Comprehensive Red Zone Monitoring**: Reports ALL objects entering the red zone
+    - **Enhanced Grouping**: Groups objects by class with detailed statistics
+    - **Real-time Status**: Shows objects currently in zone, entered, and exited
+    - **Configurable Warning System**: Alerts when objects stay in red zone for too long
+    - **Configurable Parameters**: Adjust time window, similarity threshold, and warning criteria
+    ## Enhanced Similarity Methods:
+    - **Geometric Similarity** (40%): IoU + center distance
+    - **Color Histogram** (25%): HSV color distribution comparison
+    - **Structural Similarity** (25%): SSIM for shape and texture
+    - **Feature Matching** (10%): ORB keypoint matching
+    - **Default Threshold**: 0.35 (more lenient for better object matching)
+    ## Red Zone Reporting:
+    - **Objects Entered**: Total count of all objects that entered the red zone
+    - **Currently in Zone**: Real-time list of objects currently in the red zone
+    - **Objects Exited**: Count of objects that have left the red zone
+    - **Detailed Grouping**: All statistics grouped by object class (train, car, person, etc.)
+    ## Setup Instructions:
+    **Method 1 (Manual Protection Area):**
+    1. Click 4 points on the image to define protection area
+    2. Click "Reset Points" to start over
+    **Method 2 (Automatic Detection):**
+    1. Click "Extract Protection Area" to automatically detect rail segments
+    **Processing:**
+    3. Adjust detection confidence, processing frame rate, time window, similarity threshold, and warning threshold
+    4. Click "Process Video" to analyze
+    The system will show comprehensive real-time results including:
+    - All objects that entered the red zone (grouped by class)
+    - Objects currently in red zone with detailed info
+    - Objects that exited the red zone
+    - Enhanced tracking with visual similarity comparison
+    - Configurable warnings for objects staying too long in red zone
+    - Complete tracking statistics
+    """)
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(
+                label="Input Video"
+            )
+            with gr.Row():
+                confidence = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=DEFAULT_CONFIDENCE,
+                    label="Detection Confidence Threshold",
+                    info="Minimum confidence for object detection"
+                )
+                fps_slider = gr.Slider(
+                    minimum=1,
+                    maximum=30,
+                    value=1,
+                    step=1,
+                    label="Processing Frame Rate (FPS)",
+                    info="Frames per second to process"
+                )
+            with gr.Row():
+                time_window_slider = gr.Slider(
+                    minimum=5,
+                    maximum=50,
+                    value=10,
+                    step=1,
+                    label="Time Window (frames)",
+                    info="Number of frames to consider for object similarity"
+                )
+                similarity_threshold_slider = gr.Slider(
+                    minimum=0.1,
+                    maximum=0.9,
+                    value=0.35,
+                    step=0.05,
+                    label="Similarity Threshold",
+                    info="Threshold for considering objects as the same (higher = stricter)"
+                )
+            with gr.Row():
+                warning_threshold_slider = gr.Slider(
+                    minimum=1,
+                    maximum=20,
+                    value=3,
+                    step=1,
+                    label="Warning Frame Threshold",
+                    info="Number of frames in red zone before triggering warning"
+                )
+        with gr.Column():
+            preview_image = gr.Image(
+                label="Click to Select Protection Area (Original Size)",
+                interactive=True,
+                show_label=True
+            )
+            # Add segment selection dropdown
+            segment_dropdown = gr.Dropdown(
+                label="Selected Segments",
+                choices=[],
+                multiselect=True,
+                interactive=True,
+                visible=False,
+                value=[]
+            )
+    with gr.Row():
+        reset_btn = gr.Button("Reset Points")
+        extract_btn = gr.Button("Extract Protection Area")
+        process_btn = gr.Button("🚀 Process Video")
+    with gr.Row():
+        video_output = gr.Image(
+            label="Live Processing Output",
+            streaming=True,
+            interactive=False,
+            show_label=True,
+            container=True,
+            show_download_button=True
+        )
+        text_output = gr.Textbox(
+            label="Detection Results & Red Zone Summary",
+            lines=15,
+            max_lines=20,
+            show_copy_button=True
+        )
+    # Handle video upload to populate preview
+    video_input.change(
+        fn=update_preview,
+        inputs=[video_input],
+        outputs=[preview_image, segment_dropdown]
+    )
+    extract_btn.click(
+        fn=extract_area_from_video,
+        inputs=[video_input],
+        outputs=[preview_image, text_output, segment_dropdown]
+    )
+    segment_dropdown.change(
+        fn=update_selected_segments,
+        inputs=[segment_dropdown],
+        outputs=[segment_dropdown]
+    )
+    process_btn.click(
+        fn=process_video_wrapper,
+        inputs=[video_input, confidence, fps_slider, time_window_slider, similarity_threshold_slider, warning_threshold_slider],
+        outputs=[video_output, text_output]
+    )
+    # Add click event handler
+    preview_image.select(
+        fn=handle_image_click,
+        inputs=[preview_image],
+        outputs=[preview_image, text_output]
+    )
+    # Add reset button handler
+    reset_btn.click(
+        fn=reset_points,
+        inputs=[],
+        outputs=[preview_image, text_output]
+    )
+if __name__ == "__main__":
+    demo.queue().launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# Core API dependencies
+fastapi==0.110.0
+uvicorn==0.27.1
+python-multipart==0.0.9
+pydantic==2.6.3
+# Machine Learning and Computer Vision
+torch==2.2.1
+torchvision==0.17.1
+ultralytics==8.1.28
+opencv-python==4.9.0.80
+numpy==1.26.3
+Pillow==10.2.0
+scikit-image==0.22.0
+# Data handling and utilities
+PyYAML==6.0.1
+requests==2.31.0
+python-dotenv==1.0.1
+# Frontend (Gradio app)
+gradio==4.19.2
+# Development and testing
+tqdm>=4.66.0
+# Additional utilities (built-in modules - no need to install)
+# os, io, sys, json, base64, logging, traceback, pathlib, threading, time, pickle, concurrent.futures, collections