Spaces:

Taino
/

DynamicVemes

Sleeping

App Files Files Community

Taino commited on Jun 20

Commit

783b779

verified ·

1 Parent(s): f24c883

Create app.py

Browse files

Files changed (1) hide show

app.py +288 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+import gradio as gr
+import cv2
+import json
+import tempfile
+import os
+from ultralytics import YOLO
+import numpy as np
+from collections import defaultdict
+from typing import Dict, List, Tuple, Any
+class HumanTracker:
+    def __init__(self):
+        # Load YOLOv11 model - using the nano version for faster processing
+        # You can change to yolo11s.pt, yolo11m.pt, yolo11l.pt, or yolo11x.pt for better accuracy
+        self.model = YOLO("yolo11n.pt")
+    def calculate_center(self, x1: float, y1: float, x2: float, y2: float) -> Tuple[float, float]:
+        """Calculate center coordinates from bounding box coordinates."""
+        center_x = (x1 + x2) / 2
+        center_y = (y1 + y2) / 2
+        return center_x, center_y
+    def process_video(self, video_path: str, progress_callback=None) -> Dict[str, Any]:
+        """
+        Process video file and extract human tracking data.
+        Args:
+            video_path: Path to the input video file
+            progress_callback: Optional callback function for progress updates
+        Returns:
+            Dictionary containing processed tracking data in the required JSON format
+        """
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            raise ValueError(f"Could not open video file: {video_path}")
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_data = {}
+        id_mapping = {}  # Maps original YOLO IDs to simplified sequential IDs
+        next_person_id = 1
+        print(f"Processing video: {total_frames} frames at {fps} FPS")
+        # Process video with YOLO tracking
+        # Using stream=True for memory efficiency with large videos
+        results = self.model.track(
+            video_path,
+            classes=[0],  # Only detect humans (class 0)
+            persist=True,  # Enable tracking
+            stream=True,
+            verbose=False
+        )
+        frame_count = 0
+        for result in results:
+            if progress_callback:
+                progress = (frame_count + 1) / total_frames
+                progress_callback(progress, f"Processing frame {frame_count + 1}/{total_frames}")
+            # Check if any detections exist
+            if result.boxes is not None and len(result.boxes) > 0:
+                # Extract bounding boxes, track IDs, and confidences
+                boxes = result.boxes.xyxy.cpu().numpy()  # x1, y1, x2, y2 format
+                track_ids = result.boxes.id
+                confidences = result.boxes.conf.cpu().numpy()
+                if track_ids is not None:
+                    track_ids = track_ids.int().cpu().numpy()
+                    people_in_frame = []
+                    for box, track_id, confidence in zip(boxes, track_ids, confidences):
+                        x1, y1, x2, y2 = box
+                        # Map original YOLO ID to simplified sequential ID
+                        if track_id not in id_mapping:
+                            id_mapping[track_id] = next_person_id
+                            next_person_id += 1
+                        person_id = id_mapping[track_id]
+                        # Calculate center coordinates
+                        center_x, center_y = self.calculate_center(x1, y1, x2, y2)
+                        # Create person data
+                        person_data = {
+                            "person_id": person_id,
+                            "center_x": float(center_x),
+                            "center_y": float(center_y),
+                            "confidence": float(confidence),
+                            "bbox": {
+                                "x1": float(x1),
+                                "y1": float(y1),
+                                "x2": float(x2),
+                                "y2": float(y2)
+                            }
+                        }
+                        people_in_frame.append(person_data)
+                    if people_in_frame:
+                        # Sort people by person_id for consistency
+                        people_in_frame.sort(key=lambda x: x["person_id"])
+                        frame_data[frame_count] = people_in_frame
+            frame_count += 1
+        cap.release()
+        # Convert to the required JSON format
+        frames_list = []
+        sorted_frames = sorted(frame_data.keys())
+        for frame_num in sorted_frames:
+            frames_list.append({
+                "frame": frame_num,
+                "people": frame_data[frame_num]
+            })
+        # Create the final output structure
+        output = {
+            "metadata": {
+                "total_frames": len(frames_list),
+                "total_people": len(id_mapping),
+                "video_info": {
+                    "fps": float(fps),
+                    "total_video_frames": total_frames
+                },
+                "id_mapping": {str(original_id): simplified_id for original_id, simplified_id in id_mapping.items()}
+            },
+            "frames": frames_list
+        }
+        return output
+def process_video_gradio(video_file, progress=gr.Progress()):
+    """
+    Gradio interface function for processing videos.
+    Args:
+        video_file: Uploaded video file from Gradio
+        progress: Gradio progress tracker
+    Returns:
+        Tuple of (JSON file path, status message, preview of results)
+    """
+    if video_file is None:
+        return None, "❌ Please upload a video file", "No video uploaded"
+    try:
+        # Initialize the tracker
+        tracker = HumanTracker()
+        # Create progress callback
+        def update_progress(prog, msg):
+            progress(prog, desc=msg)
+        # Process the video
+        progress(0.1, desc="Starting video processing...")
+        results = tracker.process_video(video_file, update_progress)
+        progress(0.9, desc="Generating JSON output...")
+        # Create temporary JSON file
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+            json.dump(results, f, indent=2)
+            json_path = f.name
+        # Create a preview of the results
+        metadata = results["metadata"]
+        total_frames = metadata["total_frames"]
+        total_people = metadata["total_people"]
+        preview = f"""
+📊 **Processing Results:**
+- **Total frames with detections:** {total_frames}
+- **Unique people detected:** {total_people}
+- **Original video frames:** {metadata.get('video_info', {}).get('total_video_frames', 'N/A')}
+- **Video FPS:** {metadata.get('video_info', {}).get('fps', 'N/A'):.2f}
+🆔 **ID Mapping:**
+{json.dumps(metadata["id_mapping"], indent=2)}
+📋 **Sample Frame Data (first frame):**
+{json.dumps(results["frames"][:1] if results["frames"] else [], indent=2)}
+        """
+        progress(1.0, desc="✅ Processing complete!")
+        return (
+            json_path,
+            f"✅ Successfully processed video! Detected {total_people} unique people across {total_frames} frames.",
+            preview
+        )
+    except Exception as e:
+        error_msg = f"❌ Error processing video: {str(e)}"
+        print(error_msg)
+        return None, error_msg, f"Error details: {str(e)}"
+# Create the Gradio interface
+def create_interface():
+    with gr.Blocks(
+        title="YOLOv11 Human Tracking & Coordinate Extraction",
+        theme=gr.themes.Soft()
+    ) as demo:
+        gr.Markdown("""
+        # 🎯 YOLOv11 Human Tracking & Coordinate Extraction
+        Upload a video to detect and track humans using YOLOv11. The app will:
+        - 🔍 Detect humans in each frame
+        - 🎯 Track individuals across frames with unique IDs
+        - 📐 Extract bounding box coordinates and center points
+        - 📁 Generate JSON output for text overlay positioning
+        **Supported formats:** MP4, AVI, MOV, WEBM
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                video_input = gr.Video(
+                    label="📹 Upload Video",
+                    height=400
+                )
+                process_btn = gr.Button(
+                    "🚀 Process Video",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                json_output = gr.File(
+                    label="📁 Download JSON Results",
+                    file_count="single"
+                )
+                status_output = gr.Textbox(
+                    label="📊 Status",
+                    value="Ready to process video...",
+                    interactive=False
+                )
+        with gr.Row():
+            preview_output = gr.Textbox(
+                label="👁️ Results Preview",
+                lines=15,
+                interactive=False,
+                placeholder="Results preview will appear here after processing..."
+            )
+        # Event handlers
+        process_btn.click(
+            fn=process_video_gradio,
+            inputs=[video_input],
+            outputs=[json_output, status_output, preview_output],
+            show_progress=True
+        )
+        # Example section
+        gr.Markdown("""
+        ## 📋 Output Format
+        The generated JSON file contains:
+        - **metadata**: Video info, total people count, ID mappings
+        - **frames**: Array of frame data with person detections
+        Each person detection includes:
+        - `person_id`: Unique identifier for tracking
+        - `center_x`, `center_y`: Center coordinates for text overlay positioning
+        - `confidence`: Detection confidence score
+        - `bbox`: Full bounding box coordinates (x1, y1, x2, y2)
+        """)
+    return demo
+if __name__ == "__main__":
+    # Create and launch the interface
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",  # Allow external access
+        server_port=7860,
+        share=False,  # Set to True if you want a public link
+        show_error=True
+    )