import cv2 import numpy as np import torch import gradio as gr from ultralytics import YOLO # Load YOLOv12x model MODEL_PATH = "yolov12x.pt" # Ensure the model is uploaded to the Hugging Face Space model = YOLO(MODEL_PATH) # COCO dataset class IDs PERSON_CLASS_ID = 0 # "person" TRUCK_CLASS_ID = 7 # "truck" def count_objects(video_path): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return "Error: Unable to open video file." frame_count = 0 object_counts = {"people": [], "trucks": []} frame_skip = 5 # Process every 5th frame for efficiency while True: ret, frame = cap.read() if not ret: break # End of video frame_count += 1 if frame_count % frame_skip != 0: continue # Skip frames to improve efficiency # Run YOLOv12x inference results = model(frame, verbose=False) people_count, truck_count = 0, 0 for result in results: for box in result.boxes: class_id = int(box.cls.item()) # Get class ID confidence = float(box.conf.item()) # Get confidence score # Count objects based on their class IDs if class_id == PERSON_CLASS_ID and confidence > 0.5: people_count += 1 elif class_id == TRUCK_CLASS_ID and confidence > 0.5: truck_count += 1 object_counts["people"].append(people_count) object_counts["trucks"].append(truck_count) cap.release() return { "Max People in a Frame": int(np.max(object_counts["people"])) if object_counts["people"] else 0, "Max Trucks in a Frame": int(np.max(object_counts["trucks"])) if object_counts["trucks"] else 0 } # Gradio UI function def analyze_video(video_file): result = count_objects(video_file) return "\n".join([f"{key}: {value}" for key, value in result.items()]) # Define Gradio interface iface = gr.Interface( fn=analyze_video, inputs=gr.Video(label="Upload Video"), outputs=gr.Textbox(label="Analysis Result"), title="YOLOv12x Object Counter", description="Upload a video to count people and trucks using YOLOv12x." ) # Launch the Gradio app if __name__ == "__main__": iface.launch()