Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import numpy as np | |
import os | |
# Load YOLO model | |
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') | |
# Set backend (CPU or GPU) | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) | |
# Load class names | |
with open('coco.names', 'r') as f: | |
classes = [line.strip() for line in f.readlines()] | |
# Get YOLO output layer names | |
output_layers_names = net.getUnconnectedOutLayersNames() | |
def count_people_in_frame(frame): | |
""" | |
Detects people in a given frame (image) and returns count. | |
""" | |
height, width, _ = frame.shape | |
# Convert frame to YOLO format | |
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False) | |
net.setInput(blob) | |
# Forward pass | |
layer_outputs = net.forward(output_layers_names) | |
# Process detections | |
boxes, confidences = [], [] | |
for output in layer_outputs: | |
for detection in output: | |
scores = detection[5:] | |
class_id = np.argmax(scores) | |
confidence = scores[class_id] | |
if classes[class_id] == 'person' and confidence > 0.5: | |
center_x, center_y = int(detection[0] * width), int(detection[1] * height) | |
w, h = int(detection[2] * width), int(detection[3] * height) | |
x, y = int(center_x - w / 2), int(center_y - h / 2) | |
boxes.append([x, y, w, h]) | |
confidences.append(float(confidence)) | |
# Apply Non-Maximum Suppression (NMS) | |
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) if boxes else [] | |
# Draw bounding boxes on the image | |
for i in indexes: | |
x, y, w, h = boxes[i] | |
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) | |
# Return processed frame and number of people detected | |
return frame, len(indexes) | |
def count_people_video(video_path): | |
""" | |
Process video and count people per frame. | |
""" | |
if not os.path.exists(video_path): | |
return "Error: Video file not found." | |
cap = cv2.VideoCapture(video_path) | |
if not cap.isOpened(): | |
return "Error: Unable to open video file." | |
frame_count = 0 | |
people_per_frame = [] | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Count people in the frame | |
_, people_count = count_people_in_frame(frame) | |
people_per_frame.append(people_count) | |
frame_count += 1 | |
cap.release() | |
# Generate analytics | |
return { | |
"People in Video": int(np.max(people_per_frame)) if people_per_frame else 0, | |
} | |
def analyze_video(video_file): | |
result = count_people_video(video_file) | |
return "\n".join([f"{key}: {value}" for key, value in result.items()]) | |
def analyze_image(image): | |
image_cv = np.array(image) # Convert PIL image to NumPy array | |
processed_image, people_count = count_people_in_frame(image_cv) | |
return processed_image, f"People in Image: {people_count}" | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=[analyze_image, analyze_video], # Supports both image & video | |
inputs=[gr.Image(type="pil", label="Upload Image"), gr.Video(label="Upload Video")], | |
outputs=[gr.Image(label="Processed Image"), gr.Textbox(label="People Counting Results")], | |
title="YOLO-based People Counter", | |
description="Upload an image or video to detect and count people using YOLOv3." | |
) | |
# Launch app | |
if __name__ == "__main__": | |
interface.launch() | |