Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import numpy as np | |
import os | |
from PIL import Image | |
# Load YOLO model | |
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') | |
# Set backend (CPU) | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) | |
# Load class names | |
with open('coco.names', 'r') as f: | |
classes = [line.strip() for line in f.readlines()] | |
# Get YOLO output layer names | |
output_layers_names = net.getUnconnectedOutLayersNames() | |
def count_people_in_frame(frame): | |
""" | |
Detects people in a given frame (image) and returns count. | |
""" | |
height, width, _ = frame.shape | |
# Convert frame to YOLO format | |
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False) | |
net.setInput(blob) | |
# Forward pass | |
layer_outputs = net.forward(output_layers_names) | |
# Process detections | |
boxes, confidences = [], [] | |
for output in layer_outputs: | |
for detection in output: | |
scores = detection[5:] | |
class_id = np.argmax(scores) | |
confidence = scores[class_id] | |
if classes[class_id] == 'person' and confidence > 0.5: | |
center_x, center_y = int(detection[0] * width), int(detection[1] * height) | |
w, h = int(detection[2] * width), int(detection[3] * height) | |
x, y = int(center_x - w / 2), int(center_y - h / 2) | |
boxes.append([x, y, w, h]) | |
confidences.append(float(confidence)) | |
# Apply Non-Maximum Suppression (NMS) | |
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) if boxes else [] | |
return len(indexes) | |
def analyze_image(image): | |
""" | |
Processes an image and detects people. | |
""" | |
if isinstance(image, np.ndarray): | |
image_cv = image # Already a NumPy array | |
else: | |
image_cv = np.array(image) # Convert PIL image to NumPy array | |
people_count = count_people_in_frame(image_cv) | |
return image, f"People in Image: {people_count}" | |
def analyze_video(video_file): | |
""" | |
Processes a video and detects people in each frame. | |
""" | |
video_path = video_file.name | |
if not os.path.exists(video_path): | |
return "Error: Video file could not be loaded." | |
cap = cv2.VideoCapture(video_path) | |
if not cap.isOpened(): | |
return "Error: Unable to open video file." | |
frame_count = 0 | |
people_per_frame = [] | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
# Count people in the frame | |
people_count = count_people_in_frame(frame) | |
people_per_frame.append(people_count) | |
frame_count += 1 | |
cap.release() | |
return f"Max People Detected in Video: {max(people_per_frame) if people_per_frame else 0}" | |
def process_input(input_file): | |
""" | |
Determines if the input is an image or a video and calls the appropriate function. | |
""" | |
file_path = input_file.name | |
file_extension = os.path.splitext(file_path)[1].lower() | |
if file_extension in [".jpg", ".jpeg", ".png", ".bmp"]: | |
image = Image.open(file_path) | |
return analyze_image(image) | |
elif file_extension in [".mp4", ".avi", ".mov", ".mkv"]: | |
return analyze_video(input_file) | |
else: | |
return "Error: Unsupported file format." | |
# Gradio Interface for Image and Video Processing | |
app = gr.Interface( | |
fn=process_input, | |
inputs=gr.File(label="Upload Image or Video"), # Use File to handle both types | |
outputs=[gr.Textbox(label="People Counting Results")], | |
title="YOLO People Counter (Image & Video)", | |
description="Upload an image or video to detect and count people using YOLOv3." | |
) | |
# Launch app | |
if __name__ == "__main__": | |
app.launch() | |