import gradio as gr import cv2 import numpy as np import os from PIL import Image # Load YOLO model net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg') # Set backend (CPU) net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) # Load class names with open('coco.names', 'r') as f: classes = [line.strip() for line in f.readlines()] # Get YOLO output layer names output_layers_names = net.getUnconnectedOutLayersNames() def count_people_in_frame(frame): """ Detects people in a given frame (image) and returns count. """ height, width, _ = frame.shape # Convert frame to YOLO format blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False) net.setInput(blob) # Forward pass layer_outputs = net.forward(output_layers_names) # Process detections boxes, confidences = [], [] for output in layer_outputs: for detection in output: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if classes[class_id] == 'person' and confidence > 0.5: center_x, center_y = int(detection[0] * width), int(detection[1] * height) w, h = int(detection[2] * width), int(detection[3] * height) x, y = int(center_x - w / 2), int(center_y - h / 2) boxes.append([x, y, w, h]) confidences.append(float(confidence)) # Apply Non-Maximum Suppression (NMS) indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) if boxes else [] return len(indexes) def analyze_image(image): """ Processes an image and detects people. """ if isinstance(image, np.ndarray): image_cv = image # Already a NumPy array else: image_cv = np.array(image) # Convert PIL image to NumPy array people_count = count_people_in_frame(image_cv) return image, f"People in Image: {people_count}" def analyze_video(video_file): """ Processes a video and detects people in each frame. """ video_path = video_file.name if not os.path.exists(video_path): return "Error: Video file could not be loaded." cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return "Error: Unable to open video file." frame_count = 0 people_per_frame = [] while True: ret, frame = cap.read() if not ret: break # Count people in the frame people_count = count_people_in_frame(frame) people_per_frame.append(people_count) frame_count += 1 cap.release() return f"Max People Detected in Video: {max(people_per_frame) if people_per_frame else 0}" def process_input(input_file): """ Determines if the input is an image or a video and calls the appropriate function. """ file_path = input_file.name file_extension = os.path.splitext(file_path)[1].lower() if file_extension in [".jpg", ".jpeg", ".png", ".bmp"]: image = Image.open(file_path) return analyze_image(image) elif file_extension in [".mp4", ".avi", ".mov", ".mkv"]: return analyze_video(input_file) else: return "Error: Unsupported file format." # Gradio Interface for Image and Video Processing app = gr.Interface( fn=process_input, inputs=gr.File(label="Upload Image or Video"), # Use File to handle both types outputs=[gr.Textbox(label="People Counting Results")], title="YOLO People Counter (Image & Video)", description="Upload an image or video to detect and count people using YOLOv3." ) # Launch app if __name__ == "__main__": app.launch()