import gradio as gr import cv2 import torch from torchvision import transforms from PIL import Image # Load the pre-trained object detection model (replace with your own model) # For example, using a torchvision model for demonstration purposes model = torch.hub.load('pytorch/vision:v0.10.0', 'fasterrcnn_resnet50_fpn', pretrained=True) model.eval() # Define the transformations for the input image transform = transforms.Compose([ transforms.ToTensor(), ]) # Function to perform object detection on an image def detect_objects(image): # Convert image to tensor input_tensor = transform(image).unsqueeze(0) # Perform object detection with torch.no_grad(): predictions = model(input_tensor) # Extract bounding boxes and labels from predictions boxes = predictions[0]['boxes'].numpy() labels = predictions[0]['labels'].numpy() return boxes, labels # Function for live object detection from the camera def live_object_detection(): # Open a connection to the camera (replace with your own camera setup) cap = cv2.VideoCapture(0) while True: # Capture frame-by-frame ret, frame = cap.read() # Convert the frame to PIL Image frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Perform object detection boxes, labels = detect_objects(frame_pil) # Draw bounding boxes on the frame for box, label in zip(boxes, labels): box = [int(coord) for coord in box] cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2) cv2.putText(frame, f"Label: {label}", (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Display the resulting frame cv2.imshow('Object Detection', frame) # Break the loop when 'q' key is pressed if cv2.waitKey(1) & 0xFF == ord('q'): break # Release the camera and close all windows cap.release() cv2.destroyAllWindows() # Define the Gradio interface iface = gr.Interface( fn=[detect_objects, live_object_detection], inputs=[ gr.Image(type="pil", label="Upload a photo for object detection"), "webcam", ], outputs="image", live=True, ) # Launch the Gradio interface iface.launch()