techysanoj's picture
creating app.py file
d10f466
raw
history blame
2.31 kB
import gradio as gr
import cv2
import torch
from torchvision import transforms
from PIL import Image
# Load the pre-trained object detection model (replace with your own model)
# For example, using a torchvision model for demonstration purposes
model = torch.hub.load('pytorch/vision:v0.10.0', 'fasterrcnn_resnet50_fpn', pretrained=True)
model.eval()
# Define the transformations for the input image
transform = transforms.Compose([
transforms.ToTensor(),
])
# Function to perform object detection on an image
def detect_objects(image):
# Convert image to tensor
input_tensor = transform(image).unsqueeze(0)
# Perform object detection
with torch.no_grad():
predictions = model(input_tensor)
# Extract bounding boxes and labels from predictions
boxes = predictions[0]['boxes'].numpy()
labels = predictions[0]['labels'].numpy()
return boxes, labels
# Function for live object detection from the camera
def live_object_detection():
# Open a connection to the camera (replace with your own camera setup)
cap = cv2.VideoCapture(0)
while True:
# Capture frame-by-frame
ret, frame = cap.read()
# Convert the frame to PIL Image
frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
# Perform object detection
boxes, labels = detect_objects(frame_pil)
# Draw bounding boxes on the frame
for box, label in zip(boxes, labels):
box = [int(coord) for coord in box]
cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)
cv2.putText(frame, f"Label: {label}", (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Display the resulting frame
cv2.imshow('Object Detection', frame)
# Break the loop when 'q' key is pressed
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()
# Define the Gradio interface
iface = gr.Interface(
fn=[detect_objects, live_object_detection],
inputs=[
gr.Image(type="pil", label="Upload a photo for object detection"),
"webcam",
],
outputs="image",
live=True,
)
# Launch the Gradio interface
iface.launch()