|
import streamlit as st |
|
import torch |
|
import torchvision |
|
import torchvision.transforms as transforms |
|
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor |
|
from torchvision.transforms import ToTensor |
|
from PIL import Image, ImageDraw |
|
import cv2 |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
|
|
import tempfile |
|
from tempfile import NamedTemporaryFile |
|
|
|
|
|
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(num_classes=91) |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
model.load_state_dict(torch.load("frcnn_model.pth")) |
|
|
|
|
|
classes = [ |
|
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', |
|
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', |
|
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', |
|
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', |
|
'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', |
|
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', |
|
'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', |
|
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', |
|
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', |
|
'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', 'N/A', |
|
'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', |
|
'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', |
|
'scissors', 'teddy bear', 'hair drier', 'toothbrush' |
|
] |
|
|
|
|
|
threshold = 0.5 |
|
|
|
st.title(""" Image Object Detections """) |
|
|
|
|
|
|
|
st.write(""" The Faster R-CNN (Region-based Convolutional Neural Network) is a cutting-edge object detection model that combines deep |
|
learning with region proposal networks to achieve highly accurate object detection in images. |
|
It is trained on a large dataset of images and can detect a wide range of objects with high Precision and Recall. |
|
The model is based on the ResNet-50 architecture, which allows it to capture complex visual features from the input image. |
|
It uses a two-stage approach, first proposing regions of interest (RoIs) in the image and then classifying and refining the |
|
object boundaries within these RoIs. This approach makes it extremely efficient and accurate in detecting multiple objects |
|
in a single image. |
|
""") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
images = ["test2.jpg","img7.jpg","img20.jpg","img23.jpg"] |
|
with st.sidebar: |
|
st.write("Choose an image") |
|
st.image(images) |
|
|
|
|
|
|
|
def detect_objects(image_path): |
|
|
|
image = Image.open(image_path).convert('RGB') |
|
|
|
|
|
image_tensor = ToTensor()(image).to(device) |
|
|
|
|
|
model.eval() |
|
with torch.no_grad(): |
|
predictions = model([image_tensor]) |
|
|
|
|
|
scores = predictions[0]['scores'].cpu().numpy() |
|
boxes = predictions[0]['boxes'].cpu().numpy() |
|
labels = predictions[0]['labels'].cpu().numpy() |
|
mask = scores > threshold |
|
scores = scores[mask] |
|
boxes = boxes[mask] |
|
labels = labels[mask] |
|
|
|
|
|
draw = ImageDraw.Draw(image) |
|
for box, label in zip(boxes, labels): |
|
|
|
|
|
draw.rectangle([(box[0], box[1]), (box[2], box[3])], outline='red') |
|
|
|
|
|
class_name = classes[label] |
|
draw.text((box[0], box[1]), class_name, fill='yellow') |
|
|
|
|
|
st.write("Obects detected in the image are: ") |
|
st.image(image, use_column_width=True) |
|
|
|
|
|
|
|
file = st.file_uploader('Upload an Image', type=(["jpeg", "jpg", "png"])) |
|
|
|
if file is None: |
|
st.write("Please upload an image file") |
|
else: |
|
image = Image.open(file) |
|
st.write("Input Image") |
|
st.image(image, use_column_width=True) |
|
with NamedTemporaryFile(dir='.', suffix='.jpeg') as f: |
|
f.write(file.getbuffer()) |
|
|
|
detect_objects(f.name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.write(""" This Streamlit app provides a user-friendly interface for uploading an image and visualizing the output of the Faster R-CNN |
|
model. It displays the uploaded image along with the predicted objects highlighted with bounding box overlays. The app allows |
|
users to explore the detected objects in the image, providing valuable insights and understanding of the model's predictions. |
|
It can be used for a wide range of applications, such as object recognition, image analysis, and visual storytelling. |
|
Whether it's identifying objects in real-world images or understanding the capabilities of state-of-the-art object detection |
|
models, this Streamlit app powered by Faster R-CNN is a powerful tool for computer vision tasks. |
|
""") |
|
|