exorcist123's picture
add crowd counting demo
f4634b9
raw
history blame
3.42 kB
import torch
import torchvision.transforms as standard_transforms
import numpy as np
from PIL import Image
import cv2
from .engine import *
from .models import build_model
import os
import warnings
warnings.filterwarnings("ignore")
class Args:
def __init__(
self,
backbone: str,
row: int,
line: int,
output_dir: str,
weight_path: str,
# gpu_id: int,
) -> None:
self.backbone = backbone
self.row = row
self.line = line
self.output_dir = output_dir
self.weight_path = weight_path
# self.gpu_id = gpu_id
class CrowdCounter:
def __init__(self) -> None:
# Create the Args object
self.args = Args(
backbone="vgg16_bn",
row=2,
line=2,
output_dir="./crowd_counter/preds",
weight_path="./crowd_counter/weights/SHTechA.pth",
# gpu_id=0,
)
# device = torch.device('cuda')
self.device = torch.device("cpu")
# get the P2PNet
self.model = build_model(self.args)
# move to GPU
self.model.to(self.device)
# load trained model
if self.args.weight_path is not None:
checkpoint = torch.load(self.args.weight_path, map_location="cpu")
self.model.load_state_dict(checkpoint["model"])
# convert to eval mode
self.model.eval()
# create the pre-processing transform
self.transform = standard_transforms.Compose(
[
standard_transforms.ToTensor(),
standard_transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
def test(
self, args: Args, img_raw: Image.Image , debug: bool = False,
) -> tuple[any, Image.Image, torch.Tensor]:
# round the size
width, height = img_raw.size
new_width = width // 128 * 128
new_height = height // 128 * 128
img_raw = img_raw.resize((new_width, new_height), Image.LANCZOS)
# pre-proccessing
img = self.transform(img_raw)
samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(self.device)
# run inference
outputs = self.model(samples)
outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
:, :, 1
][0]
outputs_points = outputs["pred_points"][0]
threshold = 0.5
# filter the predictions
conf = outputs_scores[outputs_scores > threshold]
points = (
outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
)
# draw the predictions
size = 5
img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
for p in points:
img_to_draw = cv2.circle(
img_to_draw, (int(p[0]), int(p[1])), size, (255, 0, 0), -1
)
return points, img_to_draw, conf
# Function to process and save images
def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:
# Predict points on the image
points, img_to_draw, conf = self.test(self.args, img_raw)
# Prepare text for the number of points
num_points = len(points)
# Pilgrims, Drawn Image %
return num_points, img_to_draw