import torch import torchvision.transforms as standard_transforms import numpy as np from PIL import Image import cv2 from .engine import * from .models import build_model import os import warnings warnings.filterwarnings("ignore") class Args: def __init__( self, backbone: str, row: int, line: int, output_dir: str, weight_path: str, # gpu_id: int, ) -> None: self.backbone = backbone self.row = row self.line = line self.output_dir = output_dir self.weight_path = weight_path # self.gpu_id = gpu_id class CrowdCounter: def __init__(self) -> None: # Create the Args object self.args = Args( backbone="vgg16_bn", row=2, line=2, output_dir="./crowd_counter/preds", weight_path="./crowd_counter/weights/SHTechA.pth", # gpu_id=0, ) # device = torch.device('cuda') self.device = torch.device("cpu") # get the P2PNet self.model = build_model(self.args) # move to GPU self.model.to(self.device) # load trained model if self.args.weight_path is not None: checkpoint = torch.load(self.args.weight_path, map_location="cpu") self.model.load_state_dict(checkpoint["model"]) # convert to eval mode self.model.eval() # create the pre-processing transform self.transform = standard_transforms.Compose( [ standard_transforms.ToTensor(), standard_transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), ] ) def test( self, args: Args, img_raw: Image.Image , debug: bool = False, ) -> tuple[any, Image.Image, torch.Tensor]: # round the size width, height = img_raw.size new_width = width // 128 * 128 new_height = height // 128 * 128 img_raw = img_raw.resize((new_width, new_height), Image.LANCZOS) # pre-proccessing img = self.transform(img_raw) samples = torch.Tensor(img).unsqueeze(0) samples = samples.to(self.device) # run inference outputs = self.model(samples) outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[ :, :, 1 ][0] outputs_points = outputs["pred_points"][0] threshold = 0.5 # filter the predictions conf = outputs_scores[outputs_scores > threshold] points = ( outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() ) # draw the predictions size = 5 img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) for p in points: img_to_draw = cv2.circle( img_to_draw, (int(p[0]), int(p[1])), size, (255, 0, 0), -1 ) return points, img_to_draw, conf # Function to process and save images def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]: # Predict points on the image points, img_to_draw, conf = self.test(self.args, img_raw) # Prepare text for the number of points num_points = len(points) # Pilgrims, Drawn Image % return num_points, img_to_draw