import torch import torchvision.transforms as standard_transforms import numpy as np from PIL import Image import cv2 from .engine import * from .models import build_model import os import warnings warnings.filterwarnings("ignore") class Args: def __init__( self, backbone: str, row: int, line: int, output_dir: str, weight_path: str, # gpu_id: int, ) -> None: self.backbone = backbone self.row = row self.line = line self.output_dir = output_dir self.weight_path = weight_path # self.gpu_id = gpu_id class CrowdCounter: def __init__(self) -> None: # Create the Args object self.args = Args( backbone="vgg16_bn", row=2, line=2, output_dir="./crowd_counter/preds", weight_path="./crowd_counter/weights/SHTechA.pth", ) # device = torch.device('cuda') self.device = torch.device("cpu") # get the P2PNet self.model = build_model(self.args) # move to GPU self.model.to(self.device) # load trained model if self.args.weight_path is not None: checkpoint = torch.load(self.args.weight_path, map_location="cpu") self.model.load_state_dict(checkpoint["model"]) # convert to eval mode self.model.eval() # create the pre-processing transform self.transform = standard_transforms.Compose( [ standard_transforms.ToTensor(), standard_transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), ] ) def test( self, args: Args, img_raw: Image.Image , debug: bool = False, ) -> tuple[any, Image.Image, torch.Tensor]: ori_width, ori_height = img_raw.size max_dimension = 512 scale_factor = max_dimension / max(ori_width, ori_height) new_width = int(ori_width * scale_factor) new_height = int(ori_height * scale_factor) print(new_width, new_height) new_width = new_width // 128 * 128 new_height = new_height // 128 * 128 img_resized = img_raw.resize((new_width, new_height), Image.LANCZOS) print(new_width, new_height) # pre-proccessing img = self.transform(img_resized) samples = torch.Tensor(img).unsqueeze(0) samples = samples.to(self.device) # run inference outputs = self.model(samples) outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[ :, :, 1 ][0] outputs_points = outputs["pred_points"][0] threshold = 0.5 # filter the predictions conf = outputs_scores[outputs_scores > threshold] points = ( outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() ) scale_factor_width = ori_width / new_width scale_factor_height = ori_height / new_height adjusted_points = [] for p in points: # Adjust each point's coordinates adjusted_x = int(p[0] * scale_factor_width) adjusted_y = int(p[1] * scale_factor_height) adjusted_points.append((adjusted_x, adjusted_y)) # draw the predictions size = 3 img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) for p in adjusted_points: img_to_draw = cv2.circle( img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1 ) return adjusted_points, img_to_draw, conf # Function to process and save images def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]: # Predict points on the image points, img_to_draw, conf = self.test(self.args, img_raw) # Prepare text for the number of points num_points = len(points) # Pilgrims, Drawn Image % return num_points, img_to_draw