import argparse import datetime import random import time from pathlib import Path import torch import torchvision.transforms as standard_transforms import numpy as np from PIL import Image import cv2 from crowd_datasets import build_dataset from engine import * from models import build_model import os import warnings warnings.filterwarnings('ignore') def get_args_parser(): parser = argparse.ArgumentParser('Set parameters for P2PNet evaluation', add_help=False) # * Backbone parser.add_argument('--backbone', default='vgg16_bn', type=str, help="name of the convolutional backbone to use") parser.add_argument('--row', default=2, type=int, help="row number of anchor points") parser.add_argument('--line', default=2, type=int, help="line number of anchor points") parser.add_argument('--output_dir', default='', help='path where to save') parser.add_argument('--weight_path', default='', help='path where the trained weights saved') parser.add_argument('--gpu_id', default=0, type=int, help='the gpu used for evaluation') return parser def main(args, img_path, debug=False): os.environ["CUDA_VISIBLE_DEVICES"] = '{}'.format(args.gpu_id) print(img_path) device = torch.device('cuda') # get the P2PNet model = build_model(args) # move to GPU model.to(device) # load trained model if args.weight_path is not None: checkpoint = torch.load(args.weight_path, map_location='cpu') model.load_state_dict(checkpoint['model']) # convert to eval mode model.eval() # create the pre-processing transform transform = standard_transforms.Compose([ standard_transforms.ToTensor(), standard_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # set your image path here clean_img_path = img_path.split('/')[-1] # load the images img_raw = Image.open(img_path).convert('RGB') # round the size width, height = img_raw.size new_width = width // 128 * 128 new_height = height // 128 * 128 img_raw = img_raw.resize((new_width, new_height), Image.ANTIALIAS) # pre-proccessing img = transform(img_raw) samples = torch.Tensor(img).unsqueeze(0) samples = samples.to(device) # run inference outputs = model(samples) outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] outputs_points = outputs['pred_points'][0] threshold = 0.5 # filter the predictions points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist() predict_cnt = int((outputs_scores > threshold).sum()) outputs_scores = torch.nn.functional.softmax(outputs['pred_logits'], -1)[:, :, 1][0] outputs_points = outputs['pred_points'][0] # draw the predictions size = 5 img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR) output_file = open(args.output_dir+"/"+clean_img_path, "w") for p in points: img_to_draw = cv2.circle(img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1) output_file.write(str(p[0]) + " ") output_file.write(str(p[1]) + "\n") # save the visualized image cv2.imwrite(os.path.join(args.output_dir, clean_img_path), img_to_draw) if __name__ == '__main__': parser = argparse.ArgumentParser('P2PNet evaluation script', parents=[get_args_parser()]) args = parser.parse_args() main(args)