Spaces:
Sleeping
Sleeping
File size: 4,060 Bytes
f4634b9 b5955cc 5823599 b5955cc f4634b9 b5955cc f4634b9 b5955cc f4634b9 b5955cc f4634b9 b5955cc f4634b9 b5955cc f4634b9 b5955cc f4634b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import torch
import torchvision.transforms as standard_transforms
import numpy as np
from PIL import Image
import cv2
from .engine import *
from .models import build_model
import os
import warnings
warnings.filterwarnings("ignore")
class Args:
def __init__(
self,
backbone: str,
row: int,
line: int,
output_dir: str,
weight_path: str,
# gpu_id: int,
) -> None:
self.backbone = backbone
self.row = row
self.line = line
self.output_dir = output_dir
self.weight_path = weight_path
# self.gpu_id = gpu_id
class CrowdCounter:
def __init__(self) -> None:
# Create the Args object
self.args = Args(
backbone="vgg16_bn",
row=2,
line=2,
output_dir="./crowd_counter/preds",
weight_path="./crowd_counter/weights/SHTechA.pth",
)
# device = torch.device('cuda')
self.device = torch.device("cpu")
# get the P2PNet
self.model = build_model(self.args)
# move to GPU
self.model.to(self.device)
# load trained model
if self.args.weight_path is not None:
checkpoint = torch.load(self.args.weight_path, map_location="cpu")
self.model.load_state_dict(checkpoint["model"])
# convert to eval mode
self.model.eval()
# create the pre-processing transform
self.transform = standard_transforms.Compose(
[
standard_transforms.ToTensor(),
standard_transforms.Normalize(
mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
),
]
)
def test(
self, args: Args, img_raw: Image.Image , debug: bool = False,
) -> tuple[any, Image.Image, torch.Tensor]:
ori_width, ori_height = img_raw.size
max_dimension = 512
scale_factor = max_dimension / max(ori_width, ori_height)
new_width = int(ori_width * scale_factor)
new_height = int(ori_height * scale_factor)
print(new_width, new_height)
new_width = new_width // 128 * 128
new_height = new_height // 128 * 128
img_resized = img_raw.resize((new_width, new_height), Image.LANCZOS)
print(new_width, new_height)
# pre-proccessing
img = self.transform(img_resized)
samples = torch.Tensor(img).unsqueeze(0)
samples = samples.to(self.device)
# run inference
outputs = self.model(samples)
outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
:, :, 1
][0]
outputs_points = outputs["pred_points"][0]
threshold = 0.5
# filter the predictions
conf = outputs_scores[outputs_scores > threshold]
points = (
outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
)
scale_factor_width = ori_width / new_width
scale_factor_height = ori_height / new_height
adjusted_points = []
for p in points:
# Adjust each point's coordinates
adjusted_x = int(p[0] * scale_factor_width)
adjusted_y = int(p[1] * scale_factor_height)
adjusted_points.append((adjusted_x, adjusted_y))
# draw the predictions
size = 3
img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
for p in adjusted_points:
img_to_draw = cv2.circle(
img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1
)
return adjusted_points, img_to_draw, conf
# Function to process and save images
def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:
# Predict points on the image
points, img_to_draw, conf = self.test(self.args, img_raw)
# Prepare text for the number of points
num_points = len(points)
# Pilgrims, Drawn Image %
return num_points, img_to_draw
|