File size: 4,060 Bytes
f4634b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5955cc
 
5823599
 
 
 
 
 
 
 
 
b5955cc
 
f4634b9
b5955cc
f4634b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5955cc
 
 
 
 
 
 
 
 
 
f4634b9
b5955cc
f4634b9
 
b5955cc
f4634b9
b5955cc
f4634b9
b5955cc
f4634b9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2

from .engine import *
from .models import build_model
import os
import warnings

warnings.filterwarnings("ignore")


class Args:
    def __init__(
        self,
        backbone: str,
        row: int,
        line: int,
        output_dir: str,
        weight_path: str,
        # gpu_id: int,
    ) -> None:
        self.backbone = backbone
        self.row = row
        self.line = line
        self.output_dir = output_dir
        self.weight_path = weight_path
        # self.gpu_id = gpu_id


class CrowdCounter:
    def __init__(self) -> None:
        # Create the Args object
        self.args = Args(
            backbone="vgg16_bn",
            row=2,
            line=2,
            output_dir="./crowd_counter/preds",
            weight_path="./crowd_counter/weights/SHTechA.pth",
        )

        # device = torch.device('cuda')
        self.device = torch.device("cpu")
        # get the P2PNet
        self.model = build_model(self.args)
        # move to GPU
        self.model.to(self.device)
        # load trained model
        if self.args.weight_path is not None:
            checkpoint = torch.load(self.args.weight_path, map_location="cpu")
            self.model.load_state_dict(checkpoint["model"])
        # convert to eval mode
        self.model.eval()
        # create the pre-processing transform
        self.transform = standard_transforms.Compose(
            [
                standard_transforms.ToTensor(),
                standard_transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def test(
        self, args: Args, img_raw: Image.Image , debug: bool = False,
    ) -> tuple[any, Image.Image, torch.Tensor]:

        ori_width, ori_height = img_raw.size
        max_dimension = 512

        scale_factor = max_dimension / max(ori_width, ori_height)
        new_width = int(ori_width * scale_factor)
        new_height = int(ori_height * scale_factor)
        print(new_width, new_height)
        new_width = new_width // 128 * 128
        new_height = new_height // 128 * 128
        img_resized = img_raw.resize((new_width, new_height), Image.LANCZOS)

        print(new_width, new_height)
        # pre-proccessing
        img = self.transform(img_resized)

        samples = torch.Tensor(img).unsqueeze(0)
        samples = samples.to(self.device)
        # run inference
        outputs = self.model(samples)
        outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
            :, :, 1
        ][0]

        outputs_points = outputs["pred_points"][0]

        threshold = 0.5
        # filter the predictions
        conf = outputs_scores[outputs_scores > threshold]
        points = (
            outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
        )

        scale_factor_width = ori_width / new_width
        scale_factor_height = ori_height / new_height

        adjusted_points = []
        for p in points:
            # Adjust each point's coordinates
            adjusted_x = int(p[0] * scale_factor_width)
            adjusted_y = int(p[1] * scale_factor_height)
            adjusted_points.append((adjusted_x, adjusted_y))

        # draw the predictions
        size = 3
        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)

        for p in adjusted_points:
            img_to_draw = cv2.circle(
                img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1
            )
        return adjusted_points, img_to_draw, conf

    # Function to process and save images
    def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:

        # Predict points on the image
        points, img_to_draw, conf = self.test(self.args, img_raw)

        # Prepare text for the number of points
        num_points = len(points)

        # Pilgrims, Drawn Image %
        return num_points, img_to_draw