File size: 3,422 Bytes
f4634b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2

from .engine import *
from .models import build_model
import os
import warnings

warnings.filterwarnings("ignore")


class Args:
    def __init__(
        self,
        backbone: str,
        row: int,
        line: int,
        output_dir: str,
        weight_path: str,
        # gpu_id: int,
    ) -> None:
        self.backbone = backbone
        self.row = row
        self.line = line
        self.output_dir = output_dir
        self.weight_path = weight_path
        # self.gpu_id = gpu_id


class CrowdCounter:
    def __init__(self) -> None:
        # Create the Args object
        self.args = Args(
            backbone="vgg16_bn",
            row=2,
            line=2,
            output_dir="./crowd_counter/preds",
            weight_path="./crowd_counter/weights/SHTechA.pth",
            # gpu_id=0,
        )

        # device = torch.device('cuda')
        self.device = torch.device("cpu")
        # get the P2PNet
        self.model = build_model(self.args)
        # move to GPU
        self.model.to(self.device)
        # load trained model
        if self.args.weight_path is not None:
            checkpoint = torch.load(self.args.weight_path, map_location="cpu")
            self.model.load_state_dict(checkpoint["model"])
        # convert to eval mode
        self.model.eval()
        # create the pre-processing transform
        self.transform = standard_transforms.Compose(
            [
                standard_transforms.ToTensor(),
                standard_transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def test(
        self, args: Args, img_raw: Image.Image , debug: bool = False,
    ) -> tuple[any, Image.Image, torch.Tensor]:
        
        # round the size
        width, height = img_raw.size
        new_width = width // 128 * 128
        new_height = height // 128 * 128
        img_raw = img_raw.resize((new_width, new_height), Image.LANCZOS)
        # pre-proccessing
        img = self.transform(img_raw)

        samples = torch.Tensor(img).unsqueeze(0)
        samples = samples.to(self.device)
        # run inference
        outputs = self.model(samples)
        outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
            :, :, 1
        ][0]

        outputs_points = outputs["pred_points"][0]

        threshold = 0.5
        # filter the predictions
        conf = outputs_scores[outputs_scores > threshold]
        points = (
            outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
        )

        # draw the predictions
        size = 5
        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)

        for p in points:
            img_to_draw = cv2.circle(
                img_to_draw, (int(p[0]), int(p[1])), size, (255, 0, 0), -1
            )
        return points, img_to_draw, conf

    # Function to process and save images
    def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:

        # Predict points on the image
        points, img_to_draw, conf = self.test(self.args, img_raw)

        # Prepare text for the number of points
        num_points = len(points)

        # Pilgrims, Drawn Image %
        return num_points, img_to_draw