Spaces:

ristek-dsa
/

crowd-counting-demo

Sleeping

File size: 4,060 Bytes

import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2

from .engine import *
from .models import build_model
import os
import warnings

warnings.filterwarnings("ignore")


class Args:
    def __init__(
        self,
        backbone: str,
        row: int,
        line: int,
        output_dir: str,
        weight_path: str,
        # gpu_id: int,
    ) -> None:
        self.backbone = backbone
        self.row = row
        self.line = line
        self.output_dir = output_dir
        self.weight_path = weight_path
        # self.gpu_id = gpu_id


class CrowdCounter:
    def __init__(self) -> None:
        # Create the Args object
        self.args = Args(
            backbone="vgg16_bn",
            row=2,
            line=2,
            output_dir="./crowd_counter/preds",
            weight_path="./crowd_counter/weights/SHTechA.pth",
        )

        # device = torch.device('cuda')
        self.device = torch.device("cpu")
        # get the P2PNet
        self.model = build_model(self.args)
        # move to GPU
        self.model.to(self.device)
        # load trained model
        if self.args.weight_path is not None:
            checkpoint = torch.load(self.args.weight_path, map_location="cpu")
            self.model.load_state_dict(checkpoint["model"])
        # convert to eval mode
        self.model.eval()
        # create the pre-processing transform
        self.transform = standard_transforms.Compose(
            [
                standard_transforms.ToTensor(),
                standard_transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def test(
        self, args: Args, img_raw: Image.Image , debug: bool = False,
    ) -> tuple[any, Image.Image, torch.Tensor]:

        ori_width, ori_height = img_raw.size
        max_dimension = 512

        scale_factor = max_dimension / max(ori_width, ori_height)
        new_width = int(ori_width * scale_factor)
        new_height = int(ori_height * scale_factor)
        print(new_width, new_height)
        new_width = new_width // 128 * 128
        new_height = new_height // 128 * 128
        img_resized = img_raw.resize((new_width, new_height), Image.LANCZOS)

        print(new_width, new_height)
        # pre-proccessing
        img = self.transform(img_resized)

        samples = torch.Tensor(img).unsqueeze(0)
        samples = samples.to(self.device)
        # run inference
        outputs = self.model(samples)
        outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
            :, :, 1
        ][0]

        outputs_points = outputs["pred_points"][0]

        threshold = 0.5
        # filter the predictions
        conf = outputs_scores[outputs_scores > threshold]
        points = (
            outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
        )

        scale_factor_width = ori_width / new_width
        scale_factor_height = ori_height / new_height

        adjusted_points = []
        for p in points:
            # Adjust each point's coordinates
            adjusted_x = int(p[0] * scale_factor_width)
            adjusted_y = int(p[1] * scale_factor_height)
            adjusted_points.append((adjusted_x, adjusted_y))

        # draw the predictions
        size = 3
        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)

        for p in adjusted_points:
            img_to_draw = cv2.circle(
                img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1
            )
        return adjusted_points, img_to_draw, conf

    # Function to process and save images
    def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:

        # Predict points on the image
        points, img_to_draw, conf = self.test(self.args, img_raw)

        # Prepare text for the number of points
        num_points = len(points)

        # Pilgrims, Drawn Image %
        return num_points, img_to_draw