Spaces:

ristek-dsa
/

crowd-counting-demo

Sleeping

File size: 3,422 Bytes

f4634b9

import torch
import torchvision.transforms as standard_transforms
import numpy as np

from PIL import Image
import cv2

from .engine import *
from .models import build_model
import os
import warnings

warnings.filterwarnings("ignore")


class Args:
    def __init__(
        self,
        backbone: str,
        row: int,
        line: int,
        output_dir: str,
        weight_path: str,
        # gpu_id: int,
    ) -> None:
        self.backbone = backbone
        self.row = row
        self.line = line
        self.output_dir = output_dir
        self.weight_path = weight_path
        # self.gpu_id = gpu_id


class CrowdCounter:
    def __init__(self) -> None:
        # Create the Args object
        self.args = Args(
            backbone="vgg16_bn",
            row=2,
            line=2,
            output_dir="./crowd_counter/preds",
            weight_path="./crowd_counter/weights/SHTechA.pth",
            # gpu_id=0,
        )

        # device = torch.device('cuda')
        self.device = torch.device("cpu")
        # get the P2PNet
        self.model = build_model(self.args)
        # move to GPU
        self.model.to(self.device)
        # load trained model
        if self.args.weight_path is not None:
            checkpoint = torch.load(self.args.weight_path, map_location="cpu")
            self.model.load_state_dict(checkpoint["model"])
        # convert to eval mode
        self.model.eval()
        # create the pre-processing transform
        self.transform = standard_transforms.Compose(
            [
                standard_transforms.ToTensor(),
                standard_transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def test(
        self, args: Args, img_raw: Image.Image , debug: bool = False,
    ) -> tuple[any, Image.Image, torch.Tensor]:
        
        # round the size
        width, height = img_raw.size
        new_width = width // 128 * 128
        new_height = height // 128 * 128
        img_raw = img_raw.resize((new_width, new_height), Image.LANCZOS)
        # pre-proccessing
        img = self.transform(img_raw)

        samples = torch.Tensor(img).unsqueeze(0)
        samples = samples.to(self.device)
        # run inference
        outputs = self.model(samples)
        outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[
            :, :, 1
        ][0]

        outputs_points = outputs["pred_points"][0]

        threshold = 0.5
        # filter the predictions
        conf = outputs_scores[outputs_scores > threshold]
        points = (
            outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
        )

        # draw the predictions
        size = 5
        img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)

        for p in points:
            img_to_draw = cv2.circle(
                img_to_draw, (int(p[0]), int(p[1])), size, (255, 0, 0), -1
            )
        return points, img_to_draw, conf

    # Function to process and save images
    def inference(self, img_raw: Image.Image) -> tuple[int, Image.Image]:

        # Predict points on the image
        points, img_to_draw, conf = self.test(self.args, img_raw)

        # Prepare text for the number of points
        num_points = len(points)

        # Pilgrims, Drawn Image %
        return num_points, img_to_draw