File size: 6,159 Bytes
994ce72 494c823 994ce72 deb6583 af01b55 d8cab4b deb6583 994ce72 18a2e37 994ce72 af01b55 3734c2f af01b55 994ce72 d075284 af01b55 994ce72 9254d1a 994ce72 af01b55 18a2e37 d075284 af01b55 d075284 af01b55 ff0d189 af01b55 deb6583 494c823 ff0d189 af01b55 ff0d189 af01b55 ff0d189 d8cab4b ff0d189 494c823 ff0d189 494c823 af01b55 ff0d189 af01b55 ff0d189 4d1a0fe ff0d189 af01b55 994ce72 494c823 1047fa8 af01b55 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# Copyright 2021 Tencent
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
import os
import numpy as np
import torch
import warnings
import random
import matplotlib.pyplot as plt
import gradio as gr
import torchvision.transforms as standard_transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from model import SASNet
warnings.filterwarnings('ignore')
# define the GPU id to be used
#os.environ['CUDA_VISIBLE_DEVICES'] = '0'
class data(Dataset):
def __init__(self, img, transform=None):
self.image = img
self.transform = transform
def __len__(self):
return 1000
def __getitem__(self, x):
# open image here as PIL / numpy
image = self.image
image = image.convert('RGB')
if self.transform is not None:
image = self.transform(image)
image = torch.Tensor(image)
return image
def loading_data(img):
# the augumentations
transform = standard_transforms.Compose([
standard_transforms.ToTensor(), standard_transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
# dcreate the dataset
test_set = data(img=img, transform=transform)
test_loader = DataLoader(test_set, batch_size=1, num_workers=0, shuffle=False, drop_last=False)
return test_loader
def predict(img):
"""the main process of inference"""
test_loader = loading_data(img)
#model = SASNet()
model = SASNet().cpu()
model_path = "./SHHA.pth"
# load the trained model
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
print('successfully load model from', model_path)
with torch.no_grad():
model.eval()
for vi, data in enumerate(test_loader, 0):
img = data
#img = img.cuda()
img = img.cpu()
pred_map = model(img)
pred_map = pred_map.data.cpu().numpy()
for i_img in range(pred_map.shape[0]):
pred_cnt = np.sum(pred_map[i_img]) / 1000
den_map = np.squeeze(pred_map[i_img])
fig = plt.figure(frameon=False)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
ax.imshow(den_map, aspect='auto')
return int(np.round(pred_cnt, 0)), fig
with gr.Blocks() as demo:
gr.Markdown("""
# Crowd Counting based on SASNet
<p>
We implemented a image crowd counting model with VGG16 following the paper of Song et. al (2021).
</p>
## Abstract
<p>
In this paper, we address the large scale variation problem in crowd counting by taking full advantage of the multi-scale feature representations in a multi-level network. We
implement such an idea by keeping the counting error of a patch as small as possible with a proper feature level selection strategy, since a specific feature level tends to perform
better for a certain range of scales. However, without scale annotations, it is sub-optimal and error-prone to manually assign the predictions for heads of different scales to
specific feature levels. Therefore, we propose a Scale-Adaptive Selection Network (SASNet), which automatically learns the internal correspondence between the scales and the feature
levels. Instead of directly using the predictions from the most appropriate feature level as the final estimation, our SASNet also considers the predictions from other feature
levels via weighted average, which helps to mitigate the gap between discrete feature levels and continuous scale variation. Since the heads in a local patch share roughly a same
scale, we conduct the adaptive selection strategy in a patch-wise style. However, pixels within a patch contribute different counting errors due to the various difficulty degrees of
learning. Thus, we further propose a Pyramid Region Awareness Loss (PRA Loss) to recursively select the most hard sub-regions within a patch until reaching the pixel level. With
awareness of whether the parent patch is over-estimated or under-estimated, the fine-grained optimization with the PRA Loss for these region-aware hard pixels helps to alleviate the
inconsistency problem between training target and evaluation metric. The state-of-the-art results on four datasets demonstrate the superiority of our approach.
</p>
## Demo
""")
with gr.Row():
with gr.Column():
gr.Markdown("")
with gr.Column():
text_output = gr.Label()
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil")
with gr.Column():
image_output = gr.Plot()
with gr.Row():
with gr.Column():
image_button = gr.Button("Count the Crowd!", variant = "primary")
with gr.Column():
gr.Markdown("")
with gr.Column():
gr.Markdown("")
gr.Examples(["IMG_1.jpg", "IMG_2.jpg", "IMG_3.jpg"], image_input)
gr.Markdown("""
## References
The code will be available at: https://github.com/TencentYoutuResearch/CrowdCounting-SASNet.
Song, Q., Wang, C., Wang, Y., Tai, Y., Wang, C., Li, J., … Ma, J. (2021). To Choose or to Fuse? Scale Selection for Crowd Counting. The Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI-21).
""")
image_button.click(predict, inputs=image_input, outputs=[text_output, image_output])
demo.launch()
|