--- license: apache-2.0 metrics: - mae - mse base_model: - glasses/vgg16 tags: - pytorch - vgg16 - cnn - computer-vision - nlp - image - detection - density-map --- ## Model Architecture used ``` python import torch import torch.nn as nn class VGG16(nn.Module): def __init__(self): super(VGG16, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 1) # Outputting head count as a single value ) def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) x = self.classifier(x) return x ``` ## Model Usage ``` python # Preprocessing function def preprocess_image(image, channels=6): transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]) image_tensor = transform(image) # Simulating 6-channel input if required if channels == 6: image_tensor = torch.cat([image_tensor, image_tensor], dim=0) return image_tensor.unsqueeze(0).to(device) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load model def load_model(selected_model): model = None model_path = None if selected_model == 'VGG16': model = models.VGG16() model_path = "vgg16_headcount.pth" else: model = models.ResNet50() model_path = "resnet50_headcount.pth" model.load_state_dict(torch.load(model_path, map_location=device, weights_only=True)) model.to(device) model.eval() print(f"{selected_model}.Heavy Model loaded successfully") return model # Prediction Function def process_image(image, model): preprocess = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) input_tensor = preprocess(image).unsqueeze(0) input_tensor = input_tensor.to(device) with torch.no_grad(): output = model(input_tensor) predicted_count = output.item() print(f"Predicted Headcount: {predicted_count}") return math.ceil(predicted_count) ```