AAOBA
/

ConvNeXtV2-IllustrationScorer

Model card Files Files and versions Community

AAOBA commited on Jan 4, 2024

Commit

f7e3261

1 Parent(s): baa1359

Updated RM.md

Browse files

Files changed (1) hide show

score_it.py +165 -0

score_it.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import timm
+from PIL import Image
+import matplotlib.pyplot as plt
+import os
+# Thanks to ( ), proxy can be essentail :)
+# os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:10809'
+# os.environ['HTTP_PROXY'] = 'http://127.0.0.1:10809'
+# os.environ['ALL_PROXY'] = 'socks5://127.0.0.1:10808'
+IMG_FILE_LIST = [
+    './testcases/14.jpg',
+    './testcases/15.jpg',
+    './testcases/16.jpg',
+    './testcases/17.jpg',
+    './testcases/18.jpg',
+    './testcases/19.jpg'
+]
+TANH_SCALE = 1
+class Scorer(nn.Module):
+    def __init__(
+            self,
+            model_name,
+            pretrained=False,
+            features_only=True,
+            embedding_dim=128
+    ):
+        super(Scorer, self).__init__()
+        self.model = timm.create_model(model_name, pretrained=pretrained, features_only=features_only)
+        pooled_dim = 128 + 256 + 512 + 1024
+        self.layer_norms = nn.ModuleList([
+            nn.LayerNorm(128),
+            nn.LayerNorm(256),
+            nn.LayerNorm(512),
+            nn.LayerNorm(1024)
+        ])
+        self.mlp = nn.Sequential(
+            nn.Linear(pooled_dim, pooled_dim),
+            nn.BatchNorm1d(pooled_dim),
+            nn.GELU(),
+        )
+        # Probably a BYOL-accidental BatchNorm could help ?
+        self.mlp_1 = nn.Sequential(
+            nn.Linear(pooled_dim, pooled_dim // 4),
+            nn.BatchNorm1d(pooled_dim // 4),
+            nn.GELU(),
+            nn.Linear(pooled_dim // 4, 3),
+            nn.Tanh()
+        )
+        self.mlp_2 = nn.Sequential(
+            nn.Linear(pooled_dim, pooled_dim // 4),
+            nn.GELU(),
+            nn.Linear(pooled_dim // 4, 1),
+        )
+    def forward(self, x, upload_date=None, freeze_backbone=False):
+        if freeze_backbone:
+            with torch.no_grad():
+                out_features = self.model(x)
+        else:
+            out_features = self.model(x)
+        #  out_features: List [
+        #  torch.Size([1, 128, x, x])
+        #  torch.Size([1, 256, x, x])
+        #  torch.Size([1, 512, x, x])
+        #  torch.Size([1, 1024, x, x])
+        #  ]
+        # Pool the output features from each layer on the channel dimension
+        pooled_features = [F.adaptive_avg_pool2d(x, 1).squeeze(-1).squeeze(-1) for x in out_features]
+        # Normalize the pooled features
+        pooled_features = [self.layer_norms[i](x) for i, x in enumerate(pooled_features)]
+        # Embed the upload date
+        # date_embedding_features = self.embedding(upload_date)
+        # Concatenate the pooled features
+        out = torch.cat(pooled_features, dim=-1)
+        # Concatenate the date embedding features
+        # out = torch.cat([out, date_embedding_features], dim=-1)
+        out = self.mlp(out)
+        rl_out = self.mlp_1(out) * TANH_SCALE
+        ai_out = self.mlp_2(out).squeeze(-1)
+        return rl_out[:, 0], rl_out[:, 1], F.sigmoid(ai_out), rl_out[:, 2]
+BACKBONE = 'convnextv2_base.fcmae'
+RESOLUTION = 640
+SHOW_GRAD = False
+GRAD_SCALE = 50
+MORE_LIKE = False
+MORE_COLLECTION = False
+LESS_AI = False
+MORE_RELATIVE_POP = True
+WEIGHT_PATH = './scorer.pt'
+DECIVE = 'cuda'
+def main():
+    model = Scorer(BACKBONE)
+    transform = transforms.Compose([
+        transforms.Resize((RESOLUTION, RESOLUTION)),
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.485, 0.456, 0.406],
+            std=[0.229, 0.224, 0.225]
+        )
+    ])
+    model.load_state_dict(torch.load(WEIGHT_PATH))
+    model.eval()
+    model.to(DECIVE)
+    # Show all the images in pyplot horizontally, and mark the predicted values under each image
+    fig = plt.figure(figsize=(20, 20))
+    for i, img_file in enumerate(IMG_FILE_LIST):
+        img = Image.open(img_file, 'r').convert('RGB')
+        transformed_img = transform(img).unsqueeze(0).to(DECIVE)
+        transformed_img.requires_grad = True
+        liking_pred, collection_pred, ai_pred, relative_pop = model(transformed_img, torch.tensor([1]), False)
+        ax = fig.add_subplot(1, len(IMG_FILE_LIST), i + 1)
+        backwardee = 0
+        if MORE_LIKE:
+            backwardee -= liking_pred
+        if MORE_COLLECTION:
+            backwardee -= collection_pred
+        if LESS_AI:
+            backwardee += ai_pred
+        if MORE_RELATIVE_POP:
+            backwardee -= relative_pop
+        if SHOW_GRAD:
+            model.zero_grad()
+            # Figure out which part of the image is the most important to popularity
+            backwardee.backward()
+            # Get the gradients of the image, and normalize them
+            gradients = transformed_img.grad
+            # squeeze the batch dimension
+            gradients = gradients.squeeze(0).detach()
+            # resize the gradients to the same size as the image
+            gradients = transforms.Resize((img.height, img.width))(gradients)
+            # add the gradients to the image
+            img = transforms.ToTensor()(img)
+            img = img + gradients.cpu() * GRAD_SCALE
+            img = transforms.ToPILImage()(img.cpu())
+        ax.imshow(img)
+        del img
+        ax.set_title(
+            f'Liking: {liking_pred.item():.3f}\nCollection: {collection_pred.item():.3f}\nAI: {ai_pred.item() * 100:.3f}%\nPopularity: {relative_pop.item():.3f}')
+    plt.show()
+    pass
+if __name__ == '__main__':
+    main()