Spaces:

billpsomas
/

attention_visualization

Runtime error

Bill Psomas commited on Dec 5, 2023

Commit

7578ae0

1 Parent(s): 02a709c

higher resolution choice

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,13 +15,7 @@ import vision_transformer as vits
 arch = "vit_small"
 mode = "simpool"
 gamma = None
-patch_size = 16
-input_size = 224
 patch_size = 16
-input_size = 448
 num_classes = 0
 checkpoint = "checkpoints/vits_dino_simpool_no_gamma_ep100.pth"
 checkpoint_key = "teacher"
@@ -51,14 +45,13 @@ msg = model.load_state_dict(state_dict, strict=True)
 model.eval()
-# Define transformations
-data_transforms = transforms.Compose([
-    transforms.Resize((input_size, input_size), interpolation=3),
-    transforms.ToTensor(),
-    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-])
-def get_attention_map(img):
     x = data_transforms(img)
     attn = model.get_simpool_attention(x[None, :, :, :])
     attn = attn.reshape(1, 1, input_size//patch_size, input_size//patch_size)
@@ -73,7 +66,9 @@ def get_attention_map(img):
 attention_interface = gr.Interface(
     fn=get_attention_map,
-    inputs=[gr.Image(type="pil", label="Input Image")],
     outputs=gr.Image(type="pil", label="SimPool Attention Map", width=width_display, height=height_display),
     examples=example_list,
     title="Explore the Attention Maps of SimPool🔍",

 arch = "vit_small"
 mode = "simpool"
 gamma = None
 patch_size = 16
 num_classes = 0
 checkpoint = "checkpoints/vits_dino_simpool_no_gamma_ep100.pth"
 checkpoint_key = "teacher"
 model.eval()
+def get_attention_map(img, resolution):
+    input_size = resolution * 14
+    data_transforms = transforms.Compose([
+        transforms.Resize((input_size, input_size), interpolation=3),
+        transforms.ToTensor(),
+        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+    ])
     x = data_transforms(img)
     attn = model.get_simpool_attention(x[None, :, :, :])
     attn = attn.reshape(1, 1, input_size//patch_size, input_size//patch_size)
 attention_interface = gr.Interface(
     fn=get_attention_map,
+    inputs=[gr.Image(type="pil", label="Input Image"),
+            gr.Dropdown(choices=["16", "32", "64", "128"],
+                        label="Attention Map Resolution", value="32", type="index")],
     outputs=gr.Image(type="pil", label="SimPool Attention Map", width=width_display, height=height_display),
     examples=example_list,
     title="Explore the Attention Maps of SimPool🔍",