sneha
commited on
Commit
·
cf87235
1
Parent(s):
4d71014
colormap, slider
Browse files- app.py +6 -6
- attn_helper.py +1 -1
app.py
CHANGED
@@ -64,7 +64,7 @@ def download_bin(model):
|
|
64 |
os.rename(model_bin, bin_path)
|
65 |
|
66 |
|
67 |
-
def run_attn(input_img, model="vc1-large",fusion="min"):
|
68 |
download_bin(model)
|
69 |
model, embedding_dim, transform, metadata = get_model(model)
|
70 |
if input_img.shape[0] != 3:
|
@@ -76,7 +76,7 @@ def run_attn(input_img, model="vc1-large",fusion="min"):
|
|
76 |
input_img = resize_transform(input_img)
|
77 |
x = transform(input_img)
|
78 |
|
79 |
-
attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion)
|
80 |
|
81 |
y = model(x)
|
82 |
mask = attention_rollout.get_attn_mask()
|
@@ -96,12 +96,12 @@ input_img = gr.Image(shape=(250,250))
|
|
96 |
input_button = gr.Radio(["min", "max", "mean"], value="min",label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
|
97 |
output_img = gr.Image(shape=(250,250))
|
98 |
output_plot = gr.Plot()
|
99 |
-
css = "
|
100 |
-
|
101 |
markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention of the last layer of the transformer.\n \
|
102 |
The user can decide how the attention heads will be combined. \
|
103 |
Along with the attention heatmap, it also displays the embedding values reshaped to a 16x48 or 16x64 grid."
|
104 |
demo = gr.Interface(fn=run_attn, title="Visual Cortex Large Model", description=markdown,
|
105 |
-
examples=[[os.path.join('./imgs',x),None,None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
|
106 |
-
inputs=[input_img,model_type,input_button],outputs=[output_img,output_plot],css=css)
|
107 |
demo.launch()
|
|
|
64 |
os.rename(model_bin, bin_path)
|
65 |
|
66 |
|
67 |
+
def run_attn(input_img, model="vc1-large",fusion="min",slider=0):
|
68 |
download_bin(model)
|
69 |
model, embedding_dim, transform, metadata = get_model(model)
|
70 |
if input_img.shape[0] != 3:
|
|
|
76 |
input_img = resize_transform(input_img)
|
77 |
x = transform(input_img)
|
78 |
|
79 |
+
attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion,discard_ratio=slider)
|
80 |
|
81 |
y = model(x)
|
82 |
mask = attention_rollout.get_attn_mask()
|
|
|
96 |
input_button = gr.Radio(["min", "max", "mean"], value="min",label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
|
97 |
output_img = gr.Image(shape=(250,250))
|
98 |
output_plot = gr.Plot()
|
99 |
+
css = "#component-3, .input-image, .image-preview {height: 240px !important}"
|
100 |
+
slider = gr.Slider(0, 1)
|
101 |
markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention of the last layer of the transformer.\n \
|
102 |
The user can decide how the attention heads will be combined. \
|
103 |
Along with the attention heatmap, it also displays the embedding values reshaped to a 16x48 or 16x64 grid."
|
104 |
demo = gr.Interface(fn=run_attn, title="Visual Cortex Large Model", description=markdown,
|
105 |
+
examples=[[os.path.join('./imgs',x),None,None,None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
|
106 |
+
inputs=[input_img,model_type,input_button,slider],outputs=[output_img,output_plot],css=css)
|
107 |
demo.launch()
|
attn_helper.py
CHANGED
@@ -9,7 +9,7 @@ def overlay_attn(original_image,mask):
|
|
9 |
# Colormap and alpha for attention mask
|
10 |
# COLORMAP_OCEAN
|
11 |
# COLORMAP_OCEAN
|
12 |
-
colormap_attn, alpha_attn = cv2.
|
13 |
|
14 |
# Resize mask to original image size
|
15 |
w, h = original_image.shape[0], original_image.shape[1]
|
|
|
9 |
# Colormap and alpha for attention mask
|
10 |
# COLORMAP_OCEAN
|
11 |
# COLORMAP_OCEAN
|
12 |
+
colormap_attn, alpha_attn = cv2.COLORMAP_VIRIDIS, 1 #0.85
|
13 |
|
14 |
# Resize mask to original image size
|
15 |
w, h = original_image.shape[0], original_image.shape[1]
|