sneha commited on
Commit
cf87235
·
1 Parent(s): 4d71014

colormap, slider

Browse files
Files changed (2) hide show
  1. app.py +6 -6
  2. attn_helper.py +1 -1
app.py CHANGED
@@ -64,7 +64,7 @@ def download_bin(model):
64
  os.rename(model_bin, bin_path)
65
 
66
 
67
- def run_attn(input_img, model="vc1-large",fusion="min"):
68
  download_bin(model)
69
  model, embedding_dim, transform, metadata = get_model(model)
70
  if input_img.shape[0] != 3:
@@ -76,7 +76,7 @@ def run_attn(input_img, model="vc1-large",fusion="min"):
76
  input_img = resize_transform(input_img)
77
  x = transform(input_img)
78
 
79
- attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion)
80
 
81
  y = model(x)
82
  mask = attention_rollout.get_attn_mask()
@@ -96,12 +96,12 @@ input_img = gr.Image(shape=(250,250))
96
  input_button = gr.Radio(["min", "max", "mean"], value="min",label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
97
  output_img = gr.Image(shape=(250,250))
98
  output_plot = gr.Plot()
99
- css = ".output-image, .input-image, .image-preview {height: 600px !important}"
100
-
101
  markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention of the last layer of the transformer.\n \
102
  The user can decide how the attention heads will be combined. \
103
  Along with the attention heatmap, it also displays the embedding values reshaped to a 16x48 or 16x64 grid."
104
  demo = gr.Interface(fn=run_attn, title="Visual Cortex Large Model", description=markdown,
105
- examples=[[os.path.join('./imgs',x),None,None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
106
- inputs=[input_img,model_type,input_button],outputs=[output_img,output_plot],css=css)
107
  demo.launch()
 
64
  os.rename(model_bin, bin_path)
65
 
66
 
67
+ def run_attn(input_img, model="vc1-large",fusion="min",slider=0):
68
  download_bin(model)
69
  model, embedding_dim, transform, metadata = get_model(model)
70
  if input_img.shape[0] != 3:
 
76
  input_img = resize_transform(input_img)
77
  x = transform(input_img)
78
 
79
+ attention_rollout = VITAttentionGradRollout(model,head_fusion=fusion,discard_ratio=slider)
80
 
81
  y = model(x)
82
  mask = attention_rollout.get_attn_mask()
 
96
  input_button = gr.Radio(["min", "max", "mean"], value="min",label="Attention Head Fusion", info="How to combine the last layer attention across all 12 heads of the transformer.")
97
  output_img = gr.Image(shape=(250,250))
98
  output_plot = gr.Plot()
99
+ css = "#component-3, .input-image, .image-preview {height: 240px !important}"
100
+ slider = gr.Slider(0, 1)
101
  markdown ="This is a demo for the Visual Cortex models. When passed an image input, it displays the attention of the last layer of the transformer.\n \
102
  The user can decide how the attention heads will be combined. \
103
  Along with the attention heatmap, it also displays the embedding values reshaped to a 16x48 or 16x64 grid."
104
  demo = gr.Interface(fn=run_attn, title="Visual Cortex Large Model", description=markdown,
105
+ examples=[[os.path.join('./imgs',x),None,None,None]for x in os.listdir(os.path.join(os.getcwd(),'imgs')) if 'jpg' in x],
106
+ inputs=[input_img,model_type,input_button,slider],outputs=[output_img,output_plot],css=css)
107
  demo.launch()
attn_helper.py CHANGED
@@ -9,7 +9,7 @@ def overlay_attn(original_image,mask):
9
  # Colormap and alpha for attention mask
10
  # COLORMAP_OCEAN
11
  # COLORMAP_OCEAN
12
- colormap_attn, alpha_attn = cv2.COLORMAP_JET, 1 #0.85
13
 
14
  # Resize mask to original image size
15
  w, h = original_image.shape[0], original_image.shape[1]
 
9
  # Colormap and alpha for attention mask
10
  # COLORMAP_OCEAN
11
  # COLORMAP_OCEAN
12
+ colormap_attn, alpha_attn = cv2.COLORMAP_VIRIDIS, 1 #0.85
13
 
14
  # Resize mask to original image size
15
  w, h = original_image.shape[0], original_image.shape[1]