Spaces:

AI4PD
/

hexviz

Sleeping

Aksel Lenes commited on May 30, 2023

Commit

909a82d

1 Parent(s): 77c9ae7

Add fixed_scale parameter for grid plot view

To see patterns in longer sequences.
Added a warning that this means each subplot in the grid has an
individual scale, so it means you canot compare
attention attensities between grid cells.

Files changed (2) hide show

hexviz/pages/1_🗺️Identify_Interesting_Heads.py +5 -2
hexviz/plot.py +15 -6

hexviz/pages/1_🗺️Identify_Interesting_Heads.py CHANGED Viewed

@@ -85,6 +85,9 @@ truncated_sequence = sequence[slice_start - 1 : slice_end]
 remove_special_tokens = st.sidebar.checkbox(
     "Hide attention to special tokens", key="remove_special_tokens"
 )
 layer_sequence, head_sequence = select_heads_and_layers(st.sidebar, selected_model)
@@ -104,7 +107,7 @@ attention, tokens = get_attention(
     ec_number=ec_number,
 )
-fig = plot_tiled_heatmap(attention, layer_sequence=layer_sequence, head_sequence=head_sequence)
 st.pyplot(fig)
@@ -143,5 +146,5 @@ if len(tokens_to_label) > 0:
     tokens = [token if token in tokens_to_label else "" for token in tokens]
-single_head_fig = plot_single_heatmap(attention, layer, head, tokens=tokens)
 st.pyplot(single_head_fig)

 remove_special_tokens = st.sidebar.checkbox(
     "Hide attention to special tokens", key="remove_special_tokens"
 )
+if "fixed_scale" not in st.session_state:
+    st.session_state.fixed_scale = True
+fixed_scale = st.sidebar.checkbox("Fixed scale", help="For long sequences the default fixed 0 to 1 scale can have very low contrast heatmaps, consider using a relative scale to increase the contrast between high attention and low attention areas. Note that each subplot will have separate color scales so don't compare colors between attention heads if using a non-fixed scale.", key="fixed_scale")
 layer_sequence, head_sequence = select_heads_and_layers(st.sidebar, selected_model)
     ec_number=ec_number,
 )
+fig = plot_tiled_heatmap(attention, layer_sequence=layer_sequence, head_sequence=head_sequence, fixed_scale=fixed_scale)
 st.pyplot(fig)
     tokens = [token if token in tokens_to_label else "" for token in tokens]
+single_head_fig = plot_single_heatmap(attention, layer, head, tokens=tokens, fixed_scale=fixed_scale)
 st.pyplot(single_head_fig)

hexviz/plot.py CHANGED Viewed

@@ -6,7 +6,7 @@ from matplotlib.ticker import FixedLocator
 from mpl_toolkits.axes_grid1 import make_axes_locatable
-def plot_tiled_heatmap(tensor, layer_sequence: List[int], head_sequence: List[int]):
     tensor = tensor[layer_sequence, :][
         :, head_sequence, :, :
     ]  # Slice the tensor according to the provided sequences and sequence_count
@@ -18,9 +18,14 @@ def plot_tiled_heatmap(tensor, layer_sequence: List[int], head_sequence: List[in
     fig, axes = plt.subplots(num_layers, num_heads, figsize=(x_size, y_size), squeeze=False)
     for i in range(num_layers):
         for j in range(num_heads):
-            axes[i, j].imshow(
-                tensor[i, j].detach().numpy(), cmap="viridis", aspect="equal", vmin=0, vmax=1
-            )
             axes[i, j].axis("off")
             # Enumerate the axes
@@ -33,7 +38,7 @@ def plot_tiled_heatmap(tensor, layer_sequence: List[int], head_sequence: List[in
         row_label = f"{layer_sequence[i]+1}"
         row_pos = ax_row[num_heads - 1].get_position()
         fig.text(row_pos.x1 + offset, (row_pos.y1 + row_pos.y0) / 2, row_label, va="center")
     plt.subplots_adjust(wspace=0.1, hspace=0.1)
     return fig
@@ -43,11 +48,15 @@ def plot_single_heatmap(
     layer: int,
     head: int,
     tokens: list[str],
 ):
     single_heatmap = tensor[layer, head, :, :].detach().numpy()
     fig, ax = plt.subplots(figsize=(10, 10))
-    heatmap = ax.imshow(single_heatmap, cmap="viridis", aspect="equal", vmin=0, vmax=1)
     # Function to adjust font size based on the number of labels
     def get_font_size(labels):

 from mpl_toolkits.axes_grid1 import make_axes_locatable
+def plot_tiled_heatmap(tensor, layer_sequence: List[int], head_sequence: List[int], fixed_scale: bool = True):
     tensor = tensor[layer_sequence, :][
         :, head_sequence, :, :
     ]  # Slice the tensor according to the provided sequences and sequence_count
     fig, axes = plt.subplots(num_layers, num_heads, figsize=(x_size, y_size), squeeze=False)
     for i in range(num_layers):
         for j in range(num_heads):
+            if fixed_scale:
+                im = axes[i, j].imshow(
+                    tensor[i, j].detach().numpy(), cmap="viridis", aspect="equal", vmin=0, vmax=1
+                )
+            else:
+                im = axes[i, j].imshow(
+                    tensor[i, j].detach().numpy(), cmap="viridis", aspect="equal"
+                )
             axes[i, j].axis("off")
             # Enumerate the axes
         row_label = f"{layer_sequence[i]+1}"
         row_pos = ax_row[num_heads - 1].get_position()
         fig.text(row_pos.x1 + offset, (row_pos.y1 + row_pos.y0) / 2, row_label, va="center")
     plt.subplots_adjust(wspace=0.1, hspace=0.1)
     return fig
     layer: int,
     head: int,
     tokens: list[str],
+    fixed_scale : bool = True
 ):
     single_heatmap = tensor[layer, head, :, :].detach().numpy()
     fig, ax = plt.subplots(figsize=(10, 10))
+    if fixed_scale:
+        heatmap = ax.imshow(single_heatmap, cmap="viridis", aspect="equal", vmin=0, vmax=1)
+    else:
+        heatmap = ax.imshow(single_heatmap, cmap="viridis", aspect="equal")
     # Function to adjust font size based on the number of labels
     def get_font_size(labels):