Spaces:

samuelstevens
/

saev-semantic-segmentation

Running

App Files Files Community

Samuel Stevens commited on Feb 3

Commit

af47b42

1 Parent(s): 6c9f92c

Add legend; add image uploader

Browse files

Files changed (2) hide show

app.py +77 -74
data.py +9 -2

app.py CHANGED Viewed

@@ -52,6 +52,44 @@ N_SAE_LATENTS = 2
 N_LATENT_EXAMPLES = 4
 """Number of examples per SAE latent to show."""
 ##########
 # Models #
 ##########
@@ -112,9 +150,9 @@ def load_tensors() -> tuple[
     return top_img_i, top_values, mask
-############
-# Datasets #
-############
 @jaxtyped(typechecker=beartype.beartype)
@@ -154,65 +192,43 @@ def add_highlights(
     return Image.alpha_composite(img.convert("RGBA"), overlay)
 #######################
 # Inference Functions #
 #######################
 @beartype.beartype
-class Example(typing.TypedDict):
-    """Represents an example image and its associated label.
-    Used to store examples of SAE latent activations for visualization.
-    """
-    index: int
-    """Dataset index."""
-    orig_url: str
-    """The URL or path to access the original example image."""
-    highlighted_url: str
-    """The URL or path to access the SAE-highlighted image."""
-    seg_url: str
-    """Base64-encoded version of the colored segmentation map."""
-@beartype.beartype
-class SaeActivation(typing.TypedDict):
-    """Represents the activation pattern of a single SAE latent across patches.
-    This captures how strongly a particular SAE latent fires on different patches of an input image.
-    """
-    latent: int
-    """The index of the SAE latent being measured."""
-    highlighted_url: str
-    """The image with the colormaps applied."""
-    activations: list[float]
-    """The activation values of this latent across different patches. Each value represents how strongly this latent fired on a particular patch."""
-    examples: list[Example]
-    """Top examples for this latent."""
-@beartype.beartype
-def get_img(i: int) -> dict[str, object]:
     img_sized = data.to_sized(data.get_img(i))
     seg_sized = data.to_sized(data.get_seg(i))
     seg_u8_sized = data.to_u8(seg_sized)
     seg_img_sized = data.u8_to_img(seg_u8_sized)
     return {
-        "index": i,
         "orig_url": data.img_to_base64(img_sized),
         "seg_url": data.img_to_base64(seg_img_sized),
     }
 @beartype.beartype
 @torch.inference_mode
-def get_sae_latents(img_i: int, patches: list[int]) -> list[SaeActivation]:
     """
     Given a particular cell, returns some highlighted images showing what feature fires most on this cell.
     """
@@ -222,9 +238,7 @@ def get_sae_latents(img_i: int, patches: list[int]) -> list[SaeActivation]:
     split_vit, vit_transform = modeling.load_vit(DEVICE)
     sae = load_sae(DEVICE)
-    img = data.get_img(img_i)
-    x_BCWH = vit_transform(img)[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_BPD = (
@@ -274,10 +288,10 @@ def get_sae_latents(img_i: int, patches: list[int]) -> list[SaeActivation]:
             )
             examples.append({
-                "index": i_im,
                 "orig_url": data.img_to_base64(img_sized),
                 "highlighted_url": data.img_to_base64(highlighted_sized),
                 "seg_url": data.img_to_base64(seg_img_sized),
             })
         sae_activations.append({
@@ -288,12 +302,12 @@ def get_sae_latents(img_i: int, patches: list[int]) -> list[SaeActivation]:
     return sae_activations
 @torch.inference_mode
-def get_orig_preds(i: int) -> dict[str, object]:
-    img = data.get_img(i)
     split_vit, vit_transform = modeling.load_vit(DEVICE)
-    x_BCWH = vit_transform(img)[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_BPD = split_vit.forward_end(x_BPD)
@@ -304,11 +318,10 @@ def get_orig_preds(i: int) -> dict[str, object]:
     logits_WHC = clf(x_WHD)
     pred_WH = logits_WHC.argmax(axis=-1)
-    # preds = einops.rearrange(pred_WH, "w h -> (w h)").tolist()
     return {
-        "index": i,
         "orig_url": data.img_to_base64(data.to_sized(img)),
         "seg_url": data.img_to_base64(data.u8_to_img(upsample(pred_WH))),
     }
@@ -333,16 +346,15 @@ def map_range(
 @beartype.beartype
 @torch.inference_mode
-def get_mod_preds(i: int, latents: dict[str, int | float]) -> dict[str, object]:
     latents = {int(k): float(v) for k, v in latents.items()}
-    img = data.get_img(i)
     split_vit, vit_transform = modeling.load_vit(DEVICE)
     sae = load_sae(DEVICE)
     _, top_values, _ = load_tensors()
     clf = load_clf()
-    x_BCWH = vit_transform(img)[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_hat_BPD, f_x_BPS, _ = sae(x_BPD)
@@ -375,27 +387,12 @@ def get_mod_preds(i: int, latents: dict[str, int | float]) -> dict[str, object]:
     pred_WH = logits_WHC.argmax(axis=-1)
     # pred_WH = einops.rearrange(pred_P, "(w h) -> w h", w=16, h=16)
     return {
-        "index": i,
         "orig_url": data.img_to_base64(data.to_sized(img)),
         "seg_url": data.img_to_base64(data.u8_to_img(upsample(pred_WH))),
     }
-@jaxtyped(typechecker=beartype.beartype)
-@torch.inference_mode
-def upsample(
-    x_WH: Int[Tensor, "width_ps height_ps"],
-) -> UInt8[Tensor, "width_px height_px"]:
-    return (
-        torch.nn.functional.interpolate(
-            x_WH.view((1, 1, 16, 16)).float(),
-            scale_factor=28,
-        )
-        .view((448, 448))
-        .type(torch.uint8)
-    )
 with gr.Blocks() as demo:
     ###########
     # get-img #
@@ -418,13 +415,19 @@ with gr.Blocks() as demo:
     # Inputs
     patches_json = gr.JSON(label="Patches", value=[])
     # Outputs
     get_sae_latents_out = gr.JSON(label="get_sae_latents_out", value=[])
     get_sae_latents_btn = gr.Button(value="Get SAE Latents")
     get_sae_latents_btn.click(
         get_sae_latents,
-        inputs=[img_number, patches_json],
         outputs=[get_sae_latents_out],
         api_name="get-sae-latents",
     )
@@ -439,7 +442,7 @@ with gr.Blocks() as demo:
     get_pred_labels_btn = gr.Button(value="Get Predictions")
     get_pred_labels_btn.click(
         get_orig_preds,
-        inputs=[img_number],
         outputs=[get_orig_preds_out],
         api_name="get-orig-preds",
     )
@@ -457,7 +460,7 @@ with gr.Blocks() as demo:
     get_pred_labels_btn = gr.Button(value="Get Predictions")
     get_pred_labels_btn.click(
         get_mod_preds,
-        inputs=[img_number, latents_json],
         outputs=[get_mod_preds_out],
         api_name="get-mod-preds",
     )

 N_LATENT_EXAMPLES = 4
 """Number of examples per SAE latent to show."""
+@beartype.beartype
+class Example(typing.TypedDict):
+    """Represents an example image and its associated label.
+    Used to store examples of SAE latent activations for visualization.
+    """
+    orig_url: str
+    """The URL or path to access the original example image."""
+    highlighted_url: typing.NotRequired[str]
+    """The URL or path to access the SAE-highlighted image."""
+    seg_url: str
+    """Base64-encoded version of the colored segmentation map."""
+    classes: list[int]
+    """Unique list of all classes in the seg_url."""
+@beartype.beartype
+class SaeActivation(typing.TypedDict):
+    """Represents the activation pattern of a single SAE latent across patches.
+    This captures how strongly a particular SAE latent fires on different patches of an input image.
+    """
+    latent: int
+    """The index of the SAE latent being measured."""
+    highlighted_url: str
+    """The image with the colormaps applied."""
+    activations: list[float]
+    """The activation values of this latent across different patches. Each value represents how strongly this latent fired on a particular patch."""
+    examples: list[Example]
+    """Top examples for this latent."""
 ##########
 # Models #
 ##########
     return top_img_i, top_values, mask
+###########
+# Imaging #
+###########
 @jaxtyped(typechecker=beartype.beartype)
     return Image.alpha_composite(img.convert("RGBA"), overlay)
+@jaxtyped(typechecker=beartype.beartype)
+@torch.inference_mode
+def upsample(
+    x_WH: Int[Tensor, "width_ps height_ps"],
+) -> UInt8[Tensor, "width_px height_px"]:
+    return (
+        torch.nn.functional.interpolate(
+            x_WH.view((1, 1, 16, 16)).float(),
+            scale_factor=28,
+        )
+        .view((448, 448))
+        .type(torch.uint8)
+    )
 #######################
 # Inference Functions #
 #######################
 @beartype.beartype
+def get_img(i: int) -> Example:
     img_sized = data.to_sized(data.get_img(i))
     seg_sized = data.to_sized(data.get_seg(i))
     seg_u8_sized = data.to_u8(seg_sized)
     seg_img_sized = data.u8_to_img(seg_u8_sized)
     return {
         "orig_url": data.img_to_base64(img_sized),
         "seg_url": data.img_to_base64(seg_img_sized),
+        "classes": data.to_classes(seg_u8_sized),
     }
 @beartype.beartype
 @torch.inference_mode
+def get_sae_latents(img: Image.Image, patches: list[int]) -> list[SaeActivation]:
     """
     Given a particular cell, returns some highlighted images showing what feature fires most on this cell.
     """
     split_vit, vit_transform = modeling.load_vit(DEVICE)
     sae = load_sae(DEVICE)
+    x_BCWH = vit_transform(img.convert("RGB"))[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_BPD = (
             )
             examples.append({
                 "orig_url": data.img_to_base64(img_sized),
                 "highlighted_url": data.img_to_base64(highlighted_sized),
                 "seg_url": data.img_to_base64(seg_img_sized),
+                "classes": data.to_classes(seg_u8_sized),
             })
         sae_activations.append({
     return sae_activations
+@beartype.beartype
 @torch.inference_mode
+def get_orig_preds(img: Image.Image) -> Example:
     split_vit, vit_transform = modeling.load_vit(DEVICE)
+    x_BCWH = vit_transform(img.convert("RGB"))[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_BPD = split_vit.forward_end(x_BPD)
     logits_WHC = clf(x_WHD)
     pred_WH = logits_WHC.argmax(axis=-1)
     return {
         "orig_url": data.img_to_base64(data.to_sized(img)),
         "seg_url": data.img_to_base64(data.u8_to_img(upsample(pred_WH))),
+        "classes": data.to_classes(pred_WH),
     }
 @beartype.beartype
 @torch.inference_mode
+def get_mod_preds(img: Image.Image, latents: dict[str, int | float]) -> Example:
     latents = {int(k): float(v) for k, v in latents.items()}
     split_vit, vit_transform = modeling.load_vit(DEVICE)
     sae = load_sae(DEVICE)
     _, top_values, _ = load_tensors()
     clf = load_clf()
+    x_BCWH = vit_transform(img.convert("RGB"))[None, ...].to(DEVICE)
     x_BPD = split_vit.forward_start(x_BCWH)
     x_hat_BPD, f_x_BPS, _ = sae(x_BPD)
     pred_WH = logits_WHC.argmax(axis=-1)
     # pred_WH = einops.rearrange(pred_P, "(w h) -> w h", w=16, h=16)
     return {
         "orig_url": data.img_to_base64(data.to_sized(img)),
         "seg_url": data.img_to_base64(data.u8_to_img(upsample(pred_WH))),
+        "classes": data.to_classes(pred_WH),
     }
 with gr.Blocks() as demo:
     ###########
     # get-img #
     # Inputs
     patches_json = gr.JSON(label="Patches", value=[])
+    input_img = gr.Image(
+        label="Input Image",
+        sources=["upload", "clipboard"],
+        type="pil",
+        interactive=True,
+    )
     # Outputs
     get_sae_latents_out = gr.JSON(label="get_sae_latents_out", value=[])
     get_sae_latents_btn = gr.Button(value="Get SAE Latents")
     get_sae_latents_btn.click(
         get_sae_latents,
+        inputs=[input_img, patches_json],
         outputs=[get_sae_latents_out],
         api_name="get-sae-latents",
     )
     get_pred_labels_btn = gr.Button(value="Get Predictions")
     get_pred_labels_btn.click(
         get_orig_preds,
+        inputs=[input_img],
         outputs=[get_orig_preds_out],
         api_name="get-orig-preds",
     )
     get_pred_labels_btn = gr.Button(value="Get Predictions")
     get_pred_labels_btn.click(
         get_mod_preds,
+        inputs=[input_img, latents_json],
         outputs=[get_mod_preds_out],
         api_name="get-mod-preds",
     )

data.py CHANGED Viewed

@@ -8,7 +8,7 @@ import beartype
 import einops.layers.torch
 import numpy as np
 import requests
-from jaxtyping import UInt8, jaxtyped
 from PIL import Image
 from torch import Tensor
 from torchvision.transforms import v2
@@ -48,12 +48,13 @@ def make_colors() -> UInt8[np.ndarray, "n 3"]:
     random.Random(42).shuffle(colors)
     colors = np.array(colors, dtype=np.uint8)
-    # Fixed colors for example 3122
     colors[2] = np.array([201, 249, 255], dtype=np.uint8)
     colors[4] = np.array([151, 204, 4], dtype=np.uint8)
     colors[13] = np.array([104, 139, 88], dtype=np.uint8)
     colors[16] = np.array([54, 48, 32], dtype=np.uint8)
     colors[26] = np.array([45, 125, 210], dtype=np.uint8)
     colors[46] = np.array([238, 185, 2], dtype=np.uint8)
     colors[52] = np.array([88, 91, 86], dtype=np.uint8)
     colors[72] = np.array([76, 46, 5], dtype=np.uint8)
@@ -97,6 +98,12 @@ def u8_to_img(map: UInt8[Tensor, "width height"]) -> Image.Image:
     return Image.fromarray(colored)
 @beartype.beartype
 def img_to_base64(img: Image.Image) -> str:
     buf = io.BytesIO()

 import einops.layers.torch
 import numpy as np
 import requests
+from jaxtyping import Integer, UInt8, jaxtyped
 from PIL import Image
 from torch import Tensor
 from torchvision.transforms import v2
     random.Random(42).shuffle(colors)
     colors = np.array(colors, dtype=np.uint8)
+    # Fixed colors. Must be synced with Segmentation.elm.
     colors[2] = np.array([201, 249, 255], dtype=np.uint8)
     colors[4] = np.array([151, 204, 4], dtype=np.uint8)
     colors[13] = np.array([104, 139, 88], dtype=np.uint8)
     colors[16] = np.array([54, 48, 32], dtype=np.uint8)
     colors[26] = np.array([45, 125, 210], dtype=np.uint8)
+    colors[29] = np.array([116, 142, 84], dtype=np.uint8)
     colors[46] = np.array([238, 185, 2], dtype=np.uint8)
     colors[52] = np.array([88, 91, 86], dtype=np.uint8)
     colors[72] = np.array([76, 46, 5], dtype=np.uint8)
     return Image.fromarray(colored)
+@jaxtyped(typechecker=beartype.beartype)
+def to_classes(map: Integer[Tensor, "width height"]) -> list[int]:
+    # Integer is any signed or unsigned int: https://docs.kidger.site/jaxtyping/api/array/#dtype
+    return list(set(map.view(-1).tolist()))
 @beartype.beartype
 def img_to_base64(img: Image.Image) -> str:
     buf = io.BytesIO()