Spaces:

sayedM
/

DINOv3-PCA-visualization

Running

App Files Files Community

sayedM commited on 4 days ago

Commit

a6e4f18

verified ·

1 Parent(s): 5e20a05

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -33

app.py CHANGED Viewed

@@ -1,18 +1,21 @@
-# app.py
 import torch
 import gradio as gr
 import numpy as np
 from PIL import Image
 import torchvision.transforms.functional as TF
 from matplotlib import colormaps
 from transformers import AutoModel
-import os
 # ----------------------------
 # Configuration
 # ----------------------------
-# 💡 FIX: Use the full, correct model ID from the Hugging Face Hub.
-MODEL_ID = "facebook/dinov3-vith16plus-pretrain-lvd1689m"
 PATCH_SIZE = 16
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -21,32 +24,49 @@ IMAGENET_MEAN = (0.485, 0.456, 0.406)
 IMAGENET_STD = (0.229, 0.224, 0.225)
 # ----------------------------
-# Model Loading (runs once at startup)
 # ----------------------------
-def load_model_from_hub():
-    """Loads the DINOv3 model from the Hugging Face Hub."""
-    print(f"Loading model '{MODEL_ID}' from Hugging Face Hub...")
     try:
-        # This will use the HF_TOKEN secret if you set it in your Space settings.
-        token = os.environ.get("HF_TOKEN")
-        # trust_remote_code is necessary for DINOv3
-        model = AutoModel.from_pretrained(MODEL_ID, token=token, trust_remote_code=True)
-        model.to(DEVICE).eval()
-        print(f"✅ Model loaded successfully on device: {DEVICE}")
-        return model
     except Exception as e:
-        print(f"❌ Failed to load model: {e}")
-        # This will display a clear error message in the Gradio interface
         raise gr.Error(
-            f"Could not load model '{MODEL_ID}'. "
-            "This is a gated model. Please ensure you have accepted the terms on its Hugging Face page "
-            "and set your HF_TOKEN as a secret in your Space settings. "
             f"Original error: {e}"
         )
-# Load the model globally when the app starts
-model = load_model_from_hub()
 # ----------------------------
 # Helper Functions
 # ----------------------------
@@ -85,9 +105,11 @@ def generate_pca_visuals(
     resolution: int,
     cmap_name: str,
     overlay_alpha: float,
     progress=gr.Progress(track_tqdm=True)
 ):
     """Main function to generate PCA visuals."""
     if model is None:
         raise gr.Error("DINOv3 model is not available. Check the startup logs.")
     if image_pil is None:
@@ -105,9 +127,8 @@ def generate_pca_visuals(
     progress(0.5, desc="🦖 Extracting features with DINOv3...")
     outputs = model(t_norm)
-    # 💡 FIX: The model output includes a [CLS] token AND 4 register tokens.
-    # We must skip all of them (total 5) to get only the patch embeddings.
-    n_special_tokens = 5 # 1 [CLS] token + 4 register tokens for ViT-H/16+
     patch_embeddings = outputs.last_hidden_state.squeeze(0)[n_special_tokens:, :]
     # 3. PCA Calculation
@@ -115,8 +136,7 @@ def generate_pca_visuals(
     X_centered = patch_embeddings.float() - patch_embeddings.float().mean(0, keepdim=True)
     U, S, V = torch.pca_lowrank(X_centered, q=3, center=False)
-    # 💡 IMPROVEMENT: Stabilize the signs of the eigenvectors for deterministic output.
-    # This prevents the colors from randomly inverting on different runs.
     for i in range(V.shape[1]):
         max_abs_idx = torch.argmax(torch.abs(V[:, i]))
         if V[max_abs_idx, i] < 0:
@@ -134,7 +154,6 @@ def generate_pca_visuals(
     )
     # 5. Create Visualizations
-    # This part should now work correctly as `scores` has the right shape (Hp*Wp, 3)
     pc1_map = scores[:, 0].reshape(Hp, Wp).cpu().numpy()
     pc1_image_raw = colorize(pc1_map, cmap_name)
@@ -155,10 +174,10 @@ def generate_pca_visuals(
 # ----------------------------
 # Gradio Interface
 # ----------------------------
-with gr.Blocks(theme=gr.themes.Soft(), title="Running on CPU so please wait 🦖 DINOv3 PCA Explorer") as demo:
     gr.Markdown(
         """
-        # Running on CPU so please wait 🦖 DINOv3 PCA Explorer
         Upload an image to visualize the principal components of its patch features.
         This reveals the main axes of semantic variation within the image as understood by the model.
         """
@@ -166,7 +185,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Running on CPU so please wait 🦖
     with gr.Row():
         with gr.Column(scale=2):
-            # Added a default image URL for convenience
             input_image = gr.Image(type="pil", label="Upload Image", value="https://images.squarespace-cdn.com/content/v1/607f89e638219e13eee71b1e/1684821560422-SD5V37BAG28BURTLIXUQ/michael-sum-LEpfefQf4rU-unsplash.jpg")
             with gr.Accordion("⚙️ Visualization Controls", open=True):
@@ -175,6 +193,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Running on CPU so please wait 🦖
                     label="Processing Resolution",
                     info="Higher values capture more detail but are slower."
                 )
                 cmap_dropdown = gr.Dropdown(
                     ['viridis', 'magma', 'inferno', 'plasma', 'cividis', 'jet'],
                     value='viridis',
@@ -201,7 +225,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Running on CPU so please wait 🦖
     run_button.click(
         fn=generate_pca_visuals,
-        inputs=[input_image, resolution_slider, cmap_dropdown, alpha_slider],
         outputs=[output_pc1, output_rgb, output_variance, output_blended, output_processed]
     )

+import os
 import torch
+import torch.nn.functional as F
 import gradio as gr
 import numpy as np
 from PIL import Image
 import torchvision.transforms.functional as TF
 from matplotlib import colormaps
 from transformers import AutoModel
 # ----------------------------
 # Configuration
 # ----------------------------
+# Define available models
+DEFAULT_MODEL_ID = "facebook/dinov3-vits16plus-pretrain-lvd1689m"
+ALT_MODEL_ID = "facebook/dinov3-vith16plus-pretrain-lvd1689m"
+AVAILABLE_MODELS = [DEFAULT_MODEL_ID, ALT_MODEL_ID]
 PATCH_SIZE = 16
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 IMAGENET_STD = (0.229, 0.224, 0.225)
 # ----------------------------
+# Model Loading (with caching)
 # ----------------------------
+_model_cache = {}
+_current_model_id = None
+model = None  # global reference
+def load_model_from_hub(model_id: str):
+    """Loads a DINOv3 model from the Hugging Face Hub."""
+    print(f"Loading model '{model_id}' from Hugging Face Hub...")
     try:
+        token = os.environ.get("HF_TOKEN")  # optional, for gated models
+        mdl = AutoModel.from_pretrained(model_id, token=token, trust_remote_code=True)
+        mdl.to(DEVICE).eval()
+        print(f"✅ Model '{model_id}' loaded successfully on device: {DEVICE}")
+        return mdl
     except Exception as e:
+        print(f"❌ Failed to load model '{model_id}': {e}")
         raise gr.Error(
+            f"Could not load model '{model_id}'. "
+            "If the model is gated, please accept the terms on its Hugging Face page "
+            "and set HF_TOKEN in your environment. "
             f"Original error: {e}"
         )
+def get_model(model_id: str):
+    """Return a cached model if available, otherwise load and cache it."""
+    if model_id in _model_cache:
+        return _model_cache[model_id]
+    mdl = load_model_from_hub(model_id)
+    _model_cache[model_id] = mdl
+    return mdl
+# Load the default model at startup
+model = get_model(DEFAULT_MODEL_ID)
+_current_model_id = DEFAULT_MODEL_ID
+def _ensure_model(model_id: str):
+    """Ensure the global 'model' matches the dropdown selection."""
+    global model, _current_model_id
+    if model_id != _current_model_id:
+        model = get_model(model_id)
+        _current_model_id = model_id
 # ----------------------------
 # Helper Functions
 # ----------------------------
     resolution: int,
     cmap_name: str,
     overlay_alpha: float,
+    model_id: str,
     progress=gr.Progress(track_tqdm=True)
 ):
     """Main function to generate PCA visuals."""
+    _ensure_model(model_id)
     if model is None:
         raise gr.Error("DINOv3 model is not available. Check the startup logs.")
     if image_pil is None:
     progress(0.5, desc="🦖 Extracting features with DINOv3...")
     outputs = model(t_norm)
+    # The model output includes a [CLS] token AND 4 register tokens.
+    n_special_tokens = 5
     patch_embeddings = outputs.last_hidden_state.squeeze(0)[n_special_tokens:, :]
     # 3. PCA Calculation
     X_centered = patch_embeddings.float() - patch_embeddings.float().mean(0, keepdim=True)
     U, S, V = torch.pca_lowrank(X_centered, q=3, center=False)
+    # Stabilize the signs of the eigenvectors for deterministic output.
     for i in range(V.shape[1]):
         max_abs_idx = torch.argmax(torch.abs(V[:, i]))
         if V[max_abs_idx, i] < 0:
     )
     # 5. Create Visualizations
     pc1_map = scores[:, 0].reshape(Hp, Wp).cpu().numpy()
     pc1_image_raw = colorize(pc1_map, cmap_name)
 # ----------------------------
 # Gradio Interface
 # ----------------------------
+with gr.Blocks(theme=gr.themes.Soft(), title="🦖 DINOv3 PCA Explorer") as demo:
     gr.Markdown(
         """
+        # 🦖 DINOv3 PCA Explorer
         Upload an image to visualize the principal components of its patch features.
         This reveals the main axes of semantic variation within the image as understood by the model.
         """
     with gr.Row():
         with gr.Column(scale=2):
             input_image = gr.Image(type="pil", label="Upload Image", value="https://images.squarespace-cdn.com/content/v1/607f89e638219e13eee71b1e/1684821560422-SD5V37BAG28BURTLIXUQ/michael-sum-LEpfefQf4rU-unsplash.jpg")
             with gr.Accordion("⚙️ Visualization Controls", open=True):
                     label="Processing Resolution",
                     info="Higher values capture more detail but are slower."
                 )
+                model_choice = gr.Dropdown(
+                    choices=AVAILABLE_MODELS,
+                    value=DEFAULT_MODEL_ID,
+                    label="Backbone (DINOv3)",
+                    info="ViT-S/16+ is smaller & faster; ViT-H/16+ is larger.",
+                )
                 cmap_dropdown = gr.Dropdown(
                     ['viridis', 'magma', 'inferno', 'plasma', 'cividis', 'jet'],
                     value='viridis',
     run_button.click(
         fn=generate_pca_visuals,
+        inputs=[input_image, resolution_slider, cmap_dropdown, alpha_slider, model_choice],
         outputs=[output_pc1, output_rgb, output_variance, output_blended, output_processed]
     )