Spaces:

broadfield-dev
/

weights

Sleeping

App Files Files Community

broadfield-dev commited on Jun 24

Commit

c1b4423

verified ·

1 Parent(s): 2c4215c

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -148

app.py CHANGED Viewed

@@ -10,194 +10,174 @@ from captum.attr import IntegratedGradients
 import io
 import base64
 from PIL import Image
 # Initialize BERT model and tokenizer
-tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-model = BertModel.from_pretrained('bert-base-uncased')
-model.eval()
-# Alternative MLP model (uncomment to use instead of BERT)
-"""
-# import torch.nn as nn
-# class SimpleMLP(nn.Module):
-#     def __init__(self, input_size=10, hidden_sizes=[64, 32], output_size=2):
-#         super(SimpleMLP, self).__init__()
-#         layers = []
-#         prev_size = input_size
-#         for hidden_size in hidden_sizes:
-#             layers.append(nn.Linear(prev_size, hidden_size))
-#             layers.append(nn.ReLU())
-#             prev_size = hidden_size
-#         layers.append(nn.Linear(prev_size, output_size))
-#         self.network = nn.Sequential(*layers)
-#     def forward(self, x):
-#         return self.network(x)
-# model = SimpleMLP()
-# model.eval()
-"""
 # Store intermediate activations
 activations = {}
 def hook_fn(module, input, output, name):
-    activations[name] = output
-# Register hooks for BERT layers (or MLP layers)
 for name, layer in model.named_modules():
-    if 'layer' in name or 'embeddings' in name:  # Focus on transformer layers
         layer.register_forward_hook(lambda m, i, o, n=name: hook_fn(m, i, o, n))
-    # For MLP, replace with:
-    # if isinstance(layer, nn.Linear) or isinstance(layer, nn.ReLU):
-    #     layer.register_forward_hook(lambda m, i, o, n=name: hook_fn(m, i, o, n))
 def process_input(input_text, layer_name, visualize_option, attribution_target=0):
     """
     Process input text, compute embeddings, activations, and visualizations.
-    Parameters:
-    - input_text: User-provided text input
-    - layer_name: Selected layer for visualization
-    - visualize_option: 'Embeddings', 'Attention', or 'Activations'
-    - attribution_target: Target class for attribution (0 or 1 for binary classification)
     Returns:
-    - Dictionary with plots and dataframes
     """
     global activations
     activations = {}  # Reset activations
-    # Tokenize input
-    inputs = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True, max_length=512)
-    input_ids = inputs['input_ids']
-    attention_mask = inputs['attention_mask']
-    # Forward pass
-    with torch.no_grad():
-        outputs = model(input_ids, attention_mask=attention_mask, output_attentions=True, output_hidden_states=True)
-        embeddings = outputs.last_hidden_state  # [batch, seq_len, hidden_size]
-        attentions = outputs.attentions  # List of attention weights
-        hidden_states = outputs.hidden_states  # List of hidden states
-    # Convert token IDs to tokens
-    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
-    # Initialize output dictionary
-    results = {
-        "plots": [],
-        "dataframes": [],
-        "text": []
-    }
-    # Visualization: Embeddings (t-SNE)
-    if visualize_option == "Embeddings":
-        emb = embeddings[0].detach().numpy()  # [seq_len, hidden_size]
-        if emb.shape[0] > 1:  # Need at least 2 points for t-SNE
-            tsne = TSNE(n_components=2, random_state=42, perplexity=min(5, emb.shape[0]-1))
-            reduced = tsne.fit_transform(emb)
-            fig, ax = plt.subplots()
-            scatter = ax.scatter(reduced[:, 0], reduced[:, 1], c='blue')
-            for i, token in enumerate(tokens):
-                ax.annotate(token, (reduced[i, 0], reduced[i, 1]))
-            ax.set_title("t-SNE of Token Embeddings")
-            # Convert plot to base64 for Gradio
-            buf = io.BytesIO()
-            plt.savefig(buf, format='png')
-            buf.seek(0)
-            img = Image.open(buf)
-            img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
-            results["plots"].append(f"data:image/png;base64,{img_base64}")
-            plt.close()
-    # Visualization: Attention Weights
-    if visualize_option == "Attention":
-        if attentions:
-            attn = attentions[-1][0, 0].detach().numpy()  # Last layer, first head
-            fig, ax = plt.subplots()
-            sns.heatmap(attn, xticklabels=tokens, yticklabels=tokens, cmap='viridis', ax=ax)
-            ax.set_title("Attention Weights (Last Layer, Head 0)")
-            plt.xticks(rotation=45)
-            plt.yticks(rotation=0)
-            # Convert plot to base64
-            buf = io.BytesIO()
-            plt.savefig(buf, format='png')
-            buf.seek(0)
-            img = Image.open(buf)
-            img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
-            results["plots"].append(f"data:image/png;base64,{img_base64}")
-            plt.close()
-    # Visualization: Activations
-    if visualize_option == "Activations":
-        if layer_name in activations:
             act = activations[layer_name]
-            if isinstance(act, tuple):  # Handle attention outputs
                 act = act[0]
-            act = act[0].detach().numpy()  # [seq_len, hidden_size]
             df = pd.DataFrame(act, index=tokens)
-            results["dataframes"].append(df)
-            # Plot mean activation per token
             fig, ax = plt.subplots()
             mean_act = np.mean(act, axis=1)
             ax.bar(range(len(mean_act)), mean_act)
             ax.set_xticks(range(len(mean_act)))
             ax.set_xticklabels(tokens, rotation=45)
             ax.set_title(f"Mean Activations in {layer_name}")
-            # Convert plot to base64
             buf = io.BytesIO()
             plt.savefig(buf, format='png')
             buf.seek(0)
-            img = Image.open(buf)
             img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
-            results["plots"].append(f"data:image/png;base64,{img_base64}")
             plt.close()
-    # Attribution: Integrated Gradients
-    def forward_func(inputs, attention_mask=None):
-        outputs = model(inputs, attention_mask=attention_mask)
-        return outputs.pooler_output[:, attribution_target]
-    ig = IntegratedGradients(forward_func)
-    attributions, delta = ig.attribute(
-        inputs=input_ids,
-        additional_forward_args=(attention_mask,),
-        target=attribution_target,
-        return_convergence_delta=True
-    )
-    attr = attributions[0].detach().numpy()
-    attr_df = pd.DataFrame({"Token": tokens, "Attribution": attr.sum(axis=1)})
-    results["dataframes"].append(attr_df)
-    # Plot attributions
-    fig, ax = plt.subplots()
-    ax.bar(range(len(attr_df)), attr_df["Attribution"])
-    ax.set_xticks(range(len(attr_df)))
-    ax.set_xticklabels(tokens, rotation=45)
-    ax.set_title("Integrated Gradients Attribution")
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    img = Image.open(buf)
-    img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
-    results["plots"].append(f"data:image/png;base64,{img_base64}")
-    plt.close()
-    return (
-        results["plots"] if results["plots"] else None,
-        results["dataframes"] if results["dataframes"] else None,
-        "\n".join(results["text"]) if results["text"] else "Processing complete."
-    )
 # Gradio Interface
 def create_gradio_interface():
     with gr.Blocks(title="Neural Network Visualization Demo") as demo:
         gr.Markdown("# Neural Network Visualization Demo")
-        gr.Markdown("Analyze the paths of a BERT model from input to output. Enter text, select a layer, and choose a visualization option.")
         with gr.Row():
             with gr.Column():
                 input_text = gr.Textbox(label="Input Text", value="The quick brown fox jumps over the lazy dog.")
                 layer_name = gr.Dropdown(
                     label="Select Layer",
-                    choices=[name for name in model.named_modules() if 'layer' in name or 'embeddings' in name],
-                    value="embeddings",
-                    allow_custom_value=True
                 )
                 visualize_option = gr.Radio(
                     label="Visualization Type",
@@ -205,7 +185,7 @@ def create_gradio_interface():
                     value="Embeddings"
                 )
                 attribution_target = gr.Slider(
-                    label="Attribution Target Class (0 or 1 for binary classification)",
                     minimum=0,
                     maximum=1,
                     step=1,
@@ -226,7 +206,11 @@ def create_gradio_interface():
     return demo
-# Launch the demo
 if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch(share=True)

 import io
 import base64
 from PIL import Image
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Initialize BERT model and tokenizer
+try:
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    model = BertModel.from_pretrained('bert-base-uncased')
+    model.eval()
+except Exception as e:
+    logger.error(f"Failed to load BERT model: {e}")
+    raise
 # Store intermediate activations
 activations = {}
 def hook_fn(module, input, output, name):
+    activations[str(name)] = output  # Ensure name is a string
+# Register hooks for BERT layers
 for name, layer in model.named_modules():
+    if 'layer' in name or 'embeddings' in name:
         layer.register_forward_hook(lambda m, i, o, n=name: hook_fn(m, i, o, n))
 def process_input(input_text, layer_name, visualize_option, attribution_target=0):
     """
     Process input text, compute embeddings, activations, and visualizations.
     Returns:
+    - List of base64-encoded plot images
+    - List of dictionaries for dataframe display
+    - Status message
     """
     global activations
     activations = {}  # Reset activations
+    try:
+        # Validate input
+        if not input_text.strip():
+            return [], [], "Error: Input text cannot be empty."
+        # Tokenize input
+        inputs = tokenizer(input_text, return_tensors='pt', padding=True, truncation=True, max_length=512)
+        input_ids = inputs['input_ids']
+        attention_mask = inputs['attention_mask']
+        # Forward pass
+        with torch.no_grad():
+            outputs = model(input_ids, attention_mask=attention_mask, output_attentions=True, output_hidden_states=True)
+            embeddings = outputs.last_hidden_state  # [batch, seq_len, hidden_size]
+            attentions = outputs.attentions  # List of attention weights
+        # Convert token IDs to tokens
+        tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+        # Initialize outputs
+        plots = []
+        dataframes = []
+        # Visualization: Embeddings (t-SNE)
+        if visualize_option == "Embeddings":
+            emb = embeddings[0].detach().numpy()
+            if emb.shape[0] > 1:
+                try:
+                    tsne = TSNE(n_components=2, random_state=42, perplexity=min(5, emb.shape[0]-1))
+                    reduced = tsne.fit_transform(emb)
+                    fig, ax = plt.subplots()
+                    ax.scatter(reduced[:, 0], reduced[:, 1], c='blue')
+                    for i, token in enumerate(tokens):
+                        ax.annotate(token, (reduced[i, 0], reduced[i, 1]))
+                    ax.set_title("t-SNE of Token Embeddings")
+                    buf = io.BytesIO()
+                    plt.savefig(buf, format='png')
+                    buf.seek(0)
+                    img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+                    plots.append(f"data:image/png;base64,{img_base64}")
+                    plt.close()
+                except Exception as e:
+                    logger.warning(f"t-SNE failed: {e}")
+                    dataframes.append({"Error": ["t-SNE could not be computed."]})
+        # Visualization: Attention Weights
+        if visualize_option == "Attention":
+            if attentions:
+                attn = attentions[-1][0, 0].detach().numpy()
+                fig, ax = plt.subplots()
+                sns.heatmap(attn, xticklabels=tokens, yticklabels=tokens, cmap='viridis', ax=ax)
+                ax.set_title("Attention Weights (Last Layer, Head 0)")
+                plt.xticks(rotation=45)
+                plt.yticks(rotation=0)
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png')
+                buf.seek(0)
+                img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+                plots.append(f"data:image/png;base64,{img_base64}")
+                plt.close()
+        # Visualization: Activations
+        if visualize_option == "Activations" and layer_name in activations:
             act = activations[layer_name]
+            if isinstance(act, tuple):
                 act = act[0]
+            act = act[0].detach().numpy()
             df = pd.DataFrame(act, index=tokens)
+            dataframes.append(df.to_dict())  # Convert to dict for serialization
             fig, ax = plt.subplots()
             mean_act = np.mean(act, axis=1)
             ax.bar(range(len(mean_act)), mean_act)
             ax.set_xticks(range(len(mean_act)))
             ax.set_xticklabels(tokens, rotation=45)
             ax.set_title(f"Mean Activations in {layer_name}")
             buf = io.BytesIO()
             plt.savefig(buf, format='png')
             buf.seek(0)
             img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+            plots.append(f"data:image/png;base64,{img_base64}")
             plt.close()
+        # Attribution: Integrated Gradients
+        def forward_func(inputs, attention_mask=None):
+            outputs = model(inputs, attention_mask=attention_mask)
+            return outputs.pooler_output[:, int(attribution_target)]
+        ig = IntegratedGradients(forward_func)
+        try:
+            attributions, _ = ig.attribute(
+                inputs=input_ids,
+                additional_forward_args=(attention_mask,),
+                target=int(attribution_target),
+                return_convergence_delta=True
+            )
+            attr = attributions[0].detach().numpy().sum(axis=1)
+            attr_df = pd.DataFrame({"Token": tokens, "Attribution": attr})
+            dataframes.append(attr_df.to_dict())
+            fig, ax = plt.subplots()
+            ax.bar(range(len(attr)), attr)
+            ax.set_xticks(range(len(attr)))
+            ax.set_xticklabels(tokens, rotation=45)
+            ax.set_title("Integrated Gradients Attribution")
+            buf = io.BytesIO()
+            plt.savefig(buf, format='png')
+            buf.seek(0)
+            img_base64 = base64.b64encode(buf.getvalue()).decode('utf-8')
+            plots.append(f"data:image/png;base64,{img_base64}")
+            plt.close()
+        except Exception as e:
+            logger.warning(f"Integrated Gradients failed: {e}")
+            dataframes.append({"Error": ["Attribution could not be computed."]})
+        return plots, dataframes, "Processing complete."
+    except Exception as e:
+        logger.error(f"Processing failed: {e}")
+        return [], [{"Error": [str(e)]}], f"Error: {e}"
 # Gradio Interface
 def create_gradio_interface():
     with gr.Blocks(title="Neural Network Visualization Demo") as demo:
         gr.Markdown("# Neural Network Visualization Demo")
+        gr.Markdown("Analyze BERT's neural network paths. Enter text, select a layer, and choose a visualization.")
         with gr.Row():
             with gr.Column():
                 input_text = gr.Textbox(label="Input Text", value="The quick brown fox jumps over the lazy dog.")
                 layer_name = gr.Dropdown(
                     label="Select Layer",
+                    choices=[str(name) for name, _ in model.named_modules() if 'layer' in name or 'embeddings' in name],
+                    value="embeddings"
                 )
                 visualize_option = gr.Radio(
                     label="Visualization Type",
                     value="Embeddings"
                 )
                 attribution_target = gr.Slider(
+                    label="Attribution Target Class (0 or 1)",
                     minimum=0,
                     maximum=1,
                     step=1,
     return demo
+# Launch the demo locally
 if __name__ == "__main__":
+    try:
+        demo = create_gradio_interface()
+        demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
+    except Exception as e:
+        logger.error(f"Failed to launch Gradio demo: {e}")
+        print(f"Error launching demo: {e}. Try running locally without share=True.")