Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Feb 27

Commit

bc5e648

verified ·

1 Parent(s): 287ec7d

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -71

app.py CHANGED Viewed

@@ -85,78 +85,23 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
-def calculate_shap_values(model, x_tensor, baselines=None, steps=100, n_baselines=5):
-    """
-    Calculate feature attributions using Integrated Gradients with multiple baselines.
-    Args:
-        model: A PyTorch model.
-        x_tensor: Input tensor of shape (1, num_features).
-        baselines: A list of baseline tensors, each of shape (1, num_features).
-                   If None, defaults to n_baselines copies of the zero vector.
-        steps: Number of interpolation steps between the baseline and the input.
-        n_baselines: Number of baselines to use if baselines is None.
-    Returns:
-        avg_attributions: A numpy array of shape (num_features,) with averaged feature attributions.
-        avg_full_prob: The model's predicted probability for the target class ('human')
-                       computed on the full input, averaged over baselines.
-    """
     model.eval()
-    # If no baselines are provided, generate a list of zero-vectors.
-    if baselines is None:
-        baselines = [torch.zeros_like(x_tensor) for _ in range(n_baselines)]
-    elif not isinstance(baselines, list):
-        baselines = [baselines]
-    all_attributions = []
-    full_probs = []
-    # For each baseline, compute integrated gradients.
-    for baseline in baselines:
-        # Compute the model's full prediction using the actual input.
-        with torch.no_grad():
-            full_output = model(x_tensor)
-            full_prob = torch.softmax(full_output, dim=1)[0, 1].item()
-            full_probs.append(full_prob)
-        # Create interpolated inputs from baseline to x_tensor.
-        scaled_inputs = [
-            baseline + (float(i) / steps) * (x_tensor - baseline)
-            for i in range(steps + 1)
-        ]
-        scaled_inputs = torch.cat(scaled_inputs, dim=0)  # Shape: (steps+1, num_features)
-        scaled_inputs.requires_grad = True
-        # Forward pass: compute outputs and target class probabilities for all interpolated inputs.
-        outputs = model(scaled_inputs)
-        probs = torch.softmax(outputs, dim=1)[:, 1]  # Probabilities for 'human' class
-        # Backward pass: compute gradients of the probabilities with respect to inputs.
-        grads = torch.autograd.grad(
-            outputs=probs,
-            inputs=scaled_inputs,
-            grad_outputs=torch.ones_like(probs),
-            create_graph=False,
-            retain_graph=False
-        )[0]  # Shape: (steps+1, num_features)
-        # Approximate the integral using the trapezoidal rule.
-        avg_grads = (grads[:-1] + grads[1:]) / 2.0  # Average gradients between successive steps.
-        integrated_grad = avg_grads.mean(dim=0, keepdim=True)  # Mean over all steps.
-        # Multiply by the input difference to get attributions.
-        attributions = (x_tensor - baseline) * integrated_grad  # Shape: (1, num_features)
-        all_attributions.append(attributions)
-    # Average attributions over all baselines.
-    avg_attributions = torch.stack(all_attributions, dim=0).mean(dim=0)
-    avg_full_prob = np.mean(full_probs)
-    return avg_attributions.squeeze().cpu().detach().numpy(), avg_full_prob
 ###############################################################################

 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
+def calculate_shap_values(model, x_tensor):
     model.eval()
+    with torch.no_grad():
+        baseline_output = model(x_tensor)
+        baseline_probs = torch.softmax(baseline_output, dim=1)
+        baseline_prob = baseline_probs[0, 1].item()  # Prob of 'human'
+        shap_values = []
+        x_zeroed = x_tensor.clone()
+        for i in range(x_tensor.shape[1]):
+            original_val = x_zeroed[0, i].item()
+            x_zeroed[0, i] = 0.0
+            output = model(x_zeroed)
+            probs = torch.softmax(output, dim=1)
+            prob = probs[0, 1].item()
+            shap_values.append(baseline_prob - prob)
+            x_zeroed[0, i] = original_val
+    return np.array(shap_values), baseline_prob
 ###############################################################################