Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Feb 27

Commit

a2f7e81

verified ·

1 Parent(s): f6763a9

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -34

app.py CHANGED Viewed

@@ -84,53 +84,97 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
 ###############################################################################
 import shap
 def calculate_shap_values(model, x_tensor):
     model.eval()
     device = next(model.parameters()).device
-    # Create background dataset (baseline)
-    background = torch.zeros((10, x_tensor.shape[1]), device=device)
     try:
-        # Try using DeepExplainer (efficient for neural networks)
-        explainer = shap.DeepExplainer(model, background)
-        # Calculate SHAP values
         shap_values_all = explainer.shap_values(x_tensor)
-        # Get SHAP values for human class (index 1)
-        shap_values = shap_values_all[1][0]
     except Exception as e:
-        print(f"DeepExplainer failed, falling back to KernelExplainer: {str(e)}")
-        # Create model wrapper function
-        def model_predict(x):
             with torch.no_grad():
-                tensor_x = torch.FloatTensor(x).to(device)
-                output = model(tensor_x)
-                probs = torch.softmax(output, dim=1)[:, 1]  # Human probability
-                return probs.cpu().numpy()
-        # Create baseline distribution
-        background = np.zeros((1, x_tensor.shape[1]))
-        # Use KernelExplainer as fallback
-        explainer = shap.KernelExplainer(model_predict, background)
-        # Calculate SHAP values
-        x_numpy = x_tensor.cpu().numpy()
-        shap_values = explainer.shap_values(x_numpy, nsamples=100)
-    # Get human probability
-    with torch.no_grad():
-        output = model(x_tensor)
-        probs = torch.softmax(output, dim=1)
-        prob_human = probs[0, 1].item()
-    return np.array(shap_values), prob_human
 ###############################################################################
 # 4. PER-BASE SHAP AGGREGATION
 ###############################################################################

 ###############################################################################
 import shap
+from sklearn.linear_model import Ridge
 def calculate_shap_values(model, x_tensor):
+    """
+    Calculate SHAP values with three possible methods:
+    1. Try SHAP's GradientExplainer (better for deep models with unsupported layers)
+    2. Fall back to SHAP's KernelExplainer with fixed parameters if #1 fails
+    3. Fall back to original feature ablation method if both SHAP methods fail
+    """
     model.eval()
     device = next(model.parameters()).device
+    # Get human probability for baseline
+    with torch.no_grad():
+        output = model(x_tensor)
+        probs = torch.softmax(output, dim=1)
+        prob_human = probs[0, 1].item()
+    # Try GradientExplainer first (better for neural nets with unsupported ops)
     try:
+        # Create synthetic background data (more samples to avoid errors)
+        background = torch.zeros((20, x_tensor.shape[1]), device=device)
+        for i in range(20):
+            # Add small random noise to avoid singular matrices
+            background[i] = torch.randn_like(x_tensor[0]) * 0.01
+        explainer = shap.GradientExplainer(model, background)
         shap_values_all = explainer.shap_values(x_tensor)
+        # For classification, shap_values is a list of arrays, one for each class
+        # We want the values for the "human" class (index 1)
+        if isinstance(shap_values_all, list) and len(shap_values_all) > 1:
+            shap_values = shap_values_all[1][0].cpu().numpy()
+        else:
+            shap_values = shap_values_all[0].cpu().numpy()
+        print("Using GradientExplainer for SHAP values")
+        return np.array(shap_values), prob_human
     except Exception as e:
+        print(f"GradientExplainer failed: {str(e)}, trying KernelExplainer")
+        try:
+            # Create model wrapper function
+            def model_predict(x):
+                with torch.no_grad():
+                    tensor_x = torch.FloatTensor(x).to(device)
+                    output = model(tensor_x)
+                    probs = torch.softmax(output, dim=1)[:, 1]  # Human probability
+                    return probs.cpu().numpy()
+            # Create more background samples (50 samples with random noise)
+            background = np.zeros((50, x_tensor.shape[1]))
+            for i in range(50):
+                # Small random values to create better background distribution
+                background[i] = np.random.normal(0, 0.01, x_tensor.shape[1])
+            # Force using Ridge regression instead of default LassoLarsIC
+            explainer = shap.KernelExplainer(
+                model_predict,
+                background,
+                link="identity",  # Use raw output, not logit
+                l1_reg="num_features(10)",  # Simplified regularization
+                model_regressor=Ridge(alpha=0.01)  # Use Ridge instead of LassoLarsIC
+            )
+            # Calculate SHAP values with more samples
+            x_numpy = x_tensor.cpu().numpy()
+            shap_values = explainer.shap_values(x_numpy, nsamples=300)
+            print("Using KernelExplainer for SHAP values")
+            return np.array(shap_values), prob_human
+        except Exception as e:
+            print(f"KernelExplainer failed: {str(e)}, falling back to ablation method")
+            # Fall back to original feature ablation method
             with torch.no_grad():
+                shap_values = []
+                x_zeroed = x_tensor.clone()
+                for i in range(x_tensor.shape[1]):
+                    original_val = x_zeroed[0, i].item()
+                    x_zeroed[0, i] = 0.0
+                    output = model(x_zeroed)
+                    probs = torch.softmax(output, dim=1)
+                    prob = probs[0, 1].item()
+                    shap_values.append(prob_human - prob)
+                    x_zeroed[0, i] = original_val
+            print("Using ablation method for SHAP values")
+            return np.array(shap_values), prob_human
 ###############################################################################
 # 4. PER-BASE SHAP AGGREGATION
 ###############################################################################