hiyata commited on
Commit
287ec7d
·
verified ·
1 Parent(s): 0e88365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -46
app.py CHANGED
@@ -85,61 +85,77 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
85
  # 3. SHAP-VALUE (ABLATION) CALCULATION
86
  ###############################################################################
87
 
88
- def calculate_shap_values(model, x_tensor, baseline=None, steps=50):
89
  """
90
- Calculate feature attributions using Integrated Gradients.
91
 
92
  Args:
93
  model: A PyTorch model.
94
  x_tensor: Input tensor of shape (1, num_features).
95
- baseline: Tensor of the same shape as x_tensor to use as the reference.
96
- If None, defaults to a tensor of zeros.
97
- steps: Number of steps in the Riemann approximation of the integral.
 
98
 
99
  Returns:
100
- attributions: A numpy array of shape (num_features,) with feature attributions.
101
- full_prob: The model's predicted probability for the target class (human)
102
- when using the actual input.
103
  """
104
  model.eval()
105
- if baseline is None:
106
- baseline = torch.zeros_like(x_tensor)
107
-
108
- # Compute the model's prediction for the full input.
109
- with torch.no_grad():
110
- full_output = model(x_tensor)
111
- full_probs = torch.softmax(full_output, dim=1)
112
- full_prob = full_probs[0, 1].item() # Probability for 'human'
113
-
114
- # Generate interpolated inputs between the baseline and the actual input.
115
- scaled_inputs = [
116
- baseline + (float(i) / steps) * (x_tensor - baseline)
117
- for i in range(steps + 1)
118
- ]
119
- scaled_inputs = torch.cat(scaled_inputs, dim=0) # Shape: (steps+1, num_features)
120
- scaled_inputs.requires_grad = True
121
-
122
- # Forward pass: compute model outputs for all interpolated inputs.
123
- outputs = model(scaled_inputs) # Shape: (steps+1, num_classes)
124
- probs = torch.softmax(outputs, dim=1)[:, 1] # Probability for 'human'
125
-
126
- # Backward pass: compute gradients of the probability with respect to inputs.
127
- grads = torch.autograd.grad(
128
- outputs=probs,
129
- inputs=scaled_inputs,
130
- grad_outputs=torch.ones_like(probs),
131
- create_graph=False,
132
- retain_graph=False
133
- )[0] # Shape: (steps+1, num_features)
134
-
135
- # Approximate the integral using the trapezoidal rule.
136
- avg_grads = (grads[:-1] + grads[1:]) / 2.0 # Average gradient between steps.
137
- integrated_grad = avg_grads.mean(dim=0, keepdim=True) # Mean over all steps.
138
-
139
- # Scale the integrated gradients by the difference between the input and baseline.
140
- attributions = (x_tensor - baseline) * integrated_grad
141
-
142
- return attributions.squeeze().cpu().numpy(), full_prob
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
 
145
 
 
85
  # 3. SHAP-VALUE (ABLATION) CALCULATION
86
  ###############################################################################
87
 
88
+ def calculate_shap_values(model, x_tensor, baselines=None, steps=100, n_baselines=5):
89
  """
90
+ Calculate feature attributions using Integrated Gradients with multiple baselines.
91
 
92
  Args:
93
  model: A PyTorch model.
94
  x_tensor: Input tensor of shape (1, num_features).
95
+ baselines: A list of baseline tensors, each of shape (1, num_features).
96
+ If None, defaults to n_baselines copies of the zero vector.
97
+ steps: Number of interpolation steps between the baseline and the input.
98
+ n_baselines: Number of baselines to use if baselines is None.
99
 
100
  Returns:
101
+ avg_attributions: A numpy array of shape (num_features,) with averaged feature attributions.
102
+ avg_full_prob: The model's predicted probability for the target class ('human')
103
+ computed on the full input, averaged over baselines.
104
  """
105
  model.eval()
106
+
107
+ # If no baselines are provided, generate a list of zero-vectors.
108
+ if baselines is None:
109
+ baselines = [torch.zeros_like(x_tensor) for _ in range(n_baselines)]
110
+ elif not isinstance(baselines, list):
111
+ baselines = [baselines]
112
+
113
+ all_attributions = []
114
+ full_probs = []
115
+
116
+ # For each baseline, compute integrated gradients.
117
+ for baseline in baselines:
118
+ # Compute the model's full prediction using the actual input.
119
+ with torch.no_grad():
120
+ full_output = model(x_tensor)
121
+ full_prob = torch.softmax(full_output, dim=1)[0, 1].item()
122
+ full_probs.append(full_prob)
123
+
124
+ # Create interpolated inputs from baseline to x_tensor.
125
+ scaled_inputs = [
126
+ baseline + (float(i) / steps) * (x_tensor - baseline)
127
+ for i in range(steps + 1)
128
+ ]
129
+ scaled_inputs = torch.cat(scaled_inputs, dim=0) # Shape: (steps+1, num_features)
130
+ scaled_inputs.requires_grad = True
131
+
132
+ # Forward pass: compute outputs and target class probabilities for all interpolated inputs.
133
+ outputs = model(scaled_inputs)
134
+ probs = torch.softmax(outputs, dim=1)[:, 1] # Probabilities for 'human' class
135
+
136
+ # Backward pass: compute gradients of the probabilities with respect to inputs.
137
+ grads = torch.autograd.grad(
138
+ outputs=probs,
139
+ inputs=scaled_inputs,
140
+ grad_outputs=torch.ones_like(probs),
141
+ create_graph=False,
142
+ retain_graph=False
143
+ )[0] # Shape: (steps+1, num_features)
144
+
145
+ # Approximate the integral using the trapezoidal rule.
146
+ avg_grads = (grads[:-1] + grads[1:]) / 2.0 # Average gradients between successive steps.
147
+ integrated_grad = avg_grads.mean(dim=0, keepdim=True) # Mean over all steps.
148
+
149
+ # Multiply by the input difference to get attributions.
150
+ attributions = (x_tensor - baseline) * integrated_grad # Shape: (1, num_features)
151
+ all_attributions.append(attributions)
152
+
153
+ # Average attributions over all baselines.
154
+ avg_attributions = torch.stack(all_attributions, dim=0).mean(dim=0)
155
+ avg_full_prob = np.mean(full_probs)
156
+
157
+ return avg_attributions.squeeze().cpu().detach().numpy(), avg_full_prob
158
+
159
 
160
 
161