Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Jan 11

Commit

afbf1c6

verified ·

1 Parent(s): 0d2d632

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -70

app.py CHANGED Viewed

@@ -4,10 +4,13 @@ import joblib
 import numpy as np
 from itertools import product
 import torch.nn as nn
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
-import shap  # Requires: pip install shap
 ###############################################################################
 # Model Definition
@@ -32,13 +35,15 @@ class VirusClassifier(nn.Module):
     def forward(self, x):
         return self.network(x)
 ###############################################################################
 # Torch Model Wrapper for SHAP
 ###############################################################################
 class TorchModelWrapper:
     """
     A simple callable that takes a PyTorch model and device,
-    and allows SHAP to pass in numpy arrays, which we convert to torch tensors.
     """
     def __init__(self, model: nn.Module, device='cpu'):
         self.model = model
@@ -87,7 +92,6 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
     Convert a single nucleotide sequence to a k-mer frequency vector
     of length 4^k (e.g., for k=4, length=256).
     """
-    from itertools import product
     kmers = [''.join(p) for p in product("ACGT", repeat=k)]
     kmer_dict = {km: i for i, km in enumerate(kmers)}
     vec = np.zeros(len(kmers), dtype=np.float32)
@@ -118,47 +122,54 @@ def create_freq_sigma_plot(
     Creates a bar plot showing top-10 k-mers (by absolute SHAP value),
     with frequency (%) and sigma from mean on a twin-axis.
-    single_shap_values: shape=(256,) shap values for this sample
-    raw_freq_vector: shape=(256,) original frequencies for this sample
-    scaled_vector: shape=(256,) scaled (Z-score) values for this sample
-    kmer_list: list of all k-mers (length=256)
     """
-    abs_vals = np.abs(single_shap_values)
     top_k = 10
-    top_indices = np.argsort(abs_vals)[-top_k:][::-1]  # top 10 by absolute shap
     top_data = []
     for idx in top_indices:
         top_data.append({
-            "kmer": kmer_list[idx],
-            "shap": single_shap_values[idx],
-            "abs_shap": abs_vals[idx],
-            "frequency": raw_freq_vector[idx] * 100.0,  # percentage
-            "sigma": scaled_vector[idx]
         })
     # Sort top_data by abs_shap descending
     top_data.sort(key=lambda x: x["abs_shap"], reverse=True)
     kmers   = [d["kmer"] for d in top_data]
     freqs   = [d["frequency"] for d in top_data]
     sigmas  = [d["sigma"] for d in top_data]
-    # color by sign (positive=green, negative=red)
     colors  = ["green" if d["shap"] >= 0 else "red" for d in top_data]
-    import matplotlib.pyplot as plt
     x = np.arange(len(kmers))
     width = 0.4
     fig, ax = plt.subplots(figsize=(8, 5))
     # Frequency
-    ax.bar(x - width/2, freqs, width, color=colors, alpha=0.7, label="Frequency (%)")
     ax.set_ylabel("Frequency (%)", color='black')
-    if freqs:
         ax.set_ylim(0, max(freqs)*1.2)
     # Twin axis for sigma
     ax2 = ax.twinx()
-    ax2.bar(x + width/2, sigmas, width, color="gray", alpha=0.5, label="σ from Mean")
     ax2.set_ylabel("Standard Deviations (σ)", color='black')
     ax.set_xticks(x)
@@ -182,9 +193,9 @@ def run_classification_and_shap(file_obj):
     Reads one or more FASTA sequences from file_obj or text.
     Returns:
       - Table of results (list of dicts) for each sequence
-      - shap_values object (SHAP values for the entire batch)
-      - array/batch of scaled vectors (for use in the waterfall selection)
-      - list of k-mers (for indexing)
       - error message or None
     """
     # 1. Basic read
@@ -194,12 +205,12 @@ def run_classification_and_shap(file_obj):
         try:
             text = file_obj.decode("utf-8")
         except Exception as e:
-            return None, None, f"Error reading file: {str(e)}"
     # 2. Parse FASTA
     sequences = parse_fasta(text)
     if len(sequences) == 0:
-        return None, None, "No valid FASTA sequences found!"
     # 3. Convert each sequence to k-mer vector
     k = 4
@@ -219,15 +230,14 @@ def run_classification_and_shap(file_obj):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = VirusClassifier(input_shape=4**k).to(device)
-        # Set weights_only=True to suppress the future pickle warning
         state_dict = torch.load("model.pt", map_location=device, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()
         scaler = joblib.load("scaler.pkl")
     except Exception as e:
-        return None, None, f"Error loading model or scaler: {str(e)}"
     # 5. Scale data
     scaled_data = scaler.transform(all_raw_vectors)  # shape=(num_seqs, 256)
@@ -236,6 +246,7 @@ def run_classification_and_shap(file_obj):
     X_tensor = torch.FloatTensor(scaled_data).to(device)
     with torch.no_grad():
         logits = model(X_tensor)
         probs = torch.softmax(logits, dim=1).cpu().numpy()
     preds = np.argmax(probs, axis=1)  # 0 or 1
@@ -243,29 +254,30 @@ def run_classification_and_shap(file_obj):
     for i, (hdr, seq) in enumerate(zip(headers, seqs)):
         results_table.append({
             "header": hdr,
-            "sequence": seq[:50] + ("..." if len(seq)>50 else ""),  # truncated
             "pred_label": "human" if preds[i] == 1 else "non-human",
             "human_prob": float(probs[i][1]),
             "non_human_prob": float(probs[i][0]),
-            "confidence": float(max(probs[i]))
         })
     # 7. SHAP Explainer
-    # We'll pick a background subset if there are many sequences
     if scaled_data.shape[0] > 50:
         background_data = scaled_data[:50]
     else:
         background_data = scaled_data
-    # Wrap the model so it can handle numpy -> tensor
     wrapped_model = TorchModelWrapper(model, device)
     explainer = shap.Explainer(wrapped_model, background_data)
-    shap_values = explainer(scaled_data)  # shape=(num_samples, num_features)
-    # k-mer list
-    from itertools import product
     kmer_list = [''.join(p) for p in product("ACGT", repeat=k)]
     return (results_table, shap_values, scaled_data, kmer_list, None)
@@ -274,9 +286,8 @@ def run_classification_and_shap(file_obj):
 ###############################################################################
 def main_predict(file_obj):
     """
-    This function is triggered by the 'Run' button in Gradio.
-    It returns a markdown of all sequences/predictions and
-    the shap values plus data needed for subsequent plots.
     """
     results, shap_vals, scaled_data, kmer_list, err = run_classification_and_shap(file_obj)
     if err:
@@ -294,28 +305,44 @@ def main_predict(file_obj):
             f"| {i} | {row['header']} | {row['pred_label']} | "
             f"{row['confidence']:.4f} | {row['human_prob']:.4f} | {row['non_human_prob']:.4f} |\n"
         )
-    md += "\nSelect a sequence index below to view SHAP Waterfall & Frequency plots."
     return (md, shap_vals, scaled_data, kmer_list, results)
 def update_waterfall_plot(selected_index, shap_values_obj):
     """
-    Build a waterfall plot for the user-selected sample using shap.plots.waterfall.
     """
     if shap_values_obj is None:
         return None
     import matplotlib.pyplot as plt
-    import shap
     try:
         selected_index = int(selected_index)
     except:
         selected_index = 0
-    # Create the figure by calling shap.plots.waterfall
     shap_plots_fig = plt.figure(figsize=(8, 5))
-    shap.plots.waterfall(shap_values_obj[selected_index], max_display=14, show=False)
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
@@ -324,18 +351,35 @@ def update_waterfall_plot(selected_index, shap_values_obj):
     return wf_img
 def update_beeswarm_plot(shap_values_obj):
     """
-    Build a beeswarm plot across all samples.
     """
     if shap_values_obj is None:
         return None
     import matplotlib.pyplot as plt
-    import shap
     beeswarm_fig = plt.figure(figsize=(8, 5))
-    shap.plots.beeswarm(shap_values_obj, show=False)
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
@@ -344,14 +388,17 @@ def update_beeswarm_plot(shap_values_obj):
     return bs_img
 def update_freq_plot(selected_index, shap_values_obj, scaled_data, kmer_list, file_obj):
     """
-    Create the frequency & sigma bar chart for the selected sequence's top-10 k-mers.
-    We must re-parse the raw freq vector for that sequence, or store it from earlier.
     """
     if shap_values_obj is None or scaled_data is None or kmer_list is None:
         return None
     try:
         selected_index = int(selected_index)
     except:
@@ -364,20 +411,23 @@ def update_freq_plot(selected_index, shap_values_obj, scaled_data, kmer_list, fi
         text = file_obj.decode('utf-8')
     sequences = parse_fasta(text)
     if selected_index >= len(sequences):
         selected_index = 0
     seq = sequences[selected_index][1]
-    raw_vec = sequence_to_kmer_vector(seq, k=4)
-    single_shap_values = shap_values_obj.values[selected_index]
     freq_sigma_fig = create_freq_sigma_plot(
-        single_shap_values,
-        raw_freq_vector=raw_vec,
         scaled_vector=scaled_data[selected_index],
         kmer_list=kmer_list,
         title=f"Sample #{selected_index} — {sequences[selected_index][0]}"
     )
     buf = io.BytesIO()
     freq_sigma_fig.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
@@ -391,16 +441,19 @@ def update_freq_plot(selected_index, shap_values_obj, scaled_data, kmer_list, fi
 # Gradio Interface
 ###############################################################################
 with gr.Blocks(title="Multi-Sequence Virus Host Classifier with SHAP") as demo:
-    shap.initjs()  # load shap JS if needed for interactive HTML (optional)
     gr.Markdown(
         """
-        # **Virus Host Classifier with SHAP**
-        **Upload a FASTA file** with one or more nucleotide sequences.
         This app will:
         1. Predict each sequence's **host** (human vs. non-human).
-        2. Provide **SHAP** explanations (waterfall & beeswarm).
-        3. Let you explore **frequency & σ** for top-10 k-mers for a chosen sequence.
         """
     )
@@ -408,23 +461,20 @@ with gr.Blocks(title="Multi-Sequence Virus Host Classifier with SHAP") as demo:
         file_input = gr.File(label="Upload FASTA", type="binary")
         run_btn = gr.Button("Run Classification")
-    # Store intermediate results in "States" for usage in subsequent tabs
     shap_values_state = gr.State()
     scaled_data_state = gr.State()
     kmer_list_state = gr.State()
     results_state = gr.State()
-    # We'll also store the "raw input" so we can reconstruct freq data for each sample
     file_data_state = gr.State()
-    # TABS for outputs
     with gr.Tabs():
         with gr.Tab("Results Table"):
             md_out = gr.Markdown()
         with gr.Tab("SHAP Waterfall"):
-            # We'll let user pick the sequence index from a dropdown or input
             with gr.Row():
-                seq_index_dropdown = gr.Number(label="Sequence Index (0-based)", value=0, precision=0)
                 update_wf_btn = gr.Button("Update Waterfall")
             wf_plot = gr.Image(label="SHAP Waterfall Plot")
@@ -434,44 +484,43 @@ with gr.Blocks(title="Multi-Sequence Virus Host Classifier with SHAP") as demo:
         with gr.Tab("Top-10 Frequency & Sigma"):
             with gr.Row():
-                seq_index_dropdown2 = gr.Number(label="Sequence Index (0-based)", value=0, precision=0)
                 update_fs_btn = gr.Button("Update Frequency Chart")
             fs_plot = gr.Image(label="Top-10 Frequency & σ Chart")
-    # --- Button Logic ---
-    # 1) The main classification run
     run_btn.click(
         fn=main_predict,
         inputs=[file_input],
         outputs=[md_out, shap_values_state, scaled_data_state, kmer_list_state, results_state]
     )
-    # Also store raw file data for subsequent freq usage
     run_btn.click(
         fn=lambda x: x,
         inputs=file_input,
         outputs=file_data_state
     )
-    # 2) Waterfall update
     update_wf_btn.click(
         fn=update_waterfall_plot,
-        inputs=[seq_index_dropdown, shap_values_state],
         outputs=[wf_plot]
     )
-    # 3) Beeswarm update
     run_btn.click(
         fn=update_beeswarm_plot,
         inputs=[shap_values_state],
         outputs=[bs_plot]
     )
-    # 4) Frequency top-10 update
     update_fs_btn.click(
         fn=update_freq_plot,
-        inputs=[seq_index_dropdown2, shap_values_state, scaled_data_state, kmer_list_state, file_data_state],
         outputs=[fs_plot]
     )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 import numpy as np
 from itertools import product
 import torch.nn as nn
+import matplotlib
+matplotlib.use("Agg")  # In case we're running in a no-display environment
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
+import shap
 ###############################################################################
 # Model Definition
     def forward(self, x):
         return self.network(x)
 ###############################################################################
 # Torch Model Wrapper for SHAP
 ###############################################################################
 class TorchModelWrapper:
     """
     A simple callable that takes a PyTorch model and device,
+    allowing SHAP to pass in NumPy arrays. We convert them
+    to torch tensors, run the model, and return NumPy outputs.
     """
     def __init__(self, model: nn.Module, device='cpu'):
         self.model = model
     Convert a single nucleotide sequence to a k-mer frequency vector
     of length 4^k (e.g., for k=4, length=256).
     """
     kmers = [''.join(p) for p in product("ACGT", repeat=k)]
     kmer_dict = {km: i for i, km in enumerate(kmers)}
     vec = np.zeros(len(kmers), dtype=np.float32)
     Creates a bar plot showing top-10 k-mers (by absolute SHAP value),
     with frequency (%) and sigma from mean on a twin-axis.
+    single_shap_values: shape=(256,) SHAP values for the "human" class
+    raw_freq_vector:    shape=(256,) original frequencies for this sample
+    scaled_vector:      shape=(256,) scaled (Z-score) values for this sample
+    kmer_list:          list of length=256 of all k-mers
     """
+    # Identify the top 10 k-mers by absolute shap
+    abs_vals = np.abs(single_shap_values)       # shape=(256,)
     top_k = 10
+    top_indices = np.argsort(abs_vals)[-top_k:][::-1]  # indices of largest -> smallest
     top_data = []
     for idx in top_indices:
+        idx_int = int(idx)  # ensure integer
         top_data.append({
+            "kmer": kmer_list[idx_int],
+            "shap": single_shap_values[idx_int],
+            "abs_shap": abs_vals[idx_int],
+            "frequency": raw_freq_vector[idx_int] * 100.0,  # percentage
+            "sigma": scaled_vector[idx_int]
         })
     # Sort top_data by abs_shap descending
     top_data.sort(key=lambda x: x["abs_shap"], reverse=True)
+    # Prepare for plotting
     kmers   = [d["kmer"] for d in top_data]
     freqs   = [d["frequency"] for d in top_data]
     sigmas  = [d["sigma"] for d in top_data]
+    # color by sign (positive=green => pushes "human", negative=red => pushes "non-human")
     colors  = ["green" if d["shap"] >= 0 else "red" for d in top_data]
     x = np.arange(len(kmers))
     width = 0.4
     fig, ax = plt.subplots(figsize=(8, 5))
     # Frequency
+    ax.bar(
+        x - width/2, freqs, width, color=colors, alpha=0.7, label="Frequency (%)"
+    )
     ax.set_ylabel("Frequency (%)", color='black')
+    if len(freqs) > 0:
         ax.set_ylim(0, max(freqs)*1.2)
     # Twin axis for sigma
     ax2 = ax.twinx()
+    ax2.bar(
+        x + width/2, sigmas, width, color="gray", alpha=0.5, label="σ from Mean"
+    )
     ax2.set_ylabel("Standard Deviations (σ)", color='black')
     ax.set_xticks(x)
     Reads one or more FASTA sequences from file_obj or text.
     Returns:
       - Table of results (list of dicts) for each sequence
+      - shap_values object (SHAP values for the entire batch, shape=(num_samples, 2, num_features))
+      - array of scaled vectors
+      - list of k-mers
       - error message or None
     """
     # 1. Basic read
         try:
             text = file_obj.decode("utf-8")
         except Exception as e:
+            return None, None, None, None, f"Error reading file: {str(e)}"
     # 2. Parse FASTA
     sequences = parse_fasta(text)
     if len(sequences) == 0:
+        return None, None, None, None, "No valid FASTA sequences found!"
     # 3. Convert each sequence to k-mer vector
     k = 4
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = VirusClassifier(input_shape=4**k).to(device)
+        # Use weights_only=True to suppress future warnings about untrusted pickles
         state_dict = torch.load("model.pt", map_location=device, weights_only=True)
         model.load_state_dict(state_dict)
         model.eval()
         scaler = joblib.load("scaler.pkl")
     except Exception as e:
+        return None, None, None, None, f"Error loading model or scaler: {str(e)}"
     # 5. Scale data
     scaled_data = scaler.transform(all_raw_vectors)  # shape=(num_seqs, 256)
     X_tensor = torch.FloatTensor(scaled_data).to(device)
     with torch.no_grad():
         logits = model(X_tensor)
+        # shape=(num_seqs, 2)
         probs = torch.softmax(logits, dim=1).cpu().numpy()
     preds = np.argmax(probs, axis=1)  # 0 or 1
     for i, (hdr, seq) in enumerate(zip(headers, seqs)):
         results_table.append({
             "header": hdr,
+            "sequence": seq[:50] + ("..." if len(seq) > 50 else ""),
             "pred_label": "human" if preds[i] == 1 else "non-human",
             "human_prob": float(probs[i][1]),
             "non_human_prob": float(probs[i][0]),
+            "confidence": float(np.max(probs[i]))
         })
     # 7. SHAP Explainer
+    # For large data, pick a smaller background subset
     if scaled_data.shape[0] > 50:
         background_data = scaled_data[:50]
     else:
         background_data = scaled_data
     wrapped_model = TorchModelWrapper(model, device)
     explainer = shap.Explainer(wrapped_model, background_data)
+    # shap_values shape=(num_samples, num_features) if single-output
+    # but here we have 2 outputs => shape=(num_samples, 2, num_features).
+    shap_values = explainer(scaled_data)
+    # Prepare k-mer list
     kmer_list = [''.join(p) for p in product("ACGT", repeat=k)]
+    # Return everything
     return (results_table, shap_values, scaled_data, kmer_list, None)
 ###############################################################################
 def main_predict(file_obj):
     """
+    Triggered by the 'Run Classification' button in Gradio.
+    Returns a markdown table plus states for subsequent plots.
     """
     results, shap_vals, scaled_data, kmer_list, err = run_classification_and_shap(file_obj)
     if err:
             f"| {i} | {row['header']} | {row['pred_label']} | "
             f"{row['confidence']:.4f} | {row['human_prob']:.4f} | {row['non_human_prob']:.4f} |\n"
         )
+    md += "\nSelect a sequence index below to view SHAP Waterfall & Frequency plots (class=1/human)."
     return (md, shap_vals, scaled_data, kmer_list, results)
 def update_waterfall_plot(selected_index, shap_values_obj):
     """
+    Build a waterfall plot for the user-selected sample, but ONLY for class=1 (human).
+    shap_values_obj has shape=(num_samples, 2, num_features).
+    We do shap_values_obj[selected_index, 1] => shape=(num_features,)
+    for a single-sample single-class explanation.
     """
     if shap_values_obj is None:
         return None
     import matplotlib.pyplot as plt
     try:
         selected_index = int(selected_index)
     except:
         selected_index = 0
+    # We only visualize class=1 ("human") SHAP values
+    # shap_values_obj.values shape => (num_samples, 2, num_features)
+    single_ex_values = shap_values_obj.values[selected_index, 1, :]      # shape=(256,)
+    single_ex_base   = shap_values_obj.base_values[selected_index, 1]   # scalar
+    single_ex_data   = shap_values_obj.data[selected_index]             # shape=(256,)
+    # Construct a shap.Explanation object for just this one sample & class
+    single_expl = shap.Explanation(
+        values=single_ex_values,
+        base_values=single_ex_base,
+        data=single_ex_data,
+        feature_names=[f"feat_{i}" for i in range(single_ex_values.shape[0])]
+    )
     shap_plots_fig = plt.figure(figsize=(8, 5))
+    shap.plots.waterfall(single_expl, max_display=14, show=False)
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
     return wf_img
 def update_beeswarm_plot(shap_values_obj):
     """
+    Build a beeswarm plot across all samples, but only for class=1 (human).
+    We slice shap_values_obj to pick shap_values_obj.values[:, 1, :]
+    => shape=(num_samples, num_features).
     """
     if shap_values_obj is None:
         return None
     import matplotlib.pyplot as plt
+    # For multi-output, shap_values_obj.values shape => (num_samples, 2, num_features)
+    # We'll create a new Explanation object for class=1:
+    class1_vals  = shap_values_obj.values[:, 1, :]      # shape=(num_samples, num_features)
+    class1_base  = shap_values_obj.base_values[:, 1]    # shape=(num_samples,)
+    class1_data  = shap_values_obj.data                # shape=(num_samples, num_features)
+    # Some versions of shap store data in a 2D array, which is fine
+    # We'll re-wrap them in a shap.Explanation:
+    class1_expl = shap.Explanation(
+        values=class1_vals,
+        base_values=class1_base,
+        data=class1_data,
+        feature_names=[f"feat_{i}" for i in range(class1_vals.shape[1])]
+    )
     beeswarm_fig = plt.figure(figsize=(8, 5))
+    shap.plots.beeswarm(class1_expl, show=False)
     buf = io.BytesIO()
     plt.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
     return bs_img
 def update_freq_plot(selected_index, shap_values_obj, scaled_data, kmer_list, file_obj):
     """
+    Create the frequency & σ bar chart for the selected sequence's top-10 k-mers (by abs SHAP).
+    Again, we'll use class=1 SHAP values only.
     """
     if shap_values_obj is None or scaled_data is None or kmer_list is None:
         return None
+    import matplotlib.pyplot as plt
     try:
         selected_index = int(selected_index)
     except:
         text = file_obj.decode('utf-8')
     sequences = parse_fasta(text)
+    # If out of range, clamp to 0
     if selected_index >= len(sequences):
         selected_index = 0
     seq = sequences[selected_index][1]
+    raw_vec = sequence_to_kmer_vector(seq, k=4)  # shape=(256,)
+    # SHAP for class=1 => shape=(num_samples, 2, 256)
+    single_shap_values = shap_values_obj.values[selected_index, 1, :]
     freq_sigma_fig = create_freq_sigma_plot(
+        single_shap_values,
+        raw_freq_vector=raw_vec,
         scaled_vector=scaled_data[selected_index],
         kmer_list=kmer_list,
         title=f"Sample #{selected_index} — {sequences[selected_index][0]}"
     )
     buf = io.BytesIO()
     freq_sigma_fig.savefig(buf, format='png', bbox_inches='tight', dpi=120)
     buf.seek(0)
 # Gradio Interface
 ###############################################################################
 with gr.Blocks(title="Multi-Sequence Virus Host Classifier with SHAP") as demo:
+    shap.initjs()  # load shap JS if needed for HTML-based plots (optional)
     gr.Markdown(
         """
+        # **irus Host Classifier**
+        Upload a FASTA file with one or more nucleotide sequences.
         This app will:
         1. Predict each sequence's **host** (human vs. non-human).
+        2. Provide **SHAP** explanations focusing on the 'human' class (index=1).
+        3. Display:
+           - A **waterfall** plot per-sequence (top features).
+           - A **beeswarm** plot across all sequences (global summary).
+           - A **frequency & σ** bar chart for the top-10 k-mers of any selected sequence.
         """
     )
         file_input = gr.File(label="Upload FASTA", type="binary")
         run_btn = gr.Button("Run Classification")
+    # Store intermediate results in Gradio states
     shap_values_state = gr.State()
     scaled_data_state = gr.State()
     kmer_list_state = gr.State()
     results_state = gr.State()
     file_data_state = gr.State()
     with gr.Tabs():
         with gr.Tab("Results Table"):
             md_out = gr.Markdown()
         with gr.Tab("SHAP Waterfall"):
             with gr.Row():
+                seq_index_input = gr.Number(label="Sequence Index (0-based)", value=0, precision=0)
                 update_wf_btn = gr.Button("Update Waterfall")
             wf_plot = gr.Image(label="SHAP Waterfall Plot")
         with gr.Tab("Top-10 Frequency & Sigma"):
             with gr.Row():
+                seq_index_input2 = gr.Number(label="Sequence Index (0-based)", value=0, precision=0)
                 update_fs_btn = gr.Button("Update Frequency Chart")
             fs_plot = gr.Image(label="Top-10 Frequency & σ Chart")
+    # 1) Main classification
     run_btn.click(
         fn=main_predict,
         inputs=[file_input],
         outputs=[md_out, shap_values_state, scaled_data_state, kmer_list_state, results_state]
     )
     run_btn.click(
         fn=lambda x: x,
         inputs=file_input,
         outputs=file_data_state
     )
+    # 2) Update Waterfall
     update_wf_btn.click(
         fn=update_waterfall_plot,
+        inputs=[seq_index_input, shap_values_state],
         outputs=[wf_plot]
     )
+    # 3) Update Beeswarm right after classification
     run_btn.click(
         fn=update_beeswarm_plot,
         inputs=[shap_values_state],
         outputs=[bs_plot]
     )
+    # 4) Update Frequency & σ
     update_fs_btn.click(
         fn=update_freq_plot,
+        inputs=[seq_index_input2, shap_values_state, scaled_data_state, kmer_list_state, file_data_state],
         outputs=[fs_plot]
     )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=True)