Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Jan 11

Commit

0e7de0c

verified ·

1 Parent(s): 8c49ca8

Update app.py

Browse files

Files changed (1) hide show

app.py +294 -197

app.py CHANGED Viewed

@@ -33,14 +33,14 @@ class VirusClassifier(nn.Module):
     def get_feature_importance(self, x):
         """
-        Calculate gradient-based feature importance.
-        We'll compute the gradient of the 'human' probability w.r.t. the input vector.
         """
         x.requires_grad_(True)
         output = self.network(x)
         probs = torch.softmax(output, dim=1)
-        # Gradient wrt 'human' class probability (index=1)
         human_prob = probs[..., 1]
         if x.grad is not None:
             x.grad.zero_()
@@ -94,127 +94,160 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
 ###############################################################################
 # Visualization
 ###############################################################################
-def create_visualization(important_kmers, human_prob, title):
     """
-    Create a multi-panel figure showing:
-    1) A waterfall-like plot for how each top k-mer shifts the probability from 0.5
-       (the baseline) to the final 'human' probability.
-    2) A side-by-side bar plot for frequency (%) and σ from mean for each important k-mer.
     """
-    # Figure & GridSpec Layout
-    fig = plt.figure(figsize=(14, 10))
-    gs = plt.GridSpec(2, 2, width_ratios=[1.2, 1], height_ratios=[1.2, 1], hspace=0.35, wspace=0.3)
-    # -------------------------------------------------------------------------
-    # 1. Waterfall-like Plot (top-left subplot)
-    # -------------------------------------------------------------------------
-    ax_waterfall = plt.subplot(gs[0, 0])
-    # Start from baseline prob=0.5
     baseline = 0.5
-    current_prob = baseline
-    steps = [("Baseline", current_prob, 0.0)]
-    # Build up the step changes
-    for kmer in important_kmers:
-        direction_multiplier = 1 if kmer["direction"] == "human" else -1
-        change = kmer["impact"] * 0.05 * direction_multiplier
-        # ^ scale changes so that the sum doesn't overshadow the final probability.
-        current_prob += change
-        steps.append((kmer["kmer"], current_prob, change))
-    # X-values for step plot
-    x_vals = range(len(steps))
-    y_vals = [s[1] for s in steps]
-    ax_waterfall.step(x_vals, y_vals, where='post', color='blue', linewidth=2, label='Probability')
-    ax_waterfall.plot(x_vals, y_vals, 'b.', markersize=8)
-    # Reference lines
-    ax_waterfall.axhline(y=baseline, color='gray', linestyle='--', label='Baseline=0.5')
-    # Annotate each step
-    for i, (kmer, prob, change) in enumerate(steps):
-        if i == 0:  # baseline
-            ax_waterfall.annotate(kmer, (i, prob), textcoords="offset points", xytext=(0, -15), ha='center', color='black')
-            continue
-        color = "green" if change > 0 else "red"
-        ax_waterfall.annotate(
-            f"{kmer}\n({change:+.3f})",
-            (i, prob),
-            textcoords="offset points",
-            xytext=(0, -15),
-            ha='center',
-            color=color,
-            fontsize=9
-        )
-    ax_waterfall.set_ylim(0, 1)
-    ax_waterfall.set_xlabel("k-mer Step")
-    ax_waterfall.set_ylabel("Running Probability (Human)")
-    ax_waterfall.set_title(f"K-mer Waterfall Plot — Final Probability: {human_prob:.3f}")
-    ax_waterfall.grid(alpha=0.3)
-    ax_waterfall.legend()
-    # -------------------------------------------------------------------------
-    # 2. Frequency & σ from Mean (top-right subplot)
-    # -------------------------------------------------------------------------
-    ax_bar = plt.subplot(gs[0, 1])
-    kmers = [k["kmer"] for k in important_kmers]
-    frequencies = [k["occurrence"] for k in important_kmers]  # in %
-    sigmas = [k["sigma"] for k in important_kmers]
-    directions = [k["direction"] for k in important_kmers]
-    # X-locations
     x = np.arange(len(kmers))
     width = 0.4
-    # We will create twin axes: one for frequency, one for σ
-    bars1 = ax_bar.bar(x - width/2, frequencies, width, label='Frequency (%)',
-                       alpha=0.7, color=['green' if d=='human' else 'red' for d in directions])
     ax_bar.set_ylabel("Frequency (%)")
     ax_bar.set_ylim(0, max(frequencies) * 1.2 if frequencies else 1)
-    ax_bar.set_title("Frequency vs. σ from Mean")
     # Twin axis for σ
     ax_bar_twin = ax_bar.twinx()
-    bars2 = ax_bar_twin.bar(x + width/2, sigmas, width, label='σ from Mean',
-                            alpha=0.5, color='gray')
     ax_bar_twin.set_ylabel("Standard Deviations (σ)")
     ax_bar.set_xticks(x)
-    ax_bar.set_xticklabels(kmers, rotation=45, ha='right', fontsize=9)
-    # Combine legends
     lines1, labels1 = ax_bar.get_legend_handles_labels()
     lines2, labels2 = ax_bar_twin.get_legend_handles_labels()
-    ax_bar.legend(lines1 + lines2, labels1 + labels2, loc='upper right')
-    # -------------------------------------------------------------------------
-    # 3. Top Feature Importances (Bottom, spanning both columns)
-    # -------------------------------------------------------------------------
-    ax_imp = plt.subplot(gs[1, :])
-    # Sort by absolute impact
     sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
-    top_kmer_labels = [k['kmer'] for k in sorted_kmers]
-    top_kmer_impacts = [k['impact'] for k in sorted_kmers]
-    top_kmer_dirs = [k['direction'] for k in sorted_kmers]
-    x_imp = np.arange(len(top_kmer_impacts))
-    bar_colors = ['green' if d == 'human' else 'red' for d in top_kmer_dirs]
-    ax_imp.bar(x_imp, top_kmer_impacts, color=bar_colors, alpha=0.7)
-    ax_imp.set_xticks(x_imp)
-    ax_imp.set_xticklabels(top_kmer_labels, rotation=45, ha='right', fontsize=9)
-    ax_imp.set_title("Absolute Feature Importance (Top k-mers)")
-    ax_imp.set_ylabel("Importance (gradient magnitude)")
-    ax_imp.grid(alpha=0.3, axis='y')
-    plt.suptitle(title, fontsize=14, y=1.02)
     plt.tight_layout()
     return fig
@@ -224,149 +257,213 @@ def create_visualization(important_kmers, human_prob, title):
 ###############################################################################
 def predict(file_obj):
     """
-    Main function that Gradio will call:
-      1. Reads the uploaded FASTA file (or text).
       2. Loads the model and scaler.
       3. Generates predictions, probabilities, and top k-mers.
-      4. Creates a summary text and a matplotlib figure for visualization.
     """
     if file_obj is None:
-        return "Please upload a FASTA file.", None
-    # Read text from file
     try:
         if isinstance(file_obj, str):
             text = file_obj
         else:
             text = file_obj.decode('utf-8')
     except Exception as e:
-        return f"Error reading file: {str(e)}", None
-    # Build k-mer dictionary
     k = 4
-    kmers = [''.join(p) for p in product("ACGT", repeat=k)]
-    kmer_dict = {km: i for i, km in enumerate(kmers)}
-    # Load model & scaler
     try:
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        model = VirusClassifier(256).to(device)
         state_dict = torch.load('model.pt', map_location=device)
         model.load_state_dict(state_dict)
         scaler = joblib.load('scaler.pkl')
         model.eval()
-    except Exception as e:
-        return f"Error loading model or scaler: {str(e)}", None
-    results_text = ""
-    plot_image = None
-    try:
-        # Parse FASTA
-        sequences = parse_fasta(text)
-        if len(sequences) == 0:
-            return "No valid FASTA sequences found. Please check your input.", None
-        header, seq = sequences[0]  # For simplicity, we'll only classify the first sequence
-        # Transform sequence to scaled k-mer vector
-        raw_freq_vector = sequence_to_kmer_vector(seq)
-        kmer_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
-        X_tensor = torch.FloatTensor(kmer_vector).to(device)
-        # Inference
         with torch.no_grad():
-            output = model(X_tensor)
-            probs = torch.softmax(output, dim=1)
-        # Feature Importance
         importance, hum_prob_grad = model.get_feature_importance(X_tensor)
-        kmer_importance = importance[0].cpu().numpy()  # shape: (256,)
-        # Top k-mers by absolute importance
         top_k = 10
-        top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]  # largest -> smallest
         important_kmers = []
-        for idx in top_indices:
-            # find corresponding k-mer by index
-            for kmer_str, i_ in kmer_dict.items():
-                if i_ == idx:
-                    kmer_name = kmer_str
-                    break
-            imp_val = float(abs(kmer_importance[idx]))
-            direction = 'human' if kmer_importance[idx] > 0 else 'non-human'
-            freq = float(raw_freq_vector[idx] * 100)  # frequency in %
-            sigma = float(kmer_vector[0][idx])  # scaled value (Z-score if standard scaler)
             important_kmers.append({
-                'kmer': kmer_name,
-                'impact': imp_val,
                 'direction': direction,
-                'occurrence': freq,
-                'sigma': sigma
             })
-        pred_class = 1 if probs[0][1] > probs[0][0] else 0
-        pred_label = 'human' if pred_class == 1 else 'non-human'
-        human_prob = float(probs[0][1])
-        non_human_prob = float(probs[0][0])
-        conf = float(max(probs[0]))  # confidence in the predicted class
-        # Generate text results
-        results_text = (
             f"**Sequence Header**: {header}\n\n"
             f"**Predicted Label**: {pred_label}\n"
-            f"**Confidence**: {conf:.4f}\n\n"
             f"**Human Probability**: {human_prob:.4f}\n"
             f"**Non-human Probability**: {non_human_prob:.4f}\n\n"
             "### Most Influential k-mers:\n"
         )
-        for k in important_kmers:
-            direction_text = f"pushes toward {k['direction']}"
-            occurrence_text = f"{k['occurrence']:.2f}% of sequence"
-            sigma_text = f"{abs(k['sigma']):.2f}σ " + ("above" if k['sigma'] > 0 else "below") + " mean"
-            results_text += (
-                f"- **{k['kmer']}**: "
-                f"impact = {k['impact']:.4f}, {direction_text}, "
-                f"occurrence = {occurrence_text}, "
-                f"({sigma_text})\n"
             )
-        # Create figure
-        fig = create_visualization(important_kmers, human_prob, f"{header}")
-        # Convert figure to image
-        buf = io.BytesIO()
-        fig.savefig(buf, format='png', bbox_inches='tight', dpi=150)
-        buf.seek(0)
-        plot_image = Image.open(buf)
-        plt.close(fig)
     except Exception as e:
-        return f"Error during prediction or visualization: {str(e)}", None
-    return results_text, plot_image
 ###############################################################################
 # Gradio Interface
 ###############################################################################
-iface = gr.Interface(
-    fn=predict,
-    inputs=gr.File(label="Upload FASTA file", type="binary"),
-    outputs=[
-        gr.Markdown(label="Prediction Results"),
-        gr.Image(label="K-mer Analysis Visualization")
-    ],
-    title="Virus Host Classifier",
-    description=(
-        "Upload a FASTA file containing a single nucleotide sequence. "
-        "This model will predict whether the virus host is **human** or **non-human**, "
-        "provide a confidence score, and highlight the most influential k-mers in the classification."
-    ),
-    allow_flagging="never",
-)
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860, share=True)

     def get_feature_importance(self, x):
         """
+        Calculate gradient-based feature importance, specifically for the
+        'human' class (index=1) by computing gradient of that probability wrt x.
         """
         x.requires_grad_(True)
         output = self.network(x)
         probs = torch.softmax(output, dim=1)
+        # Probability of 'human' class (index=1)
         human_prob = probs[..., 1]
         if x.grad is not None:
             x.grad.zero_()
 ###############################################################################
 # Visualization
 ###############################################################################
+def create_shap_waterfall_plot(important_kmers, all_kmer_importance, human_prob, title):
     """
+    Create a SHAP-like waterfall plot:
+      - Start at baseline = 0.5
+      - Add a bar for "Other" which is the combined effect of all less-important k-mers
+      - Then apply each of the top k-mers in descending order of absolute importance
+      - Show final predicted human probability as the endpoint
     """
+    # 1) Sort 'important_kmers' by absolute impact descending
+    sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
+    # 2) Compute the total effect of "other" k-mers
+    #    We have 256 total features. We selected top 10. Sum the rest.
+    top_ids = set([km['idx'] for km in sorted_kmers])
+    other_contributions = []
+    for i, val in enumerate(all_kmer_importance):
+        if i not in top_ids:
+            other_contributions.append(val)
+    # sum up those "other" contributions
+    other_sum = np.sum(other_contributions)
+    # The "impact" for "other" will be the absolute value, direction depends on sign
+    other_impact = float(abs(other_sum))
+    other_direction = "human" if other_sum > 0 else "non-human"
+    # 3) Build a list of all bars: first "other", then each top k-mer
+    # Each bar needs: name, raw_contribution_value
+    # We'll store (label, contribution). The sign indicates direction.
+    bars = []
+    bars.append(("Other", other_sum))  # lumps the leftover k-mers
+    for km in sorted_kmers:
+        # We re-inject the sign on the raw gradient
+        # (We stored only the absolute in "impact," so let's create a signed value)
+        signed_val = km['impact'] if km['direction'] == 'human' else -km['impact']
+        bars.append((km['kmer'], signed_val))
+    # 4) Waterfall plot data:
+    # We'll accumulate partial sums from baseline=0.5
     baseline = 0.5
+    running_val = baseline
+    x_labels = []
+    y_vals = []
+    bar_colors = []
+    # We'll use green for positive contributions (pushing toward 'human'),
+    # red for negative contributions (pushing away from 'human')
+    for (label, contrib) in bars:
+        x_labels.append(label)
+        # new value after adding this contribution
+        new_val = running_val + (0.05 * contrib)
+        # ^ scaled by 0.05 for better display. Adjust as desired.
+        y_vals.append((running_val, new_val))
+        running_val = new_val
+        if contrib >= 0:
+            bar_colors.append("green")
+        else:
+            bar_colors.append("red")
+    final_prob = running_val
+    # Final point is the model's predicted probability (not always exact, but this is a shap-like idea).
+    # If we want to forcibly ensure final_prob = human_prob, we could do:
+    #   correction = human_prob - running_val
+    #   running_val += correction
+    # but for now let's keep the "waterfall" purely additive from the gradient.
+    # Let's plot:
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # We'll create the bars manually
+    x_positions = np.arange(len(x_labels))
+    last_end = baseline
+    for i, ((start_val, end_val), color) in enumerate(zip(y_vals, bar_colors)):
+        # The bar's height is the difference
+        height = end_val - start_val
+        ax.bar(i, height, bottom=start_val, color=color, edgecolor='black', alpha=0.7)
+        ax.text(i, (start_val + end_val) / 2, f"{height:+.3f}", ha='center', va='center', color='white', fontsize=8)
+    ax.axhline(y=baseline, color='black', linestyle='--', linewidth=1)
+    ax.set_xticks(x_positions)
+    ax.set_xticklabels(x_labels, rotation=45, ha='right')
+    ax.set_ylim(0, 1)
+    ax.set_ylabel("Running Probability (Human)")
+    ax.set_title(f"SHAP-like Waterfall — Final Probability: {final_prob:.3f} (Model Probability: {human_prob:.3f})")
+    plt.tight_layout()
+    return fig
+def create_frequency_sigma_plot(important_kmers, title):
+    """Creates a bar plot of the top k-mers (by importance) showing frequency (%) and σ from mean."""
+    # Sort by absolute impact
+    sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
+    kmers = [k["kmer"] for k in sorted_kmers]
+    frequencies = [k["occurrence"] for k in sorted_kmers]  # in %
+    sigmas = [k["sigma"] for k in sorted_kmers]
+    directions = [k["direction"] for k in sorted_kmers]
     x = np.arange(len(kmers))
     width = 0.4
+    fig, ax_bar = plt.subplots(figsize=(10, 6))
+    # Bar for frequency
+    bars_freq = ax_bar.bar(
+        x - width/2, frequencies, width, alpha=0.7,
+        color=["green" if d=="human" else "red" for d in directions],
+        label="Frequency (%)"
+    )
     ax_bar.set_ylabel("Frequency (%)")
     ax_bar.set_ylim(0, max(frequencies) * 1.2 if frequencies else 1)
     # Twin axis for σ
     ax_bar_twin = ax_bar.twinx()
+    bars_sigma = ax_bar_twin.bar(
+        x + width/2, sigmas, width, alpha=0.5, color="gray", label="σ from Mean"
+    )
     ax_bar_twin.set_ylabel("Standard Deviations (σ)")
+    ax_bar.set_title(f"Frequency & σ from Mean for Top k-mers — {title}")
     ax_bar.set_xticks(x)
+    ax_bar.set_xticklabels(kmers, rotation=45, ha='right')
+    # Combined legend
     lines1, labels1 = ax_bar.get_legend_handles_labels()
     lines2, labels2 = ax_bar_twin.get_legend_handles_labels()
+    ax_bar.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
+    plt.tight_layout()
+    return fig
+def create_importance_bar_plot(important_kmers, title):
+    """
+    Create a simple bar chart showing the absolute gradient magnitude
+    for the top k-mers, sorted descending.
+    """
     sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
+    kmers = [k['kmer'] for k in sorted_kmers]
+    impacts = [k['impact'] for k in sorted_kmers]
+    directions = [k["direction"] for k in sorted_kmers]
+    x = np.arange(len(kmers))
+    fig, ax = plt.subplots(figsize=(10, 6))
+    bar_colors = ["green" if d=="human" else "red" for d in directions]
+    ax.bar(x, impacts, color=bar_colors, alpha=0.7)
+    ax.set_xticks(x)
+    ax.set_xticklabels(kmers, rotation=45, ha='right')
+    ax.set_title(f"Absolute Feature Importance (Top k-mers) — {title}")
+    ax.set_ylabel("Gradient Magnitude")
+    ax.grid(axis="y", alpha=0.3)
     plt.tight_layout()
     return fig
 ###############################################################################
 def predict(file_obj):
     """
+    Main function for Gradio:
+      1. Reads the uploaded FASTA file or text.
       2. Loads the model and scaler.
       3. Generates predictions, probabilities, and top k-mers.
+      4. Returns multiple outputs:
+         - A textual summary (Markdown).
+         - Waterfall plot.
+         - Frequency & sigma plot.
+         - Absolute importance bar plot.
     """
+    # 0. Basic file read
     if file_obj is None:
+        return (
+            "Please upload a FASTA file.",
+            None,
+            None,
+            None
+        )
     try:
+        # If user provided raw text, use that
         if isinstance(file_obj, str):
             text = file_obj
         else:
+            # If user uploaded a file, decode it
             text = file_obj.decode('utf-8')
     except Exception as e:
+        return (
+            f"Error reading file: {str(e)}",
+            None,
+            None,
+            None
+        )
+    # 1. Parse FASTA
+    sequences = parse_fasta(text)
+    if len(sequences) == 0:
+        return (
+            "No valid FASTA sequences found. Please check your input.",
+            None,
+            None,
+            None
+        )
+    # We’ll just classify the first sequence for demonstration
+    header, seq = sequences[0]
+    # 2. Create k-mer vector & load model
     k = 4
     try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Prepare raw freq vector & scale
+        raw_freq_vector = sequence_to_kmer_vector(seq, k=k)
+        # Load model & scaler
+        model = VirusClassifier(input_shape=4**k).to(device)
         state_dict = torch.load('model.pt', map_location=device)
         model.load_state_dict(state_dict)
         scaler = joblib.load('scaler.pkl')
         model.eval()
+        scaled_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
+        X_tensor = torch.FloatTensor(scaled_vector).to(device)
+        # 3. Inference
         with torch.no_grad():
+            logits = model(X_tensor)
+            probs = torch.softmax(logits, dim=1)
+        human_prob = float(probs[0][1])
+        non_human_prob = float(probs[0][0])
+        pred_class = 1 if human_prob >= non_human_prob else 0
+        pred_label = "human" if pred_class == 1 else "non-human"
+        confidence = float(max(probs[0]))
+        # 4. Feature importance
         importance, hum_prob_grad = model.get_feature_importance(X_tensor)
+        # shape: [1, 256]
+        kmer_importances = importance[0].cpu().numpy()
+        # We’ll store them as a dictionary: index -> (k-mer, importance)
+        # Build up a dict for k-mer strings
+        kmers_list = [''.join(p) for p in product("ACGT", repeat=k)]
+        kmer_dict = {km: i for i, km in enumerate(kmers_list)}
+        # 5. Get the top 10 k-mers by absolute importance
+        abs_importance = np.abs(kmer_importances)
         top_k = 10
+        top_idxs = np.argsort(abs_importance)[-top_k:][::-1]  # descending
         important_kmers = []
+        for idx in top_idxs:
+            # Find the k-mer by index
+            kmer_str = kmers_list[idx]
+            # direction
+            direction = "human" if kmer_importances[idx] > 0 else "non-human"
+            # frequency in % from raw_freq_vector
+            freq_percent = float(raw_freq_vector[idx] * 100)
+            # sigma from scaled vector
+            sigma_val = float(scaled_vector[0][idx])
             important_kmers.append({
+                'kmer': kmer_str,
+                'idx': idx,
+                'impact': float(abs_importance[idx]),
                 'direction': direction,
+                'occurrence': freq_percent,
+                'sigma': sigma_val
             })
+        # 6. Text Summary
+        summary_text = (
             f"**Sequence Header**: {header}\n\n"
             f"**Predicted Label**: {pred_label}\n"
+            f"**Confidence**: {confidence:.4f}\n\n"
             f"**Human Probability**: {human_prob:.4f}\n"
             f"**Non-human Probability**: {non_human_prob:.4f}\n\n"
             "### Most Influential k-mers:\n"
         )
+        for km in important_kmers:
+            direction_text = f"(pushes toward {km['direction']})"
+            freq_text = f"{km['occurrence']:.2f}%"
+            sigma_text = f"{abs(km['sigma']):.2f}σ " + ("above" if km['sigma']>0 else "below") + " mean"
+            summary_text += (
+                f"- **{km['kmer']}**: impact={km['impact']:.4f}, {direction_text}, "
+                f"occurrence={freq_text}, ({sigma_text})\n"
             )
+        # 7. Plots
+        #   a) SHAP-like Waterfall Plot
+        fig_waterfall = create_shap_waterfall_plot(
+            important_kmers,
+            kmer_importances,
+            human_prob,
+            f"{header}"
+        )
+        buf1 = io.BytesIO()
+        fig_waterfall.savefig(buf1, format='png', bbox_inches='tight', dpi=120)
+        buf1.seek(0)
+        waterfall_img = Image.open(buf1)
+        plt.close(fig_waterfall)
+        #   b) Frequency & σ Plot (top 10 k-mers)
+        fig_freq_sigma = create_frequency_sigma_plot(
+            important_kmers,
+            f"{header}"
+        )
+        buf2 = io.BytesIO()
+        fig_freq_sigma.savefig(buf2, format='png', bbox_inches='tight', dpi=120)
+        buf2.seek(0)
+        freq_sigma_img = Image.open(buf2)
+        plt.close(fig_freq_sigma)
+        #   c) Absolute Importance Bar Plot
+        fig_imp = create_importance_bar_plot(
+            important_kmers,
+            f"{header}"
+        )
+        buf3 = io.BytesIO()
+        fig_imp.savefig(buf3, format='png', bbox_inches='tight', dpi=120)
+        buf3.seek(0)
+        importance_img = Image.open(buf3)
+        plt.close(fig_imp)
+        return summary_text, waterfall_img, freq_sigma_img, importance_img
     except Exception as e:
+        return (
+            f"Error during prediction or visualization: {str(e)}",
+            None,
+            None,
+            None
+        )
 ###############################################################################
 # Gradio Interface
 ###############################################################################
+with gr.Blocks(title="Advanced Virus Host Classifier") as demo:
+    gr.Markdown(
+        """
+        # Advanced Virus Host Classifier
+        **Upload a FASTA file** containing a single nucleotide sequence.
+        The model will predict whether this sequence is **human** or **non-human**,
+        provide a confidence score, and highlight the most influential k-mers
+        (using a SHAP-like waterfall plot) along with two additional plots.
+        """
+    )
+    with gr.Row():
+        file_in = gr.File(label="Upload FASTA", type="binary")
+        btn = gr.Button("Run Prediction")
+    # We will create multiple tabs for our outputs
+    with gr.Tabs():
+        with gr.Tab("Prediction Results"):
+            md_out = gr.Markdown()
+        with gr.Tab("SHAP-like Waterfall Plot"):
+            water_out = gr.Image()
+        with gr.Tab("Frequency & σ Plot"):
+            freq_out = gr.Image()
+        with gr.Tab("Importance Bar Plot"):
+            imp_out = gr.Image()
+    # Link the button
+    btn.click(
+        fn=predict,
+        inputs=[file_in],
+        outputs=[md_out, water_out, freq_out, imp_out]
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)