Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Jan 11

Commit

b5edb58

verified ·

1 Parent(s): 3b775b7

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -393

app.py CHANGED Viewed

@@ -2,17 +2,12 @@ import gradio as gr
 import torch
 import joblib
 import numpy as np
-import shap
-import random
 from itertools import product
 import torch.nn as nn
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
-###############################################################################
-# Model Definition
-###############################################################################
 class VirusClassifier(nn.Module):
     def __init__(self, input_shape: int):
         super(VirusClassifier, self).__init__()
@@ -34,28 +29,38 @@ class VirusClassifier(nn.Module):
         return self.network(x)
     def get_feature_importance(self, x):
-        """
-        Calculate gradient-based feature importance, specifically for the
-        'human' class (index=1) by computing gradient of that probability wrt x.
-        """
         x.requires_grad_(True)
         output = self.network(x)
         probs = torch.softmax(output, dim=1)
-        # Probability of 'human' class (index=1)
         human_prob = probs[..., 1]
         if x.grad is not None:
             x.grad.zero_()
         human_prob.backward()
-        importance = x.grad  # shape: (batch_size, n_features)
         return importance, float(human_prob)
-###############################################################################
-# Utility Functions
-###############################################################################
 def parse_fasta(text):
-    """Parses text input in FASTA format into a list of (header, sequence)."""
     sequences = []
     current_header = None
     current_sequence = []
@@ -75,213 +80,97 @@ def parse_fasta(text):
         sequences.append((current_header, ''.join(current_sequence)))
     return sequences
-def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
-    """Convert a single nucleotide sequence to a k-mer frequency vector."""
-    kmers = [''.join(p) for p in product("ACGT", repeat=k)]
-    kmer_dict = {km: i for i, km in enumerate(kmers)}
-    vec = np.zeros(len(kmers), dtype=np.float32)
-    for i in range(len(sequence) - k + 1):
-        kmer = sequence[i:i+k]
-        if kmer in kmer_dict:
-            vec[kmer_dict[kmer]] += 1
-    total_kmers = len(sequence) - k + 1
-    if total_kmers > 0:
-        vec = vec / total_kmers  # normalize frequencies
-    return vec
-###############################################################################
-# Additional Plots
-###############################################################################
-def create_probability_bar_plot(prob_human, prob_nonhuman):
-    """
-    Simple bar plot comparing human vs. non-human probabilities.
-    """
-    labels = ["Non-human", "Human"]
-    probs = [prob_nonhuman, prob_human]
-    colors = ["red", "green"]
-    fig, ax = plt.subplots(figsize=(6, 4))
-    ax.bar(labels, probs, color=colors, alpha=0.7)
-    ax.set_ylim(0, 1)
-    for i, v in enumerate(probs):
-        ax.text(i, v+0.02, f"{v:.3f}", ha='center', color='black', fontsize=11)
-    ax.set_title("Predicted Probabilities")
-    ax.set_ylabel("Probability")
-    plt.tight_layout()
-    return fig
-def create_frequency_sigma_plot(important_kmers, title):
-    """
-    Creates a bar plot of the top k-mers (by importance) showing
-    frequency (%) and σ from mean.
-    """
-    # Sort by absolute impact
-    sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
-    kmers = [k["kmer"] for k in sorted_kmers]
-    frequencies = [k["occurrence"] for k in sorted_kmers]  # in %
-    sigmas = [k["sigma"] for k in sorted_kmers]
-    directions = [k["direction"] for k in sorted_kmers]
     x = np.arange(len(kmers))
-    width = 0.4
-    fig, ax_bar = plt.subplots(figsize=(10, 5))
-    # Bar for frequency
-    bars_freq = ax_bar.bar(
-        x - width/2, frequencies, width, alpha=0.7,
-        color=["green" if d=="human" else "red" for d in directions],
-        label="Frequency (%)"
-    )
-    ax_bar.set_ylabel("Frequency (%)")
-    ax_bar.set_ylim(0, max(frequencies) * 1.2 if len(frequencies) > 0 else 1)
-    # Twin axis for σ
-    ax_bar_twin = ax_bar.twinx()
-    bars_sigma = ax_bar_twin.bar(
-        x + width/2, sigmas, width, alpha=0.5, color="gray", label="σ from Mean"
-    )
-    ax_bar_twin.set_ylabel("Standard Deviations (σ)")
-    ax_bar.set_title(f"Frequency & σ from Mean for Top k-mers — {title}")
-    ax_bar.set_xticks(x)
-    ax_bar.set_xticklabels(kmers, rotation=45, ha='right')
-    # Combined legend
-    lines1, labels1 = ax_bar.get_legend_handles_labels()
-    lines2, labels2 = ax_bar_twin.get_legend_handles_labels()
-    ax_bar.legend(lines1 + lines2, labels1 + labels2, loc="upper right")
-    plt.tight_layout()
-    return fig
-def create_importance_bar_plot(important_kmers, title):
-    """
-    Create a simple bar chart showing the absolute gradient magnitude
-    for the top k-mers, sorted descending.
-    """
-    sorted_kmers = sorted(important_kmers, key=lambda x: x['impact'], reverse=True)
-    kmers = [k['kmer'] for k in sorted_kmers]
-    impacts = [k['impact'] for k in sorted_kmers]
-    directions = [k["direction"] for k in sorted_kmers]
-    x = np.arange(len(kmers))
-    fig, ax = plt.subplots(figsize=(10, 5))
-    bar_colors = ["green" if d=="human" else "red" for d in directions]
-    ax.bar(x, impacts, color=bar_colors, alpha=0.7, edgecolor='black')
-    ax.set_xticks(x)
-    ax.set_xticklabels(kmers, rotation=45, ha='right')
-    ax.set_title(f"Absolute Feature Importance (Top k-mers) — {title}")
-    ax.set_ylabel("Gradient Magnitude")
-    ax.grid(axis="y", alpha=0.3)
-    plt.tight_layout()
-    return fig
-###############################################################################
-# SHAP Beeswarm
-###############################################################################
-def create_shap_beeswarm_plot(
-    model,
-    input_vector: np.ndarray,
-    background_data: np.ndarray,
-    feature_names: list
-):
-    """
-    Creates a SHAP beeswarm plot using KernelExplainer for the given model and data.
-    Parameters
-    ----------
-    model : nn.Module
-        Trained PyTorch model (binary classifier).
-    input_vector : np.ndarray
-        The 1-sample input (or multiple samples) we want SHAP values for.
-    background_data : np.ndarray
-        Background samples for KernelExplainer. Should have shape (N, #features).
-    feature_names : list
-        Names for each feature (k-mers).
-    Returns
-    -------
-    fig : matplotlib Figure
-        Beeswarm plot figure.
-    """
-    # We'll define a prediction function that shap can call
-    # The model outputs logits for shape [N, 2]
-    # We want the raw outputs for each class. SHAP will handle the link function if needed.
-    def predict_fn(data):
-        """
-        data: shape (N, #features)
-        returns: shape (N, 2) for 2-class logits
-        """
-        with torch.no_grad():
-            x = torch.FloatTensor(data)
-            logits = model(x)
-            return logits.detach().cpu().numpy()
-    # Create KernelExplainer
-    explainer = shap.KernelExplainer(
-        model=predict_fn,
-        data=background_data
-    )
-    # Compute SHAP values
-    # For a 2-class model, shap_values is a list of length 2 => [class0 array, class1 array]
-    # Each array is shape (N, #features).
-    shap_values = explainer.shap_values(input_vector)
-    # We’ll produce a beeswarm for the 'human' class (class index=1).
-    # If we have only 1 sample, the beeswarm won't be too interesting, but let's do it anyway.
-    class_idx = 1  # 'human'
-    # If we only have one sample, place it in an array for shap summary plotting:
-    # We can do shap_values[class_idx].shape => (1, #features) for a single sample
-    # Beeswarm typically expects multiple samples. We'll plot anyway.
-    shap.plots.beeswarm(
-        shap_values[class_idx],
-        feature_names=feature_names,
-        show=False
-    )
-    fig = plt.gcf()
-    fig.set_size_inches(8, 6)
-    plt.title("SHAP Beeswarm Plot (Class: Human)")
     plt.tight_layout()
     return fig
-###############################################################################
-# Prediction Function
-###############################################################################
 def predict(file_obj):
-    """
-    Main function for Gradio:
-      1. Reads the uploaded FASTA file or text.
-      2. Loads the model and scaler.
-      3. Generates predictions, probabilities, and top k-mers.
-      4. Creates multiple outputs:
-         - Text summary (Markdown)
-         - Probability Bar Plot
-         - SHAP Beeswarm Plot
-         - Frequency & σ Plot
-         - Absolute Feature Importance Bar Plot
-    """
-    # 0. Basic file read
     if file_obj is None:
-        return (
-            "Please upload a FASTA file.",
-            None,
-            None,
-            None,
-            None
-        )
     try:
         if isinstance(file_obj, str):
@@ -289,202 +178,106 @@ def predict(file_obj):
         else:
             text = file_obj.decode('utf-8')
     except Exception as e:
-        return (
-            f"Error reading file: {str(e)}",
-            None,
-            None,
-            None,
-            None
-        )
-    # 1. Parse FASTA
-    sequences = parse_fasta(text)
-    if len(sequences) == 0:
-        return (
-            "No valid FASTA sequences found. Please check your input.",
-            None,
-            None,
-            None,
-            None
-        )
-    header, seq = sequences[0]  # We'll classify only the first sequence
-    # 2. Prepare model, scaler, and input
     k = 4
-    device = "cuda" if torch.cuda.is_available() else "cpu"
     try:
-        raw_freq_vector = sequence_to_kmer_vector(seq, k=k)
-        # Load model & scaler
-        model = VirusClassifier(input_shape=4**k).to(device)
-        state_dict = torch.load("model.pt", map_location=device)
         model.load_state_dict(state_dict)
-        scaler = joblib.load("scaler.pkl")
         model.eval()
-        scaled_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
-        X_tensor = torch.FloatTensor(scaled_vector).to(device)
-        # 3. Predict
         with torch.no_grad():
-            logits = model(X_tensor)
-            probs = torch.softmax(logits, dim=1)
-        human_prob = float(probs[0][1])
-        non_human_prob = float(probs[0][0])
-        pred_label = "human" if human_prob >= non_human_prob else "non-human"
-        confidence = float(max(probs[0]))
-        # 4. Gradient-based feature importance
-        importance, hum_prob_grad = model.get_feature_importance(X_tensor)
-        importances = importance[0].cpu().numpy()  # shape: (#features,)
-        abs_importances = np.abs(importances)
-        # 5. Gather k-mer strings
-        kmers_list = [''.join(p) for p in product("ACGT", repeat=k)]
-        # top 10 by absolute importance
         top_k = 10
-        top_idxs = np.argsort(abs_importances)[-top_k:][::-1]
         important_kmers = []
-        for idx in top_idxs:
-            direction = "human" if importances[idx] > 0 else "non-human"
-            freq_percent = float(raw_freq_vector[idx] * 100.0)
-            sigma_val = float(scaled_vector[0][idx])  # scaled / standardized val
             important_kmers.append({
-                'kmer': kmers_list[idx],
-                'idx': idx,
-                'impact': abs_importances[idx],
                 'direction': direction,
-                'occurrence': freq_percent,
-                'sigma': sigma_val
             })
-        # 6. Generate text summary
-        text_summary = (
-            f"**Sequence Header**: {header}\n\n"
-            f"**Predicted Label**: {pred_label}\n"
-            f"**Confidence**: {confidence:.4f}\n\n"
-            f"**Human Probability**: {human_prob:.4f}\n"
-            f"**Non-human Probability**: {non_human_prob:.4f}\n\n"
-            "### Most Influential k-mers:\n"
-        )
-        for km in important_kmers:
-            direction_text = f"(pushes toward {km['direction']})"
-            freq_text = f"{km['occurrence']:.2f}%"
-            sigma_text = (
-                f"{abs(km['sigma']):.2f}σ "
-                + ("above" if km['sigma'] > 0 else "below")
-                + " mean"
-            )
-            text_summary += (
-                f"- **{km['kmer']}**: impact={km['impact']:.4f}, {direction_text}, "
-                f"occurrence={freq_text}, ({sigma_text})\n"
-            )
-        # 7. Probability Bar Plot
-        fig_prob = create_probability_bar_plot(human_prob, non_human_prob)
-        buf_prob = io.BytesIO()
-        fig_prob.savefig(buf_prob, format='png', bbox_inches='tight', dpi=120)
-        buf_prob.seek(0)
-        prob_img = Image.open(buf_prob)
-        plt.close(fig_prob)
-        # 8. SHAP Beeswarm Plot
-        # We need some background data for KernelExplainer. Let's create a small random sample
-        # or sample from the scaled_vector itself in a repeated manner. Real usage: choose a valid background set.
-        background_size = 5  # keep small for speed
-        # We'll pick random sequences from normal(0,1) or from scaled_vector repeated
-        background_data = []
-        for _ in range(background_size):
-            # Option A: random small variations around scaled_vector
-            # new_sample = scaled_vector[0] + np.random.normal(0, 0.5, size=scaled_vector.shape[1])
-            # Option B: just clone the same scaled vector multiple times
-            new_sample = scaled_vector[0]
-            background_data.append(new_sample)
-        background_data = np.stack(background_data, axis=0)  # shape (5, #features)
-        fig_bee = create_shap_beeswarm_plot(
-            model=model,
-            input_vector=scaled_vector,      # our single sample
-            background_data=background_data, # background for KernelExplainer
-            feature_names=kmers_list
-        )
-        buf_bee = io.BytesIO()
-        fig_bee.savefig(buf_bee, format='png', bbox_inches='tight', dpi=120)
-        buf_bee.seek(0)
-        bee_img = Image.open(buf_bee)
-        plt.close(fig_bee)
-        # 9. Frequency & σ Plot
-        fig_freq = create_frequency_sigma_plot(important_kmers, header)
-        buf_freq = io.BytesIO()
-        fig_freq.savefig(buf_freq, format='png', bbox_inches='tight', dpi=120)
-        buf_freq.seek(0)
-        freq_img = Image.open(buf_freq)
-        plt.close(fig_freq)
-        # 10. Absolute Feature Importance Bar Plot
-        fig_imp = create_importance_bar_plot(important_kmers, header)
-        buf_imp = io.BytesIO()
-        fig_imp.savefig(buf_imp, format='png', bbox_inches='tight', dpi=120)
-        buf_imp.seek(0)
-        imp_img = Image.open(buf_imp)
-        plt.close(fig_imp)
-        return text_summary, prob_img, bee_img, freq_img, imp_img
-    except Exception as e:
-        return (
-            f"Error during prediction or visualization: {str(e)}",
-            None,
-            None,
-            None,
-            None
-        )
-###############################################################################
-# Gradio Interface
-###############################################################################
-with gr.Blocks(title="Advanced Virus Host Classifier with SHAP Beeswarm") as demo:
-    gr.Markdown(
-        """
-        # Advanced Virus Host Classifier (SHAP Beeswarm Edition)
-        **Upload a FASTA file** containing a single nucleotide sequence.
-        The model will predict whether this sequence is **human** or **non-human**,
-        provide a confidence score, and highlight the most influential k-mers.
-        We also produce a **SHAP beeswarm** plot for the features.
-        ---
-        **Note**: Beeswarm plots are usually most insightful with multiple samples.
-        Here, we demonstrate usage with a single sample plus a small synthetic background.
-        """
-    )
-    with gr.Row():
-        file_in = gr.File(label="Upload FASTA", type="binary")
-        btn = gr.Button("Run Prediction")
-    # We will create multiple tabs for our outputs
-    with gr.Tabs():
-        with gr.Tab("Prediction Results"):
-            md_out = gr.Markdown()
-        with gr.Tab("Probability Plot"):
-            prob_out = gr.Image()
-        with gr.Tab("SHAP Beeswarm Plot"):
-            bee_out = gr.Image()
-        with gr.Tab("Frequency & σ Plot"):
-            freq_out = gr.Image()
-        with gr.Tab("Importance Bar Plot"):
-            imp_out = gr.Image()
-    # Link the button
-    btn.click(
-        fn=predict,
-        inputs=[file_in],
-        outputs=[md_out, prob_out, bee_out, freq_out, imp_out]
-    )
 if __name__ == "__main__":
-    # By default, share=False. You can set share=True for external access.
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 import torch
 import joblib
 import numpy as np
 from itertools import product
 import torch.nn as nn
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
 class VirusClassifier(nn.Module):
     def __init__(self, input_shape: int):
         super(VirusClassifier, self).__init__()
         return self.network(x)
     def get_feature_importance(self, x):
+        """Calculate feature importance using gradient-based method"""
         x.requires_grad_(True)
         output = self.network(x)
         probs = torch.softmax(output, dim=1)
+        # Get importance for human class (index 1)
         human_prob = probs[..., 1]
         if x.grad is not None:
             x.grad.zero_()
         human_prob.backward()
+        importance = x.grad
         return importance, float(human_prob)
+def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
+    """Convert sequence to k-mer frequency vector"""
+    kmers = [''.join(p) for p in product("ACGT", repeat=k)]
+    kmer_dict = {km: i for i, km in enumerate(kmers)}
+    vec = np.zeros(len(kmers), dtype=np.float32)
+    for i in range(len(sequence) - k + 1):
+        kmer = sequence[i:i+k]
+        if kmer in kmer_dict:
+            vec[kmer_dict[kmer]] += 1
+    total_kmers = len(sequence) - k + 1
+    if total_kmers > 0:
+        vec = vec / total_kmers
+    return vec
 def parse_fasta(text):
     sequences = []
     current_header = None
     current_sequence = []
         sequences.append((current_header, ''.join(current_sequence)))
     return sequences
+def create_visualization(important_kmers, human_prob, title):
+    """Create a comprehensive visualization of k-mer impacts"""
+    fig = plt.figure(figsize=(15, 10))
+    # Create grid for subplots
+    gs = plt.GridSpec(2, 1, height_ratios=[1.5, 1], hspace=0.3)
+    # 1. Probability Step Plot
+    ax1 = plt.subplot(gs[0])
+    current_prob = 0.5
+    steps = [('Start', current_prob, 0)]
+    for kmer in important_kmers:
+        change = kmer['impact'] * (-1 if kmer['direction'] == 'non-human' else 1)
+        current_prob += change
+        steps.append((kmer['kmer'], current_prob, change))
+    x = range(len(steps))
+    y = [step[1] for step in steps]
+    # Plot steps
+    ax1.step(x, y, 'b-', where='post', label='Probability', linewidth=2)
+    ax1.plot(x, y, 'b.', markersize=10)
+    # Add reference line
+    ax1.axhline(y=0.5, color='r', linestyle='--', label='Neutral (0.5)')
+    # Customize plot
+    ax1.grid(True, linestyle='--', alpha=0.7)
+    ax1.set_ylim(0, 1)
+    ax1.set_ylabel('Human Probability')
+    ax1.set_title(f'K-mer Contributions to Prediction (final prob: {human_prob:.3f})')
+    # Add labels for each point
+    for i, (kmer, prob, change) in enumerate(steps):
+        # Add k-mer label
+        ax1.annotate(kmer,
+                    (i, prob),
+                    xytext=(0, 10 if i % 2 == 0 else -20),
+                    textcoords='offset points',
+                    ha='center',
+                    rotation=45)
+        # Add change value
+        if i > 0:
+            change_text = f'{change:+.3f}'
+            color = 'green' if change > 0 else 'red'
+            ax1.annotate(change_text,
+                       (i, prob),
+                       xytext=(0, -20 if i % 2 == 0 else 10),
+                       textcoords='offset points',
+                       ha='center',
+                       color=color)
+    ax1.legend()
+    # 2. K-mer Frequency and Sigma Plot
+    ax2 = plt.subplot(gs[1])
+    # Prepare data
+    kmers = [k['kmer'] for k in important_kmers]
+    frequencies = [k['occurrence'] for k in important_kmers]
+    sigmas = [k['sigma'] for k in important_kmers]
+    colors = ['g' if k['direction'] == 'human' else 'r' for k in important_kmers]
+    # Create bar plot for frequencies
     x = np.arange(len(kmers))
+    width = 0.35
+    ax2.bar(x - width/2, frequencies, width, label='Frequency (%)', color=colors, alpha=0.6)
+    ax2_twin = ax2.twinx()
+    ax2_twin.bar(x + width/2, sigmas, width, label='σ from mean', color=[c if s > 0 else 'gray' for c, s in zip(colors, sigmas)], alpha=0.3)
+    # Customize plot
+    ax2.set_xticks(x)
+    ax2.set_xticklabels(kmers, rotation=45)
+    ax2.set_ylabel('Frequency (%)')
+    ax2_twin.set_ylabel('Standard Deviations (σ) from Mean')
+    ax2.set_title('K-mer Frequencies and Statistical Significance')
+    # Add legends
+    lines1, labels1 = ax2.get_legend_handles_labels()
+    lines2, labels2 = ax2_twin.get_legend_handles_labels()
+    ax2.legend(lines1 + lines2, labels1 + labels2, loc='upper right')
     plt.tight_layout()
     return fig
 def predict(file_obj):
     if file_obj is None:
+        return "Please upload a FASTA file", None
     try:
         if isinstance(file_obj, str):
         else:
             text = file_obj.decode('utf-8')
     except Exception as e:
+        return f"Error reading file: {str(e)}", None
     k = 4
+    kmers = [''.join(p) for p in product("ACGT", repeat=k)]
+    kmer_dict = {km: i for i, km in enumerate(kmers)}
     try:
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        model = VirusClassifier(256).to(device)
+        state_dict = torch.load('model.pt', map_location=device)
         model.load_state_dict(state_dict)
+        scaler = joblib.load('scaler.pkl')
         model.eval()
+    except Exception as e:
+        return f"Error loading model: {str(e)}", None
+    results_text = ""
+    plot_image = None
+    try:
+        sequences = parse_fasta(text)
+        header, seq = sequences[0]
+        raw_freq_vector = sequence_to_kmer_vector(seq)
+        kmer_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
+        X_tensor = torch.FloatTensor(kmer_vector).to(device)
+        # Get model predictions
         with torch.no_grad():
+            output = model(X_tensor)
+            probs = torch.softmax(output, dim=1)
+        # Get feature importance
+        importance, _ = model.get_feature_importance(X_tensor)
+        kmer_importance = importance[0].cpu().numpy()
+        # Get top k-mers
         top_k = 10
+        top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]
         important_kmers = []
+        for idx in top_indices:
+            kmer = list(kmer_dict.keys())[list(kmer_dict.values()).index(idx)]
+            imp = float(abs(kmer_importance[idx]))
+            direction = 'human' if kmer_importance[idx] > 0 else 'non-human'
+            freq = float(raw_freq_vector[idx] * 100)  # Convert to percentage
+            sigma = float(kmer_vector[0][idx])
             important_kmers.append({
+                'kmer': kmer,
+                'impact': imp,
                 'direction': direction,
+                'occurrence': freq,
+                'sigma': sigma
             })
+        # Generate text results
+        pred_class = 1 if probs[0][1] > probs[0][0] else 0
+        pred_label = 'human' if pred_class == 1 else 'non-human'
+        human_prob = float(probs[0][1])
+        results_text = f"""Sequence: {header}
+Prediction: {pred_label}
+Confidence: {float(max(probs[0])):0.4f}
+Human probability: {human_prob:0.4f}
+Non-human probability: {float(probs[0][0]):0.4f}
+Most influential k-mers (ranked by importance):"""
+        for kmer in important_kmers:
+            results_text += f"\n  {kmer['kmer']}: "
+            results_text += f"pushes toward {kmer['direction']} (impact={kmer['impact']:.4f}), "
+            results_text += f"occurrence={kmer['occurrence']:.2f}% of sequence "
+            results_text += f"(appears {abs(kmer['sigma']):.2f}σ "
+            results_text += "more" if kmer['sigma'] > 0 else "less"
+            results_text += " than average)"
+        # Create visualization
+        fig = create_visualization(important_kmers, human_prob, header)
+        # Save plot
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png', bbox_inches='tight', dpi=300)
+        buf.seek(0)
+        plot_image = Image.open(buf)
+        plt.close(fig)
+    except Exception as e:
+        return f"Error processing sequences: {str(e)}", None
+    return results_text, plot_image
+iface = gr.Interface(
+    fn=predict,
+    inputs=gr.File(label="Upload FASTA file", type="binary"),
+    outputs=[
+        gr.Textbox(label="Results"),
+        gr.Image(label="K-mer Analysis Visualization")
+    ],
+    title="Virus Host Classifier"
+)
 if __name__ == "__main__":
+    iface.launch(share=True)