Spaces:

InstaDeepAI
/

ChatNT_demo

Running on Zero

File size: 4,399 Bytes

# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import pandas as pd
import os

# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)

# --- Logs ---
log_file = "logs.txt"

class Log:
    def __init__(self, log_file):
        self.log_file = log_file

    def __call__(self):
        if not os.path.exists(self.log_file):
            return ""
        with open(self.log_file, "r") as f:
            return f.read()

# --- Main Function ---
@spaces.GPU
def run_chatnt(input_file, custom_question):
    with open(log_file, "a") as log:
        log.write("Request started\n\n")

    if not custom_question or custom_question.strip() == "":
        return None

    # Read DNA sequences
    dna_sequences = []
    if input_file is not None:
        with open(input_file.name, "r") as f:
            sequence = ""
            for line in f:
                line = line.strip()
                if not line:
                    continue
                if line.startswith(">"):
                    if sequence:
                        dna_sequences.append(sequence)
                        sequence = ""
                else:
                    sequence += line
            if sequence:
                dna_sequences.append(sequence)

    with open(log_file, "a") as log:
        for i, seq in enumerate(dna_sequences):
            log.write(f"DNA sequence {i+1} : {seq}\n")

    # Build prompt
    num_sequences = len(dna_sequences)
    num_placeholders = custom_question.count("<DNA>")
    if num_sequences == 1:
        # If there is one DNA sequence, add the <DNA> at the end if it was not specified
        if num_placeholders == 0:
            english_sequence = custom_question + " <DNA>"
        elif num_placeholders == 1:
            english_sequence = custom_question
        else:
            raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
    elif num_sequences > 1:
        # If there are multiple DNA sequences, the user must specify himself all
        # positions of DNA sequences
        if num_placeholders != num_sequences:
            raise ValueError(
                f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
            )
        english_sequence = custom_question
    else:
        return None
    with open(log_file, "a") as log:
        log.write(f"Initial user question : {custom_question}\n")
        log.write(f"Full english prompt : {english_sequence}\n")

    # Call model
    with open(log_file, "a") as log:
        log.write("Calling model")
        
    output = pipe(
        inputs={
            "english_sequence": english_sequence,
            "dna_sequences": dna_sequences
        }
    )

    with open(log_file, "a") as log:
        log.write(f"Output : {output}")
        
    return output

# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")

    with gr.Row():
        with gr.Column(scale=1):
            input_file = gr.File(
                label="Upload DNA Sequence File (.fasta)",
                file_types=[".fasta", ".fa"]
            )
            custom_question = gr.Textbox(
                label="English Question (required)",
                placeholder="e.g., Does this sequence contain a donor splice site?"
            )

            submit_btn = gr.Button("Run Query", variant="primary")

        with gr.Row():
            output = gr.Textbox(label="Output Text", lines=6)

    submit_btn.click(
        run_chatnt,
        inputs=[input_file, custom_question],
        outputs=output,
    )

    gr.Markdown("""
**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
    """)

    with gr.Accordion("Logs", open=True):
        log_display = Log(log_file)
        gr.Markdown(log_display)

# --- Launch ---
if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True, show_error=True)