Spaces:

InstaDeepAI
/

ChatNT_demo

Running on Zero

File size: 4,345 Bytes

90b1023
f809a7e
90b1023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f809a7e
cdc5652
90b1023
419f959
90b1023
cdc5652
 
 
 
 
 
 
23e3a2e
 
 
cdc5652
23e3a2e
cdc5652
 
 
 
 
 
90b1023
0daa8ac
cdc5652
 
 
 
 
 
 
 
90b1023
 
0daa8ac
23e3a2e
cdc5652
 
 
0daa8ac
cdc5652
0daa8ac
 
 
cdc5652
90b1023
d6d387a
cdc5652
 
23e3a2e
 
90b1023
 
 
 
 
 
 
cdc5652
 
 
419f959
cdc5652
23e3a2e
0daa8ac
90b1023
 
 
 
 
 
 
 
 
d6d387a
90b1023
 
 
cdc5652
 
 
 
90b1023
cdc5652
 
 
 
 
 
90b1023
23e3a2e
cdc5652
90b1023
 
 
 
0daa8ac
23e3a2e
90b1023
 
 
cdc5652
0daa8ac
90b1023
 
 
cdc5652
 
 
 
90b1023

# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import os

# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)

# --- Logs ---
log_file = "logs.txt"

class Log:
    def __init__(self, log_file):
        self.log_file = log_file

    def __call__(self):
        if not os.path.exists(self.log_file):
            return ""
        with open(self.log_file, "r") as f:
            return f.read()

# --- Main Function ---
@spaces.GPU
def run_chatnt(dna_text, fasta_file, custom_question):
    with open(log_file, "a") as log:
        log.write("Request started\n\n")

    # Read DNA sequence from text field or file
    dna_sequence = ""
    if dna_text and dna_text.strip():
        dna_sequence = dna_text.strip().replace("\n", "")
    elif fasta_file is not None:
        file_content = fasta_file.read().decode("utf-8")
        lines = file_content.splitlines()
        sequence = ""
        for line in lines:
            line = line.strip()
            if not line or line.startswith(">"):
                continue
            sequence += line
        dna_sequence = sequence

    dna_sequences = []
    if dna_sequence:
        dna_sequences.append(dna_sequence)

    with open(log_file, "a") as log:
        log.write(f"DNA sequences found: {dna_sequences}\n")

    # Check DNA sequences count
    if len(dna_sequences) > 1:
        return "You must use only one DNA sequence."

    if not custom_question or custom_question.strip() == "":
        return "Please provide a question."

    # Build prompt
    num_placeholders = custom_question.count("<DNA>")

    if len(dna_sequences) == 0:
        english_sequence = custom_question
    else:
        if num_placeholders == 0:
            return "Your question must include the <DNA> token at the position where the DNA sequence should be inserted."
        elif num_placeholders == 1:
            english_sequence = custom_question
        else:
            return "You can only provide one DNA sequence, so you must use exactly one <DNA> placeholder."

    with open(log_file, "a") as log:
        log.write(f"Initial user question: {custom_question}\n")
        log.write(f"Full english prompt: {english_sequence}\n")
        log.write("Calling model\n")

    output = pipe(
        inputs={
            "english_sequence": english_sequence,
            "dna_sequences": dna_sequences
        }
    )

    if len(dna_sequences) == 0:
        return f"{output}\n\nNote: Careful, you did not provide any DNA sequence."

    with open(log_file, "a") as log:
        log.write(f"Output: {output}\n")

    return output

# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")

    with gr.Row():
        with gr.Column(scale=1):
            dna_text = gr.Textbox(
                label="Paste your DNA sequence",
                placeholder="ATGCATGCATGC...",
                lines=4
            )

            fasta_file = gr.File(
                label="Or upload your FASTA file",
                file_types=[".fasta", ".fa", ".txt"]
            )

            custom_question = gr.Textbox(
                label="English Question",
                placeholder="e.g., Does this sequence <DNA> contain a donor splice site?"
            )

            submit_btn = gr.Button("Run Query", variant="primary")

        with gr.Row():
            output = gr.Textbox(label="Answer", lines=6)

    submit_btn.click(
        run_chatnt,
        inputs=[dna_text, fasta_file, custom_question],
        outputs=output,
    )

    gr.Markdown("""
**Note:**  
✅ You must use **exactly one DNA sequence** (either paste it or upload a file).  
✅ Your question must include the `<DNA>` token **exactly once** at the position where the DNA will be inserted.  
Example: *"Does this sequence <DNA> contain a donor splice site?"*
    """)

    with gr.Accordion("Logs", open=True):
        log_display = Log(log_file)
        gr.Markdown(log_display)

# --- Launch ---
if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True, show_error=True)