File size: 4,218 Bytes
90b1023
f809a7e
90b1023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f809a7e
23e3a2e
90b1023
419f959
90b1023
 
23e3a2e
90b1023
23e3a2e
90b1023
23e3a2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b1023
0daa8ac
 
 
90b1023
 
0daa8ac
 
23e3a2e
0daa8ac
 
 
 
 
 
23e3a2e
0daa8ac
 
23e3a2e
0daa8ac
 
23e3a2e
 
0daa8ac
419f959
 
90b1023
d6d387a
23e3a2e
 
90b1023
 
 
 
 
 
 
419f959
23e3a2e
 
0daa8ac
90b1023
 
 
 
 
 
 
 
 
d6d387a
90b1023
 
 
23e3a2e
 
 
 
90b1023
 
23e3a2e
 
90b1023
 
 
 
0daa8ac
23e3a2e
90b1023
 
 
23e3a2e
0daa8ac
90b1023
 
 
23e3a2e
 
 
90b1023
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import os

# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)

# --- Logs ---
log_file = "logs.txt"

class Log:
    def __init__(self, log_file):
        self.log_file = log_file

    def __call__(self):
        if not os.path.exists(self.log_file):
            return ""
        with open(self.log_file, "r") as f:
            return f.read()

# --- Main Function ---
@spaces.GPU
def run_chatnt(fasta_text, custom_question):
    with open(log_file, "a") as log:
        log.write("Request started\n\n")

    if not custom_question or custom_question.strip() == "":
        return "Please provide a question."

    # Read DNA sequences from pasted text
    dna_sequences = []
    if fasta_text:
        lines = fasta_text.splitlines()
        sequence = ""
        for line in lines:
            line = line.strip()
            if not line:
                continue
            if line.startswith(">"):
                if sequence:
                    dna_sequences.append(sequence)
                    sequence = ""
            else:
                sequence += line
        if sequence:
            dna_sequences.append(sequence)

    if not dna_sequences:
        return "No DNA sequences found in the input."

    with open(log_file, "a") as log:
        for i, seq in enumerate(dna_sequences):
            log.write(f"DNA sequence {i+1} : {seq}\n")

    # Build prompt
    num_sequences = len(dna_sequences)
    num_placeholders = custom_question.count("<DNA>")

    if num_sequences == 1:
        if num_placeholders == 0:
            english_sequence = custom_question + " <DNA>"
        elif num_placeholders == 1:
            english_sequence = custom_question
        else:
            return "Too many <DNA> placeholders for a single DNA sequence."
    elif num_sequences > 1:
        if num_placeholders != num_sequences:
            return f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
        english_sequence = custom_question
    else:
        return "No DNA sequences detected."

    with open(log_file, "a") as log:
        log.write(f"Initial user question : {custom_question}\n")
        log.write(f"Full english prompt : {english_sequence}\n")

    with open(log_file, "a") as log:
        log.write("Calling model\n")

    output = pipe(
        inputs={
            "english_sequence": english_sequence,
            "dna_sequences": dna_sequences
        }
    )

    with open(log_file, "a") as log:
        log.write(f"Output : {output}\n")

    return output

# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")

    with gr.Row():
        with gr.Column(scale=1):
            fasta_text = gr.Textbox(
                label="Paste your DNA sequences in FASTA format",
                placeholder=">seq1\nATGC...\n>seq2\nCGTA...",
                lines=8
            )
            custom_question = gr.Textbox(
                label="English Question",
                placeholder="e.g., Does this sequence contain a donor splice site? <DNA>"
            )

            submit_btn = gr.Button("Run Query", variant="primary")

        with gr.Row():
            output = gr.Textbox(label="Answer", lines=6)

    submit_btn.click(
        run_chatnt,
        inputs=[fasta_text, custom_question],
        outputs=output,
    )

    gr.Markdown("""
**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences.  
Example if your FASTA text contains two sequences :  
"Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
    """)

    with gr.Accordion("Logs", open=True):
        log_display = Log(log_file)
        gr.Markdown(log_display)

# --- Launch ---
if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True, show_error=True)