File size: 4,345 Bytes
90b1023
f809a7e
90b1023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f809a7e
cdc5652
90b1023
419f959
90b1023
cdc5652
 
 
 
 
 
 
23e3a2e
 
 
cdc5652
23e3a2e
cdc5652
 
 
 
 
 
90b1023
0daa8ac
cdc5652
 
 
 
 
 
 
 
90b1023
 
0daa8ac
23e3a2e
cdc5652
 
 
0daa8ac
cdc5652
0daa8ac
 
 
cdc5652
90b1023
d6d387a
cdc5652
 
23e3a2e
 
90b1023
 
 
 
 
 
 
cdc5652
 
 
419f959
cdc5652
23e3a2e
0daa8ac
90b1023
 
 
 
 
 
 
 
 
d6d387a
90b1023
 
 
cdc5652
 
 
 
90b1023
cdc5652
 
 
 
 
 
90b1023
23e3a2e
cdc5652
90b1023
 
 
 
0daa8ac
23e3a2e
90b1023
 
 
cdc5652
0daa8ac
90b1023
 
 
cdc5652
 
 
 
90b1023
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import os

# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)

# --- Logs ---
log_file = "logs.txt"

class Log:
    def __init__(self, log_file):
        self.log_file = log_file

    def __call__(self):
        if not os.path.exists(self.log_file):
            return ""
        with open(self.log_file, "r") as f:
            return f.read()

# --- Main Function ---
@spaces.GPU
def run_chatnt(dna_text, fasta_file, custom_question):
    with open(log_file, "a") as log:
        log.write("Request started\n\n")

    # Read DNA sequence from text field or file
    dna_sequence = ""
    if dna_text and dna_text.strip():
        dna_sequence = dna_text.strip().replace("\n", "")
    elif fasta_file is not None:
        file_content = fasta_file.read().decode("utf-8")
        lines = file_content.splitlines()
        sequence = ""
        for line in lines:
            line = line.strip()
            if not line or line.startswith(">"):
                continue
            sequence += line
        dna_sequence = sequence

    dna_sequences = []
    if dna_sequence:
        dna_sequences.append(dna_sequence)

    with open(log_file, "a") as log:
        log.write(f"DNA sequences found: {dna_sequences}\n")

    # Check DNA sequences count
    if len(dna_sequences) > 1:
        return "You must use only one DNA sequence."

    if not custom_question or custom_question.strip() == "":
        return "Please provide a question."

    # Build prompt
    num_placeholders = custom_question.count("<DNA>")

    if len(dna_sequences) == 0:
        english_sequence = custom_question
    else:
        if num_placeholders == 0:
            return "Your question must include the <DNA> token at the position where the DNA sequence should be inserted."
        elif num_placeholders == 1:
            english_sequence = custom_question
        else:
            return "You can only provide one DNA sequence, so you must use exactly one <DNA> placeholder."

    with open(log_file, "a") as log:
        log.write(f"Initial user question: {custom_question}\n")
        log.write(f"Full english prompt: {english_sequence}\n")
        log.write("Calling model\n")

    output = pipe(
        inputs={
            "english_sequence": english_sequence,
            "dna_sequences": dna_sequences
        }
    )

    if len(dna_sequences) == 0:
        return f"{output}\n\nNote: Careful, you did not provide any DNA sequence."

    with open(log_file, "a") as log:
        log.write(f"Output: {output}\n")

    return output

# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")

    with gr.Row():
        with gr.Column(scale=1):
            dna_text = gr.Textbox(
                label="Paste your DNA sequence",
                placeholder="ATGCATGCATGC...",
                lines=4
            )

            fasta_file = gr.File(
                label="Or upload your FASTA file",
                file_types=[".fasta", ".fa", ".txt"]
            )

            custom_question = gr.Textbox(
                label="English Question",
                placeholder="e.g., Does this sequence <DNA> contain a donor splice site?"
            )

            submit_btn = gr.Button("Run Query", variant="primary")

        with gr.Row():
            output = gr.Textbox(label="Answer", lines=6)

    submit_btn.click(
        run_chatnt,
        inputs=[dna_text, fasta_file, custom_question],
        outputs=output,
    )

    gr.Markdown("""
**Note:**  
✅ You must use **exactly one DNA sequence** (either paste it or upload a file).  
✅ Your question must include the `<DNA>` token **exactly once** at the position where the DNA will be inserted.  
Example: *"Does this sequence <DNA> contain a donor splice site?"*
    """)

    with gr.Accordion("Logs", open=True):
        log_display = Log(log_file)
        gr.Markdown(log_display)

# --- Launch ---
if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True, show_error=True)