File size: 4,200 Bytes
90b1023
f809a7e
90b1023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f809a7e
90b1023
 
 
 
 
47d9b5d
90b1023
 
 
 
 
47d9b5d
 
 
 
90b1023
47d9b5d
 
 
 
 
 
 
 
90b1023
47d9b5d
 
 
90b1023
 
47d9b5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b1023
 
47d9b5d
90b1023
 
 
 
 
 
 
47d9b5d
 
90b1023
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47d9b5d
 
90b1023
 
 
 
47d9b5d
90b1023
 
 
47d9b5d
90b1023
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import pandas as pd
import os

# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)

# --- Logs ---
log_file = "logs.txt"

class Log:
    def __init__(self, log_file):
        self.log_file = log_file

    def __call__(self):
        if not os.path.exists(self.log_file):
            return ""
        with open(self.log_file, "r") as f:
            return f.read()

# --- Main Function ---
@spaces.GPU
def run_chatnt(input_file, custom_question):
    with open(log_file, "a") as log:
        log.write("Request started\n")

    if not custom_question or custom_question.strip() == "":
        None

    # Read DNA sequences
    dna_sequences = []
    if input_file is not None:
        with open(input_file.name, "r") as f:
            sequence = ""
            for line in f:
                line = line.strip()
                if not line:
                    continue
                if line.startswith(">"):
                    if sequence:
                        dna_sequences.append(sequence)
                        sequence = ""
                else:
                    sequence += line
            if sequence:
                dna_sequences.append(sequence)

    with open(log_file, "a") as log:
        for i, seq in enumerate(dna_sequences):
            log.write(f"DNA sequence {i+1} : {seq}\n")

    # Build prompt
    num_sequences = len(dna_sequences)
    num_placeholders = custom_question.count("<DNA>")
    if num_sequences == 1:
        # If there is one DNA sequence, add the <DNA> at the end if it was not specified
        if num_placeholders == 0:
            english_sequence = custom_question + " <DNA>"
        elif num_placeholders == 1:
            english_sequence = custom_question
        else:
            raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
    elif num_sequences > 1:
        # If there are multiple DNA sequences, the user must specify himself all
        # positions of DNA sequences
        if num_placeholders != num_sequences:
            raise ValueError(
                f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
            )
        english_sequence = custom_question
    else:
        return None
    with open(log_file, "a") as log:
        log.write(f"English prompt : {english_sequence}")

    # Call model
    log.write("Calling model")
    output = pipe(
        inputs={
            "english_sequence": english_sequence,
            "dna_sequences": dna_sequences
        }
    )

    log.write(f"Output : {output}")
    return output

# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# 🧬 ChatNT — DNA Sequence Query Assistant")

    with gr.Row():
        with gr.Column(scale=1):
            input_file = gr.File(
                label="Upload DNA Sequence File (.fasta or .txt)",
                file_types=[".fasta", ".fa", ".txt"]
            )
            custom_question = gr.Textbox(
                label="English Question (required)",
                placeholder="e.g., Does this sequence contain a donor splice site?"
            )

            submit_btn = gr.Button("Run Query", variant="primary")

        with gr.Row():
            output = gr.Textbox(label="Output Text", lines=6)

    submit_btn.click(
        run_chatnt,
        inputs=[input_file, custom_question],
        outputs=output,
    )

    gr.Markdown("""
**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
    """)

    with gr.Accordion("Logs", open=True):
        log_display = Log(log_file)
        gr.Markdown(log_display)

# --- Launch ---
if __name__ == "__main__":
    demo.queue()
    demo.launch(debug=True, show_error=True)