Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,345 Bytes
90b1023 f809a7e 90b1023 f809a7e cdc5652 90b1023 419f959 90b1023 cdc5652 23e3a2e cdc5652 23e3a2e cdc5652 90b1023 0daa8ac cdc5652 90b1023 0daa8ac 23e3a2e cdc5652 0daa8ac cdc5652 0daa8ac cdc5652 90b1023 d6d387a cdc5652 23e3a2e 90b1023 cdc5652 419f959 cdc5652 23e3a2e 0daa8ac 90b1023 d6d387a 90b1023 cdc5652 90b1023 cdc5652 90b1023 23e3a2e cdc5652 90b1023 0daa8ac 23e3a2e 90b1023 cdc5652 0daa8ac 90b1023 cdc5652 90b1023 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import os
# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)
# --- Logs ---
log_file = "logs.txt"
class Log:
def __init__(self, log_file):
self.log_file = log_file
def __call__(self):
if not os.path.exists(self.log_file):
return ""
with open(self.log_file, "r") as f:
return f.read()
# --- Main Function ---
@spaces.GPU
def run_chatnt(dna_text, fasta_file, custom_question):
with open(log_file, "a") as log:
log.write("Request started\n\n")
# Read DNA sequence from text field or file
dna_sequence = ""
if dna_text and dna_text.strip():
dna_sequence = dna_text.strip().replace("\n", "")
elif fasta_file is not None:
file_content = fasta_file.read().decode("utf-8")
lines = file_content.splitlines()
sequence = ""
for line in lines:
line = line.strip()
if not line or line.startswith(">"):
continue
sequence += line
dna_sequence = sequence
dna_sequences = []
if dna_sequence:
dna_sequences.append(dna_sequence)
with open(log_file, "a") as log:
log.write(f"DNA sequences found: {dna_sequences}\n")
# Check DNA sequences count
if len(dna_sequences) > 1:
return "You must use only one DNA sequence."
if not custom_question or custom_question.strip() == "":
return "Please provide a question."
# Build prompt
num_placeholders = custom_question.count("<DNA>")
if len(dna_sequences) == 0:
english_sequence = custom_question
else:
if num_placeholders == 0:
return "Your question must include the <DNA> token at the position where the DNA sequence should be inserted."
elif num_placeholders == 1:
english_sequence = custom_question
else:
return "You can only provide one DNA sequence, so you must use exactly one <DNA> placeholder."
with open(log_file, "a") as log:
log.write(f"Initial user question: {custom_question}\n")
log.write(f"Full english prompt: {english_sequence}\n")
log.write("Calling model\n")
output = pipe(
inputs={
"english_sequence": english_sequence,
"dna_sequences": dna_sequences
}
)
if len(dna_sequences) == 0:
return f"{output}\n\nNote: Careful, you did not provide any DNA sequence."
with open(log_file, "a") as log:
log.write(f"Output: {output}\n")
return output
# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")
with gr.Row():
with gr.Column(scale=1):
dna_text = gr.Textbox(
label="Paste your DNA sequence",
placeholder="ATGCATGCATGC...",
lines=4
)
fasta_file = gr.File(
label="Or upload your FASTA file",
file_types=[".fasta", ".fa", ".txt"]
)
custom_question = gr.Textbox(
label="English Question",
placeholder="e.g., Does this sequence <DNA> contain a donor splice site?"
)
submit_btn = gr.Button("Run Query", variant="primary")
with gr.Row():
output = gr.Textbox(label="Answer", lines=6)
submit_btn.click(
run_chatnt,
inputs=[dna_text, fasta_file, custom_question],
outputs=output,
)
gr.Markdown("""
**Note:**
✅ You must use **exactly one DNA sequence** (either paste it or upload a file).
✅ Your question must include the `<DNA>` token **exactly once** at the position where the DNA will be inserted.
Example: *"Does this sequence <DNA> contain a donor splice site?"*
""")
with gr.Accordion("Logs", open=True):
log_display = Log(log_file)
gr.Markdown(log_display)
# --- Launch ---
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True, show_error=True) |