Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,399 Bytes
90b1023 f809a7e 90b1023 f809a7e 90b1023 fd031f1 90b1023 d6d387a 90b1023 0daa8ac 90b1023 0daa8ac 90b1023 0daa8ac 90b1023 0daa8ac fd031f1 90b1023 d6d387a 90b1023 fd031f1 0daa8ac 90b1023 d6d387a 90b1023 d6d387a 90b1023 0daa8ac 90b1023 0daa8ac 90b1023 0daa8ac 90b1023 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# --- Imports ---
import spaces
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
# --- Load Model ---
pipe = pipeline(model="InstaDeepAI/ChatNT", trust_remote_code=True)
# --- Logs ---
log_file = "logs.txt"
class Log:
def __init__(self, log_file):
self.log_file = log_file
def __call__(self):
if not os.path.exists(self.log_file):
return ""
with open(self.log_file, "r") as f:
return f.read()
# --- Main Function ---
@spaces.GPU
def run_chatnt(input_file, custom_question):
with open(log_file, "a") as log:
log.write("Request started\n\n")
if not custom_question or custom_question.strip() == "":
return None
# Read DNA sequences
dna_sequences = []
if input_file is not None:
with open(input_file.name, "r") as f:
sequence = ""
for line in f:
line = line.strip()
if not line:
continue
if line.startswith(">"):
if sequence:
dna_sequences.append(sequence)
sequence = ""
else:
sequence += line
if sequence:
dna_sequences.append(sequence)
with open(log_file, "a") as log:
for i, seq in enumerate(dna_sequences):
log.write(f"DNA sequence {i+1} : {seq}\n")
# Build prompt
num_sequences = len(dna_sequences)
num_placeholders = custom_question.count("<DNA>")
if num_sequences == 1:
# If there is one DNA sequence, add the <DNA> at the end if it was not specified
if num_placeholders == 0:
english_sequence = custom_question + " <DNA>"
elif num_placeholders == 1:
english_sequence = custom_question
else:
raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
elif num_sequences > 1:
# If there are multiple DNA sequences, the user must specify himself all
# positions of DNA sequences
if num_placeholders != num_sequences:
raise ValueError(
f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
)
english_sequence = custom_question
else:
return None
with open(log_file, "a") as log:
log.write(f"Initial user question : {custom_question}\n")
log.write(f"Full english prompt : {english_sequence}\n")
# Call model
with open(log_file, "a") as log:
log.write("Calling model")
output = pipe(
inputs={
"english_sequence": english_sequence,
"dna_sequences": dna_sequences
}
)
with open(log_file, "a") as log:
log.write(f"Output : {output}")
return output
# --- Gradio Interface ---
css = """
.gradio-container { font-family: sans-serif; }
.gr-button { color: white; border-color: black; background: black; }
footer { display: none !important; }
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("# 🧬 ChatNT: A Multimodal Conversational Agent for DNA, RNA and Protein Tasks")
with gr.Row():
with gr.Column(scale=1):
input_file = gr.File(
label="Upload DNA Sequence File (.fasta)",
file_types=[".fasta", ".fa"]
)
custom_question = gr.Textbox(
label="English Question (required)",
placeholder="e.g., Does this sequence contain a donor splice site?"
)
submit_btn = gr.Button("Run Query", variant="primary")
with gr.Row():
output = gr.Textbox(label="Output Text", lines=6)
submit_btn.click(
run_chatnt,
inputs=[input_file, custom_question],
outputs=output,
)
gr.Markdown("""
**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
""")
with gr.Accordion("Logs", open=True):
log_display = Log(log_file)
gr.Markdown(log_display)
# --- Launch ---
if __name__ == "__main__":
demo.queue()
demo.launch(debug=True, show_error=True) |