Spaces:

InstaDeepAI
/

ChatNT_demo

Running on Zero

App Files Files Community

Update app.py

by Yanisadel - opened 6 days ago

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+41

-40

Files changed (1) hide show

app.py +41 -40

app.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import spaces
 import gradio as gr
 from transformers import pipeline
-import pandas as pd
 import os
 # --- Load Model ---
@@ -23,30 +22,33 @@ class Log:
 # --- Main Function ---
 @spaces.GPU
-def run_chatnt(input_file, custom_question):
     with open(log_file, "a") as log:
         log.write("Request started\n\n")
     if not custom_question or custom_question.strip() == "":
-        return None
-    # Read DNA sequences
     dna_sequences = []
-    if input_file is not None:
-        with open(input_file.name, "r") as f:
-            sequence = ""
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                if line.startswith(">"):
-                    if sequence:
-                        dna_sequences.append(sequence)
-                        sequence = ""
-                else:
-                    sequence += line
-            if sequence:
-                dna_sequences.append(sequence)
     with open(log_file, "a") as log:
         for i, seq in enumerate(dna_sequences):
@@ -55,32 +57,28 @@ def run_chatnt(input_file, custom_question):
     # Build prompt
     num_sequences = len(dna_sequences)
     num_placeholders = custom_question.count("<DNA>")
     if num_sequences == 1:
-        # If there is one DNA sequence, add the <DNA> at the end if it was not specified
         if num_placeholders == 0:
             english_sequence = custom_question + " <DNA>"
         elif num_placeholders == 1:
             english_sequence = custom_question
         else:
-            raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
     elif num_sequences > 1:
-        # If there are multiple DNA sequences, the user must specify himself all
-        # positions of DNA sequences
         if num_placeholders != num_sequences:
-            raise ValueError(
-                f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
-            )
         english_sequence = custom_question
     else:
-        return None
     with open(log_file, "a") as log:
         log.write(f"Initial user question : {custom_question}\n")
         log.write(f"Full english prompt : {english_sequence}\n")
-    # Call model
     with open(log_file, "a") as log:
-        log.write("Calling model")
     output = pipe(
         inputs={
             "english_sequence": english_sequence,
@@ -89,8 +87,8 @@ def run_chatnt(input_file, custom_question):
     )
     with open(log_file, "a") as log:
-        log.write(f"Output : {output}")
     return output
 # --- Gradio Interface ---
@@ -105,28 +103,31 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            input_file = gr.File(
-                label="Upload DNA Sequence File (.fasta)",
-                file_types=[".fasta", ".fa"]
             )
             custom_question = gr.Textbox(
-                label="English Question (required)",
-                placeholder="e.g., Does this sequence contain a donor splice site?"
             )
             submit_btn = gr.Button("Run Query", variant="primary")
         with gr.Row():
-            output = gr.Textbox(label="Output Text", lines=6)
     submit_btn.click(
         run_chatnt,
-        inputs=[input_file, custom_question],
         outputs=output,
     )
     gr.Markdown("""
-**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
     """)
     with gr.Accordion("Logs", open=True):

 import spaces
 import gradio as gr
 from transformers import pipeline
 import os
 # --- Load Model ---
 # --- Main Function ---
 @spaces.GPU
+def run_chatnt(fasta_text, custom_question):
     with open(log_file, "a") as log:
         log.write("Request started\n\n")
     if not custom_question or custom_question.strip() == "":
+        return "Please provide a question."
+    # Read DNA sequences from pasted text
     dna_sequences = []
+    if fasta_text:
+        lines = fasta_text.splitlines()
+        sequence = ""
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith(">"):
+                if sequence:
+                    dna_sequences.append(sequence)
+                    sequence = ""
+            else:
+                sequence += line
+        if sequence:
+            dna_sequences.append(sequence)
+    if not dna_sequences:
+        return "No DNA sequences found in the input."
     with open(log_file, "a") as log:
         for i, seq in enumerate(dna_sequences):
     # Build prompt
     num_sequences = len(dna_sequences)
     num_placeholders = custom_question.count("<DNA>")
     if num_sequences == 1:
         if num_placeholders == 0:
             english_sequence = custom_question + " <DNA>"
         elif num_placeholders == 1:
             english_sequence = custom_question
         else:
+            return "Too many <DNA> placeholders for a single DNA sequence."
     elif num_sequences > 1:
         if num_placeholders != num_sequences:
+            return f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
         english_sequence = custom_question
     else:
+        return "No DNA sequences detected."
     with open(log_file, "a") as log:
         log.write(f"Initial user question : {custom_question}\n")
         log.write(f"Full english prompt : {english_sequence}\n")
     with open(log_file, "a") as log:
+        log.write("Calling model\n")
     output = pipe(
         inputs={
             "english_sequence": english_sequence,
     )
     with open(log_file, "a") as log:
+        log.write(f"Output : {output}\n")
     return output
 # --- Gradio Interface ---
     with gr.Row():
         with gr.Column(scale=1):
+            fasta_text = gr.Textbox(
+                label="Paste your DNA sequences in FASTA format",
+                placeholder=">seq1\nATGC...\n>seq2\nCGTA...",
+                lines=8
             )
             custom_question = gr.Textbox(
+                label="English Question",
+                placeholder="e.g., Does this sequence contain a donor splice site? <DNA>"
             )
             submit_btn = gr.Button("Run Query", variant="primary")
         with gr.Row():
+            output = gr.Textbox(label="Answer", lines=6)
     submit_btn.click(
         run_chatnt,
+        inputs=[fasta_text, custom_question],
         outputs=output,
     )
     gr.Markdown("""
+**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences.
+Example if your FASTA text contains two sequences :
+"Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
     """)
     with gr.Accordion("Logs", open=True):