Spaces:

InstaDeepAI
/

ChatNT_demo

Running on Zero

App Files Files Community

Yanisadel commited on Jul 7

Commit

47d9b5d

verified ·

1 Parent(s): f809a7e

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -32

app.py CHANGED Viewed

@@ -28,25 +28,56 @@ def run_chatnt(input_file, custom_question):
         log.write("Request started\n")
     if not custom_question or custom_question.strip() == "":
-        return pd.DataFrame(), None
     # Read DNA sequences
     dna_sequences = []
     if input_file is not None:
         with open(input_file.name, "r") as f:
-            lines = f.readlines()
-            for line in lines:
-                if line.startswith(">"):
                     continue
-                dna_sequences.append(line.strip())
-    if not dna_sequences:
-        return pd.DataFrame(), None
     # Build prompt
-    english_sequence = custom_question + " <DNA>"
     # Call model
     output = pipe(
         inputs={
             "english_sequence": english_sequence,
@@ -54,22 +85,8 @@ def run_chatnt(input_file, custom_question):
         }
     )
-    # Wrap output
-    results = []
-    if isinstance(output, list):
-        for item in output:
-            results.append({"Result": item})
-    else:
-        results.append({"Result": output})
-    df = pd.DataFrame(results)
-    output_file = "output.csv"
-    df.to_csv(output_file, index=False)
-    with open(log_file, "a") as log:
-        log.write("Request finished\n")
-    return df, output_file
 # --- Gradio Interface ---
 css = """
@@ -94,21 +111,17 @@ with gr.Blocks(css=css) as demo:
             submit_btn = gr.Button("Run Query", variant="primary")
-        with gr.Column(scale=2):
-            output_df = gr.DataFrame(
-                label="Results",
-                headers=["Result"]
-            )
-            output_file = gr.File(label="Download Results (CSV)")
     submit_btn.click(
         run_chatnt,
         inputs=[input_file, custom_question],
-        outputs=[output_df, output_file],
     )
     gr.Markdown("""
-**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences.
     """)
     with gr.Accordion("Logs", open=True):

         log.write("Request started\n")
     if not custom_question or custom_question.strip() == "":
+        None
     # Read DNA sequences
     dna_sequences = []
     if input_file is not None:
         with open(input_file.name, "r") as f:
+            sequence = ""
+            for line in f:
+                line = line.strip()
+                if not line:
                     continue
+                if line.startswith(">"):
+                    if sequence:
+                        dna_sequences.append(sequence)
+                        sequence = ""
+                else:
+                    sequence += line
+            if sequence:
+                dna_sequences.append(sequence)
+    with open(log_file, "a") as log:
+        for i, seq in enumerate(dna_sequences):
+            log.write(f"DNA sequence {i+1} : {seq}\n")
     # Build prompt
+    num_sequences = len(dna_sequences)
+    num_placeholders = custom_question.count("<DNA>")
+    if num_sequences == 1:
+        # If there is one DNA sequence, add the <DNA> at the end if it was not specified
+        if num_placeholders == 0:
+            english_sequence = custom_question + " <DNA>"
+        elif num_placeholders == 1:
+            english_sequence = custom_question
+        else:
+            raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
+    elif num_sequences > 1:
+        # If there are multiple DNA sequences, the user must specify himself all
+        # positions of DNA sequences
+        if num_placeholders != num_sequences:
+            raise ValueError(
+                f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
+            )
+        english_sequence = custom_question
+    else:
+        return None
+    with open(log_file, "a") as log:
+        log.write(f"English prompt : {english_sequence}")
     # Call model
+    log.write("Calling model")
     output = pipe(
         inputs={
             "english_sequence": english_sequence,
         }
     )
+    log.write(f"Output : {output}")
+    return output
 # --- Gradio Interface ---
 css = """
             submit_btn = gr.Button("Run Query", variant="primary")
+        with gr.Row():
+            output = gr.Textbox(label="Output Text", lines=6)
     submit_btn.click(
         run_chatnt,
         inputs=[input_file, custom_question],
+        outputs=output,
     )
     gr.Markdown("""
+**Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
     """)
     with gr.Accordion("Logs", open=True):