Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -2,7 +2,6 @@
2
  import spaces
3
  import gradio as gr
4
  from transformers import pipeline
5
- import pandas as pd
6
  import os
7
 
8
  # --- Load Model ---
@@ -23,30 +22,33 @@ class Log:
23
 
24
  # --- Main Function ---
25
  @spaces.GPU
26
- def run_chatnt(input_file, custom_question):
27
  with open(log_file, "a") as log:
28
  log.write("Request started\n\n")
29
 
30
  if not custom_question or custom_question.strip() == "":
31
- return None
32
 
33
- # Read DNA sequences
34
  dna_sequences = []
35
- if input_file is not None:
36
- with open(input_file.name, "r") as f:
37
- sequence = ""
38
- for line in f:
39
- line = line.strip()
40
- if not line:
41
- continue
42
- if line.startswith(">"):
43
- if sequence:
44
- dna_sequences.append(sequence)
45
- sequence = ""
46
- else:
47
- sequence += line
48
- if sequence:
49
- dna_sequences.append(sequence)
 
 
 
50
 
51
  with open(log_file, "a") as log:
52
  for i, seq in enumerate(dna_sequences):
@@ -55,32 +57,28 @@ def run_chatnt(input_file, custom_question):
55
  # Build prompt
56
  num_sequences = len(dna_sequences)
57
  num_placeholders = custom_question.count("<DNA>")
 
58
  if num_sequences == 1:
59
- # If there is one DNA sequence, add the <DNA> at the end if it was not specified
60
  if num_placeholders == 0:
61
  english_sequence = custom_question + " <DNA>"
62
  elif num_placeholders == 1:
63
  english_sequence = custom_question
64
  else:
65
- raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
66
  elif num_sequences > 1:
67
- # If there are multiple DNA sequences, the user must specify himself all
68
- # positions of DNA sequences
69
  if num_placeholders != num_sequences:
70
- raise ValueError(
71
- f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
72
- )
73
  english_sequence = custom_question
74
  else:
75
- return None
 
76
  with open(log_file, "a") as log:
77
  log.write(f"Initial user question : {custom_question}\n")
78
  log.write(f"Full english prompt : {english_sequence}\n")
79
 
80
- # Call model
81
  with open(log_file, "a") as log:
82
- log.write("Calling model")
83
-
84
  output = pipe(
85
  inputs={
86
  "english_sequence": english_sequence,
@@ -89,8 +87,8 @@ def run_chatnt(input_file, custom_question):
89
  )
90
 
91
  with open(log_file, "a") as log:
92
- log.write(f"Output : {output}")
93
-
94
  return output
95
 
96
  # --- Gradio Interface ---
@@ -105,28 +103,31 @@ with gr.Blocks(css=css) as demo:
105
 
106
  with gr.Row():
107
  with gr.Column(scale=1):
108
- input_file = gr.File(
109
- label="Upload DNA Sequence File (.fasta)",
110
- file_types=[".fasta", ".fa"]
 
111
  )
112
  custom_question = gr.Textbox(
113
- label="English Question (required)",
114
- placeholder="e.g., Does this sequence contain a donor splice site?"
115
  )
116
 
117
  submit_btn = gr.Button("Run Query", variant="primary")
118
 
119
  with gr.Row():
120
- output = gr.Textbox(label="Output Text", lines=6)
121
 
122
  submit_btn.click(
123
  run_chatnt,
124
- inputs=[input_file, custom_question],
125
  outputs=output,
126
  )
127
 
128
  gr.Markdown("""
129
- **Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
 
 
130
  """)
131
 
132
  with gr.Accordion("Logs", open=True):
 
2
  import spaces
3
  import gradio as gr
4
  from transformers import pipeline
 
5
  import os
6
 
7
  # --- Load Model ---
 
22
 
23
  # --- Main Function ---
24
  @spaces.GPU
25
+ def run_chatnt(fasta_text, custom_question):
26
  with open(log_file, "a") as log:
27
  log.write("Request started\n\n")
28
 
29
  if not custom_question or custom_question.strip() == "":
30
+ return "Please provide a question."
31
 
32
+ # Read DNA sequences from pasted text
33
  dna_sequences = []
34
+ if fasta_text:
35
+ lines = fasta_text.splitlines()
36
+ sequence = ""
37
+ for line in lines:
38
+ line = line.strip()
39
+ if not line:
40
+ continue
41
+ if line.startswith(">"):
42
+ if sequence:
43
+ dna_sequences.append(sequence)
44
+ sequence = ""
45
+ else:
46
+ sequence += line
47
+ if sequence:
48
+ dna_sequences.append(sequence)
49
+
50
+ if not dna_sequences:
51
+ return "No DNA sequences found in the input."
52
 
53
  with open(log_file, "a") as log:
54
  for i, seq in enumerate(dna_sequences):
 
57
  # Build prompt
58
  num_sequences = len(dna_sequences)
59
  num_placeholders = custom_question.count("<DNA>")
60
+
61
  if num_sequences == 1:
 
62
  if num_placeholders == 0:
63
  english_sequence = custom_question + " <DNA>"
64
  elif num_placeholders == 1:
65
  english_sequence = custom_question
66
  else:
67
+ return "Too many <DNA> placeholders for a single DNA sequence."
68
  elif num_sequences > 1:
 
 
69
  if num_placeholders != num_sequences:
70
+ return f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
 
 
71
  english_sequence = custom_question
72
  else:
73
+ return "No DNA sequences detected."
74
+
75
  with open(log_file, "a") as log:
76
  log.write(f"Initial user question : {custom_question}\n")
77
  log.write(f"Full english prompt : {english_sequence}\n")
78
 
 
79
  with open(log_file, "a") as log:
80
+ log.write("Calling model\n")
81
+
82
  output = pipe(
83
  inputs={
84
  "english_sequence": english_sequence,
 
87
  )
88
 
89
  with open(log_file, "a") as log:
90
+ log.write(f"Output : {output}\n")
91
+
92
  return output
93
 
94
  # --- Gradio Interface ---
 
103
 
104
  with gr.Row():
105
  with gr.Column(scale=1):
106
+ fasta_text = gr.Textbox(
107
+ label="Paste your DNA sequences in FASTA format",
108
+ placeholder=">seq1\nATGC...\n>seq2\nCGTA...",
109
+ lines=8
110
  )
111
  custom_question = gr.Textbox(
112
+ label="English Question",
113
+ placeholder="e.g., Does this sequence contain a donor splice site? <DNA>"
114
  )
115
 
116
  submit_btn = gr.Button("Run Query", variant="primary")
117
 
118
  with gr.Row():
119
+ output = gr.Textbox(label="Answer", lines=6)
120
 
121
  submit_btn.click(
122
  run_chatnt,
123
+ inputs=[fasta_text, custom_question],
124
  outputs=output,
125
  )
126
 
127
  gr.Markdown("""
128
+ **Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences.
129
+ Example if your FASTA text contains two sequences :
130
+ "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
131
  """)
132
 
133
  with gr.Accordion("Logs", open=True):