bernardo-de-almeida Yanisadel commited on
Commit
0daa8ac
·
verified ·
1 Parent(s): f809a7e

Update app.py (#5)

Browse files

- Update app.py (47d9b5dcbbda71589594a3d8e07c117304acc40c)


Co-authored-by: Yanis Adel <[email protected]>

Files changed (1) hide show
  1. app.py +45 -32
app.py CHANGED
@@ -28,25 +28,56 @@ def run_chatnt(input_file, custom_question):
28
  log.write("Request started\n")
29
 
30
  if not custom_question or custom_question.strip() == "":
31
- return pd.DataFrame(), None
32
 
33
  # Read DNA sequences
34
  dna_sequences = []
35
  if input_file is not None:
36
  with open(input_file.name, "r") as f:
37
- lines = f.readlines()
38
- for line in lines:
39
- if line.startswith(">"):
 
40
  continue
41
- dna_sequences.append(line.strip())
 
 
 
 
 
 
 
42
 
43
- if not dna_sequences:
44
- return pd.DataFrame(), None
 
45
 
46
  # Build prompt
47
- english_sequence = custom_question + " <DNA>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  # Call model
 
50
  output = pipe(
51
  inputs={
52
  "english_sequence": english_sequence,
@@ -54,22 +85,8 @@ def run_chatnt(input_file, custom_question):
54
  }
55
  )
56
 
57
- # Wrap output
58
- results = []
59
- if isinstance(output, list):
60
- for item in output:
61
- results.append({"Result": item})
62
- else:
63
- results.append({"Result": output})
64
-
65
- df = pd.DataFrame(results)
66
- output_file = "output.csv"
67
- df.to_csv(output_file, index=False)
68
-
69
- with open(log_file, "a") as log:
70
- log.write("Request finished\n")
71
-
72
- return df, output_file
73
 
74
  # --- Gradio Interface ---
75
  css = """
@@ -94,21 +111,17 @@ with gr.Blocks(css=css) as demo:
94
 
95
  submit_btn = gr.Button("Run Query", variant="primary")
96
 
97
- with gr.Column(scale=2):
98
- output_df = gr.DataFrame(
99
- label="Results",
100
- headers=["Result"]
101
- )
102
- output_file = gr.File(label="Download Results (CSV)")
103
 
104
  submit_btn.click(
105
  run_chatnt,
106
  inputs=[input_file, custom_question],
107
- outputs=[output_df, output_file],
108
  )
109
 
110
  gr.Markdown("""
111
- **Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences.
112
  """)
113
 
114
  with gr.Accordion("Logs", open=True):
 
28
  log.write("Request started\n")
29
 
30
  if not custom_question or custom_question.strip() == "":
31
+ None
32
 
33
  # Read DNA sequences
34
  dna_sequences = []
35
  if input_file is not None:
36
  with open(input_file.name, "r") as f:
37
+ sequence = ""
38
+ for line in f:
39
+ line = line.strip()
40
+ if not line:
41
  continue
42
+ if line.startswith(">"):
43
+ if sequence:
44
+ dna_sequences.append(sequence)
45
+ sequence = ""
46
+ else:
47
+ sequence += line
48
+ if sequence:
49
+ dna_sequences.append(sequence)
50
 
51
+ with open(log_file, "a") as log:
52
+ for i, seq in enumerate(dna_sequences):
53
+ log.write(f"DNA sequence {i+1} : {seq}\n")
54
 
55
  # Build prompt
56
+ num_sequences = len(dna_sequences)
57
+ num_placeholders = custom_question.count("<DNA>")
58
+ if num_sequences == 1:
59
+ # If there is one DNA sequence, add the <DNA> at the end if it was not specified
60
+ if num_placeholders == 0:
61
+ english_sequence = custom_question + " <DNA>"
62
+ elif num_placeholders == 1:
63
+ english_sequence = custom_question
64
+ else:
65
+ raise ValueError("Too many <DNA> placeholders for a single DNA sequence.")
66
+ elif num_sequences > 1:
67
+ # If there are multiple DNA sequences, the user must specify himself all
68
+ # positions of DNA sequences
69
+ if num_placeholders != num_sequences:
70
+ raise ValueError(
71
+ f"You provided {num_sequences} DNA sequences but only {num_placeholders} <DNA> placeholders. Please specify one <DNA> for each sequence."
72
+ )
73
+ english_sequence = custom_question
74
+ else:
75
+ return None
76
+ with open(log_file, "a") as log:
77
+ log.write(f"English prompt : {english_sequence}")
78
 
79
  # Call model
80
+ log.write("Calling model")
81
  output = pipe(
82
  inputs={
83
  "english_sequence": english_sequence,
 
85
  }
86
  )
87
 
88
+ log.write(f"Output : {output}")
89
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # --- Gradio Interface ---
92
  css = """
 
111
 
112
  submit_btn = gr.Button("Run Query", variant="primary")
113
 
114
+ with gr.Row():
115
+ output = gr.Textbox(label="Output Text", lines=6)
 
 
 
 
116
 
117
  submit_btn.click(
118
  run_chatnt,
119
  inputs=[input_file, custom_question],
120
+ outputs=output,
121
  )
122
 
123
  gr.Markdown("""
124
+ **Note:** Your question **must** include the `<DNA>` token if needed for multiple sequences. Example if your FASTA file contains two sequences : "Does the sequence <DNA> contain a donor splice site? And the sequence <DNA> ?"
125
  """)
126
 
127
  with gr.Accordion("Logs", open=True):