VyLala commited on
Commit
48a1ec2
·
verified ·
1 Parent(s): ddf863c

delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -532
app.py DELETED
@@ -1,532 +0,0 @@
1
- <<<<<<< HEAD
2
- # ✅ Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
3
-
4
- import gradio as gr
5
- from collections import Counter
6
- import csv
7
- import os
8
- from functools import lru_cache
9
- from mtdna_classifier import classify_sample_location
10
- import subprocess
11
- import json
12
-
13
- @lru_cache(maxsize=128)
14
- def classify_sample_location_cached(accession):
15
- return classify_sample_location(accession)
16
-
17
- # Count and suggest final location
18
- def compute_final_suggested_location(rows):
19
- candidates = [
20
- row.get("Predicted Location", "").strip()
21
- for row in rows
22
- if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found", "unknown"]
23
- ] + [
24
- row.get("Inferred Region", "").strip()
25
- for row in rows
26
- if row.get("Inferred Region", "").strip().lower() not in ["", "sample id not found", "unknown"]
27
- ]
28
-
29
- if not candidates:
30
- return Counter(), ("Unknown", 0)
31
-
32
- counts = Counter(candidates)
33
- top_location, count = counts.most_common(1)[0]
34
- return counts, (top_location, count)
35
-
36
- # Store feedback (with required fields)
37
- import gspread
38
- from oauth2client.service_account import ServiceAccountCredentials
39
-
40
- '''creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
41
-
42
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
43
- creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
44
-
45
- def store_feedback_to_google_sheets(accession, answer1, answer2, contact=""):
46
- if not answer1.strip() or not answer2.strip():
47
- return "⚠️ Please answer both questions before submitting."
48
-
49
- try:
50
- # Define the scope and authenticate
51
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
52
- creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
53
- client = gspread.authorize(creds)
54
-
55
- # Open the spreadsheet and worksheet
56
- sheet = client.open("feedback_mtdna").sheet1 # You can change the name
57
- sheet.append_row([accession, answer1, answer2, contact])
58
- return "✅ Feedback submitted. Thank you!"
59
- except Exception as e:
60
- return f"❌ Error submitting feedback: {str(e)}"'''
61
-
62
- import os
63
- import json
64
- from oauth2client.service_account import ServiceAccountCredentials
65
- import gspread
66
-
67
- def store_feedback_to_google_sheets(accession, answer1, answer2, contact=""):
68
- if not answer1.strip() or not answer2.strip():
69
- return "⚠️ Please answer both questions before submitting."
70
-
71
- try:
72
- # ✅ Step: Load credentials from Hugging Face secret
73
- creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
74
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
75
- creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
76
-
77
- # Connect to Google Sheet
78
- client = gspread.authorize(creds)
79
- sheet = client.open("feedback_mtdna").sheet1 # make sure sheet name matches
80
-
81
- # Append feedback
82
- sheet.append_row([accession, answer1, answer2, contact])
83
- return "✅ Feedback submitted. Thank you!"
84
-
85
- except Exception as e:
86
- return f"❌ Error submitting feedback: {e}"
87
-
88
-
89
- def summarize_results(accession):
90
- try:
91
- output = classify_sample_location_cached(accession)
92
- print(output)
93
- except Exception as e:
94
- return [], f"❌ Error: {e}"
95
-
96
- if accession not in output:
97
- return [], "❌ Accession not found in results."
98
-
99
- isolate = next((k for k in output if k != accession), None)
100
- row_score = []
101
- rows = []
102
-
103
- for key in [accession, isolate]:
104
- if key not in output:
105
- continue
106
- sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
107
- for section, techniques in output[key].items():
108
- for technique, content in techniques.items():
109
- source = content.get("source", "")
110
- predicted = content.get("predicted_location", "")
111
- haplogroup = content.get("haplogroup", "")
112
- inferred = content.get("inferred_location", "")
113
- context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
114
-
115
- row = {
116
- "Sample ID": sample_id_label,
117
- "Technique": technique,
118
- "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
119
- "Predicted Location": "" if technique == "haplogroup" else predicted,
120
- "Haplogroup": haplogroup if technique == "haplogroup" else "",
121
- "Inferred Region": inferred if technique == "haplogroup" else "",
122
- "Context Snippet": context
123
- }
124
-
125
- row_score.append(row)
126
- rows.append(list(row.values()))
127
-
128
- location_counts, (final_location, count) = compute_final_suggested_location(row_score)
129
- summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
130
- summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
131
- summary_lines.append(f"\n**Final Suggested Location:** 🗺️ **{final_location}** (mentioned {count} times)")
132
- summary = "\n".join(summary_lines)
133
-
134
- return rows, summary
135
- # Gradio UI
136
- with gr.Blocks() as interface:
137
- gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
138
- gr.Markdown("Enter an accession number to infer geographic origin. You'll see predictions, confidence scores, and can submit feedback.")
139
-
140
- with gr.Row():
141
- accession = gr.Textbox(label="Enter Accession Number (e.g., KU131308)")
142
- run_button = gr.Button("🔍 Submit and Classify")
143
- reset_button = gr.Button("🔄 Reset")
144
-
145
- status = gr.Markdown(visible=False)
146
- headers = ["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"]
147
- output_table = gr.Dataframe(headers=headers, interactive=False)
148
- output_summary = gr.Markdown()
149
-
150
- gr.Markdown("---")
151
- gr.Markdown("### 💬 Feedback (required)")
152
- q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
153
- q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
154
- contact = gr.Textbox(label="📧 Your email or institution (optional)")
155
- submit_feedback = gr.Button("✅ Submit Feedback")
156
- feedback_status = gr.Markdown()
157
-
158
- def classify_with_loading(accession):
159
- return gr.update(value="⏳ Please wait... processing...", visible=True)
160
-
161
- def classify_main(accession):
162
- table, summary = summarize_results(accession)
163
- return table, summary, gr.update(visible=False)
164
-
165
- def reset_fields():
166
- return "", "", "", "", "", [], "", gr.update(visible=False)
167
-
168
- run_button.click(fn=classify_with_loading, inputs=accession, outputs=status)
169
- run_button.click(fn=classify_main, inputs=accession, outputs=[output_table, output_summary, status])
170
- submit_feedback.click(fn=store_feedback_to_google_sheets, inputs=[accession, q1, q2, contact], outputs=feedback_status)
171
- reset_button.click(fn=reset_fields, inputs=[], outputs=[accession, q1, q2, contact, feedback_status, output_table, output_summary, status])
172
-
173
- interface.launch(share=True)
174
- =======
175
- # ✅ Optimized mtDNA MVP UI with Faster Pipeline & Required Feedback
176
-
177
- import gradio as gr
178
- from collections import Counter
179
- import csv
180
- import os
181
- from functools import lru_cache
182
- from mtdna_classifier import classify_sample_location
183
- import subprocess
184
- import json
185
- import pandas as pd
186
- import io
187
- import re
188
- import tempfile
189
- import gspread
190
- from oauth2client.service_account import ServiceAccountCredentials
191
-
192
- @lru_cache(maxsize=128)
193
- def classify_sample_location_cached(accession):
194
- return classify_sample_location(accession)
195
-
196
- # Count and suggest final location
197
- def compute_final_suggested_location(rows):
198
- candidates = [
199
- row.get("Predicted Location", "").strip()
200
- for row in rows
201
- if row.get("Predicted Location", "").strip().lower() not in ["", "sample id not found", "unknown"]
202
- ] + [
203
- row.get("Inferred Region", "").strip()
204
- for row in rows
205
- if row.get("Inferred Region", "").strip().lower() not in ["", "sample id not found", "unknown"]
206
- ]
207
-
208
- if not candidates:
209
- return Counter(), ("Unknown", 0)
210
-
211
- counts = Counter(candidates)
212
- top_location, count = counts.most_common(1)[0]
213
- return counts, (top_location, count)
214
-
215
- # Store feedback (with required fields)
216
-
217
- '''creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
218
-
219
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
220
- creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
221
-
222
- def store_feedback_to_google_sheets(accession, answer1, answer2, contact=""):
223
- if not answer1.strip() or not answer2.strip():
224
- return "⚠️ Please answer both questions before submitting."
225
-
226
- try:
227
- # Define the scope and authenticate
228
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
229
- creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
230
- client = gspread.authorize(creds)
231
-
232
- # Open the spreadsheet and worksheet
233
- sheet = client.open("feedback_mtdna").sheet1 # You can change the name
234
- sheet.append_row([accession, answer1, answer2, contact])
235
- return "✅ Feedback submitted. Thank you!"
236
- except Exception as e:
237
- return f"❌ Error submitting feedback: {str(e)}"'''
238
-
239
- def store_feedback_to_google_sheets(accession, answer1, answer2, contact=""):
240
- if not answer1.strip() or not answer2.strip():
241
- return "⚠️ Please answer both questions before submitting."
242
-
243
- try:
244
- # ✅ Step: Load credentials from Hugging Face secret
245
- creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
246
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
247
- creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
248
-
249
- # Connect to Google Sheet
250
- client = gspread.authorize(creds)
251
- sheet = client.open("feedback_mtdna").sheet1 # make sure sheet name matches
252
-
253
- # Append feedback
254
- sheet.append_row([accession, answer1, answer2, contact])
255
- return "✅ Feedback submitted. Thank you!"
256
-
257
- except Exception as e:
258
- return f"❌ Error submitting feedback: {e}"
259
-
260
- # helper function to extract accessions
261
- def extract_accessions_from_input(file=None, raw_text=""):
262
- print(f"RAW TEXT RECEIVED: {raw_text}")
263
- accessions = []
264
- seen = set()
265
- if file:
266
- try:
267
- if file.name.endswith(".csv"):
268
- df = pd.read_csv(file)
269
- elif file.name.endswith(".xlsx"):
270
- df = pd.read_excel(file)
271
- else:
272
- return [], "Unsupported file format. Please upload CSV or Excel."
273
- for acc in df.iloc[:, 0].dropna().astype(str).str.strip():
274
- if acc not in seen:
275
- accessions.append(acc)
276
- seen.add(acc)
277
- except Exception as e:
278
- return [], f"Failed to read file: {e}"
279
-
280
- if raw_text:
281
- text_ids = [s.strip() for s in re.split(r"[\n,;\t]", raw_text) if s.strip()]
282
- for acc in text_ids:
283
- if acc not in seen:
284
- accessions.append(acc)
285
- seen.add(acc)
286
-
287
- return list(accessions), None
288
-
289
- def summarize_results(accession):
290
- try:
291
- output, labelAncient_Modern, explain_label = classify_sample_location_cached(accession)
292
- print(output)
293
- except Exception as e:
294
- return [], f"Error: {e}"
295
-
296
- if accession not in output:
297
- return [], "Accession not found in results."
298
-
299
- isolate = next((k for k in output if k != accession), None)
300
- row_score = []
301
- rows = []
302
-
303
- for key in [accession, isolate]:
304
- if key not in output:
305
- continue
306
- sample_id_label = f"{key} ({'accession number' if key == accession else 'isolate of accession'})"
307
- for section, techniques in output[key].items():
308
- for technique, content in techniques.items():
309
- source = content.get("source", "")
310
- predicted = content.get("predicted_location", "")
311
- haplogroup = content.get("haplogroup", "")
312
- inferred = content.get("inferred_location", "")
313
- context = content.get("context_snippet", "")[:300] if "context_snippet" in content else ""
314
-
315
- row = {
316
- "Sample ID": sample_id_label,
317
- "Technique": technique,
318
- "Source": f"The region of haplogroup is inferred\nby using this source: {source}" if technique == "haplogroup" else source,
319
- "Predicted Location": "" if technique == "haplogroup" else predicted,
320
- "Haplogroup": haplogroup if technique == "haplogroup" else "",
321
- "Inferred Region": inferred if technique == "haplogroup" else "",
322
- "Context Snippet": context
323
- }
324
-
325
- row_score.append(row)
326
- rows.append(list(row.values()))
327
-
328
- location_counts, (final_location, count) = compute_final_suggested_location(row_score)
329
- summary_lines = [f"### 🧭 Location Frequency Summary", "After counting all predicted and inferred locations:\n"]
330
- summary_lines += [f"- **{loc}**: {cnt} times" for loc, cnt in location_counts.items()]
331
- summary_lines.append(f"\n**Final Suggested Location:** 🗺️ **{final_location}** (mentioned {count} times)")
332
- summary = "\n".join(summary_lines)
333
-
334
- return rows, summary, labelAncient_Modern, explain_label
335
-
336
- # save the batch input in excel file
337
- def save_to_excel(all_rows, summary_text, flag_text, filename):
338
- with pd.ExcelWriter(filename) as writer:
339
- # Save table
340
- df = pd.DataFrame(all_rows, columns=["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"])
341
- df.to_excel(writer, sheet_name="Detailed Results", index=False)
342
-
343
- # Save summary
344
- summary_df = pd.DataFrame({"Summary": [summary_text]})
345
- summary_df.to_excel(writer, sheet_name="Summary", index=False)
346
-
347
- # Save flag
348
- flag_df = pd.DataFrame({"Flag": [flag_text]})
349
- flag_df.to_excel(writer, sheet_name="Ancient_Modern_Flag", index=False)
350
-
351
- # save the batch input in JSON file
352
- def save_to_json(all_rows, summary_text, flag_text, filename):
353
- output_dict = {
354
- "Detailed_Results": all_rows,
355
- "Summary_Text": summary_text,
356
- "Ancient_Modern_Flag": flag_text
357
- }
358
- with open(filename, "w") as f:
359
- json.dump(output_dict, f, indent=2)
360
-
361
- # save the batch input in Text file
362
- def save_to_txt(all_rows, summary_text, flag_text, filename):
363
- with open(filename, "w") as f:
364
- f.write("=== Detailed Results ===\n")
365
- for row in all_rows:
366
- f.write(", ".join(str(x) for x in row) + "\n")
367
-
368
- f.write("\n=== Summary ===\n")
369
- f.write(summary_text + "\n")
370
-
371
- f.write("\n=== Ancient/Modern Flag ===\n")
372
- f.write(flag_text + "\n")
373
-
374
- def save_batch_output(all_rows, summary_text, flag_text, output_type):
375
- tmp_dir = tempfile.mkdtemp()
376
-
377
- if output_type == "Excel":
378
- file_path = f"{tmp_dir}/batch_output.xlsx"
379
- save_to_excel(all_rows, summary_text, flag_text, file_path)
380
- elif output_type == "JSON":
381
- file_path = f"{tmp_dir}/batch_output.json"
382
- save_to_json(all_rows, summary_text, flag_text, file_path)
383
- elif output_type == "TXT":
384
- file_path = f"{tmp_dir}/batch_output.txt"
385
- save_to_txt(all_rows, summary_text, flag_text, file_path)
386
- else:
387
- return None # invalid option
388
-
389
- return file_path
390
-
391
- # run the batch
392
- def summarize_batch(file=None, raw_text=""):
393
- accessions, error = extract_accessions_from_input(file, raw_text)
394
- if error:
395
- return [], "", "", f"Error: {error}"
396
-
397
- all_rows = []
398
- all_summaries = []
399
- all_flags = []
400
-
401
- for acc in accessions:
402
- try:
403
- rows, summary, label, explain = summarize_results(acc)
404
- all_rows.extend(rows)
405
- all_summaries.append(f"**{acc}**\n{summary}")
406
- all_flags.append(f"**{acc}**: {label}\n_Explanation:_ {explain}")
407
- except Exception as e:
408
- all_summaries.append(f"**{acc}**: Failed - {e}")
409
-
410
- summary_text = "\n\n---\n\n".join(all_summaries)
411
- flag_text = "\n\n".join(all_flags)
412
-
413
- return all_rows, summary_text, flag_text, gr.update(visible=False)
414
-
415
- # Gradio UI
416
- with gr.Blocks() as interface:
417
- gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
418
-
419
- inputMode = gr.Radio(choices=["Single Accession", "Batch Input"], value="Single Accession", label="Choose Input Mode")
420
-
421
- with gr.Group() as single_input_group:
422
- single_accession = gr.Textbox(label="Enter Single Accession (e.g., KU131308)")
423
-
424
- with gr.Group(visible=False) as batch_input_group:
425
- raw_text = gr.Textbox(label="🧬 Paste Accession Numbers")
426
- file_upload = gr.File(label="📁 Or Upload CSV/Excel File", file_types=[".csv", ".xlsx"], interactive=True, elem_id="file-upload-box")
427
- print(raw_text)
428
- # Make the file box smaller
429
- gr.HTML('<style>#file-upload-box { width: 200px; }</style>')
430
-
431
- with gr.Row():
432
- run_button = gr.Button("🔍 Submit and Classify")
433
- reset_button = gr.Button("🔄 Reset")
434
-
435
- status = gr.Markdown(visible=False)
436
-
437
- with gr.Group(visible=False) as results_group:
438
- with gr.Row():
439
- with gr.Column():
440
- output_summary = gr.Markdown()
441
- with gr.Column():
442
- output_flag = gr.Markdown()
443
-
444
- gr.Markdown("---")
445
- output_table = gr.Dataframe(
446
- headers=["Sample ID", "Technique", "Source", "Predicted Location", "Haplogroup", "Inferred Region", "Context Snippet"],
447
- interactive=False,
448
- row_count=(5, "dynamic")
449
- )
450
-
451
- with gr.Row():
452
- output_type = gr.Dropdown(choices=["Excel", "JSON", "TXT"], label="Select Output Format", value="Excel")
453
- download_button = gr.Button("⬇️ Download Output")
454
- download_file = gr.File(label="Download File Here")
455
-
456
- gr.Markdown("---")
457
-
458
- gr.Markdown("### 💬 Feedback (required)")
459
- q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
460
- q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
461
- contact = gr.Textbox(label="📧 Your email or institution (optional)")
462
- submit_feedback = gr.Button("✅ Submit Feedback")
463
- feedback_status = gr.Markdown()
464
-
465
- # Functions
466
-
467
- def toggle_input_mode(mode):
468
- if mode == "Single Accession":
469
- return gr.update(visible=True), gr.update(visible=False)
470
- else:
471
- return gr.update(visible=False), gr.update(visible=True)
472
-
473
- def classify_with_loading():
474
- return gr.update(value="⏳ Please wait... processing...", visible=True)
475
-
476
- def classify_dynamic(single_accession, file, text, mode):
477
- print(f"MODE: {mode} | RAW TEXT: {text}")
478
- if mode == "Single Accession":
479
- return classify_main(single_accession)
480
- else:
481
- return summarize_batch(file, text)
482
-
483
- def classify_main(accession):
484
- table, summary, labelAncient_Modern, explain_label = summarize_results(accession)
485
- flag_output = f"### 🏺 Ancient/Modern Flag\n**{labelAncient_Modern}**\n\n_Explanation:_ {explain_label}"
486
- return (
487
- table,
488
- summary,
489
- flag_output,
490
- gr.update(visible=True),
491
- gr.update(visible=False)
492
- )
493
-
494
- def reset_fields():
495
- return (
496
- gr.update(value=""), # single_accession
497
- gr.update(value=""), # raw_text
498
- gr.update(value=None), # file_upload
499
- gr.update(value="Single Accession"), # inputMode
500
- gr.update(value=[], visible=True), # output_table
501
- gr.update(value="", visible=True), # output_summary
502
- gr.update(value="", visible=True), # output_flag
503
- gr.update(visible=False), # status
504
- gr.update(visible=False) # results_group
505
- )
506
-
507
- inputMode.change(fn=toggle_input_mode, inputs=inputMode, outputs=[single_input_group, batch_input_group])
508
- run_button.click(fn=classify_with_loading, inputs=[], outputs=status)
509
- run_button.click(
510
- fn=classify_dynamic,
511
- inputs=[single_accession, file_upload, raw_text, inputMode],
512
- outputs=[output_table, output_summary, output_flag, results_group, status]
513
- )
514
- reset_button.click(
515
- fn=reset_fields,
516
- inputs=[],
517
- outputs=[
518
- single_accession, raw_text, file_upload, inputMode,
519
- output_table, output_summary, output_flag,
520
- status, results_group
521
- ]
522
- )
523
-
524
- download_button.click(
525
- save_batch_output, [output_table, output_summary, output_flag, output_type], download_file
526
- )
527
- submit_feedback.click(
528
- fn=store_feedback_to_google_sheets, inputs=[single_accession, q1, q2, contact], outputs=feedback_status
529
- )
530
-
531
- interface.launch(share=True)
532
- >>>>>>> 597aa7c (WIP: Save local changes which mainly updated appUI before moving to UpdateAppUI)