VyLala commited on
Commit
dcdfdba
Β·
verified Β·
1 Parent(s): 0a5dc24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +792 -792
app.py CHANGED
@@ -1,793 +1,793 @@
1
- import gradio as gr
2
- import mtdna_backend
3
- import json
4
- import data_preprocess, model, pipeline
5
- import os
6
- import hashlib
7
- import threading
8
- # Gradio UI
9
- #stop_flag = gr.State(value=False)
10
- class StopFlag:
11
- def __init__(self):
12
- self.value = False
13
- global_stop_flag = StopFlag() # Shared between run + stop
14
-
15
- with open("offer.html", "r", encoding="utf-8") as f:
16
- pricing_html = f.read()
17
-
18
- with gr.Blocks() as interface:
19
- with gr.Tab("CURIOUS ABOUT THIS PRODUCT?"):
20
- gr.HTML(value=pricing_html)
21
- with gr.Tab("🧬 Classifier"):
22
- gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
23
- #inputMode = gr.Radio(choices=["Single Accession", "Batch Input"], value="Single Accession", label="Choose Input Mode")
24
- user_email = gr.Textbox(label="πŸ“§ Your email (used to track free quota)")
25
- usage_display = gr.Markdown("", visible=False)
26
-
27
- # with gr.Group() as single_input_group:
28
- # single_accession = gr.Textbox(label="Enter Single Accession (e.g., KU131308)")
29
-
30
- # with gr.Group(visible=False) as batch_input_group:
31
- # raw_text = gr.Textbox(label="🧬 Paste Accession Numbers (e.g., MF362736.1,MF362738.1,KU131308,MW291678)")
32
- # resume_file = gr.File(label="πŸ—ƒοΈ Previously saved Excel output (optional)", file_types=[".xlsx"], interactive=True)
33
- # gr.HTML("""<a href="https://drive.google.com/file/d/1t-TFeIsGVu5Jh3CUZS-VE9jQWzNFCs_c/view?usp=sharing" download target="_blank">Download Example CSV Format</a>""")
34
- # gr.HTML("""<a href="https://docs.google.com/spreadsheets/d/1lKqPp17EfHsshJGZRWEpcNOZlGo3F5qU/edit?usp=sharing&ouid=112390323314156876153&rtpof=true&sd=true" download target="_blank">Download Example Excel Format</a>""")
35
- # file_upload = gr.File(label="πŸ“ Or Upload CSV/Excel File", file_types=[".csv", ".xlsx"], interactive=True, elem_id="file-upload-box")
36
- raw_text = gr.Textbox(label="🧚 Input Accession Number(s) (single (KU131308) or comma-separated (e.g., MF362736.1,MF362738.1,KU131308,MW291678))")
37
- #resume_file = gr.File(label="πŸ—ƒοΈ Previously saved Excel output (optional)", file_types=[".xlsx"], interactive=True)
38
- gr.HTML("""<a href="https://docs.google.com/spreadsheets/d/1lKqPp17EfHsshJGZRWEpcNOZlGo3F5qU/edit?usp=sharing" download target="_blank">Download Example Excel Format</a>""")
39
- file_upload = gr.File(label="πŸ“ Or Upload CSV/Excel File", file_types=[".csv", ".xlsx"], interactive=True)
40
-
41
- with gr.Row():
42
- run_button = gr.Button("πŸ” Submit and Classify")
43
- stop_button = gr.Button("❌ Stop Batch", visible=True)
44
- reset_button = gr.Button("πŸ”„ Reset")
45
-
46
- status = gr.Markdown(visible=False)
47
-
48
- with gr.Group(visible=False) as results_group:
49
- # with gr.Accordion("Open to See the Result", open=False) as results:
50
- # with gr.Row():
51
- # output_summary = gr.Markdown(elem_id="output-summary")
52
- # output_flag = gr.Markdown(elem_id="output-flag")
53
-
54
- # gr.Markdown("---")
55
-
56
- with gr.Accordion("Open to See the Output Table", open=False) as table_accordion:
57
- output_table = gr.HTML(render=True)
58
- #with gr.Row():
59
- #output_type = gr.Dropdown(choices=["Excel", "JSON", "TXT"], label="Select Output Format", value="Excel")
60
- #download_button = gr.Button("⬇️ Download Output")
61
- #download_file = gr.File(label="Download File Here",visible=False)
62
- report_button = gr.Button("Report")
63
- report_textbox = gr.Textbox(
64
- label="Describe the issue",
65
- lines=4,
66
- placeholder="e.g. DQ981467: it gives me unknown when I can in fact search it on NCBI \n DQ981467: same reason as above",
67
- visible=False)
68
- submit_report_button = gr.Button("Submit", visible=False)
69
- status_report = gr.Markdown(visible=False)
70
-
71
- download_file = gr.File(label="Download File Here", visible=False, interactive=True)
72
- progress_box = gr.Textbox(label="Live Processing Log", lines=20, interactive=False)
73
-
74
- gr.Markdown("---")
75
-
76
- gr.Markdown("### πŸ’¬ Feedback (required)")
77
- q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
78
- q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
79
- contact = gr.Textbox(label="πŸ“§ Your email or institution (optional)")
80
- submit_feedback = gr.Button("βœ… Submit Feedback")
81
- feedback_status = gr.Markdown()
82
-
83
- # Functions
84
- # def toggle_input_mode(mode):
85
- # if mode == "Single Accession":
86
- # return gr.update(visible=True), gr.update(visible=False)
87
- # else:
88
- # return gr.update(visible=False), gr.update(visible=True)
89
-
90
- def classify_with_loading():
91
- return gr.update(value="⏳ Please wait... processing...",visible=True) # Show processing message
92
-
93
- # def classify_dynamic(single_accession, file, text, resume, email, mode):
94
- # if mode == "Single Accession":
95
- # return classify_main(single_accession) + (gr.update(visible=False),)
96
- # else:
97
- # #return summarize_batch(file, text) + (gr.update(visible=False),) # Hide processing message
98
- # return classify_mulAcc(file, text, resume) + (gr.update(visible=False),) # Hide processing message
99
- # Logging helpers defined early to avoid NameError
100
-
101
-
102
- # def classify_dynamic(single_accession, file, text, resume, email, mode):
103
- # if mode == "Single Accession":
104
- # return classify_main(single_accession) + (gr.update(value="", visible=False),)
105
- # else:
106
- # return classify_mulAcc(file, text, resume, email, log_callback=real_time_logger, log_collector=log_collector)
107
-
108
- # for single accession
109
- # def classify_main(accession):
110
- # #table, summary, labelAncient_Modern, explain_label = mtdna_backend.summarize_results(accession)
111
- # table = mtdna_backend.summarize_results(accession)
112
- # #flag_output = f"### 🏺 Ancient/Modern Flag\n**{labelAncient_Modern}**\n\n_Explanation:_ {explain_label}"
113
- # return (
114
- # #table,
115
- # make_html_table(table),
116
- # # summary,
117
- # # flag_output,
118
- # gr.update(visible=True),
119
- # gr.update(visible=False),
120
- # gr.update(visible=False)
121
- # )
122
-
123
- #stop_flag = gr.State(value=False)
124
- #stop_flag = StopFlag()
125
-
126
- # def stop_batch(stop_flag):
127
- # stop_flag.value = True
128
- # return gr.update(value="❌ Stopping...", visible=True), stop_flag
129
- def stop_batch():
130
- global_stop_flag.value = True
131
- return gr.update(value="❌ Stopping...", visible=True)
132
-
133
- # def threaded_batch_runner(file, text, email):
134
- # global_stop_flag.value = False
135
- # log_lines = []
136
-
137
- # def update_log(line):
138
- # log_lines.append(line)
139
- # yield (
140
- # gr.update(visible=False), # output_table (not yet)
141
- # gr.update(visible=False), # results_group
142
- # gr.update(visible=False), # download_file
143
- # gr.update(visible=False), # usage_display
144
- # gr.update(value="⏳ Still processing...", visible=True), # status
145
- # gr.update(value="\n".join(log_lines)) # progress_box
146
- # )
147
-
148
- # # Start a dummy update to say "Starting..."
149
- # yield from update_log("πŸš€ Starting batch processing...")
150
-
151
- # rows, file_path, count, final_log, warning = mtdna_backend.summarize_batch(
152
- # file=file,
153
- # raw_text=text,
154
- # resume_file=None,
155
- # user_email=email,
156
- # stop_flag=global_stop_flag,
157
- # yield_callback=lambda line: (yield from update_log(line))
158
- # )
159
-
160
- # html = make_html_table(rows)
161
- # file_update = gr.update(value=file_path, visible=True) if os.path.exists(file_path) else gr.update(visible=False)
162
- # usage_or_warning_text = f"**{count}** samples used by this email." if email.strip() else warning
163
-
164
- # yield (
165
- # html,
166
- # gr.update(visible=True), # results_group
167
- # file_update, # download_file
168
- # gr.update(value=usage_or_warning_text, visible=True),
169
- # gr.update(value="βœ… Done", visible=True),
170
- # gr.update(value=final_log)
171
- # )
172
-
173
- # def threaded_batch_runner(file=None, text="", email=""):
174
- # print("πŸ“§ EMAIL RECEIVED:", email)
175
- # import tempfile
176
- # from mtdna_backend import (
177
- # extract_accessions_from_input,
178
- # summarize_results,
179
- # save_to_excel,
180
- # hash_user_id,
181
- # increment_usage,
182
- # )
183
- # import os
184
-
185
- # global_stop_flag.value = False # reset stop flag
186
-
187
- # tmp_dir = tempfile.mkdtemp()
188
- # output_file_path = os.path.join(tmp_dir, "batch_output_live.xlsx")
189
- # limited_acc = 50 + (10 if email.strip() else 0)
190
-
191
- # # Step 1: Parse input
192
- # accessions, error = extract_accessions_from_input(file, text)
193
- # print(accessions)
194
- # if error:
195
- # yield (
196
- # "", # output_table
197
- # gr.update(visible=False), # results_group
198
- # gr.update(visible=False), # download_file
199
- # "", # usage_display
200
- # "❌ Error", # status
201
- # str(error) # progress_box
202
- # )
203
- # return
204
-
205
- # total = len(accessions)
206
- # if total > limited_acc:
207
- # accessions = accessions[:limited_acc]
208
- # warning = f"⚠️ Only processing first {limited_acc} accessions."
209
- # else:
210
- # warning = f"βœ… All {total} accessions will be processed."
211
-
212
- # all_rows = []
213
- # processed_accessions = 0 # βœ… tracks how many accessions were processed
214
- # email_tracked = False
215
- # log_lines = []
216
-
217
- # # Step 2: Loop through accessions
218
- # for i, acc in enumerate(accessions):
219
- # if global_stop_flag.value:
220
- # log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
221
- # usage_text = ""
222
- # if email.strip() and not email_tracked:
223
- # # user_hash = hash_user_id(email)
224
- # # usage_count = increment_usage(user_hash, len(all_rows))
225
- # print("print(processed_accessions at stop) ",processed_accessions)
226
- # usage_count = increment_usage(email, processed_accessions)
227
- # email_tracked = True
228
- # usage_text = f"**{usage_count}** samples used by this email. Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
229
- # else:
230
- # usage_text = f"The limited accession is 50. The user has used {processed_accessions}, and only {50-processed_accessions} left."
231
- # yield (
232
- # make_html_table(all_rows),
233
- # gr.update(visible=True),
234
- # gr.update(value=output_file_path, visible=True),
235
- # gr.update(value=usage_text, visible=True),
236
- # "πŸ›‘ Stopped",
237
- # "\n".join(log_lines)
238
- # )
239
- # return
240
-
241
- # log_lines.append(f"[{i+1}/{total}] Processing {acc}")
242
- # yield (
243
- # make_html_table(all_rows),
244
- # gr.update(visible=True),
245
- # gr.update(visible=False),
246
- # "",
247
- # "⏳ Processing...",
248
- # "\n".join(log_lines)
249
- # )
250
-
251
- # try:
252
- # print(acc)
253
- # rows = summarize_results(acc)
254
- # all_rows.extend(rows)
255
- # processed_accessions += 1 # βœ… count only successful accessions
256
- # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
257
- # log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
258
- # except Exception as e:
259
- # log_lines.append(f"❌ Failed to process {acc}: {e}")
260
-
261
- # yield (
262
- # make_html_table(all_rows),
263
- # gr.update(visible=True),
264
- # gr.update(visible=False),
265
- # "",
266
- # "⏳ Processing...",
267
- # "\n".join(log_lines)
268
- # )
269
-
270
- # # Final update
271
- # usage_text = ""
272
-
273
- # if email.strip() and not email_tracked:
274
- # # user_hash = hash_user_id(email)
275
- # # usage_count = increment_usage(user_hash, len(all_rows))
276
- # print("print(processed_accessions final) ",processed_accessions)
277
- # usage_count = increment_usage(email, processed_accessions)
278
- # usage_text = f"**{usage_count}** samples used by this email. Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
279
- # elif not email.strip():
280
- # usage_text = f"The limited accession is 50. The user has used {processed_accessions}, and only {50-processed_accessions} left."
281
- # yield (
282
- # make_html_table(all_rows),
283
- # gr.update(visible=True),
284
- # gr.update(value=output_file_path, visible=True),
285
- # gr.update(value=usage_text, visible=True),
286
- # "βœ… Done",
287
- # "\n".join(log_lines)
288
- # )
289
-
290
- def threaded_batch_runner(file=None, text="", email=""):
291
- print("πŸ“§ EMAIL RECEIVED:", repr(email))
292
- import tempfile
293
- from mtdna_backend import (
294
- extract_accessions_from_input,
295
- summarize_results,
296
- save_to_excel,
297
- increment_usage,
298
- )
299
- import os
300
-
301
- global_stop_flag.value = False # reset stop flag
302
-
303
- tmp_dir = tempfile.mkdtemp()
304
- output_file_path = os.path.join(tmp_dir, "batch_output_live.xlsx")
305
- #output_file_path = "/mnt/data/batch_output_live.xlsx"
306
- all_rows = []
307
- processed_accessions = 0 # βœ… track successful accessions
308
- email_tracked = False
309
- log_lines = []
310
- if not email.strip():
311
- output_file_path = None#"Write your email so that you can download the outputs."
312
- log_lines.append("πŸ“₯ Provide your email to receive a downloadable Excel report and get 20 more free queries.")
313
- limited_acc = 30
314
- if email.strip():
315
- usage_count, max_allowed = increment_usage(email, processed_accessions)
316
- if int(usage_count) >= int(max_allowed):
317
- log_lines.append("❌ You have reached your quota. Please contact us to unlock more.")
318
-
319
- # Minimal blank yield to trigger UI rendering
320
- yield (
321
- make_html_table([]),
322
- gr.update(visible=True),
323
- gr.update(visible=False),
324
- gr.update(value="", visible=True),
325
- "⛔️ Quota limit",
326
- "⛔️ Quota limit"
327
- )
328
-
329
- # Actual warning frame
330
- yield (
331
- make_html_table([]),
332
- gr.update(visible=False),
333
- gr.update(visible=False),
334
- gr.update(value="❌ You have reached your quota. Please contact us to unlock more.", visible=True),
335
- "❌ Quota Exceeded",
336
- "\n".join(log_lines)
337
- )
338
- return
339
- limited_acc = int(max_allowed-usage_count)
340
- # Step 1: Parse input
341
- accessions, error = extract_accessions_from_input(file, text)
342
- print("πŸ§ͺ Accessions received:", accessions)
343
- if error:
344
- yield (
345
- "", # output_table
346
- gr.update(visible=False), # results_group
347
- gr.update(visible=False), # download_file
348
- "", # usage_display
349
- "❌ Error", # status
350
- str(error) # progress_box
351
- )
352
- return
353
-
354
- total = len(accessions)
355
- if total > limited_acc:
356
- accessions = accessions[:limited_acc]
357
- warning = f"⚠️ Only processing first {limited_acc} accessions."
358
- else:
359
- warning = f"βœ… All {total} accessions will be processed."
360
-
361
- # all_rows = []
362
- # processed_accessions = 0 # βœ… track successful accessions
363
- # email_tracked = False
364
- # log_lines = []
365
- # if not email.strip():
366
- # output_file_path = None#"Write your email so that you can download the outputs."
367
- # log_lines.append("πŸ“₯ Provide your email to receive a downloadable Excel report and get 20 more free queries.")
368
- # if email.strip():
369
- # usage_count, max_allowed = increment_usage(email, processed_accessions)
370
- # if int(usage_count) > int(max_allowed):
371
- # log_lines.append("❌ You have reached your quota. Please contact us to unlock more.")
372
-
373
- # # Minimal blank yield to trigger UI rendering
374
- # yield (
375
- # make_html_table([]),
376
- # gr.update(visible=True),
377
- # gr.update(visible=False),
378
- # gr.update(value="", visible=True),
379
- # "⛔️ Quota limit",
380
- # "⛔️ Quota limit"
381
- # )
382
-
383
- # # Actual warning frame
384
- # yield (
385
- # make_html_table([]),
386
- # gr.update(visible=False),
387
- # gr.update(visible=False),
388
- # gr.update(value="❌ You have reached your quota. Please contact us to unlock more.", visible=True),
389
- # "❌ Quota Exceeded",
390
- # "\n".join(log_lines)
391
- # )
392
- # return
393
-
394
-
395
- # Step 2: Loop through accessions
396
- for i, acc in enumerate(accessions):
397
- if global_stop_flag.value:
398
- log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
399
- usage_text = ""
400
-
401
- if email.strip() and not email_tracked:
402
- print(f"πŸ§ͺ increment_usage at STOP: {email=} {processed_accessions=}")
403
- usage_count, max_allowed = increment_usage(email, processed_accessions)
404
- email_tracked = True
405
- usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
406
- #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
407
- else:
408
- usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
409
-
410
- yield (
411
- make_html_table(all_rows),
412
- gr.update(visible=True),
413
- #gr.update(value=output_file_path, visible=True),
414
- gr.update(value=output_file_path, visible=bool(output_file_path)),
415
- gr.update(value=usage_text, visible=True),
416
- "πŸ›‘ Stopped",
417
- "\n".join(log_lines)
418
- )
419
- return
420
-
421
- log_lines.append(f"[{i+1}/{total}] Processing {acc}")
422
- yield (
423
- make_html_table(all_rows),
424
- gr.update(visible=True),
425
- gr.update(visible=False),
426
- "",
427
- "⏳ Processing...",
428
- "\n".join(log_lines)
429
- )
430
-
431
- try:
432
- print("πŸ“„ Processing accession:", acc)
433
- rows = summarize_results(acc)
434
- all_rows.extend(rows)
435
- processed_accessions += 1 # βœ… only count success
436
- if email.strip():
437
- save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
438
- log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
439
- except Exception as e:
440
- log_lines.append(f"❌ Failed to process {acc}: {e}")
441
-
442
- yield (
443
- make_html_table(all_rows),
444
- gr.update(visible=True),
445
- gr.update(visible=False),
446
- "",
447
- "⏳ Processing...",
448
- "\n".join(log_lines)
449
- )
450
-
451
- # Step 3: Final usage update
452
- usage_text = ""
453
- if email.strip() and not email_tracked:
454
- print(f"πŸ§ͺ increment_usage at END: {email=} {processed_accessions=}")
455
- usage_count, max_allowed = increment_usage(email, processed_accessions)
456
- email_tracked = True
457
- usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
458
- #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
459
- elif not email.strip():
460
- usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
461
-
462
- yield (
463
- make_html_table(all_rows),
464
- gr.update(visible=True),
465
- #gr.update(value=output_file_path, visible=True),
466
- gr.update(value=output_file_path, visible=bool(output_file_path)),
467
- gr.update(value=usage_text, visible=True),
468
- "βœ… Done",
469
- "\n".join(log_lines)
470
- )
471
-
472
- # SUBMIT REPORT UI
473
- # 1. Google Sheets setup
474
- def get_worksheet(sheet_name="Report"):
475
- import os, json
476
- import gspread
477
- from oauth2client.service_account import ServiceAccountCredentials
478
- try:
479
- creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
480
- scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
481
- creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
482
- client = gspread.authorize(creds)
483
- sheet = client.open(sheet_name).sheet1
484
- return sheet
485
- except Exception as e:
486
- print(f"❌ Error loading Google Sheet '{sheet_name}':", e)
487
- return None
488
-
489
- # 2. Submit function to send report to the Google Sheet
490
- def submit_report(report_text,user_email=""):
491
- try:
492
- sheet = get_worksheet()
493
- # βœ… Parse the report_text (each line like 'ACCESSION: message')
494
- lines = report_text.strip().split('\n')
495
- user = ""
496
- if user_email.strip():
497
- user = user_email
498
- for line in lines:
499
- if ':' in line:
500
- accession, message = line.split(':', 1)
501
- sheet.append_row([accession.strip(), message.strip(), user.strip()])
502
- return "βœ… Report submitted successfully!"
503
- except Exception as e:
504
- return f"❌ Error submitting report: {str(e)}"
505
- def show_report_ui():
506
- return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
507
-
508
- def handle_submission(text,user_email):
509
- msg = submit_report(text, user_email)
510
- return gr.update(value=msg, visible=True), gr.update(visible=False), gr.update(visible=False)
511
- # def threaded_batch_runner(file=None, text="", email=""):
512
- # global_stop_flag.value = False
513
-
514
- # # Dummy test output that matches expected schema
515
- # return (
516
- # "<div>βœ… Dummy output table</div>", # HTML string
517
- # gr.update(visible=True), # Group visibility
518
- # gr.update(visible=False), # Download file
519
- # "**0** samples used.", # Markdown
520
- # "βœ… Done", # Status string
521
- # "Processing finished." # Progress string
522
- # )
523
-
524
-
525
- # def classify_mulAcc(file, text, resume, email, log_callback=None, log_collector=None):
526
- # stop_flag.value = False
527
- # return threaded_batch_runner(file, text, resume, email, status, stop_flag, log_callback=log_callback, log_collector=log_collector)
528
-
529
-
530
- def make_html_table(rows):
531
- # html = """
532
- # <div style='overflow-x: auto; padding: 10px;'>
533
- # <div style='max-height: 400px; overflow-y: auto; border: 1px solid #444; border-radius: 8px;'>
534
- # <table style='width:100%; border-collapse: collapse; table-layout: auto; font-size: 14px; color: #f1f1f1; background-color: #1e1e1e;'>
535
- # <thead style='position: sticky; top: 0; background-color: #2c2c2c; z-index: 1;'>
536
- # <tr>
537
- # """
538
- html = """
539
- <div style='overflow-x: auto; padding: 10px;'>
540
- <div style='max-height: 400px; overflow-y: auto; border: 1px solid #ccc; border-radius: 8px;'>
541
- <table style='width:100%; border-collapse: collapse; table-layout: auto; font-size: 14px; color: inherit; background-color: inherit;'>
542
- """
543
-
544
- headers = ["Sample ID", "Predicted Country", "Country Explanation", "Predicted Sample Type", "Sample Type Explanation", "Sources", "Time cost"]
545
- html += "".join(
546
- f"<th style='padding: 10px; border: 1px solid #555; text-align: left; white-space: nowrap;'>{h}</th>"
547
- for h in headers
548
- )
549
- html += "</tr></thead><tbody>"
550
-
551
- for row in rows:
552
- html += "<tr>"
553
- for i, col in enumerate(row):
554
- header = headers[i]
555
- style = "padding: 10px; border: 1px solid #555; vertical-align: top;"
556
-
557
- # For specific columns like Haplogroup, force nowrap
558
- if header in ["Country Explanation", "Sample Type Explanation"]:
559
- style += " max-width: 400px; word-wrap: break-word; white-space: normal;"
560
- elif header in ["Sample ID", "Predicted Country", "Predicted Sample Type", "Time cost"]:
561
- style += " white-space: nowrap; text-overflow: ellipsis; max-width: 200px; overflow: hidden;"
562
-
563
- # if header == "Sources" and isinstance(col, str) and col.strip().lower().startswith("http"):
564
- # col = f"<a href='{col}' target='_blank' style='color: #4ea1f3; text-decoration: underline;'>{col}</a>"
565
-
566
- #html += f"<td style='{style}'>{col}</td>"
567
- if header == "Sources" and isinstance(col, str):
568
- links = [f"<a href='{url.strip()}' target='_blank' style='color: #4ea1f3; text-decoration: underline;'>{url.strip()}</a>" for url in col.strip().split("\n") if url.strip()]
569
- col = "- "+"<br>- ".join(links)
570
- elif isinstance(col, str):
571
- # lines = []
572
- # for line in col.split("\n"):
573
- # line = line.strip()
574
- # if not line:
575
- # continue
576
- # if line.lower().startswith("rag_llm-"):
577
- # content = line[len("rag_llm-"):].strip()
578
- # line = f"{content} (Method: RAG_LLM)"
579
- # lines.append(f"- {line}")
580
- col = col.replace("\n", "<br>")
581
- #col = col.replace("\t", "&nbsp;&nbsp;&nbsp;&nbsp;")
582
- #col = "<br>".join(lines)
583
-
584
- html += f"<td style='{style}'>{col}</td>"
585
- html += "</tr>"
586
-
587
- html += "</tbody></table></div></div>"
588
- return html
589
-
590
-
591
- # def reset_fields():
592
- # global_stop_flag.value = False # πŸ’‘ Add this to reset the flag
593
- # return (
594
- # #gr.update(value=""), # single_accession
595
- # gr.update(value=""), # raw_text
596
- # gr.update(value=None), # file_upload
597
- # #gr.update(value=None), # resume_file
598
- # #gr.update(value="Single Accession"), # inputMode
599
- # gr.update(value=[], visible=True), # output_table
600
- # # gr.update(value="", visible=True), # output_summary
601
- # # gr.update(value="", visible=True), # output_flag
602
- # gr.update(visible=False), # status
603
- # gr.update(visible=False), # results_group
604
- # gr.update(value="", visible=False), # usage_display
605
- # gr.update(value="", visible=False), # progress_box
606
- # )
607
- def reset_fields():
608
- global_stop_flag.value = False # Reset the stop flag
609
-
610
- return (
611
- gr.update(value=""), # raw_text
612
- gr.update(value=None), # file_upload
613
- gr.update(value=[], visible=True), # output_table
614
- gr.update(value="", visible=True), # status β€” reset and make visible again
615
- gr.update(visible=False), # results_group
616
- gr.update(value="", visible=True), # usage_display β€” reset and make visible again
617
- gr.update(value="", visible=True), # progress_box β€” reset AND visible!
618
- # report-related reset below
619
- gr.update(value="", visible=False), # report_textbox
620
- gr.update(visible=False), # submit_report_button
621
- gr.update(value="", visible=False), # status_report
622
- )
623
- #inputMode.change(fn=toggle_input_mode, inputs=inputMode, outputs=[single_input_group, batch_input_group])
624
- #run_button.click(fn=classify_with_loading, inputs=[], outputs=[status])
625
- # run_button.click(
626
- # fn=classify_dynamic,
627
- # inputs=[single_accession, file_upload, raw_text, resume_file,user_email,inputMode],
628
- # outputs=[output_table,
629
- # #output_summary, output_flag,
630
- # results_group, download_file, usage_display,status, progress_box]
631
- # )
632
-
633
- # run_button.click(
634
- # fn=threaded_batch_runner,
635
- # #inputs=[file_upload, raw_text, resume_file, user_email],
636
- # inputs=[file_upload, raw_text, user_email],
637
- # outputs=[output_table, results_group, download_file, usage_display, status, progress_box]
638
- # )
639
- # run_button.click(
640
- # fn=threaded_batch_runner,
641
- # inputs=[file_upload, raw_text, user_email],
642
- # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
643
- # every=0.5 # <-- this tells Gradio to expect streaming
644
- # )
645
- # output_table = gr.HTML()
646
- # results_group = gr.Group(visible=False)
647
- # download_file = gr.File(visible=False)
648
- # usage_display = gr.Markdown(visible=False)
649
- # status = gr.Markdown(visible=False)
650
- # progress_box = gr.Textbox(visible=False)
651
-
652
- # run_button.click(
653
- # fn=threaded_batch_runner,
654
- # inputs=[file_upload, raw_text, user_email],
655
- # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
656
- # every=0.5, # streaming enabled
657
- # show_progress="full"
658
- # )
659
-
660
- # interface.stream(
661
- # fn=threaded_batch_runner,
662
- # inputs=[file_upload, raw_text, user_email],
663
- # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
664
- # trigger=run_button,
665
- # every=0.5,
666
- # show_progress="full",
667
- # )
668
- interface.queue() # No arguments here!
669
-
670
- run_button.click(
671
- fn=threaded_batch_runner,
672
- inputs=[file_upload, raw_text, user_email],
673
- outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
674
- concurrency_limit=1, # βœ… correct in Gradio 5.x
675
- queue=True, # βœ… ensure the queue is used
676
- #every=0.5
677
- )
678
-
679
-
680
-
681
-
682
- stop_button.click(fn=stop_batch, inputs=[], outputs=[status])
683
-
684
- # reset_button.click(
685
- # #fn=reset_fields,
686
- # fn=lambda: (
687
- # gr.update(value=""), gr.update(value=""), gr.update(value=None), gr.update(value=None), gr.update(value="Single Accession"),
688
- # gr.update(value=[], visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False), gr.update(value="", visible=False)
689
- # ),
690
- # inputs=[],
691
- # outputs=[
692
- # single_accession, raw_text, file_upload, resume_file,inputMode,
693
- # output_table,# output_summary, output_flag,
694
- # status, results_group, usage_display, progress_box
695
- # ]
696
- # )
697
- #stop_button.click(fn=lambda sf: (gr.update(value="❌ Stopping...", visible=True), setattr(sf, "value", True) or sf), inputs=[gr.State(stop_flag)], outputs=[status, gr.State(stop_flag)])
698
-
699
- reset_button.click(
700
- fn=reset_fields,
701
- inputs=[],
702
- #outputs=[raw_text, file_upload, resume_file, output_table, status, results_group, usage_display, progress_box]
703
- outputs=[raw_text, file_upload, output_table, status, results_group, usage_display, progress_box,
704
- report_textbox,
705
- submit_report_button,
706
- status_report]
707
- )
708
-
709
- # download_button.click(
710
- # fn=mtdna_backend.save_batch_output,
711
- # #inputs=[output_table, output_summary, output_flag, output_type],
712
- # inputs=[output_table, output_type],
713
- # outputs=[download_file])
714
-
715
- # submit_feedback.click(
716
- # fn=mtdna_backend.store_feedback_to_google_sheets,
717
- # inputs=[single_accession, q1, q2, contact], outputs=feedback_status
718
- # )
719
- report_button.click(fn=show_report_ui, outputs=[report_textbox, submit_report_button, status_report])
720
- submit_report_button.click(fn=handle_submission, inputs=[report_textbox, user_email], outputs=[status_report, report_textbox, submit_report_button])
721
-
722
- submit_feedback.click(
723
- fn=mtdna_backend.store_feedback_to_google_sheets,
724
- inputs=[raw_text, q1, q2, contact],
725
- outputs=[feedback_status]
726
- )
727
- gr.HTML("""
728
- <style>
729
- body, html {
730
- background-color: #121212 !important;
731
- color: #ffffff !important;
732
- }
733
-
734
- .gradio-container, .gr-block, .gr-box, textarea, input, select, .prose, .prose * {
735
- background-color: #1e1e1e !important;
736
- color: #ffffff !important;
737
- border-color: #333 !important;
738
- }
739
-
740
- textarea::placeholder,
741
- input::placeholder {
742
- color: #aaa !important;
743
- }
744
-
745
- button {
746
- background-color: #2d2d2d !important;
747
- color: #fff !important;
748
- border: 1px solid #444 !important;
749
- }
750
-
751
- a {
752
- color: #4ea1f3 !important;
753
- }
754
- </style>
755
- """)
756
-
757
- # # Custom CSS styles
758
- # gr.HTML("""
759
- # <style>
760
- # /* Ensures both sections are equally spaced with the same background size */
761
- # #output-summary, #output-flag {
762
- # background-color: #f0f4f8; /* Light Grey for both */
763
- # padding: 20px;
764
- # border-radius: 10px;
765
- # margin-top: 10px;
766
- # width: 100%; /* Ensure full width */
767
- # min-height: 150px; /* Ensures both have a minimum height */
768
- # box-sizing: border-box; /* Prevents padding from increasing size */
769
- # display: flex;
770
- # flex-direction: column;
771
- # justify-content: space-between;
772
- # }
773
-
774
- # /* Specific background colors */
775
- # #output-summary {
776
- # background-color: #434a4b;
777
- # }
778
-
779
- # #output-flag {
780
- # background-color: #141616;
781
- # }
782
-
783
- # /* Ensuring they are in a row and evenly spaced */
784
- # .gradio-row {
785
- # display: flex;
786
- # justify-content: space-between;
787
- # width: 100%;
788
- # }
789
- # </style>
790
- # """)
791
-
792
-
793
  interface.launch(share=True,debug=True)
 
1
+ import gradio as gr
2
+ import mtdna_backend
3
+ import json
4
+ import data_preprocess, model, pipeline
5
+ import os
6
+ import hashlib
7
+ import threading
8
+ # Gradio UI
9
+ #stop_flag = gr.State(value=False)
10
+ class StopFlag:
11
+ def __init__(self):
12
+ self.value = False
13
+ global_stop_flag = StopFlag() # Shared between run + stop
14
+
15
+ with open("offer.html", "r", encoding="utf-8") as f:
16
+ pricing_html = f.read()
17
+
18
+ with gr.Blocks() as interface:
19
+ with gr.Tab("CURIOUS ABOUT THIS PRODUCT?"):
20
+ gr.HTML(value=pricing_html)
21
+ with gr.Tab("🧬 Classifier"):
22
+ gr.Markdown("# 🧬 mtDNA Location Classifier (MVP)")
23
+ #inputMode = gr.Radio(choices=["Single Accession", "Batch Input"], value="Single Accession", label="Choose Input Mode")
24
+ user_email = gr.Textbox(label="πŸ“§ Your email (used to track free quota)")
25
+ usage_display = gr.Markdown("", visible=False)
26
+
27
+ # with gr.Group() as single_input_group:
28
+ # single_accession = gr.Textbox(label="Enter Single Accession (e.g., KU131308)")
29
+
30
+ # with gr.Group(visible=False) as batch_input_group:
31
+ # raw_text = gr.Textbox(label="🧬 Paste Accession Numbers (e.g., MF362736.1,MF362738.1,KU131308,MW291678)")
32
+ # resume_file = gr.File(label="πŸ—ƒοΈ Previously saved Excel output (optional)", file_types=[".xlsx"], interactive=True)
33
+ # gr.HTML("""<a href="https://drive.google.com/file/d/1t-TFeIsGVu5Jh3CUZS-VE9jQWzNFCs_c/view?usp=sharing" download target="_blank">Download Example CSV Format</a>""")
34
+ # gr.HTML("""<a href="https://docs.google.com/spreadsheets/d/1lKqPp17EfHsshJGZRWEpcNOZlGo3F5qU/edit?usp=sharing&ouid=112390323314156876153&rtpof=true&sd=true" download target="_blank">Download Example Excel Format</a>""")
35
+ # file_upload = gr.File(label="πŸ“ Or Upload CSV/Excel File", file_types=[".csv", ".xlsx"], interactive=True, elem_id="file-upload-box")
36
+ raw_text = gr.Textbox(label="🧚 Input Accession Number(s) (single (KU131308) or comma-separated (e.g., MF362736.1,MF362738.1,KU131308,MW291678))")
37
+ #resume_file = gr.File(label="πŸ—ƒοΈ Previously saved Excel output (optional)", file_types=[".xlsx"], interactive=True)
38
+ gr.HTML("""<a href="https://docs.google.com/spreadsheets/d/1lKqPp17EfHsshJGZRWEpcNOZlGo3F5qU/edit?usp=sharing" download target="_blank">Download Example Excel Format</a>""")
39
+ file_upload = gr.File(label="πŸ“ Or Upload CSV/Excel File", file_types=[".csv", ".xlsx"], interactive=True)
40
+
41
+ with gr.Row():
42
+ run_button = gr.Button("πŸ” Submit and Classify")
43
+ stop_button = gr.Button("❌ Stop Batch", visible=True)
44
+ reset_button = gr.Button("πŸ”„ Reset")
45
+
46
+ status = gr.Markdown(visible=False)
47
+
48
+ with gr.Group(visible=False) as results_group:
49
+ # with gr.Accordion("Open to See the Result", open=False) as results:
50
+ # with gr.Row():
51
+ # output_summary = gr.Markdown(elem_id="output-summary")
52
+ # output_flag = gr.Markdown(elem_id="output-flag")
53
+
54
+ # gr.Markdown("---")
55
+
56
+ with gr.Accordion("Open to See the Output Table", open=False) as table_accordion:
57
+ output_table = gr.HTML(render=True)
58
+ #with gr.Row():
59
+ #output_type = gr.Dropdown(choices=["Excel", "JSON", "TXT"], label="Select Output Format", value="Excel")
60
+ #download_button = gr.Button("⬇️ Download Output")
61
+ #download_file = gr.File(label="Download File Here",visible=False)
62
+ report_button = gr.Button("Report")
63
+ report_textbox = gr.Textbox(
64
+ label="Describe the issue",
65
+ lines=4,
66
+ placeholder="e.g. DQ981467: it gives me unknown when I can in fact search it on NCBI \n DQ981467: cannot find the result in batch output when the live processing did show already processed",
67
+ visible=False)
68
+ submit_report_button = gr.Button("Submit", visible=False)
69
+ status_report = gr.Markdown(visible=False)
70
+
71
+ download_file = gr.File(label="Download File Here", visible=False, interactive=True)
72
+ progress_box = gr.Textbox(label="Live Processing Log", lines=20, interactive=False)
73
+
74
+ gr.Markdown("---")
75
+
76
+ gr.Markdown("### πŸ’¬ Feedback (required)")
77
+ q1 = gr.Textbox(label="1️⃣ Was the inferred location accurate or helpful? Please explain.")
78
+ q2 = gr.Textbox(label="2️⃣ What would improve your experience with this tool?")
79
+ contact = gr.Textbox(label="πŸ“§ Your email or institution (optional)")
80
+ submit_feedback = gr.Button("βœ… Submit Feedback")
81
+ feedback_status = gr.Markdown()
82
+
83
+ # Functions
84
+ # def toggle_input_mode(mode):
85
+ # if mode == "Single Accession":
86
+ # return gr.update(visible=True), gr.update(visible=False)
87
+ # else:
88
+ # return gr.update(visible=False), gr.update(visible=True)
89
+
90
+ def classify_with_loading():
91
+ return gr.update(value="⏳ Please wait... processing...",visible=True) # Show processing message
92
+
93
+ # def classify_dynamic(single_accession, file, text, resume, email, mode):
94
+ # if mode == "Single Accession":
95
+ # return classify_main(single_accession) + (gr.update(visible=False),)
96
+ # else:
97
+ # #return summarize_batch(file, text) + (gr.update(visible=False),) # Hide processing message
98
+ # return classify_mulAcc(file, text, resume) + (gr.update(visible=False),) # Hide processing message
99
+ # Logging helpers defined early to avoid NameError
100
+
101
+
102
+ # def classify_dynamic(single_accession, file, text, resume, email, mode):
103
+ # if mode == "Single Accession":
104
+ # return classify_main(single_accession) + (gr.update(value="", visible=False),)
105
+ # else:
106
+ # return classify_mulAcc(file, text, resume, email, log_callback=real_time_logger, log_collector=log_collector)
107
+
108
+ # for single accession
109
+ # def classify_main(accession):
110
+ # #table, summary, labelAncient_Modern, explain_label = mtdna_backend.summarize_results(accession)
111
+ # table = mtdna_backend.summarize_results(accession)
112
+ # #flag_output = f"### 🏺 Ancient/Modern Flag\n**{labelAncient_Modern}**\n\n_Explanation:_ {explain_label}"
113
+ # return (
114
+ # #table,
115
+ # make_html_table(table),
116
+ # # summary,
117
+ # # flag_output,
118
+ # gr.update(visible=True),
119
+ # gr.update(visible=False),
120
+ # gr.update(visible=False)
121
+ # )
122
+
123
+ #stop_flag = gr.State(value=False)
124
+ #stop_flag = StopFlag()
125
+
126
+ # def stop_batch(stop_flag):
127
+ # stop_flag.value = True
128
+ # return gr.update(value="❌ Stopping...", visible=True), stop_flag
129
+ def stop_batch():
130
+ global_stop_flag.value = True
131
+ return gr.update(value="❌ Stopping...", visible=True)
132
+
133
+ # def threaded_batch_runner(file, text, email):
134
+ # global_stop_flag.value = False
135
+ # log_lines = []
136
+
137
+ # def update_log(line):
138
+ # log_lines.append(line)
139
+ # yield (
140
+ # gr.update(visible=False), # output_table (not yet)
141
+ # gr.update(visible=False), # results_group
142
+ # gr.update(visible=False), # download_file
143
+ # gr.update(visible=False), # usage_display
144
+ # gr.update(value="⏳ Still processing...", visible=True), # status
145
+ # gr.update(value="\n".join(log_lines)) # progress_box
146
+ # )
147
+
148
+ # # Start a dummy update to say "Starting..."
149
+ # yield from update_log("πŸš€ Starting batch processing...")
150
+
151
+ # rows, file_path, count, final_log, warning = mtdna_backend.summarize_batch(
152
+ # file=file,
153
+ # raw_text=text,
154
+ # resume_file=None,
155
+ # user_email=email,
156
+ # stop_flag=global_stop_flag,
157
+ # yield_callback=lambda line: (yield from update_log(line))
158
+ # )
159
+
160
+ # html = make_html_table(rows)
161
+ # file_update = gr.update(value=file_path, visible=True) if os.path.exists(file_path) else gr.update(visible=False)
162
+ # usage_or_warning_text = f"**{count}** samples used by this email." if email.strip() else warning
163
+
164
+ # yield (
165
+ # html,
166
+ # gr.update(visible=True), # results_group
167
+ # file_update, # download_file
168
+ # gr.update(value=usage_or_warning_text, visible=True),
169
+ # gr.update(value="βœ… Done", visible=True),
170
+ # gr.update(value=final_log)
171
+ # )
172
+
173
+ # def threaded_batch_runner(file=None, text="", email=""):
174
+ # print("πŸ“§ EMAIL RECEIVED:", email)
175
+ # import tempfile
176
+ # from mtdna_backend import (
177
+ # extract_accessions_from_input,
178
+ # summarize_results,
179
+ # save_to_excel,
180
+ # hash_user_id,
181
+ # increment_usage,
182
+ # )
183
+ # import os
184
+
185
+ # global_stop_flag.value = False # reset stop flag
186
+
187
+ # tmp_dir = tempfile.mkdtemp()
188
+ # output_file_path = os.path.join(tmp_dir, "batch_output_live.xlsx")
189
+ # limited_acc = 50 + (10 if email.strip() else 0)
190
+
191
+ # # Step 1: Parse input
192
+ # accessions, error = extract_accessions_from_input(file, text)
193
+ # print(accessions)
194
+ # if error:
195
+ # yield (
196
+ # "", # output_table
197
+ # gr.update(visible=False), # results_group
198
+ # gr.update(visible=False), # download_file
199
+ # "", # usage_display
200
+ # "❌ Error", # status
201
+ # str(error) # progress_box
202
+ # )
203
+ # return
204
+
205
+ # total = len(accessions)
206
+ # if total > limited_acc:
207
+ # accessions = accessions[:limited_acc]
208
+ # warning = f"⚠️ Only processing first {limited_acc} accessions."
209
+ # else:
210
+ # warning = f"βœ… All {total} accessions will be processed."
211
+
212
+ # all_rows = []
213
+ # processed_accessions = 0 # βœ… tracks how many accessions were processed
214
+ # email_tracked = False
215
+ # log_lines = []
216
+
217
+ # # Step 2: Loop through accessions
218
+ # for i, acc in enumerate(accessions):
219
+ # if global_stop_flag.value:
220
+ # log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
221
+ # usage_text = ""
222
+ # if email.strip() and not email_tracked:
223
+ # # user_hash = hash_user_id(email)
224
+ # # usage_count = increment_usage(user_hash, len(all_rows))
225
+ # print("print(processed_accessions at stop) ",processed_accessions)
226
+ # usage_count = increment_usage(email, processed_accessions)
227
+ # email_tracked = True
228
+ # usage_text = f"**{usage_count}** samples used by this email. Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
229
+ # else:
230
+ # usage_text = f"The limited accession is 50. The user has used {processed_accessions}, and only {50-processed_accessions} left."
231
+ # yield (
232
+ # make_html_table(all_rows),
233
+ # gr.update(visible=True),
234
+ # gr.update(value=output_file_path, visible=True),
235
+ # gr.update(value=usage_text, visible=True),
236
+ # "πŸ›‘ Stopped",
237
+ # "\n".join(log_lines)
238
+ # )
239
+ # return
240
+
241
+ # log_lines.append(f"[{i+1}/{total}] Processing {acc}")
242
+ # yield (
243
+ # make_html_table(all_rows),
244
+ # gr.update(visible=True),
245
+ # gr.update(visible=False),
246
+ # "",
247
+ # "⏳ Processing...",
248
+ # "\n".join(log_lines)
249
+ # )
250
+
251
+ # try:
252
+ # print(acc)
253
+ # rows = summarize_results(acc)
254
+ # all_rows.extend(rows)
255
+ # processed_accessions += 1 # βœ… count only successful accessions
256
+ # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
257
+ # log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
258
+ # except Exception as e:
259
+ # log_lines.append(f"❌ Failed to process {acc}: {e}")
260
+
261
+ # yield (
262
+ # make_html_table(all_rows),
263
+ # gr.update(visible=True),
264
+ # gr.update(visible=False),
265
+ # "",
266
+ # "⏳ Processing...",
267
+ # "\n".join(log_lines)
268
+ # )
269
+
270
+ # # Final update
271
+ # usage_text = ""
272
+
273
+ # if email.strip() and not email_tracked:
274
+ # # user_hash = hash_user_id(email)
275
+ # # usage_count = increment_usage(user_hash, len(all_rows))
276
+ # print("print(processed_accessions final) ",processed_accessions)
277
+ # usage_count = increment_usage(email, processed_accessions)
278
+ # usage_text = f"**{usage_count}** samples used by this email. Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
279
+ # elif not email.strip():
280
+ # usage_text = f"The limited accession is 50. The user has used {processed_accessions}, and only {50-processed_accessions} left."
281
+ # yield (
282
+ # make_html_table(all_rows),
283
+ # gr.update(visible=True),
284
+ # gr.update(value=output_file_path, visible=True),
285
+ # gr.update(value=usage_text, visible=True),
286
+ # "βœ… Done",
287
+ # "\n".join(log_lines)
288
+ # )
289
+
290
+ def threaded_batch_runner(file=None, text="", email=""):
291
+ print("πŸ“§ EMAIL RECEIVED:", repr(email))
292
+ import tempfile
293
+ from mtdna_backend import (
294
+ extract_accessions_from_input,
295
+ summarize_results,
296
+ save_to_excel,
297
+ increment_usage,
298
+ )
299
+ import os
300
+
301
+ global_stop_flag.value = False # reset stop flag
302
+
303
+ tmp_dir = tempfile.mkdtemp()
304
+ output_file_path = os.path.join(tmp_dir, "batch_output_live.xlsx")
305
+ #output_file_path = "/mnt/data/batch_output_live.xlsx"
306
+ all_rows = []
307
+ processed_accessions = 0 # βœ… track successful accessions
308
+ email_tracked = False
309
+ log_lines = []
310
+ if not email.strip():
311
+ output_file_path = None#"Write your email so that you can download the outputs."
312
+ log_lines.append("πŸ“₯ Provide your email to receive a downloadable Excel report and get 20 more free queries.")
313
+ limited_acc = 30
314
+ if email.strip():
315
+ usage_count, max_allowed = increment_usage(email, processed_accessions)
316
+ if int(usage_count) >= int(max_allowed):
317
+ log_lines.append("❌ You have reached your quota. Please contact us to unlock more.")
318
+
319
+ # Minimal blank yield to trigger UI rendering
320
+ yield (
321
+ make_html_table([]),
322
+ gr.update(visible=True),
323
+ gr.update(visible=False),
324
+ gr.update(value="", visible=True),
325
+ "⛔️ Quota limit",
326
+ "⛔️ Quota limit"
327
+ )
328
+
329
+ # Actual warning frame
330
+ yield (
331
+ make_html_table([]),
332
+ gr.update(visible=False),
333
+ gr.update(visible=False),
334
+ gr.update(value="❌ You have reached your quota. Please contact us to unlock more.", visible=True),
335
+ "❌ Quota Exceeded",
336
+ "\n".join(log_lines)
337
+ )
338
+ return
339
+ limited_acc = int(max_allowed-usage_count)
340
+ # Step 1: Parse input
341
+ accessions, error = extract_accessions_from_input(file, text)
342
+ print("πŸ§ͺ Accessions received:", accessions)
343
+ if error:
344
+ yield (
345
+ "", # output_table
346
+ gr.update(visible=False), # results_group
347
+ gr.update(visible=False), # download_file
348
+ "", # usage_display
349
+ "❌ Error", # status
350
+ str(error) # progress_box
351
+ )
352
+ return
353
+
354
+ total = len(accessions)
355
+ if total > limited_acc:
356
+ accessions = accessions[:limited_acc]
357
+ warning = f"⚠️ Only processing first {limited_acc} accessions."
358
+ else:
359
+ warning = f"βœ… All {total} accessions will be processed."
360
+
361
+ # all_rows = []
362
+ # processed_accessions = 0 # βœ… track successful accessions
363
+ # email_tracked = False
364
+ # log_lines = []
365
+ # if not email.strip():
366
+ # output_file_path = None#"Write your email so that you can download the outputs."
367
+ # log_lines.append("πŸ“₯ Provide your email to receive a downloadable Excel report and get 20 more free queries.")
368
+ # if email.strip():
369
+ # usage_count, max_allowed = increment_usage(email, processed_accessions)
370
+ # if int(usage_count) > int(max_allowed):
371
+ # log_lines.append("❌ You have reached your quota. Please contact us to unlock more.")
372
+
373
+ # # Minimal blank yield to trigger UI rendering
374
+ # yield (
375
+ # make_html_table([]),
376
+ # gr.update(visible=True),
377
+ # gr.update(visible=False),
378
+ # gr.update(value="", visible=True),
379
+ # "⛔️ Quota limit",
380
+ # "⛔️ Quota limit"
381
+ # )
382
+
383
+ # # Actual warning frame
384
+ # yield (
385
+ # make_html_table([]),
386
+ # gr.update(visible=False),
387
+ # gr.update(visible=False),
388
+ # gr.update(value="❌ You have reached your quota. Please contact us to unlock more.", visible=True),
389
+ # "❌ Quota Exceeded",
390
+ # "\n".join(log_lines)
391
+ # )
392
+ # return
393
+
394
+
395
+ # Step 2: Loop through accessions
396
+ for i, acc in enumerate(accessions):
397
+ if global_stop_flag.value:
398
+ log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
399
+ usage_text = ""
400
+
401
+ if email.strip() and not email_tracked:
402
+ print(f"πŸ§ͺ increment_usage at STOP: {email=} {processed_accessions=}")
403
+ usage_count, max_allowed = increment_usage(email, processed_accessions)
404
+ email_tracked = True
405
+ usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
406
+ #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
407
+ else:
408
+ usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
409
+
410
+ yield (
411
+ make_html_table(all_rows),
412
+ gr.update(visible=True),
413
+ #gr.update(value=output_file_path, visible=True),
414
+ gr.update(value=output_file_path, visible=bool(output_file_path)),
415
+ gr.update(value=usage_text, visible=True),
416
+ "πŸ›‘ Stopped",
417
+ "\n".join(log_lines)
418
+ )
419
+ return
420
+
421
+ log_lines.append(f"[{i+1}/{total}] Processing {acc}")
422
+ yield (
423
+ make_html_table(all_rows),
424
+ gr.update(visible=True),
425
+ gr.update(visible=False),
426
+ "",
427
+ "⏳ Processing...",
428
+ "\n".join(log_lines)
429
+ )
430
+
431
+ try:
432
+ print("πŸ“„ Processing accession:", acc)
433
+ rows = summarize_results(acc)
434
+ all_rows.extend(rows)
435
+ processed_accessions += 1 # βœ… only count success
436
+ if email.strip():
437
+ save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
438
+ log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
439
+ except Exception as e:
440
+ log_lines.append(f"❌ Failed to process {acc}: {e}")
441
+
442
+ yield (
443
+ make_html_table(all_rows),
444
+ gr.update(visible=True),
445
+ gr.update(visible=False),
446
+ "",
447
+ "⏳ Processing...",
448
+ "\n".join(log_lines)
449
+ )
450
+
451
+ # Step 3: Final usage update
452
+ usage_text = ""
453
+ if email.strip() and not email_tracked:
454
+ print(f"πŸ§ͺ increment_usage at END: {email=} {processed_accessions=}")
455
+ usage_count, max_allowed = increment_usage(email, processed_accessions)
456
+ email_tracked = True
457
+ usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
458
+ #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
459
+ elif not email.strip():
460
+ usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
461
+
462
+ yield (
463
+ make_html_table(all_rows),
464
+ gr.update(visible=True),
465
+ #gr.update(value=output_file_path, visible=True),
466
+ gr.update(value=output_file_path, visible=bool(output_file_path)),
467
+ gr.update(value=usage_text, visible=True),
468
+ "βœ… Done",
469
+ "\n".join(log_lines)
470
+ )
471
+
472
+ # SUBMIT REPORT UI
473
+ # 1. Google Sheets setup
474
+ def get_worksheet(sheet_name="Report"):
475
+ import os, json
476
+ import gspread
477
+ from oauth2client.service_account import ServiceAccountCredentials
478
+ try:
479
+ creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
480
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
481
+ creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
482
+ client = gspread.authorize(creds)
483
+ sheet = client.open(sheet_name).sheet1
484
+ return sheet
485
+ except Exception as e:
486
+ print(f"❌ Error loading Google Sheet '{sheet_name}':", e)
487
+ return None
488
+
489
+ # 2. Submit function to send report to the Google Sheet
490
+ def submit_report(report_text,user_email=""):
491
+ try:
492
+ sheet = get_worksheet()
493
+ # βœ… Parse the report_text (each line like 'ACCESSION: message')
494
+ lines = report_text.strip().split('\n')
495
+ user = ""
496
+ if user_email.strip():
497
+ user = user_email
498
+ for line in lines:
499
+ if ':' in line:
500
+ accession, message = line.split(':', 1)
501
+ sheet.append_row([accession.strip(), message.strip(), user.strip()])
502
+ return "βœ… Report submitted successfully!"
503
+ except Exception as e:
504
+ return f"❌ Error submitting report: {str(e)}"
505
+ def show_report_ui():
506
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
507
+
508
+ def handle_submission(text,user_email):
509
+ msg = submit_report(text, user_email)
510
+ return gr.update(value=msg, visible=True), gr.update(visible=False), gr.update(visible=False)
511
+ # def threaded_batch_runner(file=None, text="", email=""):
512
+ # global_stop_flag.value = False
513
+
514
+ # # Dummy test output that matches expected schema
515
+ # return (
516
+ # "<div>βœ… Dummy output table</div>", # HTML string
517
+ # gr.update(visible=True), # Group visibility
518
+ # gr.update(visible=False), # Download file
519
+ # "**0** samples used.", # Markdown
520
+ # "βœ… Done", # Status string
521
+ # "Processing finished." # Progress string
522
+ # )
523
+
524
+
525
+ # def classify_mulAcc(file, text, resume, email, log_callback=None, log_collector=None):
526
+ # stop_flag.value = False
527
+ # return threaded_batch_runner(file, text, resume, email, status, stop_flag, log_callback=log_callback, log_collector=log_collector)
528
+
529
+
530
+ def make_html_table(rows):
531
+ # html = """
532
+ # <div style='overflow-x: auto; padding: 10px;'>
533
+ # <div style='max-height: 400px; overflow-y: auto; border: 1px solid #444; border-radius: 8px;'>
534
+ # <table style='width:100%; border-collapse: collapse; table-layout: auto; font-size: 14px; color: #f1f1f1; background-color: #1e1e1e;'>
535
+ # <thead style='position: sticky; top: 0; background-color: #2c2c2c; z-index: 1;'>
536
+ # <tr>
537
+ # """
538
+ html = """
539
+ <div style='overflow-x: auto; padding: 10px;'>
540
+ <div style='max-height: 400px; overflow-y: auto; border: 1px solid #ccc; border-radius: 8px;'>
541
+ <table style='width:100%; border-collapse: collapse; table-layout: auto; font-size: 14px; color: inherit; background-color: inherit;'>
542
+ """
543
+
544
+ headers = ["Sample ID", "Predicted Country", "Country Explanation", "Predicted Sample Type", "Sample Type Explanation", "Sources", "Time cost"]
545
+ html += "".join(
546
+ f"<th style='padding: 10px; border: 1px solid #555; text-align: left; white-space: nowrap;'>{h}</th>"
547
+ for h in headers
548
+ )
549
+ html += "</tr></thead><tbody>"
550
+
551
+ for row in rows:
552
+ html += "<tr>"
553
+ for i, col in enumerate(row):
554
+ header = headers[i]
555
+ style = "padding: 10px; border: 1px solid #555; vertical-align: top;"
556
+
557
+ # For specific columns like Haplogroup, force nowrap
558
+ if header in ["Country Explanation", "Sample Type Explanation"]:
559
+ style += " max-width: 400px; word-wrap: break-word; white-space: normal;"
560
+ elif header in ["Sample ID", "Predicted Country", "Predicted Sample Type", "Time cost"]:
561
+ style += " white-space: nowrap; text-overflow: ellipsis; max-width: 200px; overflow: hidden;"
562
+
563
+ # if header == "Sources" and isinstance(col, str) and col.strip().lower().startswith("http"):
564
+ # col = f"<a href='{col}' target='_blank' style='color: #4ea1f3; text-decoration: underline;'>{col}</a>"
565
+
566
+ #html += f"<td style='{style}'>{col}</td>"
567
+ if header == "Sources" and isinstance(col, str):
568
+ links = [f"<a href='{url.strip()}' target='_blank' style='color: #4ea1f3; text-decoration: underline;'>{url.strip()}</a>" for url in col.strip().split("\n") if url.strip()]
569
+ col = "- "+"<br>- ".join(links)
570
+ elif isinstance(col, str):
571
+ # lines = []
572
+ # for line in col.split("\n"):
573
+ # line = line.strip()
574
+ # if not line:
575
+ # continue
576
+ # if line.lower().startswith("rag_llm-"):
577
+ # content = line[len("rag_llm-"):].strip()
578
+ # line = f"{content} (Method: RAG_LLM)"
579
+ # lines.append(f"- {line}")
580
+ col = col.replace("\n", "<br>")
581
+ #col = col.replace("\t", "&nbsp;&nbsp;&nbsp;&nbsp;")
582
+ #col = "<br>".join(lines)
583
+
584
+ html += f"<td style='{style}'>{col}</td>"
585
+ html += "</tr>"
586
+
587
+ html += "</tbody></table></div></div>"
588
+ return html
589
+
590
+
591
+ # def reset_fields():
592
+ # global_stop_flag.value = False # πŸ’‘ Add this to reset the flag
593
+ # return (
594
+ # #gr.update(value=""), # single_accession
595
+ # gr.update(value=""), # raw_text
596
+ # gr.update(value=None), # file_upload
597
+ # #gr.update(value=None), # resume_file
598
+ # #gr.update(value="Single Accession"), # inputMode
599
+ # gr.update(value=[], visible=True), # output_table
600
+ # # gr.update(value="", visible=True), # output_summary
601
+ # # gr.update(value="", visible=True), # output_flag
602
+ # gr.update(visible=False), # status
603
+ # gr.update(visible=False), # results_group
604
+ # gr.update(value="", visible=False), # usage_display
605
+ # gr.update(value="", visible=False), # progress_box
606
+ # )
607
+ def reset_fields():
608
+ global_stop_flag.value = False # Reset the stop flag
609
+
610
+ return (
611
+ gr.update(value=""), # raw_text
612
+ gr.update(value=None), # file_upload
613
+ gr.update(value=[], visible=True), # output_table
614
+ gr.update(value="", visible=True), # status β€” reset and make visible again
615
+ gr.update(visible=False), # results_group
616
+ gr.update(value="", visible=True), # usage_display β€” reset and make visible again
617
+ gr.update(value="", visible=True), # progress_box β€” reset AND visible!
618
+ # report-related reset below
619
+ gr.update(value="", visible=False), # report_textbox
620
+ gr.update(visible=False), # submit_report_button
621
+ gr.update(value="", visible=False), # status_report
622
+ )
623
+ #inputMode.change(fn=toggle_input_mode, inputs=inputMode, outputs=[single_input_group, batch_input_group])
624
+ #run_button.click(fn=classify_with_loading, inputs=[], outputs=[status])
625
+ # run_button.click(
626
+ # fn=classify_dynamic,
627
+ # inputs=[single_accession, file_upload, raw_text, resume_file,user_email,inputMode],
628
+ # outputs=[output_table,
629
+ # #output_summary, output_flag,
630
+ # results_group, download_file, usage_display,status, progress_box]
631
+ # )
632
+
633
+ # run_button.click(
634
+ # fn=threaded_batch_runner,
635
+ # #inputs=[file_upload, raw_text, resume_file, user_email],
636
+ # inputs=[file_upload, raw_text, user_email],
637
+ # outputs=[output_table, results_group, download_file, usage_display, status, progress_box]
638
+ # )
639
+ # run_button.click(
640
+ # fn=threaded_batch_runner,
641
+ # inputs=[file_upload, raw_text, user_email],
642
+ # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
643
+ # every=0.5 # <-- this tells Gradio to expect streaming
644
+ # )
645
+ # output_table = gr.HTML()
646
+ # results_group = gr.Group(visible=False)
647
+ # download_file = gr.File(visible=False)
648
+ # usage_display = gr.Markdown(visible=False)
649
+ # status = gr.Markdown(visible=False)
650
+ # progress_box = gr.Textbox(visible=False)
651
+
652
+ # run_button.click(
653
+ # fn=threaded_batch_runner,
654
+ # inputs=[file_upload, raw_text, user_email],
655
+ # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
656
+ # every=0.5, # streaming enabled
657
+ # show_progress="full"
658
+ # )
659
+
660
+ # interface.stream(
661
+ # fn=threaded_batch_runner,
662
+ # inputs=[file_upload, raw_text, user_email],
663
+ # outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
664
+ # trigger=run_button,
665
+ # every=0.5,
666
+ # show_progress="full",
667
+ # )
668
+ interface.queue() # No arguments here!
669
+
670
+ run_button.click(
671
+ fn=threaded_batch_runner,
672
+ inputs=[file_upload, raw_text, user_email],
673
+ outputs=[output_table, results_group, download_file, usage_display, status, progress_box],
674
+ concurrency_limit=1, # βœ… correct in Gradio 5.x
675
+ queue=True, # βœ… ensure the queue is used
676
+ #every=0.5
677
+ )
678
+
679
+
680
+
681
+
682
+ stop_button.click(fn=stop_batch, inputs=[], outputs=[status])
683
+
684
+ # reset_button.click(
685
+ # #fn=reset_fields,
686
+ # fn=lambda: (
687
+ # gr.update(value=""), gr.update(value=""), gr.update(value=None), gr.update(value=None), gr.update(value="Single Accession"),
688
+ # gr.update(value=[], visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value="", visible=False), gr.update(value="", visible=False)
689
+ # ),
690
+ # inputs=[],
691
+ # outputs=[
692
+ # single_accession, raw_text, file_upload, resume_file,inputMode,
693
+ # output_table,# output_summary, output_flag,
694
+ # status, results_group, usage_display, progress_box
695
+ # ]
696
+ # )
697
+ #stop_button.click(fn=lambda sf: (gr.update(value="❌ Stopping...", visible=True), setattr(sf, "value", True) or sf), inputs=[gr.State(stop_flag)], outputs=[status, gr.State(stop_flag)])
698
+
699
+ reset_button.click(
700
+ fn=reset_fields,
701
+ inputs=[],
702
+ #outputs=[raw_text, file_upload, resume_file, output_table, status, results_group, usage_display, progress_box]
703
+ outputs=[raw_text, file_upload, output_table, status, results_group, usage_display, progress_box,
704
+ report_textbox,
705
+ submit_report_button,
706
+ status_report]
707
+ )
708
+
709
+ # download_button.click(
710
+ # fn=mtdna_backend.save_batch_output,
711
+ # #inputs=[output_table, output_summary, output_flag, output_type],
712
+ # inputs=[output_table, output_type],
713
+ # outputs=[download_file])
714
+
715
+ # submit_feedback.click(
716
+ # fn=mtdna_backend.store_feedback_to_google_sheets,
717
+ # inputs=[single_accession, q1, q2, contact], outputs=feedback_status
718
+ # )
719
+ report_button.click(fn=show_report_ui, outputs=[report_textbox, submit_report_button, status_report])
720
+ submit_report_button.click(fn=handle_submission, inputs=[report_textbox, user_email], outputs=[status_report, report_textbox, submit_report_button])
721
+
722
+ submit_feedback.click(
723
+ fn=mtdna_backend.store_feedback_to_google_sheets,
724
+ inputs=[raw_text, q1, q2, contact],
725
+ outputs=[feedback_status]
726
+ )
727
+ gr.HTML("""
728
+ <style>
729
+ body, html {
730
+ background-color: #121212 !important;
731
+ color: #ffffff !important;
732
+ }
733
+
734
+ .gradio-container, .gr-block, .gr-box, textarea, input, select, .prose, .prose * {
735
+ background-color: #1e1e1e !important;
736
+ color: #ffffff !important;
737
+ border-color: #333 !important;
738
+ }
739
+
740
+ textarea::placeholder,
741
+ input::placeholder {
742
+ color: #aaa !important;
743
+ }
744
+
745
+ button {
746
+ background-color: #2d2d2d !important;
747
+ color: #fff !important;
748
+ border: 1px solid #444 !important;
749
+ }
750
+
751
+ a {
752
+ color: #4ea1f3 !important;
753
+ }
754
+ </style>
755
+ """)
756
+
757
+ # # Custom CSS styles
758
+ # gr.HTML("""
759
+ # <style>
760
+ # /* Ensures both sections are equally spaced with the same background size */
761
+ # #output-summary, #output-flag {
762
+ # background-color: #f0f4f8; /* Light Grey for both */
763
+ # padding: 20px;
764
+ # border-radius: 10px;
765
+ # margin-top: 10px;
766
+ # width: 100%; /* Ensure full width */
767
+ # min-height: 150px; /* Ensures both have a minimum height */
768
+ # box-sizing: border-box; /* Prevents padding from increasing size */
769
+ # display: flex;
770
+ # flex-direction: column;
771
+ # justify-content: space-between;
772
+ # }
773
+
774
+ # /* Specific background colors */
775
+ # #output-summary {
776
+ # background-color: #434a4b;
777
+ # }
778
+
779
+ # #output-flag {
780
+ # background-color: #141616;
781
+ # }
782
+
783
+ # /* Ensuring they are in a row and evenly spaced */
784
+ # .gradio-row {
785
+ # display: flex;
786
+ # justify-content: space-between;
787
+ # width: 100%;
788
+ # }
789
+ # </style>
790
+ # """)
791
+
792
+
793
  interface.launch(share=True,debug=True)