VyLala commited on
Commit
4e7035b
·
verified ·
1 Parent(s): 4ed3064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -270
app.py CHANGED
@@ -5,8 +5,6 @@ import data_preprocess, model, pipeline
5
  import os
6
  import hashlib
7
  import threading
8
- import multiprocessing as mp
9
- mp.set_start_method("spawn", force=True) # ensures workers aren't daemonic
10
 
11
  # Gradio UI
12
  #stop_flag = gr.State(value=False)
@@ -460,28 +458,6 @@ with gr.Blocks() as interface:
460
 
461
  return False, None
462
 
463
- from multiprocessing import Pool, cpu_count
464
-
465
- def process_accession_worker(args):
466
- acc, stop_flag = args
467
- if stop_flag.value: # Early exit if stop requested before starting
468
- return None
469
- try:
470
- # You can still use run_with_timeout if you want per-accession timeout
471
- success, rows = run_with_timeout(
472
- mtdna_backend.summarize_results,
473
- args=(acc,),
474
- timeout=None, # or set max seconds per sample if needed
475
- stop_value=stop_flag
476
- )
477
- if success:
478
- return rows
479
- else:
480
- return None
481
- except Exception as e:
482
- print(f"⚠️ Error processing {acc}: {e}")
483
- return None
484
- from concurrent.futures import ProcessPoolExecutor, as_completed
485
  def threaded_batch_runner(file=None, text="", email=""):
486
  print("📧 EMAIL RECEIVED:", repr(email))
487
  import tempfile
@@ -629,271 +605,207 @@ with gr.Blocks() as interface:
629
  # "\n".join(log_lines)
630
  # )
631
  # return
632
-
633
- from multiprocessing import get_context
634
-
635
- num_workers = min(cpu_count(), 4) # HF free tier usually has 2 CPUs
636
- ctx = get_context("spawn") # ensures workers are not daemonic
637
- pool = ctx.Pool(processes=num_workers)
638
-
639
-
640
- try:
641
- for result in pool.imap_unordered(
642
- process_accession_worker, [(acc, global_stop_flag) for acc in accessions]
643
- ):
644
- # if global_stop_flag.value:
645
- # log_lines.append("🛑 Stop requested — terminating workers...")
646
- # pool.terminate()
647
- # break
648
  if global_stop_flag.value:
649
- log_lines.append("🛑 Stopped mid-batch")
650
  usage_text = ""
 
651
  if email.strip() and not email_tracked:
 
652
  usage_count, max_allowed = increment_usage(email, processed_accessions)
653
  email_tracked = True
654
- usage_text = f"**{usage_count}**/{max_allowed} allowed samples used."
 
655
  else:
656
  usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
657
-
658
- pool.terminate()
 
 
 
 
 
 
 
 
659
  yield (
660
  make_html_table(all_rows),
661
- gr.update(visible=True),
662
- gr.update(value=output_file_path, visible=bool(output_file_path)),
663
- gr.update(value=usage_text, visible=True),
664
- "🛑 Stopped",
665
  "\n".join(log_lines),
666
- gr.update(visible=False), # run_button
667
- gr.update(visible=False), # stop_button
668
- gr.update(visible=True), # reset_button
669
- gr.update(visible=False), # raw_text
670
- gr.update(visible=False), # file_upload
671
- gr.update(value=processed_info, visible=True),
672
- gr.update(visible=True) # NPS modal
673
  )
 
674
  return
675
-
676
- if result:
677
- all_rows.extend(result)
678
- processed_accessions += 1
679
- log_lines.append(f"✅ Processed {processed_accessions}/{total}")
680
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  yield (
682
  make_html_table(all_rows),
683
- gr.update(visible=True), # results_group
684
- gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
685
- gr.update(value=usage_text, visible=True), # usage_display
686
- f"⏳ Processed {processed_accessions}/{total}",
687
- "\n".join(log_lines),
688
- gr.update(visible=False), # run_button
689
- gr.update(visible=True), # stop_button
690
- gr.update(visible=True), # reset_button
691
- gr.update(visible=False), # raw_text
692
- gr.update(visible=False), # file_upload
693
  gr.update(value=processed_info, visible=True), # processed_info
694
- gr.update(visible=False) # NPS modal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
  )
696
-
697
- finally:
698
- pool.close()
699
- pool.join()
700
-
701
- if not global_stop_flag.value:
702
- log_lines.append("✅ All accessions processed")
703
 
704
- # # Step 2: Loop through accessions
705
- # for i, acc in enumerate(accessions):
706
- # if global_stop_flag.value:
707
- # log_lines.append(f"🛑 Stopped at {acc} ({i+1}/{total})")
708
- # usage_text = ""
709
-
710
- # if email.strip() and not email_tracked:
711
- # print(f"🧪 increment_usage at STOP: {email=} {processed_accessions=}")
712
- # usage_count, max_allowed = increment_usage(email, processed_accessions)
713
- # email_tracked = True
714
- # usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
715
- # #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
716
- # else:
717
- # usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
718
-
719
- # # yield (
720
- # # make_html_table(all_rows),
721
- # # gr.update(visible=True),
722
- # # #gr.update(value=output_file_path, visible=True),
723
- # # gr.update(value=output_file_path, visible=bool(output_file_path)),
724
- # # gr.update(value=usage_text, visible=True),
725
- # # "🛑 Stopped",
726
- # # "\n".join(log_lines)
727
- # # )
728
- # yield (
729
- # make_html_table(all_rows),
730
- # gr.update(visible=True), # results_group
731
- # gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
732
- # gr.update(value=usage_text, visible=True), # usage_display
733
- # "🛑 Stopped", # "✅ Done" or "🛑 Stopped"
734
- # "\n".join(log_lines),
735
- # gr.update(visible=False), # run_button
736
- # gr.update(visible=False), # stop_button
737
- # gr.update(visible=True), # reset_button
738
- # gr.update(visible=False), # raw_text
739
- # gr.update(visible=False), # file_upload
740
- # gr.update(value=processed_info, visible=False), # processed_info
741
- # gr.update(visible=True) # NPS modal now visible
742
- # )
743
-
744
- # return
745
-
746
- # log_lines.append(f"[{i+1}/{total}] Processing {acc}")
747
- # # yield (
748
- # # make_html_table(all_rows),
749
- # # gr.update(visible=True),
750
- # # gr.update(visible=False),
751
- # # "",
752
- # # "⏳ Processing...",
753
- # # "\n".join(log_lines)
754
- # # )
755
- # # Hide inputs, show processed_info at start
756
- # yield (
757
- # make_html_table(all_rows), # output_table
758
- # gr.update(visible=True), # results_group
759
- # gr.update(visible=False), # download_file
760
- # "", # usage_display
761
- # "⏳ Processing...", # status
762
- # "\n".join(log_lines), # progress_box
763
- # gr.update(visible=False), # run_button
764
- # gr.update(visible=True), # stop_button
765
- # gr.update(visible=True), # reset_button
766
- # gr.update(visible=False), # hide raw_text
767
- # gr.update(visible=False), # hide file_upload
768
- # gr.update(value=processed_info, visible=True), # processed_info
769
- # gr.update(visible=False) # hide NPS modal at start
770
- # )
771
-
772
-
773
- # # try:
774
- # # print("📄 Processing accession:", acc)
775
- # # rows = summarize_results(acc)
776
- # # all_rows.extend(rows)
777
- # # processed_accessions += 1 # ✅ only count success
778
- # # if email.strip():
779
- # # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
780
- # # log_lines.append(f"✅ Processed {acc} ({i+1}/{total})")
781
- # print("📄 Processing accession:", acc)
782
- # # --- Before calling summarize_results ---
783
- # samples_left = total - i # including current one
784
- # estimated_seconds_left = samples_left * 100 # your observed average per sample
785
-
786
- # log_lines.append(
787
- # f"Running... usually ~100s per sample"
788
- # )
789
- # log_lines.append(
790
- # f"⏳ Estimated time left: ~{estimated_seconds_left} seconds ({samples_left} sample{'s' if samples_left > 1 else ''} remaining)"
791
- # )
792
-
793
- # # Yield update to UI before the heavy pipeline call
794
- # yield (
795
- # make_html_table(all_rows),
796
- # gr.update(visible=True), # results_group
797
- # gr.update(visible=False), # download_file
798
- # "", # usage_display
799
- # "⏳ Processing...", # status
800
- # "\n".join(log_lines), # progress_box
801
- # gr.update(visible=False), # run_button
802
- # gr.update(visible=True), # stop_button
803
- # gr.update(visible=True), # reset_button
804
- # gr.update(visible=False), # raw_text
805
- # gr.update(visible=False), # file_upload
806
- # gr.update(value=processed_info, visible=True), # processed_info
807
- # gr.update(visible=False) # hide NPS modal
808
- # )
809
-
810
- # # Run summarize_results in a separate process with stop flag support
811
- # success, rows = run_with_timeout(
812
- # summarize_results,
813
- # args=(acc,),
814
- # timeout=None, # or set max seconds per sample if you want
815
- # stop_value=global_stop_flag
816
- # )
817
-
818
- # # If stop was pressed during this accession
819
- # if not success and global_stop_flag.value:
820
- # log_lines.append(f"🛑 Cancelled {acc} before completion")
821
- # # yield (
822
- # # make_html_table(all_rows),
823
- # # gr.update(visible=True),
824
- # # gr.update(visible=False),
825
- # # "",
826
- # # "🛑 Stopped",
827
- # # "\n".join(log_lines)
828
- # # )
829
- # yield (
830
- # make_html_table(all_rows),
831
- # gr.update(visible=True), # results_group
832
- # gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
833
- # gr.update(value=usage_text, visible=True), # usage_display
834
- # "🛑 Stopped", # "✅ Done" or "🛑 Stopped"
835
- # "\n".join(log_lines),
836
- # gr.update(visible=False), # run_button
837
- # gr.update(visible=False), # stop_button
838
- # gr.update(visible=True), # reset_button
839
- # gr.update(visible=False), # raw_text
840
- # gr.update(visible=False), # file_upload
841
- # gr.update(value="", visible=False), # processed_info
842
- # gr.update(visible=True) # NPS modal now visible
843
- # )
844
 
845
- # break # stop processing entirely
846
-
847
- # # If it finished normally
848
- # if success and rows:
849
- # all_rows.extend(rows)
850
- # processed_accessions += 1
851
- # if email.strip():
852
- # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
853
- # log_lines.append(f"✅ Processed {acc} ({i+1}/{total})")
854
- # else:
855
- # # If it failed due to timeout or other error
856
- # if not global_stop_flag.value:
857
- # log_lines.append(f"⚠️ Skipped {acc} due to timeout or error")
 
 
 
 
858
 
859
- # # Always yield updated logs after each attempt
860
- # # yield (
861
- # # make_html_table(all_rows),
862
- # # gr.update(visible=True),
863
- # # gr.update(visible=False),
864
- # # "",
865
- # # "⏳ Processing...",
866
- # # "\n".join(log_lines)
867
- # # )
868
- # yield (
869
- # make_html_table(all_rows), # output_table
870
- # gr.update(visible=True), # results_group
871
- # gr.update(visible=False), # download_file
872
- # "", # usage_display
873
- # "⏳ Processing...", # status
874
- # "\n".join(log_lines), # progress_box
875
- # gr.update(visible=True), # run_button
876
- # gr.update(visible=True), # stop_button
877
- # gr.update(visible=True), # reset_button
878
- # gr.update(visible=False), # hide raw_text
879
- # gr.update(visible=False), # hide file_upload
880
- # gr.update(value=processed_info, visible=True), # processed_info
881
- # gr.update(visible=False) # hide NPS modal at start
882
- # )
883
-
884
-
885
- # except Exception as e:
886
- # log_lines.append(f"❌ Failed to process {acc}: {e}")
887
-
888
- # yield (
889
- # make_html_table(all_rows),
890
- # gr.update(visible=True),
891
- # gr.update(visible=False),
892
- # "",
893
- # "⏳ Processing...",
894
- # "\n".join(log_lines)
895
- # )
896
-
897
  # Step 3: Final usage update
898
  usage_text = ""
899
  if email.strip() and not email_tracked:
 
5
  import os
6
  import hashlib
7
  import threading
 
 
8
 
9
  # Gradio UI
10
  #stop_flag = gr.State(value=False)
 
458
 
459
  return False, None
460
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  def threaded_batch_runner(file=None, text="", email=""):
462
  print("📧 EMAIL RECEIVED:", repr(email))
463
  import tempfile
 
605
  # "\n".join(log_lines)
606
  # )
607
  # return
608
+
609
+ # Step 2: Loop through accessions
610
+ for i, acc in enumerate(accessions):
611
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
612
  if global_stop_flag.value:
613
+ log_lines.append(f"🛑 Stopped at {acc} ({i+1}/{total})")
614
  usage_text = ""
615
+
616
  if email.strip() and not email_tracked:
617
+ print(f"🧪 increment_usage at STOP: {email=} {processed_accessions=}")
618
  usage_count, max_allowed = increment_usage(email, processed_accessions)
619
  email_tracked = True
620
+ usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
621
+ #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
622
  else:
623
  usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
624
+
625
+ # yield (
626
+ # make_html_table(all_rows),
627
+ # gr.update(visible=True),
628
+ # #gr.update(value=output_file_path, visible=True),
629
+ # gr.update(value=output_file_path, visible=bool(output_file_path)),
630
+ # gr.update(value=usage_text, visible=True),
631
+ # "🛑 Stopped",
632
+ # "\n".join(log_lines)
633
+ # )
634
  yield (
635
  make_html_table(all_rows),
636
+ gr.update(visible=True), # results_group
637
+ gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
638
+ gr.update(value=usage_text, visible=True), # usage_display
639
+ "🛑 Stopped", # "✅ Done" or "🛑 Stopped"
640
  "\n".join(log_lines),
641
+ gr.update(visible=False), # run_button
642
+ gr.update(visible=False), # stop_button
643
+ gr.update(visible=True), # reset_button
644
+ gr.update(visible=False), # raw_text
645
+ gr.update(visible=False), # file_upload
646
+ gr.update(value=processed_info, visible=False), # processed_info
647
+ gr.update(visible=True) # NPS modal now visible
648
  )
649
+
650
  return
651
+
652
+ log_lines.append(f"[{i+1}/{total}] Processing {acc}")
653
+ # yield (
654
+ # make_html_table(all_rows),
655
+ # gr.update(visible=True),
656
+ # gr.update(visible=False),
657
+ # "",
658
+ # "⏳ Processing...",
659
+ # "\n".join(log_lines)
660
+ # )
661
+ # Hide inputs, show processed_info at start
662
+ yield (
663
+ make_html_table(all_rows), # output_table
664
+ gr.update(visible=True), # results_group
665
+ gr.update(visible=False), # download_file
666
+ "", # usage_display
667
+ "⏳ Processing...", # status
668
+ "\n".join(log_lines), # progress_box
669
+ gr.update(visible=False), # run_button
670
+ gr.update(visible=True), # stop_button
671
+ gr.update(visible=True), # reset_button
672
+ gr.update(visible=False), # hide raw_text
673
+ gr.update(visible=False), # hide file_upload
674
+ gr.update(value=processed_info, visible=True), # processed_info
675
+ gr.update(visible=False) # hide NPS modal at start
676
+ )
677
+
678
+
679
+ # try:
680
+ # print("📄 Processing accession:", acc)
681
+ # rows = summarize_results(acc)
682
+ # all_rows.extend(rows)
683
+ # processed_accessions += 1 # ✅ only count success
684
+ # if email.strip():
685
+ # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
686
+ # log_lines.append(f"✅ Processed {acc} ({i+1}/{total})")
687
+ print("📄 Processing accession:", acc)
688
+ # --- Before calling summarize_results ---
689
+ samples_left = total - i # including current one
690
+ estimated_seconds_left = samples_left * 100 # your observed average per sample
691
+
692
+ log_lines.append(
693
+ f"Running... usually ~100s per sample"
694
+ )
695
+ log_lines.append(
696
+ f"⏳ Estimated time left: ~{estimated_seconds_left} seconds ({samples_left} sample{'s' if samples_left > 1 else ''} remaining)"
697
+ )
698
+
699
+ # Yield update to UI before the heavy pipeline call
700
  yield (
701
  make_html_table(all_rows),
702
+ gr.update(visible=True), # results_group
703
+ gr.update(visible=False), # download_file
704
+ "", # usage_display
705
+ "⏳ Processing...", # status
706
+ "\n".join(log_lines), # progress_box
707
+ gr.update(visible=False), # run_button
708
+ gr.update(visible=True), # stop_button
709
+ gr.update(visible=True), # reset_button
710
+ gr.update(visible=False), # raw_text
711
+ gr.update(visible=False), # file_upload
712
  gr.update(value=processed_info, visible=True), # processed_info
713
+ gr.update(visible=False) # hide NPS modal
714
+ )
715
+
716
+ # Run summarize_results in a separate process with stop flag support
717
+ success, rows = run_with_timeout(
718
+ summarize_results,
719
+ args=(acc,),
720
+ timeout=None, # or set max seconds per sample if you want
721
+ stop_value=global_stop_flag
722
+ )
723
+
724
+ # If stop was pressed during this accession
725
+ if not success and global_stop_flag.value:
726
+ log_lines.append(f"🛑 Cancelled {acc} before completion")
727
+ # yield (
728
+ # make_html_table(all_rows),
729
+ # gr.update(visible=True),
730
+ # gr.update(visible=False),
731
+ # "",
732
+ # "🛑 Stopped",
733
+ # "\n".join(log_lines)
734
+ # )
735
+ yield (
736
+ make_html_table(all_rows),
737
+ gr.update(visible=True), # results_group
738
+ gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
739
+ gr.update(value=usage_text, visible=True), # usage_display
740
+ "🛑 Stopped", # "✅ Done" or "🛑 Stopped"
741
+ "\n".join(log_lines),
742
+ gr.update(visible=False), # run_button
743
+ gr.update(visible=False), # stop_button
744
+ gr.update(visible=True), # reset_button
745
+ gr.update(visible=False), # raw_text
746
+ gr.update(visible=False), # file_upload
747
+ gr.update(value="", visible=False), # processed_info
748
+ gr.update(visible=True) # NPS modal now visible
749
+ )
750
+
751
+ break # stop processing entirely
752
+
753
+ # If it finished normally
754
+ if success and rows:
755
+ all_rows.extend(rows)
756
+ processed_accessions += 1
757
+ if email.strip():
758
+ save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
759
+ log_lines.append(f"✅ Processed {acc} ({i+1}/{total})")
760
+ else:
761
+ # If it failed due to timeout or other error
762
+ if not global_stop_flag.value:
763
+ log_lines.append(f"⚠️ Skipped {acc} due to timeout or error")
764
+
765
+ # Always yield updated logs after each attempt
766
+ # yield (
767
+ # make_html_table(all_rows),
768
+ # gr.update(visible=True),
769
+ # gr.update(visible=False),
770
+ # "",
771
+ # "⏳ Processing...",
772
+ # "\n".join(log_lines)
773
+ # )
774
+ yield (
775
+ make_html_table(all_rows), # output_table
776
+ gr.update(visible=True), # results_group
777
+ gr.update(visible=False), # download_file
778
+ "", # usage_display
779
+ "⏳ Processing...", # status
780
+ "\n".join(log_lines), # progress_box
781
+ gr.update(visible=True), # run_button
782
+ gr.update(visible=True), # stop_button
783
+ gr.update(visible=True), # reset_button
784
+ gr.update(visible=False), # hide raw_text
785
+ gr.update(visible=False), # hide file_upload
786
+ gr.update(value=processed_info, visible=True), # processed_info
787
+ gr.update(visible=False) # hide NPS modal at start
788
  )
 
 
 
 
 
 
 
789
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
790
 
791
+ except Exception as e:
792
+ log_lines.append(f"❌ Failed to process {acc}: {e}. Report on the box above so that we won't count this bad one for you (email required).")
793
+ yield (
794
+ make_html_table(all_rows), # output_table
795
+ gr.update(visible=True), # results_group
796
+ gr.update(visible=False), # download_file
797
+ "", # usage_display
798
+ " Processing...", # status
799
+ "\n".join(log_lines), # progress_box
800
+ gr.update(visible=True), # run_button
801
+ gr.update(visible=True), # stop_button
802
+ gr.update(visible=True), # reset_button
803
+ gr.update(visible=False), # hide raw_text
804
+ gr.update(visible=False), # hide file_upload
805
+ gr.update(value=processed_info, visible=True), # processed_info
806
+ gr.update(visible=False) # hide NPS modal at start
807
+ )
808
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  # Step 3: Final usage update
810
  usage_text = ""
811
  if email.strip() and not email_tracked: