VyLala commited on
Commit
aeddffc
Β·
verified Β·
1 Parent(s): 993f2e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -165
app.py CHANGED
@@ -5,6 +5,7 @@ import data_preprocess, model, pipeline
5
  import os
6
  import hashlib
7
  import threading
 
8
  # Gradio UI
9
  #stop_flag = gr.State(value=False)
10
  class StopFlag:
@@ -40,9 +41,6 @@ custom_css = """
40
  }
41
  """
42
 
43
-
44
-
45
-
46
  with gr.Blocks() as interface:
47
  # with gr.Tab("CURIOUS ABOUT THIS PRODUCT?"):
48
  # gr.HTML(value=pricing_html)
@@ -459,6 +457,28 @@ with gr.Blocks() as interface:
459
  raise result
460
 
461
  return False, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
 
463
  def threaded_batch_runner(file=None, text="", email=""):
464
  print("πŸ“§ EMAIL RECEIVED:", repr(email))
@@ -608,186 +628,253 @@ with gr.Blocks() as interface:
608
  # )
609
  # return
610
 
611
-
612
- # Step 2: Loop through accessions
613
- for i, acc in enumerate(accessions):
614
- if global_stop_flag.value:
615
- log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
616
- usage_text = ""
617
 
618
- if email.strip() and not email_tracked:
619
- print(f"πŸ§ͺ increment_usage at STOP: {email=} {processed_accessions=}")
620
- usage_count, max_allowed = increment_usage(email, processed_accessions)
621
- email_tracked = True
622
- usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
623
- #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
624
- else:
625
- usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
626
 
627
- # yield (
628
- # make_html_table(all_rows),
629
- # gr.update(visible=True),
630
- # #gr.update(value=output_file_path, visible=True),
631
- # gr.update(value=output_file_path, visible=bool(output_file_path)),
632
- # gr.update(value=usage_text, visible=True),
633
- # "πŸ›‘ Stopped",
634
- # "\n".join(log_lines)
635
- # )
636
  yield (
637
  make_html_table(all_rows),
638
- gr.update(visible=True), # results_group
639
  gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
640
- gr.update(value=usage_text, visible=True), # usage_display
641
- "πŸ›‘ Stopped", # "βœ… Done" or "πŸ›‘ Stopped"
642
  "\n".join(log_lines),
643
- gr.update(visible=False), # run_button
644
- gr.update(visible=False), # stop_button
645
- gr.update(visible=True), # reset_button
646
- gr.update(visible=False), # raw_text
647
- gr.update(visible=False), # file_upload
648
- gr.update(value=processed_info, visible=False), # processed_info
649
- gr.update(visible=True) # NPS modal now visible
650
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
 
652
- return
653
 
654
- log_lines.append(f"[{i+1}/{total}] Processing {acc}")
655
- # yield (
656
- # make_html_table(all_rows),
657
- # gr.update(visible=True),
658
- # gr.update(visible=False),
659
- # "",
660
- # "⏳ Processing...",
661
- # "\n".join(log_lines)
662
- # )
663
- # Hide inputs, show processed_info at start
664
- yield (
665
- make_html_table(all_rows), # output_table
666
- gr.update(visible=True), # results_group
667
- gr.update(visible=False), # download_file
668
- "", # usage_display
669
- "⏳ Processing...", # status
670
- "\n".join(log_lines), # progress_box
671
- gr.update(visible=False), # run_button
672
- gr.update(visible=True), # stop_button
673
- gr.update(visible=True), # reset_button
674
- gr.update(visible=False), # hide raw_text
675
- gr.update(visible=False), # hide file_upload
676
- gr.update(value=processed_info, visible=True), # processed_info
677
- gr.update(visible=False) # hide NPS modal at start
678
- )
679
 
680
 
681
- # try:
682
- # print("πŸ“„ Processing accession:", acc)
683
- # rows = summarize_results(acc)
684
- # all_rows.extend(rows)
685
- # processed_accessions += 1 # βœ… only count success
686
- # if email.strip():
687
- # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
688
- # log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
689
- print("πŸ“„ Processing accession:", acc)
690
- # --- Before calling summarize_results ---
691
- samples_left = total - i # including current one
692
- estimated_seconds_left = samples_left * 100 # your observed average per sample
693
 
694
- log_lines.append(
695
- f"Running... usually ~100s per sample"
696
- )
697
- log_lines.append(
698
- f"⏳ Estimated time left: ~{estimated_seconds_left} seconds ({samples_left} sample{'s' if samples_left > 1 else ''} remaining)"
699
- )
700
 
701
- # Yield update to UI before the heavy pipeline call
702
- yield (
703
- make_html_table(all_rows),
704
- gr.update(visible=True), # results_group
705
- gr.update(visible=False), # download_file
706
- "", # usage_display
707
- "⏳ Processing...", # status
708
- "\n".join(log_lines), # progress_box
709
- gr.update(visible=False), # run_button
710
- gr.update(visible=True), # stop_button
711
- gr.update(visible=True), # reset_button
712
- gr.update(visible=False), # raw_text
713
- gr.update(visible=False), # file_upload
714
- gr.update(value=processed_info, visible=True), # processed_info
715
- gr.update(visible=False) # hide NPS modal
716
- )
717
 
718
- # Run summarize_results in a separate process with stop flag support
719
- success, rows = run_with_timeout(
720
- summarize_results,
721
- args=(acc,),
722
- timeout=None, # or set max seconds per sample if you want
723
- stop_value=global_stop_flag
724
- )
725
 
726
- # If stop was pressed during this accession
727
- if not success and global_stop_flag.value:
728
- log_lines.append(f"πŸ›‘ Cancelled {acc} before completion")
729
- # yield (
730
- # make_html_table(all_rows),
731
- # gr.update(visible=True),
732
- # gr.update(visible=False),
733
- # "",
734
- # "πŸ›‘ Stopped",
735
- # "\n".join(log_lines)
736
- # )
737
- yield (
738
- make_html_table(all_rows),
739
- gr.update(visible=True), # results_group
740
- gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
741
- gr.update(value=usage_text, visible=True), # usage_display
742
- "πŸ›‘ Stopped", # "βœ… Done" or "πŸ›‘ Stopped"
743
- "\n".join(log_lines),
744
- gr.update(visible=False), # run_button
745
- gr.update(visible=False), # stop_button
746
- gr.update(visible=True), # reset_button
747
- gr.update(visible=False), # raw_text
748
- gr.update(visible=False), # file_upload
749
- gr.update(value="", visible=False), # processed_info
750
- gr.update(visible=True) # NPS modal now visible
751
- )
752
 
753
- break # stop processing entirely
754
 
755
- # If it finished normally
756
- if success and rows:
757
- all_rows.extend(rows)
758
- processed_accessions += 1
759
- if email.strip():
760
- save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
761
- log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
762
- else:
763
- # If it failed due to timeout or other error
764
- if not global_stop_flag.value:
765
- log_lines.append(f"⚠️ Skipped {acc} due to timeout or error")
766
 
767
- # Always yield updated logs after each attempt
768
- # yield (
769
- # make_html_table(all_rows),
770
- # gr.update(visible=True),
771
- # gr.update(visible=False),
772
- # "",
773
- # "⏳ Processing...",
774
- # "\n".join(log_lines)
775
- # )
776
- yield (
777
- make_html_table(all_rows), # output_table
778
- gr.update(visible=True), # results_group
779
- gr.update(visible=False), # download_file
780
- "", # usage_display
781
- "⏳ Processing...", # status
782
- "\n".join(log_lines), # progress_box
783
- gr.update(visible=True), # run_button
784
- gr.update(visible=True), # stop_button
785
- gr.update(visible=True), # reset_button
786
- gr.update(visible=False), # hide raw_text
787
- gr.update(visible=False), # hide file_upload
788
- gr.update(value=processed_info, visible=True), # processed_info
789
- gr.update(visible=False) # hide NPS modal at start
790
- )
 
791
 
792
  # except Exception as e:
793
  # log_lines.append(f"❌ Failed to process {acc}: {e}")
 
5
  import os
6
  import hashlib
7
  import threading
8
+
9
  # Gradio UI
10
  #stop_flag = gr.State(value=False)
11
  class StopFlag:
 
41
  }
42
  """
43
 
 
 
 
44
  with gr.Blocks() as interface:
45
  # with gr.Tab("CURIOUS ABOUT THIS PRODUCT?"):
46
  # gr.HTML(value=pricing_html)
 
457
  raise result
458
 
459
  return False, None
460
+
461
+ from multiprocessing import Pool, cpu_count
462
+
463
+ def process_accession_worker(args):
464
+ acc, stop_flag = args
465
+ if stop_flag.value: # Early exit if stop requested before starting
466
+ return None
467
+ try:
468
+ # You can still use run_with_timeout if you want per-accession timeout
469
+ success, rows = run_with_timeout(
470
+ summarize_results,
471
+ args=(acc,),
472
+ timeout=None, # or set max seconds per sample if needed
473
+ stop_value=stop_flag
474
+ )
475
+ if success:
476
+ return rows
477
+ else:
478
+ return None
479
+ except Exception as e:
480
+ print(f"⚠️ Error processing {acc}: {e}")
481
+ return None
482
 
483
  def threaded_batch_runner(file=None, text="", email=""):
484
  print("πŸ“§ EMAIL RECEIVED:", repr(email))
 
628
  # )
629
  # return
630
 
631
+ num_workers = min(cpu_count(), 4) # HF free tier usually has 2 CPUs
632
+ pool = Pool(processes=num_workers)
 
 
 
 
633
 
634
+ try:
635
+ for result in pool.imap_unordered(
636
+ process_accession_worker, [(acc, global_stop_flag) for acc in accessions]
637
+ ):
638
+ # if global_stop_flag.value:
639
+ # log_lines.append("πŸ›‘ Stop requested β€” terminating workers...")
640
+ # pool.terminate()
641
+ # break
642
+ if global_stop_flag.value:
643
+ log_lines.append("πŸ›‘ Stopped mid-batch")
644
+ usage_text = ""
645
+ if email.strip() and not email_tracked:
646
+ usage_count, max_allowed = increment_usage(email, processed_accessions)
647
+ email_tracked = True
648
+ usage_text = f"**{usage_count}**/{max_allowed} allowed samples used."
649
+ else:
650
+ usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
651
+
652
+ pool.terminate()
653
+ yield (
654
+ make_html_table(all_rows),
655
+ gr.update(visible=True),
656
+ gr.update(value=output_file_path, visible=bool(output_file_path)),
657
+ gr.update(value=usage_text, visible=True),
658
+ "πŸ›‘ Stopped",
659
+ "\n".join(log_lines),
660
+ gr.update(visible=False), # run_button
661
+ gr.update(visible=False), # stop_button
662
+ gr.update(visible=True), # reset_button
663
+ gr.update(visible=False), # raw_text
664
+ gr.update(visible=False), # file_upload
665
+ gr.update(value=processed_info, visible=False),
666
+ gr.update(visible=True) # NPS modal
667
+ )
668
+ return
669
+
670
+ if result:
671
+ all_rows.extend(result)
672
+ processed_count += 1
673
+ log_lines.append(f"βœ… Processed {processed_count}/{total}")
674
 
 
 
 
 
 
 
 
 
 
675
  yield (
676
  make_html_table(all_rows),
677
+ gr.update(visible=True), # results_group
678
  gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
679
+ gr.update(value=usage_text, visible=True), # usage_display
680
+ f"⏳ Processed {processed_count}/{total}",
681
  "\n".join(log_lines),
682
+ gr.update(visible=False), # run_button
683
+ gr.update(visible=True), # stop_button
684
+ gr.update(visible=True), # reset_button
685
+ gr.update(visible=False), # raw_text
686
+ gr.update(visible=False), # file_upload
687
+ gr.update(value="", visible=False), # processed_info
688
+ gr.update(visible=False) # NPS modal
689
  )
690
+
691
+ finally:
692
+ pool.close()
693
+ pool.join()
694
+
695
+ if not global_stop_flag.value:
696
+ log_lines.append("βœ… All accessions processed")
697
+
698
+ # # Step 2: Loop through accessions
699
+ # for i, acc in enumerate(accessions):
700
+ # if global_stop_flag.value:
701
+ # log_lines.append(f"πŸ›‘ Stopped at {acc} ({i+1}/{total})")
702
+ # usage_text = ""
703
+
704
+ # if email.strip() and not email_tracked:
705
+ # print(f"πŸ§ͺ increment_usage at STOP: {email=} {processed_accessions=}")
706
+ # usage_count, max_allowed = increment_usage(email, processed_accessions)
707
+ # email_tracked = True
708
+ # usage_text = f"**{usage_count}**/{max_allowed} allowed samples used by this email."
709
+ # #Ten more samples are added first (you now have 60 limited accessions), then wait we will contact you via this email."
710
+ # else:
711
+ # usage_text = f"The limited accession is 30. The user has used {processed_accessions}, and only {30 - processed_accessions} left."
712
+
713
+ # # yield (
714
+ # # make_html_table(all_rows),
715
+ # # gr.update(visible=True),
716
+ # # #gr.update(value=output_file_path, visible=True),
717
+ # # gr.update(value=output_file_path, visible=bool(output_file_path)),
718
+ # # gr.update(value=usage_text, visible=True),
719
+ # # "πŸ›‘ Stopped",
720
+ # # "\n".join(log_lines)
721
+ # # )
722
+ # yield (
723
+ # make_html_table(all_rows),
724
+ # gr.update(visible=True), # results_group
725
+ # gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
726
+ # gr.update(value=usage_text, visible=True), # usage_display
727
+ # "πŸ›‘ Stopped", # "βœ… Done" or "πŸ›‘ Stopped"
728
+ # "\n".join(log_lines),
729
+ # gr.update(visible=False), # run_button
730
+ # gr.update(visible=False), # stop_button
731
+ # gr.update(visible=True), # reset_button
732
+ # gr.update(visible=False), # raw_text
733
+ # gr.update(visible=False), # file_upload
734
+ # gr.update(value=processed_info, visible=False), # processed_info
735
+ # gr.update(visible=True) # NPS modal now visible
736
+ # )
737
 
738
+ # return
739
 
740
+ # log_lines.append(f"[{i+1}/{total}] Processing {acc}")
741
+ # # yield (
742
+ # # make_html_table(all_rows),
743
+ # # gr.update(visible=True),
744
+ # # gr.update(visible=False),
745
+ # # "",
746
+ # # "⏳ Processing...",
747
+ # # "\n".join(log_lines)
748
+ # # )
749
+ # # Hide inputs, show processed_info at start
750
+ # yield (
751
+ # make_html_table(all_rows), # output_table
752
+ # gr.update(visible=True), # results_group
753
+ # gr.update(visible=False), # download_file
754
+ # "", # usage_display
755
+ # "⏳ Processing...", # status
756
+ # "\n".join(log_lines), # progress_box
757
+ # gr.update(visible=False), # run_button
758
+ # gr.update(visible=True), # stop_button
759
+ # gr.update(visible=True), # reset_button
760
+ # gr.update(visible=False), # hide raw_text
761
+ # gr.update(visible=False), # hide file_upload
762
+ # gr.update(value=processed_info, visible=True), # processed_info
763
+ # gr.update(visible=False) # hide NPS modal at start
764
+ # )
765
 
766
 
767
+ # # try:
768
+ # # print("πŸ“„ Processing accession:", acc)
769
+ # # rows = summarize_results(acc)
770
+ # # all_rows.extend(rows)
771
+ # # processed_accessions += 1 # βœ… only count success
772
+ # # if email.strip():
773
+ # # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
774
+ # # log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
775
+ # print("πŸ“„ Processing accession:", acc)
776
+ # # --- Before calling summarize_results ---
777
+ # samples_left = total - i # including current one
778
+ # estimated_seconds_left = samples_left * 100 # your observed average per sample
779
 
780
+ # log_lines.append(
781
+ # f"Running... usually ~100s per sample"
782
+ # )
783
+ # log_lines.append(
784
+ # f"⏳ Estimated time left: ~{estimated_seconds_left} seconds ({samples_left} sample{'s' if samples_left > 1 else ''} remaining)"
785
+ # )
786
 
787
+ # # Yield update to UI before the heavy pipeline call
788
+ # yield (
789
+ # make_html_table(all_rows),
790
+ # gr.update(visible=True), # results_group
791
+ # gr.update(visible=False), # download_file
792
+ # "", # usage_display
793
+ # "⏳ Processing...", # status
794
+ # "\n".join(log_lines), # progress_box
795
+ # gr.update(visible=False), # run_button
796
+ # gr.update(visible=True), # stop_button
797
+ # gr.update(visible=True), # reset_button
798
+ # gr.update(visible=False), # raw_text
799
+ # gr.update(visible=False), # file_upload
800
+ # gr.update(value=processed_info, visible=True), # processed_info
801
+ # gr.update(visible=False) # hide NPS modal
802
+ # )
803
 
804
+ # # Run summarize_results in a separate process with stop flag support
805
+ # success, rows = run_with_timeout(
806
+ # summarize_results,
807
+ # args=(acc,),
808
+ # timeout=None, # or set max seconds per sample if you want
809
+ # stop_value=global_stop_flag
810
+ # )
811
 
812
+ # # If stop was pressed during this accession
813
+ # if not success and global_stop_flag.value:
814
+ # log_lines.append(f"πŸ›‘ Cancelled {acc} before completion")
815
+ # # yield (
816
+ # # make_html_table(all_rows),
817
+ # # gr.update(visible=True),
818
+ # # gr.update(visible=False),
819
+ # # "",
820
+ # # "πŸ›‘ Stopped",
821
+ # # "\n".join(log_lines)
822
+ # # )
823
+ # yield (
824
+ # make_html_table(all_rows),
825
+ # gr.update(visible=True), # results_group
826
+ # gr.update(value=output_file_path, visible=bool(output_file_path)), # download_file
827
+ # gr.update(value=usage_text, visible=True), # usage_display
828
+ # "πŸ›‘ Stopped", # "βœ… Done" or "πŸ›‘ Stopped"
829
+ # "\n".join(log_lines),
830
+ # gr.update(visible=False), # run_button
831
+ # gr.update(visible=False), # stop_button
832
+ # gr.update(visible=True), # reset_button
833
+ # gr.update(visible=False), # raw_text
834
+ # gr.update(visible=False), # file_upload
835
+ # gr.update(value="", visible=False), # processed_info
836
+ # gr.update(visible=True) # NPS modal now visible
837
+ # )
838
 
839
+ # break # stop processing entirely
840
 
841
+ # # If it finished normally
842
+ # if success and rows:
843
+ # all_rows.extend(rows)
844
+ # processed_accessions += 1
845
+ # if email.strip():
846
+ # save_to_excel(all_rows, "", "", output_file_path, is_resume=False)
847
+ # log_lines.append(f"βœ… Processed {acc} ({i+1}/{total})")
848
+ # else:
849
+ # # If it failed due to timeout or other error
850
+ # if not global_stop_flag.value:
851
+ # log_lines.append(f"⚠️ Skipped {acc} due to timeout or error")
852
 
853
+ # # Always yield updated logs after each attempt
854
+ # # yield (
855
+ # # make_html_table(all_rows),
856
+ # # gr.update(visible=True),
857
+ # # gr.update(visible=False),
858
+ # # "",
859
+ # # "⏳ Processing...",
860
+ # # "\n".join(log_lines)
861
+ # # )
862
+ # yield (
863
+ # make_html_table(all_rows), # output_table
864
+ # gr.update(visible=True), # results_group
865
+ # gr.update(visible=False), # download_file
866
+ # "", # usage_display
867
+ # "⏳ Processing...", # status
868
+ # "\n".join(log_lines), # progress_box
869
+ # gr.update(visible=True), # run_button
870
+ # gr.update(visible=True), # stop_button
871
+ # gr.update(visible=True), # reset_button
872
+ # gr.update(visible=False), # hide raw_text
873
+ # gr.update(visible=False), # hide file_upload
874
+ # gr.update(value=processed_info, visible=True), # processed_info
875
+ # gr.update(visible=False) # hide NPS modal at start
876
+ # )
877
+
878
 
879
  # except Exception as e:
880
  # log_lines.append(f"❌ Failed to process {acc}: {e}")