euler314 commited on
Commit
2ee6b58
·
verified ·
1 Parent(s): 6b8a747

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +124 -47
app.py CHANGED
@@ -54,29 +54,56 @@ def load_models():
54
  st.info("Downloading spaCy model...")
55
  spacy.cli.download("en_core_web_sm")
56
  nlp = spacy.load("en_core_web_sm")
57
-
58
- # Load other models
59
- from sentence_transformers import SentenceTransformer
60
- semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
61
-
62
- from transformers import pipeline
63
- summarizer = pipeline("summarization")
64
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  return nlp, semantic_model, summarizer
 
66
  except Exception as e:
67
  st.error(f"Error loading models: {e}")
68
  return None, None, None
69
 
70
- # Initialize dependencies and models
71
- with st.spinner("Setting up dependencies..."):
72
- install_playwright_dependencies()
73
-
74
  with st.spinner("Loading models..."):
75
  nlp_model, semantic_model, summarizer = load_models()
76
 
77
- if not all([nlp_model, semantic_model, summarizer]):
78
- st.error("Failed to load required models. Please check the error messages above.")
79
  st.stop()
 
 
 
 
 
 
80
 
81
  # Rest of your imports and code here...
82
 
@@ -625,11 +652,16 @@ def main():
625
  if files:
626
  st.success(f"Found {len(files)} files!")
627
 
628
- # Display files
629
- for file in files:
630
- st.write(f"- {file['filename']} ({file['size']})")
 
 
 
 
631
 
632
  # Download section
 
633
  selected_files = st.multiselect(
634
  "Select files to download",
635
  range(len(files)),
@@ -637,13 +669,22 @@ def main():
637
  )
638
 
639
  if selected_files:
640
- download_dir = st.text_input("Download Directory", value="./downloads")
641
- if st.button("Download Selected"):
642
- async def download_files():
643
- async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
644
- paths = []
645
- for idx in selected_files:
646
- with st.spinner(f"Downloading {files[idx]['filename']}..."):
 
 
 
 
 
 
 
 
 
647
  path = await dm.download_file(
648
  files[idx],
649
  download_dir,
@@ -651,11 +692,21 @@ def main():
651
  )
652
  if path:
653
  paths.append(path)
654
- return paths
 
 
 
655
 
656
- downloaded = asyncio.run(download_files())
657
- if downloaded:
658
- st.success(f"Successfully downloaded {len(downloaded)} files to {download_dir}")
 
 
 
 
 
 
 
659
  else:
660
  st.warning("No files found.")
661
 
@@ -689,9 +740,13 @@ def main():
689
  st.session_state.current_url = url
690
  st.success(f"Found {len(files)} files!")
691
 
692
- # Display and download section
693
- for file in files:
694
- st.write(f"- {file['filename']} ({file['size']})")
 
 
 
 
695
 
696
  selected_files = st.multiselect(
697
  "Select files to download",
@@ -700,11 +755,20 @@ def main():
700
  )
701
 
702
  if selected_files:
703
- download_dir = st.text_input("Download Directory", value="./downloads")
704
- if st.button("Download Selected Files"):
705
- paths = []
706
- for idx in selected_files:
707
- with st.spinner(f"Downloading {files[idx]['filename']}..."):
 
 
 
 
 
 
 
 
 
708
  path = await dm.download_file(
709
  files[idx],
710
  download_dir,
@@ -712,8 +776,18 @@ def main():
712
  )
713
  if path:
714
  paths.append(path)
715
- if paths:
716
- st.success(f"Successfully downloaded {len(paths)} files to {download_dir}")
 
 
 
 
 
 
 
 
 
 
717
  else:
718
  st.warning("No files found on this page.")
719
  else:
@@ -722,15 +796,18 @@ def main():
722
  asyncio.run(run_search())
723
 
724
  else: # PDF Summarizer mode
725
- st.header("PDF Summarizer")
726
- pdf_url = st.text_input("Enter PDF URL")
727
-
728
- if st.button("Summarize"):
729
- if pdf_url:
730
- with st.spinner("Generating summary..."):
731
- summary = summarize_pdf_url(pdf_url)
732
- st.write("Summary:")
733
- st.write(summary)
 
 
 
734
 
735
  if __name__ == "__main__":
736
  try:
 
54
  st.info("Downloading spaCy model...")
55
  spacy.cli.download("en_core_web_sm")
56
  nlp = spacy.load("en_core_web_sm")
57
+
58
+ # Load SentenceTransformer with offline handling
59
+ try:
60
+ from sentence_transformers import SentenceTransformer
61
+ model_name = 'all-MiniLM-L6-v2'
62
+ cache_dir = os.path.expanduser('~/.cache/torch/sentence_transformers')
63
+ if os.path.exists(os.path.join(cache_dir, model_name)):
64
+ semantic_model = SentenceTransformer(os.path.join(cache_dir, model_name))
65
+ else:
66
+ st.warning(f"Downloading SentenceTransformer model {model_name}...")
67
+ semantic_model = SentenceTransformer(model_name)
68
+ except Exception as e:
69
+ st.error(f"Error loading SentenceTransformer: {e}")
70
+ st.info("Continuing without semantic search capability...")
71
+ semantic_model = None
72
+
73
+ # Load Transformers pipeline with offline handling
74
+ try:
75
+ from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
76
+ model_name = "facebook/bart-large-cnn"
77
+ cache_dir = os.path.expanduser('~/.cache/huggingface/transformers')
78
+ if os.path.exists(os.path.join(cache_dir, model_name)):
79
+ summarizer = pipeline("summarization", model=model_name)
80
+ else:
81
+ st.warning(f"Downloading Transformer model {model_name}...")
82
+ summarizer = pipeline("summarization")
83
+ except Exception as e:
84
+ st.error(f"Error loading Transformers: {e}")
85
+ st.info("Continuing without summarization capability...")
86
+ summarizer = None
87
+
88
  return nlp, semantic_model, summarizer
89
+
90
  except Exception as e:
91
  st.error(f"Error loading models: {e}")
92
  return None, None, None
93
 
94
+ # Initialize models with better error handling
 
 
 
95
  with st.spinner("Loading models..."):
96
  nlp_model, semantic_model, summarizer = load_models()
97
 
98
+ if nlp_model is None:
99
+ st.error("Failed to load essential NLP model. The application cannot continue.")
100
  st.stop()
101
+ else:
102
+ # Continue with available features based on which models loaded successfully
103
+ if semantic_model is None:
104
+ st.warning("Semantic search features will be disabled.")
105
+ if summarizer is None:
106
+ st.warning("PDF summarization features will be disabled.")
107
 
108
  # Rest of your imports and code here...
109
 
 
652
  if files:
653
  st.success(f"Found {len(files)} files!")
654
 
655
+ with st.expander("Found Files", expanded=True):
656
+ for i, file in enumerate(files):
657
+ col1, col2 = st.columns([3, 1])
658
+ with col1:
659
+ st.write(f"{i+1}. {file['filename']}")
660
+ with col2:
661
+ st.write(f"Size: {file['size']}")
662
 
663
  # Download section
664
+ st.subheader("Download Files")
665
  selected_files = st.multiselect(
666
  "Select files to download",
667
  range(len(files)),
 
669
  )
670
 
671
  if selected_files:
672
+ col1, col2 = st.columns([3, 1])
673
+ with col1:
674
+ download_dir = st.text_input("Download Directory", value="./downloads")
675
+ with col2:
676
+ if st.button("Download Selected", use_container_width=True):
677
+ async def download_files():
678
+ async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
679
+ paths = []
680
+ progress_text = st.empty()
681
+ progress_bar = st.progress(0)
682
+
683
+ for i, idx in enumerate(selected_files):
684
+ progress = (i + 1) / len(selected_files)
685
+ progress_text.text(f"Downloading {files[idx]['filename']}...")
686
+ progress_bar.progress(progress)
687
+
688
  path = await dm.download_file(
689
  files[idx],
690
  download_dir,
 
692
  )
693
  if path:
694
  paths.append(path)
695
+
696
+ progress_text.empty()
697
+ progress_bar.empty()
698
+ return paths
699
 
700
+ downloaded = asyncio.run(download_files())
701
+ if downloaded:
702
+ st.success(f"Successfully downloaded {len(downloaded)} files to {download_dir}")
703
+ # Create zip file if multiple files were downloaded
704
+ if len(downloaded) > 1:
705
+ zip_path = os.path.join(download_dir, "downloads.zip")
706
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
707
+ for file in downloaded:
708
+ zipf.write(file, os.path.basename(file))
709
+ st.success(f"Created zip file: {zip_path}")
710
  else:
711
  st.warning("No files found.")
712
 
 
740
  st.session_state.current_url = url
741
  st.success(f"Found {len(files)} files!")
742
 
743
+ with st.expander("Found Files", expanded=True):
744
+ for j, file in enumerate(files):
745
+ col1, col2 = st.columns([3, 1])
746
+ with col1:
747
+ st.write(f"{j+1}. {file['filename']}")
748
+ with col2:
749
+ st.write(f"Size: {file['size']}")
750
 
751
  selected_files = st.multiselect(
752
  "Select files to download",
 
755
  )
756
 
757
  if selected_files:
758
+ col1, col2 = st.columns([3, 1])
759
+ with col1:
760
+ download_dir = st.text_input("Download Directory", value="./downloads")
761
+ with col2:
762
+ if st.button("Download Selected Files"):
763
+ progress_text = st.empty()
764
+ progress_bar = st.progress(0)
765
+
766
+ paths = []
767
+ for k, idx in enumerate(selected_files):
768
+ progress = (k + 1) / len(selected_files)
769
+ progress_text.text(f"Downloading {files[idx]['filename']}...")
770
+ progress_bar.progress(progress)
771
+
772
  path = await dm.download_file(
773
  files[idx],
774
  download_dir,
 
776
  )
777
  if path:
778
  paths.append(path)
779
+
780
+ progress_text.empty()
781
+ progress_bar.empty()
782
+
783
+ if paths:
784
+ st.success(f"Successfully downloaded {len(paths)} files to {download_dir}")
785
+ if len(paths) > 1:
786
+ zip_path = os.path.join(download_dir, "downloads.zip")
787
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
788
+ for file in paths:
789
+ zipf.write(file, os.path.basename(file))
790
+ st.success(f"Created zip file: {zip_path}")
791
  else:
792
  st.warning("No files found on this page.")
793
  else:
 
796
  asyncio.run(run_search())
797
 
798
  else: # PDF Summarizer mode
799
+ if summarizer is None:
800
+ st.error("PDF summarization is not available due to model loading errors.")
801
+ else:
802
+ st.header("PDF Summarizer")
803
+ pdf_url = st.text_input("Enter PDF URL")
804
+
805
+ if st.button("Summarize"):
806
+ if pdf_url:
807
+ with st.spinner("Generating summary..."):
808
+ summary = summarize_pdf_url(pdf_url)
809
+ st.write("Summary:")
810
+ st.write(summary)
811
 
812
  if __name__ == "__main__":
813
  try: