euler314 commited on
Commit
6b8a747
·
verified ·
1 Parent(s): d17208b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -237
app.py CHANGED
@@ -8,10 +8,10 @@ import sys
8
 
9
  def install_playwright_dependencies():
10
  try:
11
- # Use apt-get directly without sudo
12
  os.system('apt-get update -y')
13
 
14
- # Install required dependencies
15
  dependencies = [
16
  'libnss3',
17
  'libnspr4',
@@ -20,7 +20,9 @@ def install_playwright_dependencies():
20
  'libcups2',
21
  'libxcomposite1',
22
  'libxdamage1',
23
- 'libatspi2.0-0'
 
 
24
  ]
25
 
26
  dependency_command = f"apt-get install -y {' '.join(dependencies)}"
@@ -281,7 +283,7 @@ class DownloadManager:
281
  self.browser = None
282
  self.context = None
283
  self.page = None
284
-
285
  async def __aenter__(self):
286
  self.playwright = await async_playwright().start()
287
  opts = {"headless": True}
@@ -337,25 +339,12 @@ class DownloadManager:
337
  try:
338
  async with self.context.new_page() as page:
339
  response = await page.goto(url, wait_until='networkidle', timeout=30000)
340
-
341
- # Check if the response is a redirect
342
  if response and response.headers.get('location'):
343
  return response.headers['location']
344
-
345
- # Check if response is a file
346
  content_type = response.headers.get('content-type', '')
347
  if 'text/html' not in content_type.lower():
348
  return url
349
-
350
- # Look for meta refresh
351
  content = await page.content()
352
- soup = BeautifulSoup(content, 'html.parser')
353
- meta_refresh = soup.find('meta', {'http-equiv': 'refresh'})
354
- if meta_refresh:
355
- content = meta_refresh.get('content', '')
356
- if 'url=' in content.lower():
357
- return content.split('url=')[-1].strip()
358
-
359
  return page.url
360
  except Exception as e:
361
  logger.error(f"Error extracting real download URL: {e}")
@@ -364,82 +353,53 @@ class DownloadManager:
364
  async def extract_downloadable_files(self, url, custom_ext_list):
365
  found_files = []
366
  try:
367
- # First try to load the page
368
  response = await self.page.goto(url, timeout=30000, wait_until='networkidle')
369
  if not response:
370
  return []
371
 
372
  final_url = self.page.url
373
-
374
- # Handle redirects and download scripts
375
- if '.php' in final_url or 'download' in final_url or 'get' in final_url:
376
  real_url = await self.extract_real_download_url(final_url)
377
  if real_url != final_url:
378
- content_type = (await self.page.request.head(real_url)).headers.get('content-type', '')
379
- if content_type and 'text/html' not in content_type.lower():
380
- found_files.append({
381
- 'url': real_url,
382
- 'filename': os.path.basename(urlparse(real_url).path) or 'downloaded_file',
383
- 'size': await self.get_file_size(real_url),
384
- 'metadata': {}
385
- })
386
- return found_files
387
 
388
  await self.page.wait_for_load_state('networkidle', timeout=30000)
389
- await human_like_interactions(self.page)
390
-
391
  content = await self.page.content()
392
  soup = BeautifulSoup(content, 'html.parser')
393
 
394
- # Define extensions to look for
395
- default_exts = ['.pdf', '.docx', '.doc', '.zip', '.rar', '.mp3', '.mp4', '.avi', '.mkv',
396
- '.png', '.jpg', '.jpeg', '.gif', '.xlsx', '.xls', '.ppt', '.pptx', '.txt']
397
  all_exts = set(default_exts + [ext.strip().lower() for ext in custom_ext_list if ext.strip()])
398
 
399
- # Parse base URL for relative links
400
  parsed_base = urlparse(final_url)
401
  base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
402
 
403
- # Find all links
404
  for a in soup.find_all('a', href=True):
405
  href = a['href'].strip()
406
-
407
- # Skip empty or javascript links
408
- if not href or href.startswith('javascript:') or href == '#':
409
- continue
410
-
411
- # Handle special cases (PHP scripts, download handlers)
412
- if '.php' in href.lower() or 'download' in href.lower() or 'get' in href.lower():
413
- full_url = href if href.startswith('http') else urljoin(base_url, href)
414
- real_url = await self.extract_real_download_url(full_url)
415
- if real_url and real_url != full_url:
416
- size_str = await self.get_file_size(real_url)
417
- found_files.append({
418
- 'url': real_url,
419
- 'filename': os.path.basename(urlparse(real_url).path) or 'downloaded_file',
420
- 'size': size_str,
421
- 'metadata': {}
422
- })
423
- continue
424
-
425
- # Handle direct file links
426
  if any(href.lower().endswith(ext) for ext in all_exts):
427
- file_url = href if href.startswith('http') else urljoin(base_url, href)
 
 
 
428
  size_str = await self.get_file_size(file_url)
429
  meta = {}
430
-
431
  if file_url.lower().endswith('.pdf'):
432
  meta = await self.get_pdf_metadata(file_url)
433
-
434
  found_files.append({
435
  'url': file_url,
436
- 'filename': os.path.basename(urlparse(file_url).path),
437
  'size': size_str,
438
  'metadata': meta
439
  })
440
-
441
- # Handle Google Drive links
442
- elif any(x in href for x in ['drive.google.com', 'docs.google.com']):
443
  file_id = None
444
  for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
445
  match = re.search(pattern, href)
@@ -449,35 +409,25 @@ class DownloadManager:
449
 
450
  if file_id:
451
  direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
452
- async with self.context.new_page() as page:
453
- try:
454
- response = await page.request.head(direct_url, timeout=15000)
455
- filename = file_id
456
- content_disposition = response.headers.get('content-disposition', '')
457
- if content_disposition:
458
- filename_match = re.findall('filename="(.+?)"', content_disposition)
459
- if filename_match:
460
- filename = filename_match[0]
461
-
462
- found_files.append({
463
- 'url': direct_url,
464
- 'filename': filename,
465
- 'size': await self.get_file_size(direct_url),
466
- 'metadata': {}
467
- })
468
- except Exception as e:
469
- logger.error(f"Error processing Google Drive link: {e}")
470
-
471
- # Make list unique based on URLs
472
- seen_urls = set()
473
- unique_files = []
474
- for f in found_files:
475
- if f['url'] not in seen_urls:
476
- seen_urls.add(f['url'])
477
- unique_files.append(f)
478
-
479
- return unique_files
480
 
 
481
  except Exception as e:
482
  logger.error(f"Error extracting files from {url}: {e}")
483
  return []
@@ -486,8 +436,6 @@ class DownloadManager:
486
  file_url = file_info['url']
487
  fname = file_info['filename']
488
  path = os.path.join(save_dir, fname)
489
-
490
- # Handle duplicate filenames
491
  base, ext = os.path.splitext(fname)
492
  counter = 1
493
  while os.path.exists(path):
@@ -497,8 +445,7 @@ class DownloadManager:
497
  os.makedirs(save_dir, exist_ok=True)
498
 
499
  try:
500
- # Special handling for Google Drive
501
- if 'drive.google.com' in file_url:
502
  import gdown
503
  try:
504
  st.write(f"Downloading from Google Drive: {fname}")
@@ -510,7 +457,6 @@ class DownloadManager:
510
  logger.error(f"Google Drive download error: {e}")
511
  return None
512
 
513
- # Handle normal downloads
514
  async with self.context.new_page() as page:
515
  st.write(f"Downloading: {fname}")
516
 
@@ -637,7 +583,6 @@ class DownloadManager:
637
  logger.error(f"Deep search error: {e}")
638
  return []
639
 
640
- # ---------- Main Streamlit UI Implementation -------------
641
  def main():
642
  if 'initialized' not in st.session_state:
643
  st.session_state.initialized = True
@@ -647,7 +592,6 @@ def main():
647
 
648
  st.title("Advanced File Downloader")
649
 
650
- # Sidebar for settings
651
  with st.sidebar:
652
  st.header("Settings")
653
  mode = st.radio("Select Mode", ["Manual URL", "Bing Search", "PDF Summarizer"])
@@ -657,123 +601,63 @@ def main():
657
  "Custom File Extensions",
658
  placeholder=".csv, .txt, .epub"
659
  )
660
- max_concurrency = st.slider(
661
- "Max Concurrency",
662
- min_value=1,
663
- max_value=1000,
664
- value=200
665
- )
666
  use_proxy = st.checkbox("Use Proxy")
667
  proxy = st.text_input("Proxy URL", placeholder="http://proxy:port")
668
 
669
- # Google OAuth Section
670
- with st.expander("Google Drive Integration"):
671
- if st.button("Start Google Sign-In"):
672
- auth_url = get_google_auth_url()
673
- st.markdown(f"[Click here to authorize]({auth_url})")
674
-
675
- auth_code = st.text_input("Enter authorization code")
676
- if st.button("Complete Sign-In") and auth_code:
677
- creds, msg = exchange_code_for_credentials(auth_code)
678
- st.session_state.google_creds = creds
679
- st.write(msg)
680
-
681
- # Main content area
682
  if mode == "Manual URL":
683
  st.header("Manual URL Mode")
684
  url = st.text_input("Enter URL", placeholder="https://example.com")
685
 
686
- col1, col2 = st.columns(2)
687
- with col1:
688
- if st.button("Deep Search", use_container_width=True):
689
- if url:
690
- async def run_deep_search():
691
- async with DownloadManager(
692
- use_proxy=use_proxy,
693
- proxy=proxy
694
- ) as dm:
695
- with st.spinner("Searching for files..."):
696
- files = await dm.deep_search(
697
- url=url,
698
- custom_ext_list=custom_extensions.split(',') if custom_extensions else [],
699
- max_concurrency=max_concurrency
700
- )
701
- st.session_state.discovered_files = files
702
- st.session_state.current_url = url
703
- return files
704
 
705
- files = asyncio.run(run_deep_search())
706
- if files:
707
- st.success(f"Found {len(files)} files!")
708
- else:
709
- st.warning("No files found.")
710
-
711
- with col2:
712
- if st.button("Preview Page", use_container_width=True):
713
- if url:
714
- async def preview():
715
- async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
716
- with st.spinner("Loading preview..."):
717
- return await dm.preview_page(url)
718
 
719
- preview_html = asyncio.run(preview())
720
- st.markdown(preview_html, unsafe_allow_html=True)
721
-
722
- # File selection and download section
723
- if st.session_state.discovered_files:
724
- with st.expander("Download Options", expanded=True):
725
- file_options = [f"{f['filename']} ({f['size']})" for f in st.session_state.discovered_files]
726
- selected_indices = st.multiselect(
727
- "Select files to download",
728
- range(len(file_options)),
729
- format_func=lambda x: file_options[x]
730
- )
731
-
732
- if selected_indices:
733
- download_dir = st.text_input("Download Directory", value="./downloads")
734
- delete_after = st.checkbox("Delete after creating ZIP?")
735
- upload_drive = st.checkbox("Upload to Google Drive?")
736
 
737
- if st.button("Download Selected"):
738
- selected_files = [st.session_state.discovered_files[i] for i in selected_indices]
739
- async def download_files():
740
- async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
741
- paths = []
742
- for file_info in selected_files:
743
- with st.spinner(f"Downloading {file_info['filename']}..."):
744
- path = await dm.download_file(
745
- file_info,
746
- download_dir,
747
- st.session_state.current_url
748
- )
749
- if path:
750
- paths.append(path)
751
- return paths
752
-
753
- downloaded_paths = asyncio.run(download_files())
754
- if downloaded_paths:
755
- st.success(f"Successfully downloaded {len(downloaded_paths)} files!")
 
 
 
 
756
 
757
- # Create ZIP if needed
758
- if len(downloaded_paths) > 1 or delete_after or upload_drive:
759
- with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as tmp:
760
- with zipfile.ZipFile(tmp.name, 'w') as zf:
761
- for p in downloaded_paths:
762
- zf.write(p, arcname=os.path.basename(p))
763
-
764
- if upload_drive and st.session_state.google_creds:
765
- file_id = google_drive_upload(tmp.name, st.session_state.google_creds)
766
- if file_id and not isinstance(file_id, str):
767
- st.success(f"Uploaded to Google Drive! File ID: {file_id}")
768
- else:
769
- st.error("Failed to upload to Google Drive")
770
-
771
- if delete_after:
772
- for p in downloaded_paths:
773
- try:
774
- os.remove(p)
775
- except:
776
- pass
777
 
778
  elif mode == "Bing Search":
779
  st.header("Bing Search Mode")
@@ -790,35 +674,52 @@ def main():
790
  num_results=num_results
791
  ) as dm:
792
  with st.spinner("Searching..."):
793
- return await dm.search_bing()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
 
795
- urls, info = asyncio.run(run_search())
796
- if urls:
797
- st.success(f"Found {len(urls)} results!")
798
- for i, (url, info) in enumerate(zip(urls, info), 1):
799
- with st.expander(f"Result {i}: {url}", expanded=i==1):
800
- st.write(f"Snippet: {info['snippet']}")
801
- if info['entities']:
802
- st.write("Entities:", ', '.join(f"{e[0]} ({e[1]})" for e in info['entities']))
803
-
804
- if st.button(f"Deep Search This Result {i}"):
805
- st.session_state.current_url = url
806
- async def search_result():
807
- async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
808
- return await dm.deep_search(
809
- url=url,
810
- custom_ext_list=custom_extensions.split(',') if custom_extensions else [],
811
- max_concurrency=max_concurrency
812
- )
813
-
814
- files = asyncio.run(search_result())
815
- if files:
816
- st.session_state.discovered_files = files
817
- st.success(f"Found {len(files)} files!")
818
- else:
819
- st.warning("No files found.")
820
- else:
821
- st.warning("No results found.")
822
 
823
  else: # PDF Summarizer mode
824
  st.header("PDF Summarizer")
@@ -826,9 +727,14 @@ def main():
826
 
827
  if st.button("Summarize"):
828
  if pdf_url:
829
- summary = summarize_pdf_url(pdf_url)
830
- st.write("Summary:")
831
- st.write(summary)
 
832
 
833
  if __name__ == "__main__":
834
- main()
 
 
 
 
 
8
 
9
  def install_playwright_dependencies():
10
  try:
11
+ # Update package list
12
  os.system('apt-get update -y')
13
 
14
+ # Install required dependencies including GTK
15
  dependencies = [
16
  'libnss3',
17
  'libnspr4',
 
20
  'libcups2',
21
  'libxcomposite1',
22
  'libxdamage1',
23
+ 'libatspi2.0-0',
24
+ 'libgtk-3-0', # Add GTK dependencies
25
+ 'libgdk-3-0'
26
  ]
27
 
28
  dependency_command = f"apt-get install -y {' '.join(dependencies)}"
 
283
  self.browser = None
284
  self.context = None
285
  self.page = None
286
+
287
  async def __aenter__(self):
288
  self.playwright = await async_playwright().start()
289
  opts = {"headless": True}
 
339
  try:
340
  async with self.context.new_page() as page:
341
  response = await page.goto(url, wait_until='networkidle', timeout=30000)
 
 
342
  if response and response.headers.get('location'):
343
  return response.headers['location']
 
 
344
  content_type = response.headers.get('content-type', '')
345
  if 'text/html' not in content_type.lower():
346
  return url
 
 
347
  content = await page.content()
 
 
 
 
 
 
 
348
  return page.url
349
  except Exception as e:
350
  logger.error(f"Error extracting real download URL: {e}")
 
353
  async def extract_downloadable_files(self, url, custom_ext_list):
354
  found_files = []
355
  try:
 
356
  response = await self.page.goto(url, timeout=30000, wait_until='networkidle')
357
  if not response:
358
  return []
359
 
360
  final_url = self.page.url
361
+ if '.php' in final_url or 'download' in final_url:
 
 
362
  real_url = await self.extract_real_download_url(final_url)
363
  if real_url != final_url:
364
+ found_files.append({
365
+ 'url': real_url,
366
+ 'filename': os.path.basename(urlparse(real_url).path) or 'downloaded_file',
367
+ 'size': await self.get_file_size(real_url),
368
+ 'metadata': {}
369
+ })
370
+ return found_files
 
 
371
 
372
  await self.page.wait_for_load_state('networkidle', timeout=30000)
 
 
373
  content = await self.page.content()
374
  soup = BeautifulSoup(content, 'html.parser')
375
 
376
+ default_exts = ['.pdf', '.docx', '.doc', '.zip', '.rar', '.mp3', '.mp4',
377
+ '.avi', '.mkv', '.png', '.jpg', '.jpeg', '.gif']
 
378
  all_exts = set(default_exts + [ext.strip().lower() for ext in custom_ext_list if ext.strip()])
379
 
 
380
  parsed_base = urlparse(final_url)
381
  base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
382
 
 
383
  for a in soup.find_all('a', href=True):
384
  href = a['href'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  if any(href.lower().endswith(ext) for ext in all_exts):
386
+ file_url = href if href.startswith('http') else (
387
+ f"{base_url}{href}" if href.startswith('/') else f"{base_url}/{href}"
388
+ )
389
+
390
  size_str = await self.get_file_size(file_url)
391
  meta = {}
 
392
  if file_url.lower().endswith('.pdf'):
393
  meta = await self.get_pdf_metadata(file_url)
394
+
395
  found_files.append({
396
  'url': file_url,
397
+ 'filename': os.path.basename(file_url.split('?')[0]),
398
  'size': size_str,
399
  'metadata': meta
400
  })
401
+
402
+ elif ("drive.google.com" in href) or ("docs.google.com" in href):
 
403
  file_id = None
404
  for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
405
  match = re.search(pattern, href)
 
409
 
410
  if file_id:
411
  direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
412
+ filename = file_id
413
+ try:
414
+ response = await self.page.request.head(direct_url, timeout=15000)
415
+ cd = response.headers.get("Content-Disposition", "")
416
+ if cd:
417
+ mt = re.search(r'filename\*?="?([^";]+)', cd)
418
+ if mt:
419
+ filename = mt.group(1).strip('"').strip()
420
+
421
+ found_files.append({
422
+ 'url': direct_url,
423
+ 'filename': filename,
424
+ 'size': await self.get_file_size(direct_url),
425
+ 'metadata': {}
426
+ })
427
+ except Exception as e:
428
+ logger.error(f"Error processing Google Drive link: {e}")
 
 
 
 
 
 
 
 
 
 
 
429
 
430
+ return found_files
431
  except Exception as e:
432
  logger.error(f"Error extracting files from {url}: {e}")
433
  return []
 
436
  file_url = file_info['url']
437
  fname = file_info['filename']
438
  path = os.path.join(save_dir, fname)
 
 
439
  base, ext = os.path.splitext(fname)
440
  counter = 1
441
  while os.path.exists(path):
 
445
  os.makedirs(save_dir, exist_ok=True)
446
 
447
  try:
448
+ if "drive.google.com" in file_url:
 
449
  import gdown
450
  try:
451
  st.write(f"Downloading from Google Drive: {fname}")
 
457
  logger.error(f"Google Drive download error: {e}")
458
  return None
459
 
 
460
  async with self.context.new_page() as page:
461
  st.write(f"Downloading: {fname}")
462
 
 
583
  logger.error(f"Deep search error: {e}")
584
  return []
585
 
 
586
  def main():
587
  if 'initialized' not in st.session_state:
588
  st.session_state.initialized = True
 
592
 
593
  st.title("Advanced File Downloader")
594
 
 
595
  with st.sidebar:
596
  st.header("Settings")
597
  mode = st.radio("Select Mode", ["Manual URL", "Bing Search", "PDF Summarizer"])
 
601
  "Custom File Extensions",
602
  placeholder=".csv, .txt, .epub"
603
  )
 
 
 
 
 
 
604
  use_proxy = st.checkbox("Use Proxy")
605
  proxy = st.text_input("Proxy URL", placeholder="http://proxy:port")
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  if mode == "Manual URL":
608
  st.header("Manual URL Mode")
609
  url = st.text_input("Enter URL", placeholder="https://example.com")
610
 
611
+ if st.button("Deep Search", use_container_width=True):
612
+ if url:
613
+ async def run_deep_search():
614
+ async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
615
+ with st.spinner("Searching for files..."):
616
+ files = await dm.deep_search(
617
+ url=url,
618
+ custom_ext_list=custom_extensions.split(',') if custom_extensions else []
619
+ )
620
+ st.session_state.discovered_files = files
621
+ st.session_state.current_url = url
622
+ return files
 
 
 
 
 
 
623
 
624
+ files = asyncio.run(run_deep_search())
625
+ if files:
626
+ st.success(f"Found {len(files)} files!")
 
 
 
 
 
 
 
 
 
 
627
 
628
+ # Display files
629
+ for file in files:
630
+ st.write(f"- {file['filename']} ({file['size']})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
631
 
632
+ # Download section
633
+ selected_files = st.multiselect(
634
+ "Select files to download",
635
+ range(len(files)),
636
+ format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})"
637
+ )
638
+
639
+ if selected_files:
640
+ download_dir = st.text_input("Download Directory", value="./downloads")
641
+ if st.button("Download Selected"):
642
+ async def download_files():
643
+ async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
644
+ paths = []
645
+ for idx in selected_files:
646
+ with st.spinner(f"Downloading {files[idx]['filename']}..."):
647
+ path = await dm.download_file(
648
+ files[idx],
649
+ download_dir,
650
+ url
651
+ )
652
+ if path:
653
+ paths.append(path)
654
+ return paths
655
 
656
+ downloaded = asyncio.run(download_files())
657
+ if downloaded:
658
+ st.success(f"Successfully downloaded {len(downloaded)} files to {download_dir}")
659
+ else:
660
+ st.warning("No files found.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
661
 
662
  elif mode == "Bing Search":
663
  st.header("Bing Search Mode")
 
674
  num_results=num_results
675
  ) as dm:
676
  with st.spinner("Searching..."):
677
+ urls = await dm.search_bing()
678
+ if urls:
679
+ st.success(f"Found {len(urls)} results!")
680
+ for i, url in enumerate(urls, 1):
681
+ with st.expander(f"Result {i}: {url}", expanded=i==1):
682
+ if st.button(f"Deep Search This Result {i}"):
683
+ files = await dm.deep_search(
684
+ url=url,
685
+ custom_ext_list=custom_extensions.split(',') if custom_extensions else []
686
+ )
687
+ if files:
688
+ st.session_state.discovered_files = files
689
+ st.session_state.current_url = url
690
+ st.success(f"Found {len(files)} files!")
691
+
692
+ # Display and download section
693
+ for file in files:
694
+ st.write(f"- {file['filename']} ({file['size']})")
695
+
696
+ selected_files = st.multiselect(
697
+ "Select files to download",
698
+ range(len(files)),
699
+ format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})"
700
+ )
701
+
702
+ if selected_files:
703
+ download_dir = st.text_input("Download Directory", value="./downloads")
704
+ if st.button("Download Selected Files"):
705
+ paths = []
706
+ for idx in selected_files:
707
+ with st.spinner(f"Downloading {files[idx]['filename']}..."):
708
+ path = await dm.download_file(
709
+ files[idx],
710
+ download_dir,
711
+ url
712
+ )
713
+ if path:
714
+ paths.append(path)
715
+ if paths:
716
+ st.success(f"Successfully downloaded {len(paths)} files to {download_dir}")
717
+ else:
718
+ st.warning("No files found on this page.")
719
+ else:
720
+ st.warning("No search results found.")
721
 
722
+ asyncio.run(run_search())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
 
724
  else: # PDF Summarizer mode
725
  st.header("PDF Summarizer")
 
727
 
728
  if st.button("Summarize"):
729
  if pdf_url:
730
+ with st.spinner("Generating summary..."):
731
+ summary = summarize_pdf_url(pdf_url)
732
+ st.write("Summary:")
733
+ st.write(summary)
734
 
735
  if __name__ == "__main__":
736
+ try:
737
+ main()
738
+ except Exception as e:
739
+ st.error(f"An error occurred: {str(e)}")
740
+ logger.error(f"Application error: {str(e)}", exc_info=True)