C2MV commited on
Commit
a1cb529
·
verified ·
1 Parent(s): 5b5829a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -71
app.py CHANGED
@@ -436,17 +436,17 @@ class PaperDownloader:
436
  with open(filepath, 'wb') as f:
437
  f.write(pdf_content)
438
  logger.info(f"Successfully downloaded: {filename}")
439
- return filepath, f'<a href="https://doi.org/{doi}">{doi}</a>', ""
440
  else:
441
  logger.warning(f"Could not download: {doi}")
442
- return None, f"Could not download {doi}", f'<a href="https://doi.org/{doi}">{doi}</a>'
443
 
444
  except Exception as e:
445
  logger.error(f"Error processing {doi}: {e}")
446
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
447
 
448
- def download_multiple_dois(self, dois_text, progress_callback=None):
449
- """Downloads multiple papers from a list of DOIs with progress updates and single copy button"""
450
  if not dois_text:
451
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
452
 
@@ -457,9 +457,7 @@ class PaperDownloader:
457
  downloaded_files = []
458
  failed_dois = []
459
  downloaded_links = []
460
- total_dois = len(dois)
461
-
462
- for i, doi in enumerate(dois):
463
  filepath, success_message, fail_message = self.download_single_doi(doi)
464
  if filepath:
465
  # Unique filename for zip
@@ -467,14 +465,10 @@ class PaperDownloader:
467
  filepath_unique = os.path.join(self.output_dir, filename)
468
  os.rename(filepath, filepath_unique)
469
  downloaded_files.append(filepath_unique)
470
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
471
- else:
472
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
473
-
474
- if progress_callback:
475
- progress = int(((i + 1) / total_dois) * 100)
476
- progress_callback(progress)
477
 
 
 
478
 
479
  if downloaded_files:
480
  zip_filename = 'papers.zip'
@@ -483,17 +477,9 @@ class PaperDownloader:
483
  zipf.write(file_path, arcname=os.path.basename(file_path))
484
  logger.info(f"ZIP file created: {zip_filename}")
485
 
486
-
487
-
488
-
489
- # Combine all links into a single string
490
- all_links_html = "<br>".join(downloaded_links)
491
- copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
492
 
493
-
494
- return zip_filename if downloaded_files else None, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois)
495
-
496
- def process_bibtex(self, bib_file, progress_callback=None):
497
  """Process BibTeX file and download papers with multiple strategies"""
498
  # Read BibTeX file content from the uploaded object
499
  try:
@@ -518,10 +504,9 @@ class PaperDownloader:
518
  downloaded_files = []
519
  failed_dois = []
520
  downloaded_links = []
521
- total_dois = len(dois)
522
 
523
  # Download PDFs
524
- for i, doi in enumerate(dois):
525
  try:
526
  # Try to download with multiple methods with retries
527
  pdf_content = self.download_with_retry(doi)
@@ -537,17 +522,14 @@ class PaperDownloader:
537
  f.write(pdf_content)
538
 
539
  downloaded_files.append(filepath)
540
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
541
  logger.info(f"Successfully downloaded: {filename}")
542
  else:
543
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
 
544
  except Exception as e:
545
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
546
  logger.error(f"Error processing {doi}: {e}")
547
-
548
- if progress_callback:
549
- progress = int(((i + 1) / total_dois) * 100)
550
- progress_callback(progress)
551
 
552
  # Create ZIP of downloaded papers
553
  if downloaded_files:
@@ -556,16 +538,10 @@ class PaperDownloader:
556
  for file_path in downloaded_files:
557
  zipf.write(file_path, arcname=os.path.basename(file_path))
558
  logger.info(f"ZIP file created: {zip_filename}")
559
-
560
-
561
- # Combine all links into a single string
562
- all_links_html = "<br>".join(downloaded_links)
563
- copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
564
 
 
565
 
566
- return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
567
-
568
- async def process_bibtex_async(self, bib_file, progress_callback=None):
569
  """Process BibTeX file and download papers with multiple strategies"""
570
  # Read BibTeX file content from the uploaded object
571
  try:
@@ -590,10 +566,9 @@ class PaperDownloader:
590
  downloaded_files = []
591
  failed_dois = []
592
  downloaded_links = []
593
- total_dois = len(dois)
594
 
595
  # Download PDFs
596
- for i, doi in enumerate(dois):
597
  try:
598
  # Try to download with multiple methods with retries
599
  pdf_content = await self.download_with_retry_async(doi)
@@ -609,17 +584,14 @@ class PaperDownloader:
609
  f.write(pdf_content)
610
 
611
  downloaded_files.append(filepath)
612
- downloaded_links.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
613
  logger.info(f"Successfully downloaded: {filename}")
614
  else:
615
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
 
616
  except Exception as e:
617
- failed_dois.append(f'<a href="https://doi.org/{doi}">{doi}</a>')
618
  logger.error(f"Error processing {doi}: {e}")
619
-
620
- if progress_callback:
621
- progress = int(((i + 1) / total_dois) * 100)
622
- progress_callback(progress)
623
 
624
  # Create ZIP of downloaded papers
625
  if downloaded_files:
@@ -628,34 +600,26 @@ class PaperDownloader:
628
  for file_path in downloaded_files:
629
  zipf.write(file_path, arcname=os.path.basename(file_path))
630
  logger.info(f"ZIP file created: {zip_filename}")
631
-
632
-
633
- # Combine all links into a single string
634
- all_links_html = "<br>".join(downloaded_links)
635
- copy_button_html = f'<button onclick="copyAllLinks(\'{all_links_html}\')">Copy All Links</button>' if all_links_html else ""
636
 
637
-
638
- return zip_filename, f"{all_links_html} {copy_button_html}", "\n".join(failed_dois), None
639
 
640
  def create_gradio_interface():
641
  """Create Gradio interface for Paper Downloader"""
642
  downloader = PaperDownloader()
643
 
644
- async def download_papers(bib_file, doi_input, dois_input, progress=gr.Progress()):
645
- progress_callback = lambda p: progress(p, desc="Downloading Papers")
646
-
647
  if bib_file:
648
  # Check file type
649
  if not bib_file.name.lower().endswith('.bib'):
650
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
651
 
652
- zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file, progress_callback)
653
  return zip_path, downloaded_dois, failed_dois, None
654
  elif doi_input:
655
  filepath, message, failed_doi = downloader.download_single_doi(doi_input)
656
  return None, message, failed_doi, filepath
657
  elif dois_input:
658
- zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input, progress_callback)
659
  return zip_path, downloaded_dois, failed_dois, None
660
  else:
661
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
@@ -686,7 +650,7 @@ def create_gradio_interface():
686
  <div id="failed-dois"></div>
687
  </div>
688
  """),
689
- gr.File(label="Downloaded Single PDF")
690
  ],
691
  title="🔬 Academic Paper Batch Downloader",
692
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
@@ -736,17 +700,20 @@ def create_gradio_interface():
736
 
737
  interface.head = """
738
  <script>
739
- function copyAllLinks(linksHTML) {
740
- const tempElement = document.createElement('div');
741
- tempElement.innerHTML = linksHTML;
742
- const links = Array.from(tempElement.querySelectorAll('a')).map(a => a.href).join('\\n');
743
-
744
- navigator.clipboard.writeText(links)
745
  .then(() => {
746
- alert('All links copied to clipboard!');
 
 
 
 
 
747
  })
748
  .catch(err => {
749
- console.error('Failed to copy links: ', err);
750
  });
751
  }
752
  </script>
 
436
  with open(filepath, 'wb') as f:
437
  f.write(pdf_content)
438
  logger.info(f"Successfully downloaded: {filename}")
439
+ return filepath, f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>', ""
440
  else:
441
  logger.warning(f"Could not download: {doi}")
442
+ return None, f"Could not download {doi}", f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>'
443
 
444
  except Exception as e:
445
  logger.error(f"Error processing {doi}: {e}")
446
  return None, f"Error processing {doi}: {e}", f"Error processing {doi}: {e}"
447
 
448
+ def download_multiple_dois(self, dois_text):
449
+ """Downloads multiple papers from a list of DOIs"""
450
  if not dois_text:
451
  return None, "Error: No DOIs provided", "Error: No DOIs provided"
452
 
 
457
  downloaded_files = []
458
  failed_dois = []
459
  downloaded_links = []
460
+ for i, doi in enumerate(tqdm(dois, desc="Downloading papers")):
 
 
461
  filepath, success_message, fail_message = self.download_single_doi(doi)
462
  if filepath:
463
  # Unique filename for zip
 
465
  filepath_unique = os.path.join(self.output_dir, filename)
466
  os.rename(filepath, filepath_unique)
467
  downloaded_files.append(filepath_unique)
468
+ downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
 
 
 
 
 
 
469
 
470
+ else:
471
+ failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
472
 
473
  if downloaded_files:
474
  zip_filename = 'papers.zip'
 
477
  zipf.write(file_path, arcname=os.path.basename(file_path))
478
  logger.info(f"ZIP file created: {zip_filename}")
479
 
480
+ return zip_filename if downloaded_files else None, "\n".join(downloaded_links), "\n".join(failed_dois)
 
 
 
 
 
481
 
482
+ def process_bibtex(self, bib_file):
 
 
 
483
  """Process BibTeX file and download papers with multiple strategies"""
484
  # Read BibTeX file content from the uploaded object
485
  try:
 
504
  downloaded_files = []
505
  failed_dois = []
506
  downloaded_links = []
 
507
 
508
  # Download PDFs
509
+ for doi in tqdm(dois, desc="Downloading papers"):
510
  try:
511
  # Try to download with multiple methods with retries
512
  pdf_content = self.download_with_retry(doi)
 
522
  f.write(pdf_content)
523
 
524
  downloaded_files.append(filepath)
525
+ downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
526
  logger.info(f"Successfully downloaded: {filename}")
527
  else:
528
+ failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
529
+
530
  except Exception as e:
531
+ failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
532
  logger.error(f"Error processing {doi}: {e}")
 
 
 
 
533
 
534
  # Create ZIP of downloaded papers
535
  if downloaded_files:
 
538
  for file_path in downloaded_files:
539
  zipf.write(file_path, arcname=os.path.basename(file_path))
540
  logger.info(f"ZIP file created: {zip_filename}")
 
 
 
 
 
541
 
542
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
543
 
544
+ async def process_bibtex_async(self, bib_file):
 
 
545
  """Process BibTeX file and download papers with multiple strategies"""
546
  # Read BibTeX file content from the uploaded object
547
  try:
 
566
  downloaded_files = []
567
  failed_dois = []
568
  downloaded_links = []
 
569
 
570
  # Download PDFs
571
+ for doi in tqdm(dois, desc="Downloading papers"):
572
  try:
573
  # Try to download with multiple methods with retries
574
  pdf_content = await self.download_with_retry_async(doi)
 
584
  f.write(pdf_content)
585
 
586
  downloaded_files.append(filepath)
587
+ downloaded_links.append(f'<div style="display: flex; align-items: center;">✓ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
588
  logger.info(f"Successfully downloaded: {filename}")
589
  else:
590
+ failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
591
+
592
  except Exception as e:
593
+ failed_dois.append(f'<div style="display: flex; align-items: center;">❌ <a href="https://doi.org/{doi}">{doi}</a> <button onclick="copyLink(this)">Copy</button></div>')
594
  logger.error(f"Error processing {doi}: {e}")
 
 
 
 
595
 
596
  # Create ZIP of downloaded papers
597
  if downloaded_files:
 
600
  for file_path in downloaded_files:
601
  zipf.write(file_path, arcname=os.path.basename(file_path))
602
  logger.info(f"ZIP file created: {zip_filename}")
 
 
 
 
 
603
 
604
+ return zip_filename, "\n".join(downloaded_links), "\n".join(failed_dois), None
 
605
 
606
  def create_gradio_interface():
607
  """Create Gradio interface for Paper Downloader"""
608
  downloader = PaperDownloader()
609
 
610
+ async def download_papers(bib_file, doi_input, dois_input):
 
 
611
  if bib_file:
612
  # Check file type
613
  if not bib_file.name.lower().endswith('.bib'):
614
  return None, "Error: Please upload a .bib file", "Error: Please upload a .bib file", None
615
 
616
+ zip_path, downloaded_dois, failed_dois, _ = await downloader.process_bibtex_async(bib_file)
617
  return zip_path, downloaded_dois, failed_dois, None
618
  elif doi_input:
619
  filepath, message, failed_doi = downloader.download_single_doi(doi_input)
620
  return None, message, failed_doi, filepath
621
  elif dois_input:
622
+ zip_path, downloaded_dois, failed_dois = downloader.download_multiple_dois(dois_input)
623
  return zip_path, downloaded_dois, failed_dois, None
624
  else:
625
  return None, "Please provide a .bib file, a single DOI, or a list of DOIs", "Please provide a .bib file, a single DOI, or a list of DOIs", None
 
650
  <div id="failed-dois"></div>
651
  </div>
652
  """),
653
+ gr.File(label="Downloaded Single PDF")
654
  ],
655
  title="🔬 Academic Paper Batch Downloader",
656
  description="Upload a BibTeX file or enter DOIs to download PDFs. We'll attempt to fetch PDFs from multiple sources like Sci-Hub, Libgen, Google Scholar and Crossref. You can use any of the three inputs at any moment.",
 
700
 
701
  interface.head = """
702
  <script>
703
+ function copyLink(button) {
704
+ const linkElement = button.previousElementSibling;
705
+ const link = linkElement.href;
706
+ navigator.clipboard.writeText(link)
 
 
707
  .then(() => {
708
+ button.innerText = '✓ Copied';
709
+ button.style.color = 'green';
710
+ setTimeout(() => {
711
+ button.innerText = 'Copy';
712
+ button.style.color = '';
713
+ }, 2000);
714
  })
715
  .catch(err => {
716
+ console.error('Failed to copy link: ', err);
717
  });
718
  }
719
  </script>