Update app.py
Browse files
app.py
CHANGED
@@ -538,7 +538,13 @@ class DownloadManager:
|
|
538 |
logger.error(f"Error getting sublinks: {e}")
|
539 |
return []
|
540 |
|
|
|
|
|
|
|
|
|
|
|
541 |
def main():
|
|
|
542 |
if 'initialized' not in st.session_state:
|
543 |
st.session_state.initialized = True
|
544 |
st.session_state.discovered_files = []
|
@@ -548,18 +554,11 @@ def main():
|
|
548 |
|
549 |
st.title("Advanced File Downloader")
|
550 |
|
551 |
-
# Sidebar
|
552 |
with st.sidebar:
|
553 |
-
# Mode Selection
|
554 |
mode = st.radio("Select Mode", ["Manual URL", "Bing Search", "PDF Summarizer"], key="mode_select")
|
555 |
-
|
556 |
-
# Advanced Options
|
557 |
with st.expander("Advanced Options", expanded=True):
|
558 |
-
custom_extensions = st.text_input(
|
559 |
-
"Custom File Extensions",
|
560 |
-
placeholder=".csv, .txt, .epub",
|
561 |
-
key="custom_ext_input"
|
562 |
-
)
|
563 |
max_sublinks = st.number_input(
|
564 |
"Maximum Sublinks to Process",
|
565 |
min_value=1,
|
@@ -580,19 +579,16 @@ def main():
|
|
580 |
)
|
581 |
use_proxy = st.checkbox("Use Proxy", key="proxy_checkbox")
|
582 |
proxy = st.text_input("Proxy URL", placeholder="http://proxy:port", key="proxy_input")
|
583 |
-
|
584 |
-
# Google Drive Integration
|
585 |
with st.expander("Google Drive Integration", expanded=False):
|
586 |
if st.button("Start Google Sign-In", key="google_signin_btn"):
|
587 |
auth_url = get_google_auth_url()
|
588 |
st.markdown(f"[Click here to authorize]({auth_url})")
|
589 |
-
|
590 |
auth_code = st.text_input("Enter authorization code", key="auth_code_input")
|
591 |
if st.button("Complete Sign-In", key="complete_signin_btn") and auth_code:
|
592 |
creds, msg = exchange_code_for_credentials(auth_code)
|
593 |
st.session_state.google_creds = creds
|
594 |
st.write(msg)
|
595 |
-
|
596 |
# Main content area
|
597 |
if mode == "Manual URL":
|
598 |
st.header("Manual URL Mode")
|
@@ -623,17 +619,16 @@ def main():
|
|
623 |
if files:
|
624 |
st.success(f"Found {len(files)} files!")
|
625 |
|
626 |
-
#
|
627 |
col1, col2 = st.columns([1, 4])
|
628 |
with col1:
|
629 |
if st.button("Select All", key="select_all_btn"):
|
630 |
st.session_state.selected_files = list(range(len(files)))
|
631 |
-
|
632 |
if st.button("Clear Selection", key="clear_selection_btn"):
|
633 |
st.session_state.selected_files = []
|
634 |
-
|
635 |
|
636 |
-
# File selection
|
637 |
selected_files = st.multiselect(
|
638 |
"Select files to download",
|
639 |
options=list(range(len(files))),
|
@@ -641,8 +636,6 @@ def main():
|
|
641 |
format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})",
|
642 |
key="file_multiselect"
|
643 |
)
|
644 |
-
|
645 |
-
# Update session state
|
646 |
st.session_state.selected_files = selected_files
|
647 |
|
648 |
if selected_files:
|
@@ -659,28 +652,20 @@ def main():
|
|
659 |
if st.button("Download Selected", key="download_btn"):
|
660 |
if not os.path.exists(download_dir):
|
661 |
os.makedirs(download_dir)
|
662 |
-
|
663 |
async def download_files():
|
664 |
downloaded_paths = []
|
665 |
progress_bar = st.progress(0)
|
666 |
status_text = st.empty()
|
667 |
-
|
668 |
async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
|
669 |
for i, idx in enumerate(selected_files):
|
670 |
progress = (i + 1) / len(selected_files)
|
671 |
file_info = files[idx]
|
672 |
-
|
673 |
status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_files)})")
|
674 |
progress_bar.progress(progress)
|
675 |
-
|
676 |
-
path = await dm.download_file(
|
677 |
-
file_info,
|
678 |
-
download_dir,
|
679 |
-
url
|
680 |
-
)
|
681 |
if path:
|
682 |
downloaded_paths.append(path)
|
683 |
-
|
684 |
status_text.empty()
|
685 |
progress_bar.empty()
|
686 |
return downloaded_paths
|
@@ -689,11 +674,9 @@ def main():
|
|
689 |
|
690 |
if downloaded:
|
691 |
st.success(f"Successfully downloaded {len(downloaded)} files")
|
692 |
-
|
693 |
if create_zip or upload_to_drive:
|
694 |
zip_path = create_zip_file(downloaded, download_dir)
|
695 |
st.success(f"Created ZIP file: {zip_path}")
|
696 |
-
|
697 |
if upload_to_drive and st.session_state.get('google_creds'):
|
698 |
with st.spinner("Uploading to Google Drive..."):
|
699 |
drive_id = google_drive_upload(zip_path, st.session_state.google_creds)
|
@@ -701,7 +684,6 @@ def main():
|
|
701 |
st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
|
702 |
else:
|
703 |
st.error(drive_id)
|
704 |
-
|
705 |
if delete_after:
|
706 |
for path in downloaded:
|
707 |
try:
|
@@ -711,23 +693,19 @@ def main():
|
|
711 |
st.info("Deleted original files after ZIP creation")
|
712 |
else:
|
713 |
st.warning("No files found.")
|
714 |
-
|
715 |
-
#
|
716 |
if st.session_state.discovered_files:
|
717 |
files = st.session_state.discovered_files
|
718 |
st.success(f"Found {len(files)} files!")
|
719 |
-
|
720 |
-
# Select All/Clear Selection buttons
|
721 |
col1, col2 = st.columns([1, 4])
|
722 |
with col1:
|
723 |
if st.button("Select All", key="select_all_btn2"):
|
724 |
st.session_state.selected_files = list(range(len(files)))
|
725 |
-
|
726 |
if st.button("Clear Selection", key="clear_selection_btn2"):
|
727 |
st.session_state.selected_files = []
|
728 |
-
|
729 |
-
|
730 |
-
# File selection
|
731 |
selected_files = st.multiselect(
|
732 |
"Select files to download",
|
733 |
options=list(range(len(files))),
|
@@ -735,10 +713,7 @@ def main():
|
|
735 |
format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})",
|
736 |
key="file_multiselect2"
|
737 |
)
|
738 |
-
|
739 |
-
# Update session state
|
740 |
st.session_state.selected_files = selected_files
|
741 |
-
|
742 |
if selected_files:
|
743 |
col1, col2, col3, col4 = st.columns(4)
|
744 |
with col1:
|
@@ -749,45 +724,32 @@ def main():
|
|
749 |
delete_after = st.checkbox("Delete after creating ZIP", key="delete_after_checkbox2")
|
750 |
with col4:
|
751 |
upload_to_drive = st.checkbox("Upload to Google Drive", key="upload_drive_checkbox2")
|
752 |
-
|
753 |
if st.button("Download Selected", key="download_btn2"):
|
754 |
if not os.path.exists(download_dir):
|
755 |
os.makedirs(download_dir)
|
756 |
-
|
757 |
async def download_files():
|
758 |
downloaded_paths = []
|
759 |
progress_bar = st.progress(0)
|
760 |
status_text = st.empty()
|
761 |
-
|
762 |
async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
|
763 |
for i, idx in enumerate(selected_files):
|
764 |
progress = (i + 1) / len(selected_files)
|
765 |
file_info = files[idx]
|
766 |
-
|
767 |
status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_files)})")
|
768 |
progress_bar.progress(progress)
|
769 |
-
|
770 |
-
path = await dm.download_file(
|
771 |
-
file_info,
|
772 |
-
download_dir,
|
773 |
-
st.session_state.current_url
|
774 |
-
)
|
775 |
if path:
|
776 |
downloaded_paths.append(path)
|
777 |
-
|
778 |
status_text.empty()
|
779 |
progress_bar.empty()
|
780 |
return downloaded_paths
|
781 |
-
|
782 |
downloaded = asyncio.run(download_files())
|
783 |
-
|
784 |
if downloaded:
|
785 |
st.success(f"Successfully downloaded {len(downloaded)} files")
|
786 |
-
|
787 |
if create_zip or upload_to_drive:
|
788 |
zip_path = create_zip_file(downloaded, download_dir)
|
789 |
st.success(f"Created ZIP file: {zip_path}")
|
790 |
-
|
791 |
if upload_to_drive and st.session_state.get('google_creds'):
|
792 |
with st.spinner("Uploading to Google Drive..."):
|
793 |
drive_id = google_drive_upload(zip_path, st.session_state.google_creds)
|
@@ -795,7 +757,6 @@ def main():
|
|
795 |
st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
|
796 |
else:
|
797 |
st.error(drive_id)
|
798 |
-
|
799 |
if delete_after:
|
800 |
for path in downloaded:
|
801 |
try:
|
@@ -803,12 +764,11 @@ def main():
|
|
803 |
except Exception as e:
|
804 |
st.warning(f"Could not delete {path}: {e}")
|
805 |
st.info("Deleted original files after ZIP creation")
|
806 |
-
|
807 |
elif mode == "Bing Search":
|
808 |
st.header("Bing Search Mode")
|
809 |
query = st.text_input("Enter search query", key="search_query_input")
|
810 |
num_results = st.slider("Number of results", 1, 50, 5, key="num_results_slider")
|
811 |
-
|
812 |
if st.button("Search", key="search_btn"):
|
813 |
if query:
|
814 |
async def run_search():
|
@@ -835,21 +795,19 @@ def main():
|
|
835 |
st.session_state.discovered_files = files
|
836 |
st.session_state.current_url = url
|
837 |
st.session_state.selected_files = []
|
838 |
-
|
839 |
else:
|
840 |
st.warning("No files found on this page.")
|
841 |
else:
|
842 |
st.warning("No search results found.")
|
843 |
-
|
844 |
asyncio.run(run_search())
|
845 |
-
|
846 |
else: # PDF Summarizer mode
|
847 |
if summarizer is None:
|
848 |
st.error("PDF summarization is not available due to model loading errors.")
|
849 |
else:
|
850 |
st.header("PDF Summarizer")
|
851 |
pdf_url = st.text_input("Enter PDF URL", key="pdf_url_input")
|
852 |
-
|
853 |
if st.button("Summarize", key="summarize_btn"):
|
854 |
if pdf_url:
|
855 |
with st.spinner("Generating summary..."):
|
@@ -873,4 +831,3 @@ if __name__ == "__main__":
|
|
873 |
main()
|
874 |
except Exception as e:
|
875 |
st.error(f"An error occurred: {str(e)}")
|
876 |
-
logger.error(f"Application error: {str(e)}", exc_info=True)
|
|
|
538 |
logger.error(f"Error getting sublinks: {e}")
|
539 |
return []
|
540 |
|
541 |
+
def safe_rerun():
|
542 |
+
"""Safely rerun the app if experimental_rerun is available."""
|
543 |
+
if hasattr(st, "experimental_rerun"):
|
544 |
+
st.experimental_rerun()
|
545 |
+
|
546 |
def main():
|
547 |
+
# Initialize session state on first run
|
548 |
if 'initialized' not in st.session_state:
|
549 |
st.session_state.initialized = True
|
550 |
st.session_state.discovered_files = []
|
|
|
554 |
|
555 |
st.title("Advanced File Downloader")
|
556 |
|
557 |
+
# Sidebar configuration
|
558 |
with st.sidebar:
|
|
|
559 |
mode = st.radio("Select Mode", ["Manual URL", "Bing Search", "PDF Summarizer"], key="mode_select")
|
|
|
|
|
560 |
with st.expander("Advanced Options", expanded=True):
|
561 |
+
custom_extensions = st.text_input("Custom File Extensions", placeholder=".csv, .txt, .epub", key="custom_ext_input")
|
|
|
|
|
|
|
|
|
562 |
max_sublinks = st.number_input(
|
563 |
"Maximum Sublinks to Process",
|
564 |
min_value=1,
|
|
|
579 |
)
|
580 |
use_proxy = st.checkbox("Use Proxy", key="proxy_checkbox")
|
581 |
proxy = st.text_input("Proxy URL", placeholder="http://proxy:port", key="proxy_input")
|
|
|
|
|
582 |
with st.expander("Google Drive Integration", expanded=False):
|
583 |
if st.button("Start Google Sign-In", key="google_signin_btn"):
|
584 |
auth_url = get_google_auth_url()
|
585 |
st.markdown(f"[Click here to authorize]({auth_url})")
|
|
|
586 |
auth_code = st.text_input("Enter authorization code", key="auth_code_input")
|
587 |
if st.button("Complete Sign-In", key="complete_signin_btn") and auth_code:
|
588 |
creds, msg = exchange_code_for_credentials(auth_code)
|
589 |
st.session_state.google_creds = creds
|
590 |
st.write(msg)
|
591 |
+
|
592 |
# Main content area
|
593 |
if mode == "Manual URL":
|
594 |
st.header("Manual URL Mode")
|
|
|
619 |
if files:
|
620 |
st.success(f"Found {len(files)} files!")
|
621 |
|
622 |
+
# File selection block 1
|
623 |
col1, col2 = st.columns([1, 4])
|
624 |
with col1:
|
625 |
if st.button("Select All", key="select_all_btn"):
|
626 |
st.session_state.selected_files = list(range(len(files)))
|
627 |
+
safe_rerun()
|
628 |
if st.button("Clear Selection", key="clear_selection_btn"):
|
629 |
st.session_state.selected_files = []
|
630 |
+
safe_rerun()
|
631 |
|
|
|
632 |
selected_files = st.multiselect(
|
633 |
"Select files to download",
|
634 |
options=list(range(len(files))),
|
|
|
636 |
format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})",
|
637 |
key="file_multiselect"
|
638 |
)
|
|
|
|
|
639 |
st.session_state.selected_files = selected_files
|
640 |
|
641 |
if selected_files:
|
|
|
652 |
if st.button("Download Selected", key="download_btn"):
|
653 |
if not os.path.exists(download_dir):
|
654 |
os.makedirs(download_dir)
|
655 |
+
|
656 |
async def download_files():
|
657 |
downloaded_paths = []
|
658 |
progress_bar = st.progress(0)
|
659 |
status_text = st.empty()
|
|
|
660 |
async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
|
661 |
for i, idx in enumerate(selected_files):
|
662 |
progress = (i + 1) / len(selected_files)
|
663 |
file_info = files[idx]
|
|
|
664 |
status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_files)})")
|
665 |
progress_bar.progress(progress)
|
666 |
+
path = await dm.download_file(file_info, download_dir, url)
|
|
|
|
|
|
|
|
|
|
|
667 |
if path:
|
668 |
downloaded_paths.append(path)
|
|
|
669 |
status_text.empty()
|
670 |
progress_bar.empty()
|
671 |
return downloaded_paths
|
|
|
674 |
|
675 |
if downloaded:
|
676 |
st.success(f"Successfully downloaded {len(downloaded)} files")
|
|
|
677 |
if create_zip or upload_to_drive:
|
678 |
zip_path = create_zip_file(downloaded, download_dir)
|
679 |
st.success(f"Created ZIP file: {zip_path}")
|
|
|
680 |
if upload_to_drive and st.session_state.get('google_creds'):
|
681 |
with st.spinner("Uploading to Google Drive..."):
|
682 |
drive_id = google_drive_upload(zip_path, st.session_state.google_creds)
|
|
|
684 |
st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
|
685 |
else:
|
686 |
st.error(drive_id)
|
|
|
687 |
if delete_after:
|
688 |
for path in downloaded:
|
689 |
try:
|
|
|
693 |
st.info("Deleted original files after ZIP creation")
|
694 |
else:
|
695 |
st.warning("No files found.")
|
696 |
+
|
697 |
+
# File selection block 2 (if files are already discovered)
|
698 |
if st.session_state.discovered_files:
|
699 |
files = st.session_state.discovered_files
|
700 |
st.success(f"Found {len(files)} files!")
|
|
|
|
|
701 |
col1, col2 = st.columns([1, 4])
|
702 |
with col1:
|
703 |
if st.button("Select All", key="select_all_btn2"):
|
704 |
st.session_state.selected_files = list(range(len(files)))
|
705 |
+
safe_rerun()
|
706 |
if st.button("Clear Selection", key="clear_selection_btn2"):
|
707 |
st.session_state.selected_files = []
|
708 |
+
safe_rerun()
|
|
|
|
|
709 |
selected_files = st.multiselect(
|
710 |
"Select files to download",
|
711 |
options=list(range(len(files))),
|
|
|
713 |
format_func=lambda x: f"{files[x]['filename']} ({files[x]['size']})",
|
714 |
key="file_multiselect2"
|
715 |
)
|
|
|
|
|
716 |
st.session_state.selected_files = selected_files
|
|
|
717 |
if selected_files:
|
718 |
col1, col2, col3, col4 = st.columns(4)
|
719 |
with col1:
|
|
|
724 |
delete_after = st.checkbox("Delete after creating ZIP", key="delete_after_checkbox2")
|
725 |
with col4:
|
726 |
upload_to_drive = st.checkbox("Upload to Google Drive", key="upload_drive_checkbox2")
|
|
|
727 |
if st.button("Download Selected", key="download_btn2"):
|
728 |
if not os.path.exists(download_dir):
|
729 |
os.makedirs(download_dir)
|
730 |
+
|
731 |
async def download_files():
|
732 |
downloaded_paths = []
|
733 |
progress_bar = st.progress(0)
|
734 |
status_text = st.empty()
|
|
|
735 |
async with DownloadManager(use_proxy=use_proxy, proxy=proxy) as dm:
|
736 |
for i, idx in enumerate(selected_files):
|
737 |
progress = (i + 1) / len(selected_files)
|
738 |
file_info = files[idx]
|
|
|
739 |
status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_files)})")
|
740 |
progress_bar.progress(progress)
|
741 |
+
path = await dm.download_file(file_info, download_dir, st.session_state.current_url)
|
|
|
|
|
|
|
|
|
|
|
742 |
if path:
|
743 |
downloaded_paths.append(path)
|
|
|
744 |
status_text.empty()
|
745 |
progress_bar.empty()
|
746 |
return downloaded_paths
|
|
|
747 |
downloaded = asyncio.run(download_files())
|
|
|
748 |
if downloaded:
|
749 |
st.success(f"Successfully downloaded {len(downloaded)} files")
|
|
|
750 |
if create_zip or upload_to_drive:
|
751 |
zip_path = create_zip_file(downloaded, download_dir)
|
752 |
st.success(f"Created ZIP file: {zip_path}")
|
|
|
753 |
if upload_to_drive and st.session_state.get('google_creds'):
|
754 |
with st.spinner("Uploading to Google Drive..."):
|
755 |
drive_id = google_drive_upload(zip_path, st.session_state.google_creds)
|
|
|
757 |
st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
|
758 |
else:
|
759 |
st.error(drive_id)
|
|
|
760 |
if delete_after:
|
761 |
for path in downloaded:
|
762 |
try:
|
|
|
764 |
except Exception as e:
|
765 |
st.warning(f"Could not delete {path}: {e}")
|
766 |
st.info("Deleted original files after ZIP creation")
|
767 |
+
|
768 |
elif mode == "Bing Search":
|
769 |
st.header("Bing Search Mode")
|
770 |
query = st.text_input("Enter search query", key="search_query_input")
|
771 |
num_results = st.slider("Number of results", 1, 50, 5, key="num_results_slider")
|
|
|
772 |
if st.button("Search", key="search_btn"):
|
773 |
if query:
|
774 |
async def run_search():
|
|
|
795 |
st.session_state.discovered_files = files
|
796 |
st.session_state.current_url = url
|
797 |
st.session_state.selected_files = []
|
798 |
+
safe_rerun()
|
799 |
else:
|
800 |
st.warning("No files found on this page.")
|
801 |
else:
|
802 |
st.warning("No search results found.")
|
|
|
803 |
asyncio.run(run_search())
|
804 |
+
|
805 |
else: # PDF Summarizer mode
|
806 |
if summarizer is None:
|
807 |
st.error("PDF summarization is not available due to model loading errors.")
|
808 |
else:
|
809 |
st.header("PDF Summarizer")
|
810 |
pdf_url = st.text_input("Enter PDF URL", key="pdf_url_input")
|
|
|
811 |
if st.button("Summarize", key="summarize_btn"):
|
812 |
if pdf_url:
|
813 |
with st.spinner("Generating summary..."):
|
|
|
831 |
main()
|
832 |
except Exception as e:
|
833 |
st.error(f"An error occurred: {str(e)}")
|
|