Spaces:

awacke1
/

TorchTransformers-CV-SFT

Running

App Files Files Community

awacke1 commited on Mar 26

Commit

e54ce94

verified ·

1 Parent(s): 4e9816f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -12

app.py CHANGED Viewed

@@ -267,22 +267,36 @@ with tab_download:
         example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls)  # 📚 Examples loaded into session!
     url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)  # 📝 Text area: Paste your PDF URLs here—no commas needed!
-    if st.button("Robo-Download 🤖"):  # 🤖 Button: Let Robo-Download flex its digital muscles!
-        urls = url_input.strip().split("\n"); progress_bar = st.progress(0); status_text = st.empty(); total_urls = len(urls); existing_pdfs = get_pdf_files()  # 🚀 Setup: Preparing to download and track progress!
         for idx, url in enumerate(urls):
             if url:
-                output_path = pdf_url_to_filename(url); status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")  # 🔍 Fetching PDF: Checking out file name!
                 if output_path not in existing_pdfs:
                     if download_pdf(url, output_path):
-                        file_size = os.path.getsize(output_path)  # 📏 File size: Measured in bytes for bragging rights!
-                        st.session_state['downloaded_pdfs'][url] = output_path; logger.info(f"Downloaded PDF from {url} to {output_path}")  # 💾 Download success: File saved and logged!
-                        entry = f"Downloaded PDF: {output_path} ({file_size} bytes)";
-                        if entry not in st.session_state['history']: st.session_state['history'].append(entry)  # 📝 History: Recording download details with file size!
-                        st.session_state['asset_checkboxes'][output_path] = True  # ✅ Marking the asset for further magic!
-                    else: st.error(f"Failed to nab {url} 😿")  # ❌ Oops: Download failed, no kitten cuddles here!
-                else: st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾"); st.session_state['downloaded_pdfs'][url] = output_path  # 📂 Already downloaded: Avoiding duplicate work!
-                progress_bar.progress((idx + 1) / total_urls)  # 📊 Progress: Moving the download needle forward!
-        status_text.text("Robo-Download complete! 🚀"); update_gallery()  # 🎉 Finished: All PDFs downloaded and gallery refreshed!
     mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")  # 🎛️ Selectbox: Choose your snapshot resolution!
     if st.button("Snapshot Selected 📸"):  # 📸 Button: Time to snap some PDF snapshots!

         example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls)  # 📚 Examples loaded into session!
     url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200)  # 📝 Text area: Paste your PDF URLs here—no commas needed!
+    # --- Download PDFs Tab (modified section) ---
+    if st.button("Robo-Download 🤖"):
+        urls = url_input.strip().split("\n")
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        total_urls = len(urls)
+        existing_pdfs = get_pdf_files()
         for idx, url in enumerate(urls):
             if url:
+                output_path = pdf_url_to_filename(url)
+                status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
                 if output_path not in existing_pdfs:
                     if download_pdf(url, output_path):
+                        st.session_state['downloaded_pdfs'][url] = output_path
+                        logger.info(f"Downloaded PDF from {url} to {output_path}")
+                        # Removed file size from history entry
+                        entry = f"Downloaded PDF: {output_path}"
+                        if entry not in st.session_state['history']:
+                            st.session_state['history'].append(entry)
+                        st.session_state['asset_checkboxes'][output_path] = True
+                    else:
+                        st.error(f"Failed to nab {url} 😿")
+                else:
+                    st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
+                    st.session_state['downloaded_pdfs'][url] = output_path
+                progress_bar.progress((idx + 1) / total_urls)
+        status_text.text("Robo-Download complete! 🚀")
+        update_gallery()
     mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode")  # 🎛️ Selectbox: Choose your snapshot resolution!
     if st.button("Snapshot Selected 📸"):  # 📸 Button: Time to snap some PDF snapshots!