awacke1 commited on
Commit
e54ce94
·
verified ·
1 Parent(s): 4e9816f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -12
app.py CHANGED
@@ -267,22 +267,36 @@ with tab_download:
267
  example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
268
 
269
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
270
- if st.button("Robo-Download 🤖"): # 🤖 Button: Let Robo-Download flex its digital muscles!
271
- urls = url_input.strip().split("\n"); progress_bar = st.progress(0); status_text = st.empty(); total_urls = len(urls); existing_pdfs = get_pdf_files() # 🚀 Setup: Preparing to download and track progress!
 
 
 
 
 
 
272
  for idx, url in enumerate(urls):
273
  if url:
274
- output_path = pdf_url_to_filename(url); status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...") # 🔍 Fetching PDF: Checking out file name!
 
275
  if output_path not in existing_pdfs:
276
  if download_pdf(url, output_path):
277
- file_size = os.path.getsize(output_path) # 📏 File size: Measured in bytes for bragging rights!
278
- st.session_state['downloaded_pdfs'][url] = output_path; logger.info(f"Downloaded PDF from {url} to {output_path}") # 💾 Download success: File saved and logged!
279
- entry = f"Downloaded PDF: {output_path} ({file_size} bytes)";
280
- if entry not in st.session_state['history']: st.session_state['history'].append(entry) # 📝 History: Recording download details with file size!
281
- st.session_state['asset_checkboxes'][output_path] = True # ✅ Marking the asset for further magic!
282
- else: st.error(f"Failed to nab {url} 😿") # ❌ Oops: Download failed, no kitten cuddles here!
283
- else: st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾"); st.session_state['downloaded_pdfs'][url] = output_path # 📂 Already downloaded: Avoiding duplicate work!
284
- progress_bar.progress((idx + 1) / total_urls) # 📊 Progress: Moving the download needle forward!
285
- status_text.text("Robo-Download complete! 🚀"); update_gallery() # 🎉 Finished: All PDFs downloaded and gallery refreshed!
 
 
 
 
 
 
 
286
 
287
  mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
288
  if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!
 
267
  example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
268
 
269
  url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
270
+
271
+ # --- Download PDFs Tab (modified section) ---
272
+ if st.button("Robo-Download 🤖"):
273
+ urls = url_input.strip().split("\n")
274
+ progress_bar = st.progress(0)
275
+ status_text = st.empty()
276
+ total_urls = len(urls)
277
+ existing_pdfs = get_pdf_files()
278
  for idx, url in enumerate(urls):
279
  if url:
280
+ output_path = pdf_url_to_filename(url)
281
+ status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
282
  if output_path not in existing_pdfs:
283
  if download_pdf(url, output_path):
284
+ st.session_state['downloaded_pdfs'][url] = output_path
285
+ logger.info(f"Downloaded PDF from {url} to {output_path}")
286
+ # Removed file size from history entry
287
+ entry = f"Downloaded PDF: {output_path}"
288
+ if entry not in st.session_state['history']:
289
+ st.session_state['history'].append(entry)
290
+ st.session_state['asset_checkboxes'][output_path] = True
291
+ else:
292
+ st.error(f"Failed to nab {url} 😿")
293
+ else:
294
+ st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
295
+ st.session_state['downloaded_pdfs'][url] = output_path
296
+ progress_bar.progress((idx + 1) / total_urls)
297
+ status_text.text("Robo-Download complete! 🚀")
298
+ update_gallery()
299
+
300
 
301
  mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
302
  if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!