Update app.py
Browse files
app.py
CHANGED
@@ -267,22 +267,36 @@ with tab_download:
|
|
267 |
example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
|
268 |
|
269 |
url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
|
270 |
-
|
271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
for idx, url in enumerate(urls):
|
273 |
if url:
|
274 |
-
output_path = pdf_url_to_filename(url)
|
|
|
275 |
if output_path not in existing_pdfs:
|
276 |
if download_pdf(url, output_path):
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
st.session_state['
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
|
288 |
if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!
|
|
|
267 |
example_urls = ["https://arxiv.org/pdf/2308.03892", "https://arxiv.org/pdf/1912.01703", "https://arxiv.org/pdf/2408.11039", "https://arxiv.org/pdf/2109.10282", "https://arxiv.org/pdf/2112.10752", "https://arxiv.org/pdf/2308.11236", "https://arxiv.org/pdf/1706.03762", "https://arxiv.org/pdf/2006.11239", "https://arxiv.org/pdf/2305.11207", "https://arxiv.org/pdf/2106.09685", "https://arxiv.org/pdf/2005.11401", "https://arxiv.org/pdf/2106.10504"]; st.session_state['pdf_urls'] = "\n".join(example_urls) # 📚 Examples loaded into session!
|
268 |
|
269 |
url_input = st.text_area("Enter PDF URLs (one per line)", value=st.session_state.get('pdf_urls', ""), height=200) # 📝 Text area: Paste your PDF URLs here—no commas needed!
|
270 |
+
|
271 |
+
# --- Download PDFs Tab (modified section) ---
|
272 |
+
if st.button("Robo-Download 🤖"):
|
273 |
+
urls = url_input.strip().split("\n")
|
274 |
+
progress_bar = st.progress(0)
|
275 |
+
status_text = st.empty()
|
276 |
+
total_urls = len(urls)
|
277 |
+
existing_pdfs = get_pdf_files()
|
278 |
for idx, url in enumerate(urls):
|
279 |
if url:
|
280 |
+
output_path = pdf_url_to_filename(url)
|
281 |
+
status_text.text(f"Fetching {idx + 1}/{total_urls}: {os.path.basename(output_path)}...")
|
282 |
if output_path not in existing_pdfs:
|
283 |
if download_pdf(url, output_path):
|
284 |
+
st.session_state['downloaded_pdfs'][url] = output_path
|
285 |
+
logger.info(f"Downloaded PDF from {url} to {output_path}")
|
286 |
+
# Removed file size from history entry
|
287 |
+
entry = f"Downloaded PDF: {output_path}"
|
288 |
+
if entry not in st.session_state['history']:
|
289 |
+
st.session_state['history'].append(entry)
|
290 |
+
st.session_state['asset_checkboxes'][output_path] = True
|
291 |
+
else:
|
292 |
+
st.error(f"Failed to nab {url} 😿")
|
293 |
+
else:
|
294 |
+
st.info(f"Already got {os.path.basename(output_path)}! Skipping... 🐾")
|
295 |
+
st.session_state['downloaded_pdfs'][url] = output_path
|
296 |
+
progress_bar.progress((idx + 1) / total_urls)
|
297 |
+
status_text.text("Robo-Download complete! 🚀")
|
298 |
+
update_gallery()
|
299 |
+
|
300 |
|
301 |
mode = st.selectbox("Snapshot Mode", ["Single Page (High-Res)", "Two Pages (High-Res)", "All Pages (High-Res)"], key="download_mode") # 🎛️ Selectbox: Choose your snapshot resolution!
|
302 |
if st.button("Snapshot Selected 📸"): # 📸 Button: Time to snap some PDF snapshots!
|