Spaces:

euler314
/

craw_web

Sleeping

App Files Files Community

euler314 commited on Mar 8

Commit

baece32

verified ·

1 Parent(s): 6f39f32

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -244

app.py CHANGED Viewed

@@ -251,6 +251,7 @@ class DownloadManager:
             logger.error(f"Error extracting real download URL: {e}")
             return url
     async def get_edu_exam_links(self, url):
         """Specialized method for educational exam websites that follows a common pattern."""
         try:
@@ -259,37 +260,72 @@ class DownloadManager:
             # Use requests for a faster initial scan
             headers = {"User-Agent": get_random_user_agent()}
-            response = requests.get(url, headers=headers, timeout=30)
-            if response.status_code != 200:
-                logger.warning(f"Failed to fetch page: {response.status_code}")
-                return []
-            # Parse with BeautifulSoup first for efficiency
-            soup = BeautifulSoup(response.text, "html.parser")
-            parsed_base = urlparse(url)
-            base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
-            # Look for all links
-            for a in soup.find_all("a", href=True):
-                href = a["href"]
-                full_url = urljoin(url, href)
-                # Special patterns for exam sites
-                for pattern in ["/eduexp/docs/", "/exam/", "/pastexam/", "/papers/",
-                                "/test/", "/download/", "/files/", "/assignments/"]:
-                    if pattern in full_url.lower():
-                        links.add(full_url)
-                        break
             # If we didn't find many links with direct approach, use Playwright for more thorough extraction
             if len(links) < 5:
                 logger.info("Using browser for enhanced link extraction")
-                await self.page.goto(url, timeout=30000, wait_until='networkidle')
-                # Check for ASP.NET specific elements that might contain exam links
-                grid_elements = await self.page.query_selector_all('table.grid, .GridView, #GridView1, .rgMasterTable')
-                if grid_elements:
                     for grid in grid_elements:
                         grid_links = await grid.query_selector_all('a[href]')
                         for a in grid_links:
@@ -297,28 +333,30 @@ class DownloadManager:
                             if href:
                                 full_url = href if href.startswith('http') else urljoin(url, href)
                                 links.add(full_url)
-                # Try clicking any controls that might reveal more exam links
-                show_buttons = await self.page.query_selector_all('input[type="button"], button')
-                for button in show_buttons:
-                    button_text = await button.text_content() or ""
-                    button_value = await button.get_attribute("value") or ""
-                    if any(keyword in (button_text + button_value).lower() for keyword in
-                           ["show", "view", "display", "list", "exam", "paper", "test"]):
-                        try:
-                            await button.click()
-                            await self.page.wait_for_timeout(1000)
-                            await self.page.wait_for_load_state('networkidle', timeout=5000)
-                            # Get any new links that appeared
-                            new_links = await self.page.query_selector_all('a[href]')
-                            for a in new_links:
-                                href = await a.get_attribute('href')
-                                if href:
-                                    full_url = href if href.startswith('http') else urljoin(url, href)
-                                    links.add(full_url)
-                        except Exception as e:
-                            logger.warning(f"Error clicking button: {e}")
             # Filter links to likely contain exam documents
             filtered_links = []
@@ -579,6 +617,7 @@ class DownloadManager:
             logger.error(f"Error downloading {file_url}: {e}")
             return None
     async def force_download_viewonly(self, file_info, save_path):
         """Completely rewritten method to handle view-only files reliably, especially multi-page PDFs"""
         try:
@@ -620,7 +659,8 @@ class DownloadManager:
             context = await browser.new_context(
                 viewport={'width': 1600, 'height': 1200},
                 user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
-                device_scale_factor=2.0
             )
             page = await context.new_page()
@@ -637,11 +677,10 @@ class DownloadManager:
                 # Special handling for PDFs
                 if file_type.lower() == 'pdf':
-                    # Check if there's a pagination control
-                    pagination_exists = await page.query_selector('div[role="toolbar"] div[role="presentation"] div[role="presentation"]:has-text("/")')
-                    # Try multiple methods to extract total pages
-                    total_pages = await page.evaluate("""
                     () => {
                         // Method 1: Check page counter text
                         const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
@@ -663,205 +702,119 @@ class DownloadManager:
                         const thumbnails = document.querySelectorAll('.drive-viewer-paginated-thumb');
                         if (thumbnails.length > 0) return thumbnails.length;
-                        // Fallback: conservative guess based on UI
-                        return 50; // Safe default when we can't determine
                     }
                     """)
-                    logger.info(f"Detected {total_pages} pages in PDF")
-                    if total_pages <= 1:
-                        # Additional check - sometimes the page count detection fails
-                        # Let's double-check by looking for next/previous buttons
-                        next_button = await page.query_selector('button[aria-label="Next page"]')
-                        if next_button:
-                            disabled = await next_button.get_attribute('disabled')
-                            if not disabled:
-                                logger.info("Found next button that's not disabled, document has multiple pages")
-                                total_pages = 100  # Set a high number, we'll stop when we can't go further
-                    # If we still think it's a single page, use a more direct approach
-                    if total_pages <= 1:
-                        # Single page approach
-                        logger.info("Using single-page capture approach")
-                        # Take a screenshot of the current view (should be the full document or first page)
-                        screenshot_path = os.path.join(temp_dir, "page.png")
-                        # Try to screenshot just the document area if we can find it
-                        document_area = await page.query_selector('.drive-viewer-paginated-page')
-                        if document_area:
-                            await document_area.screenshot(path=screenshot_path)
-                        else:
-                            # Otherwise take a full screenshot
-                            await page.screenshot(path=screenshot_path)
-                        # Convert to PDF
-                        img = Image.open(screenshot_path)
-                        width, height = img.size
-                        c = canvas.Canvas(save_path, pagesize=(width, height))
-                        c.drawImage(screenshot_path, 0, 0, width, height)
-                        c.save()
-                        os.remove(screenshot_path)
-                        os.rmdir(temp_dir)
-                        if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
-                            return save_path
-                        return None
-                    # Multi-page approach
-                    logger.info(f"Using multi-page capture approach for {total_pages} pages")
-                    # CRITICAL: We need to go to the first page first
-                    # Check if we need to reset to first page
-                    current_page_text = await page.evaluate("""
-                    () => {
-                        const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
-                            const text = el.textContent || '';
-                            return /\\d+\\s*\\/\\s*\\d+/.test(text);
-                        });
-                        if (pageCounters.length > 0) {
-                            return pageCounters[0].textContent || '';
-                        }
-                        return '';
-                    }
-                    """)
-                    current_page = 1
-                    if current_page_text:
-                        match = re.search(r'(\d+)\s*\/\s*\d+', current_page_text)
-                        if match:
-                            current_page = int(match.group(1))
-                    # If we're not on page 1, go back to first page
-                    if current_page > 1:
-                        logger.info(f"Currently on page {current_page}, navigating back to page 1")
-                        # Look for an input field where we can directly set the page number
-                        page_input = await page.query_selector('input[aria-label="Page"]')
-                        if page_input:
-                            await page_input.fill("1")
-                            await page_input.press("Enter")
-                            await page.wait_for_timeout(1000)
-                        else:
-                            # Use prev button to go back to first page
-                            prev_button = await page.query_selector('button[aria-label="Previous page"]')
-                            if prev_button:
-                                # Keep clicking until we can't anymore
-                                for _ in range(current_page - 1):
-                                    try:
-                                        await prev_button.click()
-                                        await page.wait_for_timeout(500)
-                                    except Exception as e:
-                                        logger.warning(f"Error clicking prev button: {e}")
-                                        break
-                    # Capture each page
-                    screenshots = []
-                    page_num = 1
-                    max_tries = min(total_pages + 10, 200)  # Set a reasonable limit
-                    next_button = await page.query_selector('button[aria-label="Next page"]')
-                    # Maximize the PDF view if possible
-                    await page.evaluate("""
-                    () => {
-                        // Try to find and click any "full page" or "maximize" buttons
-                        const fullViewButtons = Array.from(document.querySelectorAll('button'))
-                                              .filter(b => b.textContent?.includes('Full') ||
-                                                          b.getAttribute('aria-label')?.includes('Full') ||
-                                                          b.getAttribute('aria-label')?.includes('fit page'));
-                        if (fullViewButtons.length > 0) {
-                            fullViewButtons[0].click();
-                        }
-                    }
-                    """)
-                    await page.wait_for_timeout(1000)  # Wait for view to adjust
-                    while page_num <= max_tries:
-                        # Wait for the page to be fully loaded
-                        await page.wait_for_timeout(800)
-                        # Take a screenshot of the current page
-                        screenshot_path = os.path.join(temp_dir, f"page_{page_num}.png")
-                        # Try different methods to identify and capture just the page content
-                        page_content = await page.query_selector('.drive-viewer-paginated-page')
-                        if page_content:
-                            # Found the specific page element
-                            await page_content.screenshot(path=screenshot_path)
-                        else:
-                            # Fall back to screenshot of visible viewport
-                            await page.screenshot(path=screenshot_path)
-                        screenshots.append(screenshot_path)
-                        logger.info(f"Captured page {page_num}")
-                        # Check if we have a disabled next button (reached the end)
-                        if next_button:
-                            is_disabled = await next_button.get_attribute('disabled')
-                            if is_disabled == 'true' or is_disabled == 'disabled' or is_disabled is True:
-                                logger.info(f"Reached end of document after {page_num} pages")
-                                break
-                            # Click the next button
-                            try:
-                                await next_button.click()
-                                await page.wait_for_timeout(800)  # Wait for page transition
-                                page_num += 1
-                            except Exception as e:
-                                logger.error(f"Error clicking next button: {e}")
-                                # Try to get a fresh reference to the button
-                                next_button = await page.query_selector('button[aria-label="Next page"]')
-                                if not next_button:
-                                    logger.warning("Next button disappeared, assuming end of document")
-                                    break
-                        else:
-                            # Try to find the next button again
-                            next_button = await page.query_selector('button[aria-label="Next page"]')
-                            if not next_button:
-                                logger.warning("Could not find next button, stopping navigation")
-                                break
-                        # Double-check if we've reached the expected total
-                        if page_num >= total_pages:
-                            logger.info(f"Reached expected total of {total_pages} pages")
-                            break
-                    # Combine screenshots into PDF
-                    logger.info(f"Creating PDF from {len(screenshots)} captured pages")
-                    # Use the size of the first screenshot to set PDF dimensions
-                    if screenshots:
-                        try:
-                            img = Image.open(screenshots[0])
-                            width, height = img.size
-                            c = canvas.Canvas(save_path, pagesize=(width, height))
-                            for screenshot in screenshots:
-                                try:
-                                    if os.path.exists(screenshot) and os.path.getsize(screenshot) > 100:
-                                        img = Image.open(screenshot)
-                                        c.drawImage(screenshot, 0, 0, width, height)
-                                        c.showPage()
-                                except Exception as e:
-                                    logger.error(f"Error adding page to PDF: {e}")
-                            c.save()
-                            # Clean up screenshots
-                            for screenshot in screenshots:
-                                if os.path.exists(screenshot):
-                                    os.remove(screenshot)
-                            logger.info(f"Successfully created PDF with {len(screenshots)} pages")
-                        except Exception as e:
-                            logger.error(f"Error creating PDF: {e}")
-                    else:
-                        logger.error("No screenshots captured to create PDF")
                 else:
                     # Non-PDF file handling
                     screenshot_path = os.path.join(temp_dir, "file.png")
@@ -876,12 +829,6 @@ class DownloadManager:
                     os.remove(screenshot_path)
-                # Clean up temp directory
-                try:
-                    os.rmdir(temp_dir)
-                except:
-                    pass
                 # Close browser
                 await browser.close()
@@ -1064,6 +1011,7 @@ class DownloadManager:
                 logger.info("Waiting for all pages to load...")
                 max_attempts = min(estimated_pages * 3, 300)  # Adjust based on document size
                 attempt = 0
                 while attempt < max_attempts:
                     # Count blob images (which are the PDF pages)
@@ -1076,13 +1024,14 @@ class DownloadManager:
                     logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
                     # If we've loaded enough pages or reached estimated count
-                    if blob_count >= estimated_pages:
                         logger.info("All pages appear to be loaded.")
                         break
                     # Press PageDown to scroll further and trigger more loading
                     await page.keyboard.press("PageDown")
                     await page.wait_for_timeout(2000)  # Wait for content to load
                     attempt += 1
                 # Extra wait to ensure everything is fully loaded
@@ -1415,6 +1364,7 @@ class DownloadManager:
         return file_type, is_view_only
     async def get_sublinks(self, url, limit=10000):
         """Enhanced method to extract sublinks from a website, including dynamic content and interactive elements"""
         links = set()
@@ -1979,9 +1929,49 @@ def main():
                 else:
                     st.warning("No files found.")
     # Add footer with attribution
     st.markdown('---')
-    st.markdown('Created by [Euler314](https://github.com/euler314)')
 if __name__ == "__main__":
     main()

             logger.error(f"Error extracting real download URL: {e}")
             return url
+    # IMPROVED: Enhanced exam links extraction method
     async def get_edu_exam_links(self, url):
         """Specialized method for educational exam websites that follows a common pattern."""
         try:
             # Use requests for a faster initial scan
             headers = {"User-Agent": get_random_user_agent()}
+            try:
+                response = requests.get(url, headers=headers, timeout=30)
+                if response.status_code == 200:
+                    # Parse with BeautifulSoup first for efficiency
+                    soup = BeautifulSoup(response.text, "html.parser")
+                    parsed_base = urlparse(url)
+                    base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+                    # Look for all links
+                    for a in soup.find_all("a", href=True):
+                        href = a["href"]
+                        full_url = urljoin(url, href)
+                        # Special patterns for exam sites
+                        for pattern in ["/eduexp/docs/", "/exam/", "/pastexam/", "/papers/",
+                                        "/test/", "/download/", "/files/", "/assignments/",
+                                        "paper_", "question_", "exam_", "test_", "past_"]:
+                            if pattern in full_url.lower():
+                                links.add(full_url)
+                                break
+            except Exception as e:
+                logger.warning(f"Request-based extraction failed: {e}")
             # If we didn't find many links with direct approach, use Playwright for more thorough extraction
             if len(links) < 5:
                 logger.info("Using browser for enhanced link extraction")
+                try:
+                    await self.page.goto(url, timeout=30000, wait_until='networkidle')
+                    # Extract all links with Playwright
+                    page_links = await self.page.evaluate("""
+                        () => {
+                            const links = [];
+                            const anchors = document.querySelectorAll('a[href]');
+                            for (const a of anchors) {
+                                if (a.href) {
+                                    links.push({
+                                        href: a.href,
+                                        text: a.innerText || a.textContent || ''
+                                    });
+                                }
+                            }
+                            return links;
+                        }
+                    """)
+                    # Process extracted links
+                    for link_info in page_links:
+                        href = link_info.get('href', '')
+                        text = link_info.get('text', '').lower()
+                        if href:
+                            # Check for exam-related patterns in URL or link text
+                            url_patterns = ["/eduexp/docs/", "/exam/", "/pastexam/", "/papers/",
+                                            "/test/", "/download/", "/files/", "/assignments/",
+                                            "paper_", "question_", "exam_", "test_", "past_"]
+                            text_patterns = ["exam", "paper", "test", "question", "past", "download"]
+                            if any(pattern in href.lower() for pattern in url_patterns) or \
+                               any(pattern in text for pattern in text_patterns):
+                                links.add(href)
+                    # Check for ASP.NET specific elements that might contain exam links
+                    grid_elements = await self.page.query_selector_all('table.grid, .GridView, #GridView1, .rgMasterTable')
                     for grid in grid_elements:
                         grid_links = await grid.query_selector_all('a[href]')
                         for a in grid_links:
                             if href:
                                 full_url = href if href.startswith('http') else urljoin(url, href)
                                 links.add(full_url)
+                    # Try clicking any controls that might reveal more exam links
+                    buttons = await self.page.query_selector_all('input[type="button"], button')
+                    for button in buttons:
+                        button_text = await button.text_content() or ""
+                        button_value = await button.get_attribute("value") or ""
+                        if any(keyword in (button_text + button_value).lower() for keyword in
+                              ["show", "view", "display", "list", "exam", "paper", "test"]):
+                            try:
+                                await button.click()
+                                await self.page.wait_for_timeout(1000)
+                                await self.page.wait_for_load_state('networkidle', timeout=5000)
+                                # Get any new links that appeared
+                                new_links = await self.page.query_selector_all('a[href]')
+                                for a in new_links:
+                                    href = await a.get_attribute('href')
+                                    if href:
+                                        full_url = href if href.startswith('http') else urljoin(url, href)
+                                        links.add(full_url)
+                            except Exception as e:
+                                logger.warning(f"Error clicking button: {e}")
+                except Exception as e:
+                    logger.error(f"Browser-based extraction failed: {e}")
             # Filter links to likely contain exam documents
             filtered_links = []
             logger.error(f"Error downloading {file_url}: {e}")
             return None
+    # IMPROVED: Enhanced view-only document download method
     async def force_download_viewonly(self, file_info, save_path):
         """Completely rewritten method to handle view-only files reliably, especially multi-page PDFs"""
         try:
             context = await browser.new_context(
                 viewport={'width': 1600, 'height': 1200},
                 user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                device_scale_factor=2.0,
+                accept_downloads=True  # Critical for the download workflow
             )
             page = await context.new_page()
                 # Special handling for PDFs
                 if file_type.lower() == 'pdf':
+                    # Use the improved scrolling and detection approach
+                    # Check if there's a pagination control to estimate pages
+                    estimated_pages = await page.evaluate("""
                     () => {
                         // Method 1: Check page counter text
                         const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
                         const thumbnails = document.querySelectorAll('.drive-viewer-paginated-thumb');
                         if (thumbnails.length > 0) return thumbnails.length;
+                        // Fallback: conservative guess
+                        return 50;
                     }
                     """)
+                    logger.info(f"Estimated {estimated_pages} pages in PDF")
+                    # Scroll to ensure all pages are loaded
+                    logger.info("Scrolling to load all PDF pages...")
+                    # Initial scroll to bottom to trigger lazy loading
+                    await page.keyboard.press("End")
+                    await page.wait_for_timeout(3000)
+                    # Scroll page by page to ensure all pages are loaded
+                    max_attempts = min(estimated_pages * 3, 300)
+                    attempt = 0
+                    prev_blob_count = 0
+                    while attempt < max_attempts:
+                        blob_count = await page.evaluate("""
+                            Array.from(document.getElementsByTagName('img'))
+                                .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                .length
+                        """)
+                        logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
+                        if blob_count >= estimated_pages or (blob_count > 0 and blob_count == prev_blob_count and attempt > 10):
+                            logger.info("All pages appear to be loaded.")
+                            break
+                        await page.keyboard.press("PageDown")
+                        await page.wait_for_timeout(2000)
+                        prev_blob_count = blob_count
+                        attempt += 1
+                    # Extra wait to ensure everything is loaded
+                    await page.wait_for_timeout(5000)
+                    # Set up download event listener for the PDF
+                    download_promise = page.wait_for_event("download")
+                    # Use jsPDF to generate PDF from loaded pages
+                    logger.info("Generating PDF from loaded pages...")
+                    result = await page.evaluate(r'''
+                        (function() {
+                            return new Promise((resolve, reject) => {
+                                let script = document.createElement("script");
+                                script.onload = function () {
+                                    try {
+                                        let pdf = new jsPDF();
+                                        let imgs = Array.from(document.getElementsByTagName("img"))
+                                            .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                            .sort((a, b) => {
+                                                const rectA = a.getBoundingClientRect();
+                                                const rectB = b.getBoundingClientRect();
+                                                return rectA.top - rectB.top;
+                                            });
+                                        console.log(`Found ${imgs.length} valid page images to add to PDF`);
+                                        let added = 0;
+                                        for (let i = 0; i < imgs.length; i++) {
+                                            let img = imgs[i];
+                                            let canvas = document.createElement("canvas");
+                                            let ctx = canvas.getContext("2d");
+                                            canvas.width = img.width;
+                                            canvas.height = img.height;
+                                            ctx.drawImage(img, 0, 0, img.width, img.height);
+                                            let imgData = canvas.toDataURL("image/jpeg", 1.0);
+                                            if (added > 0) {
+                                                pdf.addPage();
+                                            }
+                                            pdf.addImage(imgData, 'JPEG', 0, 0);
+                                            added++;
+                                        }
+                                        pdf.save("download.pdf");
+                                        resolve({success: true, pageCount: added});
+                                    } catch (error) {
+                                        reject({success: false, error: error.toString()});
+                                    }
+                                };
+                                script.onerror = function() {
+                                    reject({success: false, error: "Failed to load jsPDF library"});
+                                };
+                                script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jspdf/1.5.3/jspdf.debug.js';
+                                document.body.appendChild(script);
+                            });
+                        })();
+                    ''')
+                    if not result.get('success', False):
+                        logger.error(f"Error in PDF generation: {result.get('error', 'Unknown error')}")
+                        return None
+                    logger.info(f"PDF generation triggered with {result.get('pageCount')} pages")
+                    # Wait for the download and save it
+                    download = await download_promise
+                    await download.save_as(save_path)
+                    # Clean up temp directory
+                    try:
+                        os.rmdir(temp_dir)
+                    except:
+                        pass
                 else:
                     # Non-PDF file handling
                     screenshot_path = os.path.join(temp_dir, "file.png")
                     os.remove(screenshot_path)
                 # Close browser
                 await browser.close()
                 logger.info("Waiting for all pages to load...")
                 max_attempts = min(estimated_pages * 3, 300)  # Adjust based on document size
                 attempt = 0
+                prev_blob_count = 0
                 while attempt < max_attempts:
                     # Count blob images (which are the PDF pages)
                     logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
                     # If we've loaded enough pages or reached estimated count
+                    if blob_count >= estimated_pages or (blob_count > 0 and blob_count == prev_blob_count and attempt > 10):
                         logger.info("All pages appear to be loaded.")
                         break
                     # Press PageDown to scroll further and trigger more loading
                     await page.keyboard.press("PageDown")
                     await page.wait_for_timeout(2000)  # Wait for content to load
+                    prev_blob_count = blob_count
                     attempt += 1
                 # Extra wait to ensure everything is fully loaded
         return file_type, is_view_only
+    # IMPROVED: Enhanced sublink extraction method
     async def get_sublinks(self, url, limit=10000):
         """Enhanced method to extract sublinks from a website, including dynamic content and interactive elements"""
         links = set()
                 else:
                     st.warning("No files found.")
+    # Add a special section for direct Google Drive file download
+    st.markdown("---")
+    with st.expander("Download View-Only Google Drive Document", expanded=False):
+        st.write("Download protected/view-only Google Drive documents - just enter the file ID")
+        file_id = st.text_input("Google Drive File ID",
+                      placeholder="Example: 139CTPrz7jOuJRW6pL6eupH-7B4fnNRku",
+                      help="Enter the ID from the Google Drive URL (e.g., from 'drive.google.com/file/d/THIS_IS_THE_ID/view')")
+        if st.button("Download Document") and file_id:
+            download_dir = "./downloads"
+            os.makedirs(download_dir, exist_ok=True)
+            output_path = os.path.join(download_dir, f"gdrive_{file_id}.pdf")
+            with st.spinner("Downloading view-only document... (this may take a minute)"):
+                async def download_viewonly():
+                    async with DownloadManager() as dm:
+                        file_info = {
+                            'url': f"https://drive.google.com/file/d/{file_id}/view",
+                            'filename': f"gdrive_{file_id}.pdf",
+                            'metadata': {'file_id': file_id, 'file_type': 'pdf', 'view_only': True}
+                        }
+                        result_path = await dm.force_download_viewonly(file_info, output_path)
+                        return result_path
+                result = asyncio.run(download_viewonly())
+                if result:
+                    st.success("Document downloaded successfully!")
+                    with open(result, "rb") as f:
+                        file_bytes = f.read()
+                    st.download_button(
+                        label="Download PDF",
+                        data=file_bytes,
+                        file_name=os.path.basename(result),
+                        mime="application/pdf"
+                    )
+                else:
+                    st.error("Failed to download the document. Please check the file ID and try again.")
     # Add footer with attribution
     st.markdown('---')
+    st.markdown('Created by [Euler314](https://github.com/yu314-coder)')
 if __name__ == "__main__":
     main()