diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,51 +1,45 @@
 import streamlit as st
+st.set_page_config(page_title="Advanced File Downloader", layout="wide")
+
+# Core imports
 import os
-import asyncio
 import subprocess
-import tempfile
+from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
+import asyncio
 import logging
-import time
-import json
-import base64
+from urllib.parse import urlparse, urljoin, unquote
 import re
+from pathlib import Path
+from io import BytesIO
 import random
+from bs4 import BeautifulSoup
+from PyPDF2 import PdfReader
 import zipfile
+import tempfile
+import mimetypes
+import requests
 import datetime
 import traceback
+import base64
 import shutil
-import mimetypes
-from pathlib import Path
-from urllib.parse import urlparse, urljoin, unquote
-from io import BytesIO
+import json
+import time
 from PIL import Image
 from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
-
-# Advanced imports
-from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
-from bs4 import BeautifulSoup
-from PyPDF2 import PdfReader
 import google_auth_oauthlib.flow
 import googleapiclient.discovery
 import google.auth.transport.requests
 import googleapiclient.http
-import requests
-import celery
-from celery import Celery
-import splash
-import pyppeteer
-import mitmproxy
-from mitmproxy import http
 
-# Configure page and logging
-st.set_page_config(page_title="Advanced File Downloader", layout="wide")
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# -------------------- Logging Setup --------------------
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
 logger = logging.getLogger(__name__)
 
-# Initialize Celery for distributed task processing
-celery_app = Celery('file_downloader', broker='redis://localhost:6379/0')
-
-# Configure Google OAuth
+# -------------------- Google OAuth Config --------------------
 GOOGLE_OAUTH_CONFIG = {
     "web": {
         "client_id": "90798824947-u25obg1q844qeikjoh4jdmi579kn9p1c.apps.googleusercontent.com",
@@ -58,7 +52,8 @@ GOOGLE_OAUTH_CONFIG = {
     }
 }
 
-# -------------------- User Agent Settings --------------------
+# -------------------- Stealth and UA Settings --------------------
+# Extended user agent list for better variety
 USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
     'Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
@@ -67,18 +62,30 @@ USER_AGENTS = [
     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.54',
     'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1',
     'Mozilla/5.0 (iPad; CPU OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0'
 ]
 
-# -------------------- Proxy Management --------------------
-PROXY_POOL = []
-CURRENT_PROXY_INDEX = 0
+# Stealth browser settings
+STEALTH_SETTINGS = {
+    # Hardware features to modify/disable
+    "hardware_concurrency": 4,
+    "device_memory": 8,
+    # Browser features to enable/disable
+    "webgl_vendor": "Google Inc. (Intel)",
+    "webgl_renderer": "Intel Iris OpenGL Engine",
+    "languages": ["en-US", "en"],
+    "disable_webrtc": True,
+    # Additional timing randomization
+    "navigator_platform": "Win32",
+    "touch_support": False
+}
 
-# -------------------- Network Interception Configuration --------------------
-NETWORK_INTERCEPTOR_CONFIG = {
-    "enabled": False,
-    "intercept_types": ["xhr", "fetch", "document", "media"],
-    "save_intercepted": True,
-    "intercept_folder": "./intercepted_data"
+# Proxy rotation configuration (if using multiple proxies)
+PROXY_ROTATION_CONFIG = {
+    "enabled": False,  # Set to True to enable rotation
+    "rotation_interval": 10,  # Rotate every 10 requests
+    "proxies": []  # Will be populated from the UI if needed
 }
 
 # -------------------- Utility Functions --------------------
@@ -108,6 +115,16 @@ def get_file_extension(url, default='.pdf'):
         return default
     return ext
 
+def humanize_file_size(size_bytes):
+    """Format file size in human-readable format"""
+    if size_bytes < 1024:
+        return f"{size_bytes} bytes"
+    for unit in ['KB', 'MB', 'GB', 'TB']:
+        size_bytes /= 1024.0
+        if size_bytes < 1024.0:
+            return f"{size_bytes:.1f} {unit}"
+    return f"{size_bytes:.1f} PB"
+
 def get_domain(url):
     """Extract domain from URL"""
     parsed = urlparse(url)
@@ -117,6 +134,15 @@ def is_valid_file_url(url, extensions):
     """Check if URL is a valid file URL based on extension"""
     return any(url.lower().endswith(ext) for ext in extensions)
 
+def detect_captcha(html_content):
+    """Detect common captcha patterns in HTML content"""
+    captcha_patterns = [
+        'captcha', 'recaptcha', 'g-recaptcha', 'hcaptcha', 'cf-turnstile',
+        'challenge', 'solve the following', 'verify you are human'
+    ]
+    html_lower = html_content.lower()
+    return any(pattern in html_lower for pattern in captcha_patterns)
+
 # -------------------- Google Drive Functions --------------------
 def get_google_auth_url():
     client_config = GOOGLE_OAUTH_CONFIG["web"]
@@ -167,507 +193,394 @@ def create_drive_folder(drive_service, name):
     folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
     return folder.get('id')
 
-# -------------------- Setup Functions --------------------
-def setup_dependencies():
-    """Install required system dependencies"""
+# -------------------- Playwright Setup --------------------
+def install_playwright_dependencies():
     try:
-        # Check if browsers are already installed instead of installing them
-        if os.path.exists(os.path.join(os.environ.get('PLAYWRIGHT_BROWSERS_PATH', ''), 'chromium-1045')):
-            logger.info("Playwright browsers already installed, skipping installation")
-            installed = True
-        else:
-            # Only try to install browsers if they don't exist already
-            try:
-                subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], 
-                               check=True, env=os.environ)
-                installed = True
-            except subprocess.CalledProcessError as e:
-                logger.warning(f"Could not install browsers: {e}")
-                installed = False
+        # Set environment variable for Playwright browsers path
+        os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
         
-        # Skip system dependency installation in container environment
-        if os.path.exists('/.dockerenv'):
-            return installed
-            
-        # System packages installation 
+        # Install system dependencies
         subprocess.run(['apt-get', 'update', '-y'], check=True)
         packages = [
             'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
             'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
-            'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0',
-            'redis-server', 'python3-dev', 'build-essential'
+            'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
         ]
         subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
         
-        # Install Python packages - only if not in Docker
-        if not os.path.exists('/.dockerenv'):
-            subprocess.run(['pip', 'install', 'playwright', 'pyppeteer', 'splash', 'celery[redis]', 'mitmproxy'], check=True)
+        # Install Playwright and dependencies
+        subprocess.run(['pip', 'install', 'playwright'], check=True)
+        subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
         
-        st.success("Dependencies installed successfully!")
-        return True
+        st.success("Playwright dependencies installed successfully!")
     except Exception as e:
-        st.error(f"Error installing dependencies: {e}")
+        st.error(f"Error installing Playwright dependencies: {e}")
         st.info("You may need to manually install dependencies. Check console for details.")
-        logger.error(f"Setup error: {e}")
+        logger.error(f"Playwright setup error: {e}")
         traceback.print_exc()
-        return False
-
-def check_services():
-    """Check if required services are running"""
-    try:
-        # Check Redis for Celery
-        redis_running = False
-        try:
-            redis_running = subprocess.run(['redis-cli', 'ping'], 
-                                         capture_output=True, 
-                                         text=True).stdout.strip() == 'PONG'
-        except Exception:
-            pass
-            
-        if not redis_running:
-            # Try to start Redis as a non-root user
-            try:
-                if os.path.exists('/etc/redis/redis.conf'):
-                    # Custom Redis config for non-root
-                    subprocess.run(['redis-server', '/etc/redis/redis.conf'], 
-                                 check=False, 
-                                 stdout=subprocess.DEVNULL,
-                                 stderr=subprocess.DEVNULL)
-                else:
-                    # Fallback to basic Redis without custom config
-                    subprocess.run(['redis-server', '--daemonize', 'yes'], 
-                                 check=False,
-                                 stdout=subprocess.DEVNULL,
-                                 stderr=subprocess.DEVNULL)
-            except Exception as e:
-                logger.warning(f"Could not start Redis: {e}")
-                st.warning("Redis service could not be started. Celery tasks will not work properly.")
-        
-        # Create directories for intercepted data
-        os.makedirs(NETWORK_INTERCEPTOR_CONFIG['intercept_folder'], exist_ok=True)
-        
-        return True
-    except Exception as e:
-        logger.error(f"Service check error: {e}")
-        return False
 
-# -------------------- Network Interception Classes --------------------
-class NetworkInterceptor:
-    """Class to intercept network traffic using mitmproxy"""
-    
-    def __init__(self, intercept_types=None, save_path=None):
-        self.intercept_types = intercept_types or ["xhr", "fetch", "document"]
-        self.save_path = save_path or "./intercepted_data"
-        os.makedirs(self.save_path, exist_ok=True)
-        self.captured_data = []
-    
-    def intercept_request(self, flow):
-        """Process intercepted requests"""
-        try:
-            url = flow.request.url
-            method = flow.request.method
-            content_type = flow.request.headers.get("Content-Type", "")
-            
-            # Log the request
-            self.captured_data.append({
-                "type": "request",
-                "url": url,
-                "method": method,
-                "headers": dict(flow.request.headers),
-                "timestamp": time.time()
-            })
-            
-            logger.info(f"Intercepted {method} request to {url}")
-        except Exception as e:
-            logger.error(f"Error intercepting request: {e}")
-    
-    def intercept_response(self, flow):
-        """Process intercepted responses"""
-        try:
-            url = flow.request.url
-            status_code = flow.response.status_code
-            content_type = flow.response.headers.get("Content-Type", "")
-            
-            # Only process responses of interest based on content type
-            if any(t in content_type.lower() for t in ["application/pdf", "application/msword", 
-                                                      "application/vnd.openxmlformats", 
-                                                      "application/zip"]):
-                # Save the file
-                filename = os.path.basename(urlparse(url).path)
-                if not filename or filename == '/':
-                    filename = f"file_{int(time.time())}"
-                    
-                    # Try to add extension based on content type
-                    if "pdf" in content_type:
-                        filename += ".pdf"
-                    elif "msword" in content_type:
-                        filename += ".doc"
-                    elif "openxmlformats" in content_type and "wordprocessingml" in content_type:
-                        filename += ".docx"
-                    elif "zip" in content_type:
-                        filename += ".zip"
-                
-                file_path = os.path.join(self.save_path, filename)
-                with open(file_path, "wb") as f:
-                    f.write(flow.response.content)
-                
-                logger.info(f"Saved intercepted file: {file_path}")
-                
-                # Record metadata about the captured file
-                self.captured_data.append({
-                    "type": "file",
-                    "url": url,
-                    "content_type": content_type,
-                    "size": len(flow.response.content),
-                    "path": file_path,
-                    "timestamp": time.time()
-                })
-        except Exception as e:
-            logger.error(f"Error intercepting response: {e}")
-    
-    def get_captured_files(self):
-        """Return list of captured files"""
-        return [item for item in self.captured_data if item["type"] == "file"]
-
-# -------------------- Browser Automation Classes --------------------
-class MultiEngineBrowser:
-    """Class that supports multiple browser engines (Playwright, Pyppeteer, Splash)"""
-    
-    def __init__(self, engine="playwright", use_proxy=False, proxy=None, stealth=True):
-        self.engine = engine
+# -------------------- Download Manager Class --------------------
+class DownloadManager:
+    def __init__(self, use_proxy=False, proxy=None, query=None, num_results=5, use_stealth=True, proxy_rotation=False):
         self.use_proxy = use_proxy
         self.proxy = proxy
-        self.stealth = stealth
+        self.query = query
+        self.num_results = num_results
+        self.playwright = None
         self.browser = None
         self.context = None
         self.page = None
-    
-    async def setup(self):
-        """Initialize browser based on selected engine"""
-        if self.engine == "playwright":
-            return await self.setup_playwright()
-        elif self.engine == "pyppeteer":
-            return await self.setup_pyppeteer()
-        elif self.engine == "splash":
-            return await self.setup_splash()
-        else:
-            raise ValueError(f"Unsupported browser engine: {self.engine}")
-    
-    async def setup_playwright(self):
-        """Setup Playwright browser"""
-        from playwright.async_api import async_playwright
-        
+        self.use_stealth = use_stealth
+        self.proxy_rotation = proxy_rotation
+        self.request_count = 0
+        self.captcha_detected = False
+        self.download_timeout = 300  # 5 minutes timeout for downloads
+
+    async def __aenter__(self):
         self.playwright = await async_playwright().start()
+        
+        # Prepare browser args with stealth settings
         browser_args = [
             '--no-sandbox',
             '--disable-setuid-sandbox',
             '--disable-dev-shm-usage',
+            '--disable-gpu',
+            '--no-zygote',
+            '--single-process',
             '--disable-web-security',
-            '--disable-features=IsolateOrigins,site-per-process',
+            '--disable-features=IsolateOrigins',
+            '--disable-site-isolation-trials'
         ]
         
-        if self.stealth:
+        # Add stealth-specific args
+        if self.use_stealth:
             browser_args.extend([
                 '--disable-blink-features=AutomationControlled',
-                '--disable-features=IsolateOrigins'
+                '--disable-features=IsolateOrigins,site-per-process',
+                '--disable-webgl',
+                '--disable-webrtc'
             ])
         
-        launch_options = {
+        # Setup browser options
+        opts = {
             "headless": True,
             "args": browser_args
         }
         
+        # Configure proxy if specified
         if self.use_proxy and self.proxy:
-            launch_options["proxy"] = {"server": self.proxy}
+            opts["proxy"] = {"server": self.proxy}
         
-        self.browser = await self.playwright.chromium.launch(**launch_options)
+        # Launch browser with options
+        self.browser = await self.playwright.chromium.launch(**opts)
         
-        context_options = {
-            "viewport": {"width": 1920, "height": 1080},
+        # Setup browser context with enhanced settings
+        context_opts = {
             "user_agent": get_random_user_agent(),
-            "bypass_csp": True,
+            "viewport": {"width": 1920, "height": 1080},
+            "device_scale_factor": 1,
+            "has_touch": False,
+            "is_mobile": False,
             "ignore_https_errors": True,
             "accept_downloads": True
         }
         
-        self.context = await self.browser.new_context(**context_options)
-        
-        # Apply stealth features
-        if self.stealth:
+        # Apply stealth-specific settings to the context
+        if self.use_stealth:
+            # Apply JS-injection for enhanced stealth 
+            context_opts["bypass_csp"] = True
+            self.context = await self.browser.new_context(**context_opts)
+            
+            # Execute stealth JS to avoid detection
             await self.context.add_init_script("""
-                Object.defineProperty(navigator, 'webdriver', { get: () => false });
-                Object.defineProperty(navigator, 'plugins', { 
-                    get: () => [1, 2, 3, 4, 5].map(() => ({ length: 1 }))
+            () => {
+                Object.defineProperty(navigator, 'webdriver', {
+                    get: () => false,
                 });
-                Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
-                window.chrome = { runtime: {} };
-            """)
-        
-        self.page = await self.context.new_page()
-        return self.page
-    
-    async def setup_pyppeteer(self):
-        """Setup Pyppeteer browser"""
-        from pyppeteer import launch
-        
-        browser_args = [
-            '--no-sandbox',
-            '--disable-setuid-sandbox',
-            '--disable-dev-shm-usage',
-            '--disable-web-security',
-        ]
-        
-        if self.stealth:
-            browser_args.extend([
-                '--disable-blink-features=AutomationControlled',
-                '--disable-features=IsolateOrigins'
-            ])
-        
-        launch_options = {
-            "headless": True,
-            "args": browser_args,
-            "ignoreHTTPSErrors": True,
-            "userDataDir": tempfile.mkdtemp()
-        }
-        
-        if self.use_proxy and self.proxy:
-            browser_args.append(f'--proxy-server={self.proxy}')
-        
-        self.browser = await launch(launch_options)
-        self.page = await self.browser.newPage()
-        
-        # Set user agent
-        await self.page.setUserAgent(get_random_user_agent())
-        
-        # Set viewport
-        await self.page.setViewport({"width": 1920, "height": 1080})
-        
-        # Apply stealth features
-        if self.stealth:
-            await self.page.evaluateOnNewDocument("""
-                Object.defineProperty(navigator, 'webdriver', { get: () => false });
-                Object.defineProperty(navigator, 'plugins', { 
-                    get: () => [1, 2, 3, 4, 5].map(() => ({ length: 1 }))
+                
+                // Change navigator properties
+                const newProto = navigator.__proto__;
+                delete newProto.webdriver;
+                
+                // Overwrite the plugins
+                Object.defineProperty(navigator, 'plugins', {
+                    get: () => [1, 2, 3, 4, 5].map(() => ({
+                        lengthComputable: true,
+                        loaded: 100,
+                        total: 100
+                    }))
                 });
-                Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
-                window.chrome = { runtime: {} };
-            """)
-        
-        return self.page
-    
-    async def setup_splash(self):
-        """Setup Splash browser through API"""
-        # Splash is typically used via HTTP API
-        # We'll use requests for this
-        self.splash_url = "http://localhost:8050/render.html"
-        return None  # No actual page object for Splash
-    
-    async def goto(self, url, wait_until=None, timeout=30000):
-        """Navigate to a URL"""
-        if self.engine == "playwright":
-            return await self.page.goto(url, wait_until=wait_until or 'networkidle', timeout=timeout)
-        elif self.engine == "pyppeteer":
-            return await self.page.goto(url, waitUntil=wait_until or 'networkidle0', timeout=timeout)
-        elif self.engine == "splash":
-            # Use Splash HTTP API
-            params = {
-                "url": url,
-                "wait": min(timeout/1000, 30),  # Splash uses seconds
-                "timeout": min(timeout/1000, 60),
-                "resource_timeout": min(timeout/1000, 30),
-                "html": 1,
-                "png": 0,
-                "render_all": 1
+                
+                // Handle languages more naturally
+                Object.defineProperty(navigator, 'languages', {
+                    get: () => ['en-US', 'en', 'es']
+                });
+                
+                // Modify hardware concurrency
+                Object.defineProperty(navigator, 'hardwareConcurrency', {
+                    get: () => 4
+                });
+                
+                // Modify deviceMemory
+                Object.defineProperty(navigator, 'deviceMemory', {
+                    get: () => 8
+                });
+                
+                // WebGL modifications
+                const getParameter = WebGLRenderingContext.prototype.getParameter;
+                WebGLRenderingContext.prototype.getParameter = function(parameter) {
+                    if (parameter === 37445) {
+                        return 'Intel Inc.';
+                    }
+                    if (parameter === 37446) {
+                        return 'Intel Iris OpenGL Engine';
+                    }
+                    return getParameter.apply(this, arguments);
+                };
             }
-            
-            if self.use_proxy and self.proxy:
-                params["proxy"] = self.proxy
-            
-            headers = {"User-Agent": get_random_user_agent()}
-            response = requests.get(self.splash_url, params=params, headers=headers)
-            self.last_html = response.text
-            return response
-    
-    async def content(self):
-        """Get page content"""
-        if self.engine == "playwright":
-            return await self.page.content()
-        elif self.engine == "pyppeteer":
-            return await self.page.content()
-        elif self.engine == "splash":
-            return self.last_html
-    
-    async def close(self):
-        """Close browser"""
-        if self.engine == "playwright":
-            if self.browser:
-                await self.browser.close()
-            if self.playwright:
-                await self.playwright.stop()
-        elif self.engine == "pyppeteer":
-            if self.browser:
-                await self.browser.close()
-        # No cleanup needed for Splash as it's stateless
-
-# -------------------- Download Manager Class --------------------
-class DownloadManager:
-    def __init__(self, browser_engine="playwright", use_proxy=False, proxy=None, query=None, num_results=5, use_stealth=True):
-        self.browser_engine = browser_engine
-        self.use_proxy = use_proxy
-        self.proxy = proxy
-        self.query = query
-        self.num_results = num_results
-        self.use_stealth = use_stealth
-        self.browser = None
-        self.network_interceptor = None
+            """)
+        else:
+            # Regular context without stealth
+            self.context = await self.browser.new_context(**context_opts)
         
-        # Configure network interception if enabled
-        if NETWORK_INTERCEPTOR_CONFIG["enabled"]:
-            self.network_interceptor = NetworkInterceptor(
-                intercept_types=NETWORK_INTERCEPTOR_CONFIG["intercept_types"],
-                save_path=NETWORK_INTERCEPTOR_CONFIG["intercept_folder"]
-            )
-
-    async def __aenter__(self):
-        # Initialize multi-engine browser
-        self.browser = MultiEngineBrowser(
-            engine=self.browser_engine,
-            use_proxy=self.use_proxy,
-            proxy=self.proxy,
-            stealth=self.use_stealth
-        )
-        self.page = await self.browser.setup()
+        # Create page with enhanced headers
+        self.page = await self.context.new_page()
+        await self.page.set_extra_http_headers({
+            'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Cache-Control': 'max-age=0',
+            'DNT': '1',  # Do Not Track
+            'Referer': 'https://www.google.com/',
+            'Sec-Fetch-Dest': 'document',
+            'Sec-Fetch-Mode': 'navigate',
+            'Sec-Fetch-Site': 'cross-site',
+            'Sec-Fetch-User': '?1',
+            'Upgrade-Insecure-Requests': '1'
+        })
         
-        # Set headers for better stealth
-        if self.browser_engine == "playwright":
-            await self.page.set_extra_http_headers({
-                'Accept-Language': 'en-US,en;q=0.9',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'DNT': '1',
-                'Referer': 'https://www.google.com/',
-                'Sec-Fetch-Dest': 'document',
-                'Sec-Fetch-Mode': 'navigate',
-                'Sec-Fetch-Site': 'cross-site',
-                'Sec-Fetch-User': '?1',
-                'Upgrade-Insecure-Requests': '1'
-            })
+        # Add delay for mouse movements to simulate human behavior
+        if self.use_stealth:
+            await self.page.mouse.move(x=random.randint(100, 500), y=random.randint(100, 500))
+            await self.page.wait_for_timeout(random.randint(200, 500))
         
         return self
 
     async def __aexit__(self, exc_type, exc_val, exc_tb):
-        await self.browser.close()
+        if self.browser:
+            await self.browser.close()
+        if self.playwright:
+            await self.playwright.stop()
+
+    async def rotate_proxy_if_needed(self):
+        """Rotate proxy if proxy rotation is enabled and threshold is reached"""
+        if self.proxy_rotation and PROXY_ROTATION_CONFIG["enabled"]:
+            self.request_count += 1
+            if self.request_count >= PROXY_ROTATION_CONFIG["rotation_interval"] and PROXY_ROTATION_CONFIG["proxies"]:
+                # Get next proxy from the pool
+                next_proxy = PROXY_ROTATION_CONFIG["proxies"].pop(0)
+                PROXY_ROTATION_CONFIG["proxies"].append(next_proxy)  # Move to end of list
+                
+                # Close existing context and create new one with the new proxy
+                if self.context:
+                    await self.context.close()
+                
+                # Create new context with the new proxy
+                context_opts = {
+                    "user_agent": get_random_user_agent(),
+                    "proxy": {"server": next_proxy},
+                    "accept_downloads": True
+                }
+                self.context = await self.browser.new_context(**context_opts)
+                self.page = await self.context.new_page()
+                
+                # Reset counter
+                self.request_count = 0
+                logger.info(f"Rotated to new proxy: {next_proxy}")
+
+    async def handle_captcha(self, page):
+        """Detect and handle captchas if possible"""
+        # Check for common captcha patterns
+        content = await page.content()
+        if detect_captcha(content):
+            self.captcha_detected = True
+            logger.warning("Captcha detected on page")
+            
+            # Strategies for handling captchas:
+            # 1. For simple captchas, try to extract the image and solve it
+            captcha_img = await page.query_selector('img[alt*="captcha" i], img[src*="captcha" i]')
+            if captcha_img:
+                logger.info("Found captcha image, attempting to capture")
+                
+                # Take screenshot of the captcha
+                captcha_path = os.path.join(tempfile.gettempdir(), "captcha.png")
+                await captcha_img.screenshot(path=captcha_path)
+                
+                # In a real implementation, you would send this to a captcha solving service
+                # For now, just log the detection
+                logger.info(f"Captcha image saved to {captcha_path}")
+                
+                # For demonstration, we'll notify the user but not actually solve it
+                return False
+            
+            # 2. For reCAPTCHA, special handling would be required
+            recaptcha = await page.query_selector('iframe[src*="recaptcha"]')
+            if recaptcha:
+                logger.warning("reCAPTCHA detected, would require external solving service")
+                return False
+            
+            # 3. Try to perform human-like actions that might bypass simple bot checks
+            await self.perform_human_actions(page)
+            
+            # Check if captcha is still present
+            content = await page.content()
+            if detect_captcha(content):
+                logger.warning("Captcha still present after human-like actions")
+                return False
+            else:
+                logger.info("Captcha appears to be resolved")
+                return True
+                
+        return True  # No captcha detected
+
+    async def perform_human_actions(self, page):
+        """Perform human-like actions on the page to possibly bypass simple bot checks"""
+        try:
+            # 1. Slowly scroll down the page
+            for i in range(3):
+                await page.evaluate(f"window.scrollTo(0, {i * 300})")
+                await page.wait_for_timeout(random.randint(300, 700))
+            
+            # 2. Random mouse movements
+            for _ in range(3):
+                x = random.randint(100, 800)
+                y = random.randint(100, 600)
+                await page.mouse.move(x=x, y=y)
+                await page.wait_for_timeout(random.randint(200, 500))
+            
+            # 3. Click on a non-essential part of the page
+            try:
+                await page.click("body", position={"x": 50, "y": 50})
+            except:
+                pass
+            
+            # 4. Wait a bit before continuing
+            await page.wait_for_timeout(1000)
+            
+        except Exception as e:
+            logger.warning(f"Error during human-like actions: {e}")
 
-    async def search_web(self, search_engine="bing"):
-        """Search web using specified search engine"""
+    async def search_bing(self):
         urls = []
         try:
-            if search_engine == "bing":
-                search_url = f"https://www.bing.com/search?q={self.query}"
-            elif search_engine == "google":
-                search_url = f"https://www.google.com/search?q={self.query}"
-            else:
-                raise ValueError(f"Unsupported search engine: {search_engine}")
-            
-            await self.browser.goto(search_url, timeout=30000)
-            
-            if self.browser_engine == "playwright":
-                if search_engine == "bing":
-                    links = await self.page.query_selector_all("li.b_algo h2 a")
-                    for link in links[:self.num_results]:
-                        href = await link.get_attribute('href')
-                        if href:
-                            urls.append(href)
-                elif search_engine == "google":
-                    links = await self.page.query_selector_all("div.g a[href^='http']")
-                    for link in links[:self.num_results]:
-                        href = await link.get_attribute('href')
-                        if href:
-                            urls.append(href)
-            elif self.browser_engine == "pyppeteer":
-                if search_engine == "bing":
-                    links = await self.page.querySelectorAll("li.b_algo h2 a")
-                    for link in links[:self.num_results]:
-                        href = await self.page.evaluate('el => el.getAttribute("href")', link)
-                        if href:
-                            urls.append(href)
-                elif search_engine == "google":
-                    links = await self.page.querySelectorAll("div.g a[href^='http']")
-                    for link in links[:self.num_results]:
-                        href = await self.page.evaluate('el => el.getAttribute("href")', link)
-                        if href:
-                            urls.append(href)
-            elif self.browser_engine == "splash":
-                # Parse the HTML with BeautifulSoup
-                soup = BeautifulSoup(self.browser.last_html, 'html.parser')
-                if search_engine == "bing":
-                    links = soup.select("li.b_algo h2 a")
-                    for link in links[:self.num_results]:
-                        href = link.get("href")
-                        if href:
-                            urls.append(href)
-                elif search_engine == "google":
-                    links = soup.select("div.g a[href^='http']")
-                    for link in links[:self.num_results]:
-                        href = link.get("href")
-                        if href:
-                            urls.append(href)
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            search_url = f"https://www.bing.com/search?q={self.query}"
+            await self.page.goto(search_url, timeout=30000)
+            await self.page.wait_for_load_state('networkidle')
+            
+            # Check for captchas
+            if not await self.handle_captcha(self.page):
+                logger.warning("Captcha detected during search, results may be limited")
+            
+            # More natural scrolling behavior
+            for i in range(3):
+                await self.page.evaluate(f"window.scrollTo(0, {i * 400})")
+                await self.page.wait_for_timeout(random.randint(300, 800))
+            
+            # Extract search results
+            links = await self.page.query_selector_all("li.b_algo h2 a")
+            for link in links[:self.num_results]:
+                href = await link.get_attribute('href')
+                if href:
+                    urls.append(href)
+            
+            # If we didn't find enough results, try an alternative selector
+            if len(urls) < self.num_results:
+                alt_links = await self.page.query_selector_all(".b_caption a")
+                for link in alt_links:
+                    href = await link.get_attribute('href')
+                    if href and href not in urls:
+                        urls.append(href)
+                        if len(urls) >= self.num_results:
+                            break
             
             return urls
         except Exception as e:
-            logger.error(f"Error searching web: {e}")
+            logger.error(f"Error searching Bing: {e}")
             return []
 
     async def get_file_size(self, url):
         try:
-            headers = {'User-Agent': get_random_user_agent()}
-            response = requests.head(url, headers=headers, timeout=15)
-            length = response.headers.get('Content-Length', None)
-            if length:
-                return sizeof_fmt(int(length))
-            else:
-                return "Unknown Size"
-        except Exception:
+            await self.rotate_proxy_if_needed()
+            
+            async with self.context.new_page() as page:
+                response = await page.request.head(url, timeout=15000)
+                length = response.headers.get('Content-Length', None)
+                if length:
+                    return sizeof_fmt(int(length))
+                else:
+                    return "Unknown Size"
+        except Exception as e:
+            logger.warning(f"Error getting file size: {e}")
             return "Unknown Size"
 
     async def get_pdf_metadata(self, url):
         try:
-            headers = {'User-Agent': get_random_user_agent()}
-            response = requests.get(url, headers=headers, timeout=15, stream=True)
-            if response.status_code == 200:
-                content = BytesIO(response.content)
-                reader = PdfReader(content)
-                return {
-                    'Title': reader.metadata.get('/Title', 'N/A') if reader.metadata else 'N/A',
-                    'Author': reader.metadata.get('/Author', 'N/A') if reader.metadata else 'N/A',
-                    'Pages': len(reader.pages),
-                }
-            else:
-                return {}
-        except Exception:
+            await self.rotate_proxy_if_needed()
+            
+            async with self.context.new_page() as page:
+                resp = await page.request.get(url, timeout=15000)
+                if resp.ok:
+                    content = await resp.body()
+                    pdf = BytesIO(content)
+                    reader = PdfReader(pdf)
+                    return {
+                        'Title': reader.metadata.get('/Title', 'N/A') if reader.metadata else 'N/A',
+                        'Author': reader.metadata.get('/Author', 'N/A') if reader.metadata else 'N/A',
+                        'Pages': len(reader.pages),
+                    }
+                else:
+                    return {}
+        except Exception as e:
+            logger.warning(f"Error reading PDF metadata: {e}")
             return {}
 
     async def extract_real_download_url(self, url):
         try:
-            headers = {'User-Agent': get_random_user_agent()}
-            response = requests.head(url, headers=headers, timeout=15, allow_redirects=True)
-            return response.url
+            await self.rotate_proxy_if_needed()
+            
+            async with self.context.new_page() as page:
+                response = await page.goto(url, wait_until='networkidle', timeout=30000)
+                if response and response.headers.get('location'):
+                    return response.headers['location']
+                return page.url
         except Exception as e:
             logger.error(f"Error extracting real download URL: {e}")
             return url
 
+    # IMPROVED: Enhanced exam links extraction method
     async def get_edu_exam_links(self, url):
         """Specialized method for educational exam websites that follows a common pattern."""
         try:
             logger.info(f"Fetching exam links from {url}")
             links = set()
             
-            # First try with direct requests for speed
-            headers = {"User-Agent": get_random_user_agent()}
+            # First try with direct requests for speed (but with proper headers)
+            headers = {
+                "User-Agent": get_random_user_agent(),
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+                "Accept-Language": "en-US,en;q=0.9",
+                "Referer": "https://www.google.com/",
+                "DNT": "1"
+            }
+            
             try:
                 response = requests.get(url, headers=headers, timeout=30)
                 
                 if response.status_code == 200:
-                    # Parse with BeautifulSoup for efficiency
+                    # Parse with BeautifulSoup first for efficiency
                     soup = BeautifulSoup(response.text, "html.parser")
                     parsed_base = urlparse(url)
                     base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
@@ -696,63 +609,239 @@ class DownloadManager:
                             "view", "open", "get", "solution", "answer"
                         ]
                         
-                        # Check URL and text patterns
-                        if any(pattern in full_url.lower() for pattern in url_patterns) or \
-                           any(pattern in link_text for pattern in text_patterns) or \
-                           any(full_url.lower().endswith(ext) for ext in ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                        # Check URL for patterns
+                        if any(pattern in full_url.lower() for pattern in url_patterns):
+                            links.add(full_url)
+                            continue
+                            
+                        # Check link text for patterns
+                        if any(pattern in link_text for pattern in text_patterns):
+                            links.add(full_url)
+                            continue
+                            
+                        # Check for common file extensions
+                        if any(full_url.lower().endswith(ext) for ext in 
+                              ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
                             links.add(full_url)
             except Exception as e:
                 logger.warning(f"Request-based extraction failed: {e}")
             
-            # Use browser-based approach if needed
-            if len(links) < 5 or "phsms.cloud.ncnu.edu.tw" in url:
-                logger.info("Using browser for enhanced link extraction")
-                
-                # Navigate to the page
-                await self.browser.goto(url, timeout=45000)
-                
-                # Get page content and parse with BeautifulSoup
-                content = await self.browser.content()
-                soup = BeautifulSoup(content, "html.parser")
-                parsed_base = urlparse(url)
-                base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
-                
-                # Process all links on the page
-                for a in soup.find_all("a", href=True):
-                    href = a["href"]
-                    full_url = urljoin(url, href)
-                    link_text = a.get_text().lower()
-                    
-                    # Apply the same filtering criteria
-                    url_patterns = [
-                        "/eduexp/docs/", "/exam/", "/pastexam/", "/papers/", 
-                        "/test/", "/download/", "/files/", "/assignments/",
-                        "paper_", "question_", "exam_", "test_", "past_",
-                        "assignment_", "sample_", "study_material", "notes_",
-                        "/resource/", "/subject/", "/course/", "/material/"
-                    ]
-                    
-                    text_patterns = [
-                        "exam", "paper", "test", "question", "past", "download",
-                        "assignment", "sample", "study", "material", "notes",
-                        "subject", "course", "resource", "pdf", "document",
-                        "view", "open", "get", "solution", "answer"
-                    ]
-                    
-                    # Check URL and text patterns
-                    if any(pattern in full_url.lower() for pattern in url_patterns) or \
-                       any(pattern in link_text for pattern in text_patterns) or \
-                       any(full_url.lower().endswith(ext) for ext in ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
-                        links.add(full_url)
+            # Browser-based approach for more thorough extraction or if initial approach was inadequate
+            try:
+                # Check if we need to proceed with browser-based extraction
+                if len(links) < 5 or "phsms.cloud.ncnu.edu.tw" in url:
+                    logger.info("Using browser for enhanced link extraction")
+                    
+                    # Rotate proxy if needed
+                    await self.rotate_proxy_if_needed()
+                    
+                    # Navigate to the page with more natural timing
+                    await self.page.goto(url, timeout=45000, wait_until='networkidle')
+                    await self.page.wait_for_timeout(random.randint(1000, 2000))
+                    
+                    # Handle captchas if present
+                    if not await self.handle_captcha(self.page):
+                        logger.warning("Captcha detected, extraction may be limited")
+                    
+                    # Get base URL for resolving relative links
+                    parsed_base = urlparse(url)
+                    base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+                    
+                    # Perform natural scrolling to trigger lazy-loaded content
+                    page_height = await self.page.evaluate("document.body.scrollHeight")
+                    viewport_height = await self.page.evaluate("window.innerHeight")
+                    
+                    for scroll_pos in range(0, page_height, viewport_height // 2):
+                        await self.page.evaluate(f"window.scrollTo(0, {scroll_pos})")
+                        await self.page.wait_for_timeout(random.randint(300, 800))
+                    
+                    # Scroll back to top
+                    await self.page.evaluate("window.scrollTo(0, 0)")
+                    await self.page.wait_for_timeout(500)
+                    
+                    # Extract all links with Playwright (better than just anchor tags)
+                    all_links = await self.page.evaluate("""
+                        () => {
+                            const results = [];
+                            
+                            // Get all anchor tags
+                            const anchors = document.querySelectorAll('a[href]');
+                            for (const a of anchors) {
+                                if (a.href) {
+                                    results.push({
+                                        href: a.href,
+                                        text: a.innerText || a.textContent || '',
+                                        isButton: a.classList.contains('btn') || a.role === 'button'
+                                    });
+                                }
+                            }
+                            
+                            // Get buttons that might contain links
+                            const buttons = document.querySelectorAll('button');
+                            for (const btn of buttons) {
+                                const onclick = btn.getAttribute('onclick') || '';
+                                if (onclick.includes('window.location') || onclick.includes('download')) {
+                                    results.push({
+                                        href: '#button',
+                                        text: btn.innerText || btn.textContent || '',
+                                        isButton: true,
+                                        onclick: onclick
+                                    });
+                                }
+                            }
+                            
+                            return results;
+                        }
+                    """)
+                    
+                    # Process the extracted links
+                    for link_info in all_links:
+                        href = link_info.get('href', '')
+                        text = link_info.get('text', '').lower()
+                        
+                        if href and href != '#button':
+                            # Check URL patterns
+                            url_patterns = [
+                                "/eduexp/docs/", "/exam/", "/pastexam/", "/papers/", 
+                                "/test/", "/download/", "/files/", "/assignments/",
+                                "paper_", "question_", "exam_", "test_", "past_",
+                                "assignment_", "sample_", "study_material", "notes_"
+                            ]
+                            
+                            # Check text patterns
+                            text_patterns = [
+                                "exam", "paper", "test", "question", "past", "download",
+                                "assignment", "sample", "study", "material", "notes",
+                                "pdf", "document", "view", "open", "solution"
+                            ]
+                            
+                            if any(pattern in href.lower() for pattern in url_patterns) or \
+                               any(pattern in text for pattern in text_patterns) or \
+                               any(href.lower().endswith(ext) for ext in 
+                                  ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                links.add(href)
+                    
+                    # Check for ASP.NET specific elements that might contain exam links
+                    grid_elements = await self.page.query_selector_all('table.grid, .GridView, #GridView1, .rgMasterTable, .table-responsive')
+                    for grid in grid_elements:
+                        grid_links = await grid.query_selector_all('a[href]')
+                        for a in grid_links:
+                            href = await a.get_attribute('href')
+                            text = await a.text_content()
+                            
+                            if href:
+                                full_url = href if href.startswith('http') else urljoin(url, href)
+                                links.add(full_url)
+                    
+                    # Try clicking pagination controls to reveal more content
+                    pagination_buttons = await self.page.query_selector_all('a[href*="page"], .pagination a, .pager a')
+                    for i, button in enumerate(pagination_buttons[:5]):  # Limit to first 5 pagination buttons
+                        try:
+                            # Check if this is a numeric pagination button (more likely to be useful)
+                            button_text = await button.text_content()
+                            if button_text and button_text.strip().isdigit():
+                                logger.info(f"Clicking pagination button: {button_text}")
+                                await button.click()
+                                await self.page.wait_for_timeout(2000)
+                                await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                
+                                # Extract links from this page
+                                new_page_links = await self.page.evaluate("""
+                                    () => {
+                                        return Array.from(document.querySelectorAll('a[href]')).map(a => a.href);
+                                    }
+                                """)
+                                
+                                for href in new_page_links:
+                                    if href and not href.startswith('javascript:'):
+                                        if any(pattern in href.lower() for pattern in url_patterns) or \
+                                           any(href.lower().endswith(ext) for ext in 
+                                              ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                            links.add(href)
+                        except Exception as e:
+                            logger.warning(f"Error clicking pagination button: {e}")
+                    
+                    # Try clicking any controls that might reveal more exam links (more focused approach)
+                    show_buttons = await self.page.query_selector_all('input[type="button"], button, a.btn')
+                    for button in show_buttons:
+                        button_text = (await button.text_content() or "").lower()
+                        button_value = (await button.get_attribute("value") or "").lower()
+                        button_id = (await button.get_attribute("id") or "").lower()
+                        
+                        # Look for buttons that seem likely to reveal file lists
+                        promising_terms = ["show", "view", "display", "list", "exam", "paper", "test", 
+                                         "download", "resource", "material", "browse", "file"]
+                        
+                        if any(term in button_text or term in button_value or term in button_id 
+                               for term in promising_terms):
+                            try:
+                                logger.info(f"Clicking button: {button_text or button_value}")
+                                await button.click()
+                                await self.page.wait_for_timeout(2000)
+                                await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                
+                                # Get any new links that appeared
+                                new_links = await self.page.query_selector_all('a[href]')
+                                for a in new_links:
+                                    href = await a.get_attribute('href')
+                                    if href:
+                                        full_url = href if href.startswith('http') else urljoin(url, href)
+                                        
+                                        # Focus on file extensions and patterns
+                                        if any(full_url.lower().endswith(ext) for ext in 
+                                               ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']) or \
+                                           any(pattern in full_url.lower() for pattern in url_patterns):
+                                            links.add(full_url)
+                            except Exception as e:
+                                logger.warning(f"Error clicking button: {e}")
+                
+                # Special handling for ASP.NET PostBack links
+                try:
+                    # Find and interact with ASP.NET __doPostBack elements
+                    postback_elements = await self.page.query_selector_all('[onclick*="__doPostBack"]')
+                    for i, element in enumerate(postback_elements[:10]):  # Limit to avoid too many clicks
+                        try:
+                            onclick = await element.get_attribute('onclick')
+                            if onclick and '__doPostBack' in onclick:
+                                element_text = await element.text_content()
+                                
+                                # Only interact with elements that seem likely to contain exam links
+                                promising_terms = ["show", "view", "list", "exam", "paper", "test", 
+                                                "download", "resource", "material"]
+                                                
+                                if any(term in element_text.lower() for term in promising_terms):
+                                    logger.info(f"Clicking ASP.NET postback element: {element_text}")
+                                    
+                                    # Click the element
+                                    await element.click()
+                                    await self.page.wait_for_timeout(2000)
+                                    await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                    
+                                    # Extract any new links
+                                    new_links = await self.page.query_selector_all('a[href]')
+                                    for a in new_links:
+                                        href = await a.get_attribute('href')
+                                        if href:
+                                            full_url = href if href.startswith('http') else urljoin(url, href)
+                                            if any(full_url.lower().endswith(ext) for ext in 
+                                                ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                                links.add(full_url)
+                        except Exception as e:
+                            logger.warning(f"Error interacting with postback element: {e}")
+                except Exception as e:
+                    logger.warning(f"Error during postback handling: {e}")
+
+            except Exception as e:
+                logger.error(f"Browser-based extraction failed: {e}")
             
-            # Filter to likely exam documents
+            # Filter links to likely contain exam documents
             filtered_links = []
             for link in links:
-                # Common file extensions
+                # Common file extensions for exam documents
                 if any(ext in link.lower() for ext in ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
                     filtered_links.append(link)
                     continue
-                
+                    
                 # Common paths for exam documents
                 if any(pattern in link.lower() for pattern in [
                     "/eduexp/docs/pastexam", "/exam/", "/pastexam/", "/papers/", 
@@ -771,9 +860,12 @@ class DownloadManager:
     async def extract_downloadable_files(self, url, custom_ext_list):
         found_files = []
         try:
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
             # Special handling for educational exam sites
             if "phsms.cloud.ncnu.edu.tw" in url or any(keyword in url.lower() for keyword in 
-                                                    ["exam", "test", "pastpaper", "eduexp"]):
+                                                      ["exam", "test", "pastpaper", "eduexp"]):
                 logger.info("Using specialized handler for educational exam site")
                 
                 # Get direct links to exam files
@@ -812,54 +904,102 @@ class DownloadManager:
                         'url': real_url,
                         'filename': filename,
                         'size': size_str,
-                        'metadata': meta,
-                        'source_url': url  # Add source URL for better tracking
+                        'metadata': meta
                     })
                 
                 # If we found exam files with the specialized method, return them
                 if found_files:
                     return found_files
             
-            # Standard extraction method for all pages
-            await self.browser.goto(url, timeout=30000)
+            # Standard extraction method if specialized method didn't find files
+            response = await self.page.goto(url, timeout=30000, wait_until='networkidle')
+            if not response:
+                return []
             
-            # Get page content
-            content = await self.browser.content()
+            # Check for captchas
+            if not await self.handle_captcha(self.page):
+                logger.warning("Captcha detected, file extraction may be limited")
+
+            # Scroll through the page naturally to trigger lazy loading
+            await self.page.evaluate("""
+                (async () => {
+                    const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+                    const height = document.body.scrollHeight;
+                    const scrollStep = Math.floor(window.innerHeight / 2);
+                    
+                    for (let i = 0; i < height; i += scrollStep) {
+                        window.scrollTo(0, i);
+                        await delay(100);
+                    }
+                    
+                    window.scrollTo(0, 0);
+                })()
+            """)
+            await self.page.wait_for_timeout(1000)
+
+            final_url = self.page.url
+            if '.php' in final_url or 'download' in final_url:
+                real_url = await self.extract_real_download_url(final_url)
+                if real_url != final_url:
+                    # Try to detect the filename from headers or URL
+                    response = await self.page.request.head(real_url, timeout=15000)
+                    filename = None
+                    
+                    # Try to get from Content-Disposition header
+                    content_disposition = response.headers.get('Content-Disposition', '')
+                    if 'filename=' in content_disposition:
+                        filename_match = re.search(r'filename=["\'](.*?)["\']', content_disposition)
+                        if filename_match:
+                            filename = filename_match.group(1)
+                    
+                    # If not found in headers, use URL basename
+                    if not filename:
+                        filename = os.path.basename(urlparse(real_url).path)
+                        if not filename or filename == '/':
+                            # Generate a name based on domain
+                            domain = get_domain(real_url)
+                            ext = get_file_extension(real_url, '.pdf')
+                            filename = f"file_from_{domain}{ext}"
+                    
+                    found_files.append({
+                        'url': real_url,
+                        'filename': filename,
+                        'size': await self.get_file_size(real_url),
+                        'metadata': {}
+                    })
+                    return found_files
+
+            await self.page.wait_for_load_state('networkidle', timeout=30000)
+            content = await self.page.content()
             soup = BeautifulSoup(content, 'html.parser')
             
-            # Define file extensions to look for
             default_exts = ['.pdf', '.docx', '.doc', '.zip', '.rar', '.mp3', '.mp4', 
-                          '.avi', '.mkv', '.png', '.jpg', '.jpeg', '.gif', '.xlsx', 
-                          '.pptx', '.odt', '.txt']
+                            '.avi', '.mkv', '.png', '.jpg', '.jpeg', '.gif', '.xlsx', 
+                            '.pptx', '.odt', '.txt']
             all_exts = set(default_exts + [ext.strip().lower() for ext in custom_ext_list if ext.strip()])
             
-            # Get base URL for resolving relative links
-            parsed_base = urlparse(url)
+            parsed_base = urlparse(final_url)
             base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
             path_base = os.path.dirname(parsed_base.path)
             
-            # Process all anchor tags for file links
+            # Process all anchor tags
             for a in soup.find_all('a', href=True):
                 href = a['href'].strip()
                 
-                # Handle PHP and download links separately
                 if '.php' in href.lower() or 'download' in href.lower():
-                    full_url = href if href.startswith('http') else urljoin(base_url, href)
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
                     real_url = await self.extract_real_download_url(full_url)
                     if real_url and real_url != full_url:
-                        filename = os.path.basename(urlparse(real_url).path) or 'downloaded_file'
                         found_files.append({
                             'url': real_url,
-                            'filename': filename,
+                            'filename': os.path.basename(urlparse(real_url).path) or 'downloaded_file',
                             'size': await self.get_file_size(real_url),
-                            'metadata': {},
-                            'source_url': url
+                            'metadata': {}
                         })
                         continue
-                
-                # Check for direct file extensions
+
                 if any(href.lower().endswith(ext) for ext in all_exts):
-                    file_url = href if href.startswith('http') else urljoin(base_url, href)
+                    file_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
                     size_str = await self.get_file_size(file_url)
                     meta = {}
                     if file_url.lower().endswith('.pdf'):
@@ -868,10 +1008,9 @@ class DownloadManager:
                         'url': file_url,
                         'filename': os.path.basename(file_url.split('?')[0]),
                         'size': size_str,
-                        'metadata': meta,
-                        'source_url': url
+                        'metadata': meta
                     })
-                
+
                 # Handle Google Drive links
                 elif ("drive.google.com" in href) or ("docs.google.com" in href):
                     file_id = None
@@ -880,131 +1019,299 @@ class DownloadManager:
                         if match:
                             file_id = match.group(1)
                             break
-                    
                     if file_id:
-                        # Determine if it's a view-only file
-                        is_view_only = "View-only" in (await self.get_file_size(f"https://drive.google.com/uc?export=download&id={file_id}"))
+                        # Get file info to determine type and view-only status
+                        file_type, is_view_only = await self.get_google_drive_file_info(file_id)
                         
+                        # Create a more informative filename based on info
                         filename = f"gdrive_{file_id}"
-                        ext = get_file_extension(href, '.pdf')
-                        if ext != '.':
-                            filename += ext
+                        if file_type:
+                            filename = f"{filename}.{file_type}"
+                        
+                        size_str = "View-only" if is_view_only else await self.get_file_size(f"https://drive.google.com/uc?export=download&id={file_id}")
                         
                         found_files.append({
-                            'url': href,
+                            'url': href,  # Use original URL
                             'filename': filename,
-                            'size': "View-only" if is_view_only else await self.get_file_size(f"https://drive.google.com/uc?export=download&id={file_id}"),
+                            'size': size_str,
                             'metadata': {
                                 'view_only': is_view_only,
+                                'file_type': file_type,
                                 'file_id': file_id
-                            },
-                            'source_url': url
+                            }
                         })
             
-            # Check for embedded content (iframe, embed, object)
-            for elem_tag in ['iframe', 'embed', 'object', 'source']:
-                for elem in soup.find_all(elem_tag):
-                    src = elem.get('src') or elem.get('data')
-                    if src and any(src.lower().endswith(ext) for ext in all_exts):
-                        file_url = src if src.startswith('http') else urljoin(base_url, src)
+            # Also check for files in other elements (iframe, embed, object, etc.)
+            other_elements = soup.find_all(['iframe', 'embed', 'object', 'source'])
+            for elem in other_elements:
+                src = elem.get('src') or elem.get('data')
+                if src and any(src.lower().endswith(ext) for ext in all_exts):
+                    file_url = src if src.startswith('http') else self.resolve_relative_url(src, base_url, path_base)
+                    size_str = await self.get_file_size(file_url)
+                    meta = {}
+                    if file_url.lower().endswith('.pdf'):
+                        meta = await self.get_pdf_metadata(file_url)
+                    found_files.append({
+                        'url': file_url,
+                        'filename': os.path.basename(file_url.split('?')[0]),
+                        'size': size_str,
+                        'metadata': meta
+                    })
+            
+            # Check for file links in onclick attributes
+            onclick_elements = await self.page.query_selector_all('*[onclick*="download"], *[onclick*="file"]')
+            for elem in onclick_elements:
+                onclick = await elem.get_attribute('onclick')
+                urls = re.findall(r'(https?://[^\'"]+)', onclick)
+                for url_match in urls:
+                    if any(url_match.lower().endswith(ext) for ext in all_exts):
+                        size_str = await self.get_file_size(url_match)
+                        meta = {}
+                        if url_match.lower().endswith('.pdf'):
+                            meta = await self.get_pdf_metadata(url_match)
                         found_files.append({
-                            'url': file_url,
-                            'filename': os.path.basename(file_url.split('?')[0]),
-                            'size': await self.get_file_size(file_url),
-                            'metadata': {},
-                            'source_url': url
+                            'url': url_match,
+                            'filename': os.path.basename(url_match.split('?')[0]),
+                            'size': size_str,
+                            'metadata': meta
                         })
             
-            # Deduplicate files
+            # Also check for data-src and data-url attributes (common in lazy-loaded sites)
+            data_elements = await self.page.query_selector_all('[data-src], [data-url], [data-href], [data-download]')
+            for elem in data_elements:
+                for attr in ['data-src', 'data-url', 'data-href', 'data-download']:
+                    try:
+                        value = await elem.get_attribute(attr)
+                        if value and any(value.lower().endswith(ext) for ext in all_exts):
+                            file_url = value if value.startswith('http') else self.resolve_relative_url(value, base_url, path_base)
+                            found_files.append({
+                                'url': file_url,
+                                'filename': os.path.basename(file_url.split('?')[0]),
+                                'size': await self.get_file_size(file_url),
+                                'metadata': {}
+                            })
+                    except:
+                        pass
+            
+            # Check script tags for JSON data that might contain file URLs
+            script_elements = soup.find_all('script', type='application/json')
+            for script in script_elements:
+                try:
+                    json_data = json.loads(script.string)
+                    # Look for URL patterns in the JSON data
+                    def extract_urls_from_json(obj, urls_found=None):
+                        if urls_found is None:
+                            urls_found = []
+                        if isinstance(obj, dict):
+                            for k, v in obj.items():
+                                # Check if any key contains url-like terms
+                                url_keys = ['url', 'href', 'src', 'link', 'file', 'path', 'download']
+                                if any(url_key in k.lower() for url_key in url_keys) and isinstance(v, str) and v.startswith('http'):
+                                    urls_found.append(v)
+                                else:
+                                    extract_urls_from_json(v, urls_found)
+                        elif isinstance(obj, list):
+                            for item in obj:
+                                extract_urls_from_json(item, urls_found)
+                        return urls_found
+                    
+                    json_urls = extract_urls_from_json(json_data)
+                    for json_url in json_urls:
+                        if any(json_url.lower().endswith(ext) for ext in all_exts):
+                            found_files.append({
+                                'url': json_url,
+                                'filename': os.path.basename(json_url.split('?')[0]),
+                                'size': await self.get_file_size(json_url),
+                                'metadata': {}
+                            })
+                except:
+                    pass
+            
+            # Check for hidden download buttons or forms
+            hidden_elements = await self.page.evaluate("""
+                () => {
+                    const results = [];
+                    
+                    // Check for hidden forms with download actions
+                    const forms = document.querySelectorAll('form[action*="download"], form[action*="file"]');
+                    for (const form of forms) {
+                        const action = form.getAttribute('action') || '';
+                        results.push({
+                            type: 'form',
+                            action: action,
+                            inputs: Array.from(form.querySelectorAll('input[name]')).map(input => {
+                                return {name: input.name, value: input.value};
+                            })
+                        });
+                    }
+                    
+                    // Check for hidden download links/buttons
+                    const hiddenLinks = Array.from(document.querySelectorAll('a[href]')).filter(a => {
+                        const style = window.getComputedStyle(a);
+                        return (style.display === 'none' || style.visibility === 'hidden') && 
+                               (a.href.includes('download') || a.href.includes('file'));
+                    });
+                    
+                    for (const link of hiddenLinks) {
+                        results.push({
+                            type: 'link',
+                            href: link.href,
+                            text: link.innerText || link.textContent
+                        });
+                    }
+                    
+                    return results;
+                }
+            """)
+            
+            # Process hidden elements
+            for elem in hidden_elements:
+                if elem['type'] == 'link' and 'href' in elem:
+                    href = elem['href']
+                    if any(href.lower().endswith(ext) for ext in all_exts):
+                        found_files.append({
+                            'url': href,
+                            'filename': os.path.basename(href.split('?')[0]),
+                            'size': await self.get_file_size(href),
+                            'metadata': {}
+                        })
+            
+            # Deduplicate files by URL
             seen_urls = set()
             unique_files = []
             for f in found_files:
                 if f['url'] not in seen_urls:
                     seen_urls.add(f['url'])
                     unique_files.append(f)
-                    
-            return unique_files
             
+            return unique_files
         except Exception as e:
             logger.error(f"Error extracting files from {url}: {e}")
+            traceback.print_exc()
             return []
 
-    async def download_file(self, file_info, save_dir, referer=None):
-        """Download a file and provide a direct download link"""
+    async def download_file(self, file_info, save_dir, referer):
         file_url = file_info['url']
         fname = file_info['filename']
-        referer = referer or file_info.get('source_url', 'https://www.google.com')
-        
-        # Create unique filename to avoid overwriting
         path = os.path.join(save_dir, fname)
         base, ext = os.path.splitext(fname)
         counter = 1
         while os.path.exists(path):
             path = os.path.join(save_dir, f"{base}_{counter}{ext}")
             counter += 1
-        
         os.makedirs(save_dir, exist_ok=True)
         
         try:
             # Special handling for Google Drive files
             if "drive.google.com" in file_url or "docs.google.com" in file_url:
-                # For view-only Google Drive files, use specialized method
+                # Check if it's marked as view-only in metadata
                 is_view_only = file_info.get('metadata', {}).get('view_only', False)
+                
+                # For view-only files, try our most robust approach first
                 if is_view_only:
-                    result_path = await self.download_viewonly_google_drive(file_info, path)
+                    logger.info(f"Attempting to download view-only file: {file_url}")
+                    result_path = await self.force_download_viewonly(file_info, path)
                     if result_path:
                         return result_path
+                    
+                    # If that failed, try the regular download approach
+                    logger.info("Primary method failed, trying fallback methods")
                 
-                # Try standard Google Drive download
-                file_id = None
-                for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
-                    match = re.search(pattern, file_url)
-                    if match:
-                        file_id = match.group(1)
-                        break
+                # Try regular download methods
+                success = await self.download_from_google_drive(file_url, path)
+                if success:
+                    return path
                 
-                if file_id:
-                    # Try direct download
-                    download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
-                    headers = {
-                        'User-Agent': get_random_user_agent(),
-                        'Referer': referer
-                    }
+                # If all methods failed for Google Drive, try one last approach
+                logger.warning("All standard methods failed, attempting force download")
+                result_path = await self.force_download_viewonly(file_info, path)
+                return result_path if result_path else None
+            
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            # Try with direct requests first (faster)
+            try:
+                headers = {
+                    'User-Agent': get_random_user_agent(),
+                    'Accept': '*/*',
+                    'Accept-Encoding': 'gzip, deflate, br',
+                    'Referer': referer,
+                    'DNT': '1'
+                }
+                
+                with requests.get(file_url, headers=headers, stream=True, timeout=30) as response:
+                    if response.status_code == 200:
+                        # Check content type to verify it's not HTML/error page
+                        content_type = response.headers.get('Content-Type', '')
+                        if 'text/html' in content_type and not file_url.endswith('.html'):
+                            logger.warning(f"Received HTML instead of expected file: {file_url}")
+                        else:
+                            with open(path, 'wb') as f:
+                                for chunk in response.iter_content(chunk_size=8192):
+                                    if chunk:
+                                        f.write(chunk)
+                            
+                            # Verify file was downloaded correctly
+                            if os.path.exists(path) and os.path.getsize(path) > 0:
+                                return path
+            except Exception as e:
+                logger.warning(f"Direct download failed: {e}, trying browser approach")
                     
-                    with requests.get(download_url, headers=headers, stream=True) as r:
-                        r.raise_for_status()
+            # Original code for non-Google Drive downloads using Playwright
+            async with self.context.new_page() as page:
+                headers = {
+                    'Accept': '*/*',
+                    'Accept-Encoding': 'gzip, deflate, br',
+                    'Referer': referer
+                }
+                
+                # Try to download with timeout protection
+                try:
+                    response = await page.request.get(file_url, headers=headers, timeout=self.download_timeout * 1000)
+                    if response.status == 200:
+                        content = await response.body()
                         with open(path, 'wb') as f:
-                            for chunk in r.iter_content(chunk_size=8192):
-                                f.write(chunk)
+                            f.write(content)
+                        return path
+                    else:
+                        logger.error(f"Download failed with status {response.status}: {file_url}")
+                        
+                        # Try to extract error information
+                        error_info = await response.text()
+                        logger.debug(f"Error response: {error_info[:200]}...")
+                        
+                        # Check if this might be a captcha or login issue
+                        if detect_captcha(error_info):
+                            logger.warning("Captcha detected during download")
+                            # For HF Spaces, we can't implement browser-based captcha solving here
+                            # Just log the issue for now
+                except PlaywrightTimeoutError:
+                    logger.error(f"Download timed out after {self.download_timeout} seconds: {file_url}")
+                    
+                # Try an alternative approach - using the browser's download manager
+                try:
+                    logger.info("Trying browser download manager approach")
+                    download_promise = page.wait_for_event("download")
+                    await page.goto(file_url, timeout=60000)
+                    
+                    # Wait for download to start (with timeout)
+                    download = await download_promise
+                    await download.save_as(path)
                     
                     if os.path.exists(path) and os.path.getsize(path) > 0:
                         return path
-            
-            # Standard file download
-            headers = {
-                'User-Agent': get_random_user_agent(),
-                'Referer': referer,
-                'Accept': '*/*',
-                'Accept-Encoding': 'gzip, deflate, br'
-            }
-            
-            with requests.get(file_url, headers=headers, stream=True) as r:
-                r.raise_for_status()
-                with open(path, 'wb') as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-            
-            if os.path.exists(path) and os.path.getsize(path) > 0:
-                return path
-            else:
-                return None
+                except Exception as e:
+                    logger.error(f"Browser download manager approach failed: {e}")
                 
+                return None
         except Exception as e:
             logger.error(f"Error downloading {file_url}: {e}")
             return None
 
-    async def download_viewonly_google_drive(self, file_info, save_path):
-        """Download view-only Google Drive documents"""
+    # IMPROVED: Enhanced view-only document download method
+    async def force_download_viewonly(self, file_info, save_path):
+        """Completely rewritten method to handle view-only files reliably, especially multi-page PDFs"""
         try:
             # Extract file ID
             file_id = file_info.get('metadata', {}).get('file_id')
@@ -1020,350 +1327,1353 @@ class DownloadManager:
                 logger.error("Could not extract file ID")
                 return None
             
-            # Determine file type
-            file_type = get_file_extension(file_info['url'], '.pdf').lstrip('.')
-            
-            # Ensure appropriate extension on save path
+            file_type = file_info.get('metadata', {}).get('file_type', 'pdf')
             base, ext = os.path.splitext(save_path)
             if not ext:
                 save_path = f"{base}.{file_type}"
             
-            logger.info(f"Downloading view-only Google Drive file: {file_id}")
-            
-            # Create a dedicated browser session
-            if self.browser_engine == "playwright":
-                from playwright.async_api import async_playwright
-                
-                async with async_playwright() as p:
-                    browser = await p.chromium.launch(
-                        headless=True,
-                        args=[
-                            '--no-sandbox',
-                            '--disable-setuid-sandbox',
-                            '--disable-dev-shm-usage',
-                            '--disable-web-security',
-                            '--disable-features=IsolateOrigins,site-per-process',
-                            '--disable-site-isolation-trials',
-                            '--disable-blink-features=AutomationControlled'
-                        ]
-                    )
+            logger.info(f"Starting reliable download of Google Drive file {file_id} (type: {file_type})")
+            
+            # Create a dedicated browser instance with better resolution and stealth
+            browser_args = [
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-web-security',
+                '--disable-features=IsolateOrigins,site-per-process',
+                '--disable-site-isolation-trials',
+                '--disable-blink-features=AutomationControlled'  # Anti-detection
+            ]
+            
+            browser = await self.playwright.chromium.launch(
+                headless=True,
+                args=browser_args
+            )
+            
+            # Use higher resolution for better quality
+            context = await browser.new_context(
+                viewport={'width': 1600, 'height': 1200},
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                device_scale_factor=2.0,
+                accept_downloads=True  # Critical for the download workflow
+            )
+            
+            # Add anti-detection script
+            await context.add_init_script("""
+                () => {
+                    Object.defineProperty(navigator, 'webdriver', {
+                        get: () => false,
+                    });
                     
-                    # Create context with options for better handling
-                    context = await browser.new_context(
-                        viewport={'width': 1600, 'height': 1200},
-                        user_agent=get_random_user_agent(),
-                        accept_downloads=True,
-                        ignore_https_errors=True
-                    )
+                    // Change plugins
+                    Object.defineProperty(navigator, 'plugins', {
+                        get: () => [1, 2, 3, 4, 5].map(() => ({
+                            lengthComputable: true,
+                            loaded: 100,
+                            total: 100
+                        }))
+                    });
+                    
+                    // Handle languages
+                    Object.defineProperty(navigator, 'languages', {
+                        get: () => ['en-US', 'en', 'es']
+                    });
+                    
+                    // Modify hardware concurrency
+                    Object.defineProperty(navigator, 'hardwareConcurrency', {
+                        get: () => 4
+                    });
+                }
+            """)
+            
+            page = await context.new_page()
+            
+            try:
+                # Go to the file view page
+                logger.info(f"Opening file view page: https://drive.google.com/file/d/{file_id}/view")
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=90000)
+                await page.wait_for_load_state('networkidle')
+                
+                # Check for any barriers or permissions issues
+                content = await page.content()
+                if "the owner has not granted you permission to" in content:
+                    logger.warning("Permission denied error detected")
+                
+                # Randomized wait to appear more human-like
+                await page.wait_for_timeout(random.randint(3000, 7000))
+
+                # Create temp directory
+                temp_dir = tempfile.mkdtemp()
+                
+                # Special handling for PDFs
+                if file_type.lower() == 'pdf':
+                    # Use the improved scrolling and detection approach
+                    
+                    # Perform some natural mouse movements and scrolling
+                    await page.mouse.move(x=random.randint(200, 400), y=random.randint(200, 400))
+                    await page.wait_for_timeout(random.randint(500, 1000))
                     
-                    # Add stealth script
-                    await context.add_init_script("""
-                        Object.defineProperty(navigator, 'webdriver', { get: () => false });
-                        Object.defineProperty(navigator, 'plugins', { 
-                            get: () => [1, 2, 3, 4, 5].map(() => ({ length: 1 }))
+                    # Estimate number of pages
+                    estimated_pages = await page.evaluate("""
+                    () => {
+                        // Method 1: Check page counter text
+                        const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
+                            const text = el.textContent || '';
+                            return /\\d+\\s*\\/\\s*\\d+/.test(text);
                         });
-                        Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
-                        window.chrome = { runtime: {} };
+                        
+                        if (pageCounters.length > 0) {
+                            const text = pageCounters[0].textContent || '';
+                            const match = text.match(/(\\d+)\\s*\\/\\s*(\\d+)/);
+                            if (match && match[2]) return parseInt(match[2]);
+                        }
+                        
+                        // Method 2: Check actual page elements
+                        const pageElements = document.querySelectorAll('.drive-viewer-paginated-page');
+                        if (pageElements.length > 0) return pageElements.length;
+                        
+                        // Method 3: Look for page thumbnails
+                        const thumbnails = document.querySelectorAll('.drive-viewer-paginated-thumb');
+                        if (thumbnails.length > 0) return thumbnails.length;
+                        
+                        // Fallback: conservative guess 
+                        return 50;
+                    }
                     """)
                     
-                    page = await context.new_page()
+                    logger.info(f"Estimated {estimated_pages} pages in PDF")
                     
-                    try:
-                        # Visit the file
-                        await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=60000)
-                        await page.wait_for_load_state('networkidle')
+                    # Initial scroll to trigger lazy loading
+                    logger.info("Initial scroll to bottom to trigger lazy loading...")
+                    await page.keyboard.press("End")
+                    await page.wait_for_timeout(3000)
+                    
+                    # Scroll page by page to ensure all pages are loaded
+                    logger.info("Scrolling page by page...")
+                    max_attempts = min(estimated_pages * 3, 300)
+                    attempt = 0
+                    prev_blob_count = 0
+                    
+                    while attempt < max_attempts:
+                        blob_count = await page.evaluate("""
+                            Array.from(document.getElementsByTagName('img'))
+                                .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                .length
+                        """)
                         
-                        # Wait for content to load
-                        await page.wait_for_timeout(5000)
+                        logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
                         
-                        # Create temporary directory for processing
-                        temp_dir = tempfile.mkdtemp()
+                        if blob_count >= estimated_pages or (blob_count > 0 and blob_count == prev_blob_count and attempt > 10):
+                            logger.info("All pages appear to be loaded.")
+                            break
                         
-                        # For PDF handling
-                        if file_type == 'pdf':
-                            # Create directory for screenshots
-                            screenshots_dir = os.path.join(temp_dir, "screenshots")
-                            os.makedirs(screenshots_dir, exist_ok=True)
+                        # Alternate between PageDown and End keys for more natural scrolling
+                        if attempt % 3 == 0:
+                            await page.keyboard.press("End")
+                        else:
+                            await page.keyboard.press("PageDown")
                             
-                            # Get page count
-                            total_pages = await page.evaluate("""
-                                () => {
-                                    // Look for page counters in the interface
-                                    const pageCounters = document.querySelectorAll('*');
-                                    for (const el of pageCounters) {
-                                        const text = el.textContent || '';
-                                        const match = text.match(/(\\d+)\\s*\\/\\s*(\\d+)/);
-                                        if (match && match[2]) {
-                                            return parseInt(match[2]);
+                        # Randomized wait times
+                        await page.wait_for_timeout(random.randint(1500, 3000))
+                        
+                        # Move mouse randomly to appear more human-like
+                        if attempt % 4 == 0:
+                            await page.mouse.move(x=random.randint(200, 800), y=random.randint(200, 800))
+                        
+                        prev_blob_count = blob_count
+                        attempt += 1
+                    
+                    # Extra wait to ensure everything is loaded
+                    await page.wait_for_timeout(5000)
+                    
+                    # Set up download event listener for the PDF
+                    download_promise = page.wait_for_event("download")
+                    
+                    # Use jsPDF to generate PDF from loaded pages
+                    logger.info("Generating PDF from loaded pages...")
+                    result = await page.evaluate(r'''
+                        (function() {
+                            return new Promise((resolve, reject) => {
+                                let script = document.createElement("script");
+                                script.onload = function () {
+                                    try {
+                                        let pdf = new jsPDF();
+                                        let imgs = Array.from(document.getElementsByTagName("img"))
+                                            .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                            .sort((a, b) => {
+                                                const rectA = a.getBoundingClientRect();
+                                                const rectB = b.getBoundingClientRect();
+                                                return rectA.top - rectB.top;
+                                            });
+                                        
+                                        console.log(`Found ${imgs.length} valid page images to add to PDF`);
+                                        
+                                        let added = 0;
+                                        for (let i = 0; i < imgs.length; i++) {
+                                            let img = imgs[i];
+                                            let canvas = document.createElement("canvas");
+                                            let ctx = canvas.getContext("2d");
+                                            canvas.width = img.width;
+                                            canvas.height = img.height;
+                                            ctx.drawImage(img, 0, 0, img.width, img.height);
+                                            let imgData = canvas.toDataURL("image/jpeg", 1.0);
+                                            
+                                            if (added > 0) {
+                                                pdf.addPage();
+                                            }
+                                            
+                                            pdf.addImage(imgData, 'JPEG', 0, 0);
+                                            added++;
                                         }
+                                        
+                                        pdf.save("download.pdf");
+                                        resolve({success: true, pageCount: added});
+                                    } catch (error) {
+                                        reject({success: false, error: error.toString()});
                                     }
-                                    
-                                    // Look for paginated pages
-                                    const pages = document.querySelectorAll('.drive-viewer-paginated-page');
-                                    if (pages.length > 0) return pages.length;
-                                    
-                                    // Default if we can't determine
-                                    return 20;
-                                }
-                            """)
+                                };
+                                
+                                script.onerror = function() {
+                                    reject({success: false, error: "Failed to load jsPDF library"});
+                                };
+                                
+                                script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jspdf/1.5.3/jspdf.debug.js';
+                                document.body.appendChild(script);
+                            });
+                        })();
+                    ''')
+                    
+                    if not result.get('success', False):
+                        logger.error(f"Error in PDF generation: {result.get('error', 'Unknown error')}")
+                        
+                        # Try fallback approach - screenshot method
+                        logger.info("Trying fallback screenshot method...")
+                        
+                        # Navigate back to the first page
+                        await page.evaluate("""
+                            () => {
+                                // Find and click the "first page" button if available
+                                const buttons = Array.from(document.querySelectorAll('button'));
+                                const firstPageBtn = buttons.find(b => b.getAttribute('aria-label')?.includes('First page'));
+                                if (firstPageBtn) firstPageBtn.click();
+                            }
+                        """)
+                        await page.wait_for_timeout(1000);
+                        
+                        # Create a PDF by taking screenshots of each page
+                        screenshots = []
+                        current_page = 1
+                        max_pages = estimated_pages
+                        
+                        # Create a PDF using the reportlab package
+                        while current_page <= max_pages:
+                            screenshot_path = os.path.join(temp_dir, f"page_{current_page}.png")
+                            
+                            # Try to find the current page element
+                            page_elem = await page.query_selector('.drive-viewer-paginated-page')
+                            if page_elem:
+                                await page_elem.screenshot(path=screenshot_path)
+                            else:
+                                # Fallback to full page screenshot
+                                await page.screenshot(path=screenshot_path)
+                            
+                            screenshots.append(screenshot_path)
+                            
+                            # Try to navigate to next page
+                            next_btn = await page.query_selector('button[aria-label="Next page"]')
+                            if next_btn:
+                                is_disabled = await next_btn.get_attribute('disabled')
+                                if is_disabled:
+                                    logger.info(f"Reached end of document at page {current_page}")
+                                    break
+                                
+                                await next_btn.click()
+                                await page.wait_for_timeout(1000)
+                                current_page += 1
+                            else:
+                                break
+                        
+                        # Create PDF from screenshots
+                        if screenshots:
+                            first_img = Image.open(screenshots[0])
+                            width, height = first_img.size
                             
-                            logger.info(f"PDF has approximately {total_pages} pages")
+                            c = canvas.Canvas(save_path, pagesize=(width, height))
+                            for screenshot in screenshots:
+                                img = Image.open(screenshot)
+                                c.drawImage(screenshot, 0, 0, width, height)
+                                c.showPage()
+                            c.save()
                             
-                            # Take screenshots of each page
-                            screenshots = []
+                            # Clean up screenshots
+                            for screenshot in screenshots:
+                                os.remove(screenshot)
                             
-                            # First try with the page element method
-                            for i in range(min(total_pages, 100)):  # Limit to 100 pages for safety
-                                try:
-                                    # Navigate to specific page
-                                    if i > 0:
-                                        await page.evaluate(f"document.querySelector('.drive-viewer-paginated-page:nth-child({i+1})').scrollIntoView()")
-                                        await page.wait_for_timeout(500)
+                            return save_path
+                        
+                        return None
+                    
+                    logger.info(f"PDF generation triggered with {result.get('pageCount')} pages")
+                    
+                    # Wait for the download and save it
+                    download = await download_promise
+                    await download.save_as(save_path)
+                    
+                    # Clean up temp directory
+                    try:
+                        os.rmdir(temp_dir)
+                    except:
+                        pass
+                    
+                else:
+                    # Non-PDF file handling
+                    screenshot_path = os.path.join(temp_dir, "file.png")
+                    await page.screenshot(path=screenshot_path)
+                    
+                    if file_type.lower() in ['doc', 'docx', 'xlsx', 'pptx']:
+                        # For document types, try to export directly
+                        await self.export_google_doc(file_id, file_type, save_path)
+                    else:
+                        # For other types, save the screenshot with appropriate extension
+                        shutil.copy(screenshot_path, save_path)
+                    
+                    os.remove(screenshot_path)
+                
+                # Close browser
+                await browser.close()
+                
+                # Verify file exists and has content
+                if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
+                    logger.info(f"Successfully downloaded file to {save_path}")
+                    return save_path
+                else:
+                    logger.error(f"Generated file is too small or missing: {save_path}")
+                    return None
+                
+            except Exception as e:
+                logger.error(f"Error during force download: {e}")
+                if browser:
+                    await browser.close()
+                return None
+                
+        except Exception as e:
+            logger.error(f"Force download preparation failed: {e}")
+            return None
+
+    async def download_from_google_drive(self, url, save_path):
+        """Enhanced method to download from Google Drive with multiple fallback approaches"""
+        # Extract the file ID from different URL formats
+        file_id = None
+        url_patterns = [
+            r'drive\.google\.com/file/d/([^/]+)',
+            r'drive\.google\.com/open\?id=([^&]+)',
+            r'docs\.google\.com/\w+/d/([^/]+)',
+            r'id=([^&]+)',
+            r'drive\.google\.com/uc\?id=([^&]+)',
+        ]
+        
+        for pattern in url_patterns:
+            match = re.search(pattern, url)
+            if match:
+                file_id = match.group(1)
+                break
+        
+        if not file_id:
+            logger.error(f"Could not extract file ID from URL: {url}")
+            return False
+        
+        # Determine file type first (important for handling different file types)
+        file_type, is_view_only = await self.get_google_drive_file_info(file_id)
+        logger.info(f"Google Drive file type: {file_type}, View-only: {is_view_only}")
+        
+        base, ext = os.path.splitext(save_path)
+        if not ext and file_type:
+            # Add the correct extension if missing
+            save_path = f"{base}.{file_type}"
+        
+        # For view-only files, use specialized approaches
+        if is_view_only:
+            # Approach 1: For PDFs, use the JS method
+            if file_type == 'pdf':
+                success = await self.download_viewonly_pdf_with_js(file_id, save_path)
+                if success:
+                    return True
+                    
+            # Approach 2: For Google Docs, Sheets, etc., use export API
+            if file_type in ['doc', 'docx', 'sheet', 'ppt', 'xlsx', 'pptx']:
+                success = await self.export_google_doc(file_id, file_type, save_path)
+                if success:
+                    return True
+                    
+            # Approach 3: Try the direct screenshot method for any view-only file
+            success = await self.download_viewonly_with_screenshots(file_id, save_path, file_type)
+            if success:
+                return True
+        
+        # Try standard approaches for non-view-only files
+        try:
+            # Try direct download link first (fastest)
+            direct_url = f"https://drive.google.com/uc?id={file_id}&export=download&confirm=t"
+            
+            # Add anti-bot headers
+            headers = {
+                'User-Agent': get_random_user_agent(),
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.9',
+                'Referer': 'https://drive.google.com/',
+                'DNT': '1'
+            }
+            
+            # Try with streaming to handle larger files
+            with requests.get(direct_url, headers=headers, stream=True, timeout=60) as r:
+                if r.status_code == 200:
+                    # Check if we got HTML instead of the file
+                    content_type = r.headers.get('Content-Type', '')
+                    if 'text/html' in content_type and not file_id.endswith('.html'):
+                        logger.warning("Received HTML instead of file, trying with session cookies")
+                    else:
+                        # Looks like we got the actual file
+                        with open(save_path, 'wb') as f:
+                            for chunk in r.iter_content(chunk_size=8192):
+                                if chunk:
+                                    f.write(chunk)
+                        
+                        # Verify file exists and has content
+                        if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
+                            logger.info("Direct download successful")
+                            return True
+            
+            # Try with requests and session cookies
+            session = requests.Session()
+            session.headers.update({'User-Agent': get_random_user_agent()})
+            
+            # Visit the page first to get cookies
+            session.get(f"https://drive.google.com/file/d/{file_id}/view", timeout=30)
+            
+            # Try download
+            url = f"https://drive.google.com/uc?id={file_id}&export=download"
+            response = session.get(url, stream=True, timeout=30)
+            
+            # Check for confirmation token
+            confirmation_token = None
+            for k, v in response.cookies.items():
+                if k.startswith('download_warning'):
+                    confirmation_token = v
+                    break
+            
+            # Use confirmation token if found
+            if confirmation_token:
+                url = f"{url}&confirm={confirmation_token}"
+                response = session.get(url, stream=True, timeout=60)
+            
+            # Check if we're getting HTML instead of the file
+            content_type = response.headers.get('Content-Type', '')
+            if 'text/html' in content_type:
+                logger.warning("Received HTML instead of file - likely download restriction")
+            else:
+                with open(save_path, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=1024*1024):
+                        if chunk:
+                            f.write(chunk)
+                
+                if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
+                    with open(save_path, 'rb') as f:
+                        content = f.read(100)
+                        if b'<!DOCTYPE html>' not in content:
+                            logger.info("Successfully downloaded with requests session")
+                            return True
+        except Exception as e:
+            logger.warning(f"Requests session download failed: {e}")
+        
+        # Try browser-based approach as last resort
+        try:
+            async with self.context.new_page() as page:
+                # Visit the file view page first to get cookies
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=30000)
+                await page.wait_for_timeout(3000)
+                
+                # Set up download event listener
+                download_promise = page.wait_for_event("download")
+                
+                # Try to trigger the download button click
+                download_button = await page.query_selector('button[aria-label*="Download"], [data-tooltip*="Download"]')
+                if download_button:
+                    await download_button.click()
+                    
+                    # Wait for download to start
+                    try:
+                        download = await download_promise
+                        await download.save_as(save_path)
+                        return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                    except Exception as e:
+                        logger.error(f"Error during browser download: {e}")
+                        return False
+                else:
+                    # Try the export download URL
+                    await page.goto(f"https://drive.google.com/uc?id={file_id}&export=download", timeout=30000)
+                    
+                    # Look for and click any download buttons or links
+                    download_elements = await page.query_selector_all('a[href*="download"], a[href*="export"], form[action*="download"], button:has-text("Download")')
+                    for elem in download_elements:
+                        try:
+                            await elem.click()
+                            # Wait a bit to see if download starts
+                            try:
+                                download = await download_promise
+                                await download.save_as(save_path)
+                                return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                            except:
+                                pass
+                        except:
+                            continue
+        except Exception as e:
+            logger.error(f"Browser-based download attempt failed: {e}")
+        
+        logger.warning("All standard download methods failed")
+        return False
+
+    async def download_viewonly_pdf_with_js(self, file_id, save_path):
+        """Download view-only PDF using the enhanced blob image caching technique"""
+        try:
+            # Create a dedicated browser instance with stealth capabilities
+            browser_args = [
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-web-security',
+                '--disable-blink-features=AutomationControlled'  # Anti-detection
+            ]
+            
+            browser = await self.playwright.chromium.launch(
+                headless=True,
+                args=browser_args
+            )
+            
+            # Setup stealth context
+            context = await browser.new_context(
+                viewport={'width': 1600, 'height': 1200},
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                accept_downloads=True,  # Critical for handling the download event
+                ignore_https_errors=True
+            )
+            
+            # Add stealth script
+            await context.add_init_script("""
+                () => {
+                    Object.defineProperty(navigator, 'webdriver', {
+                        get: () => false,
+                    });
+                    
+                    // Change plugins and languages to appear more human
+                    Object.defineProperty(navigator, 'plugins', {
+                        get: () => [1, 2, 3, 4, 5].map(() => ({
+                            lengthComputable: true,
+                            loaded: 100,
+                            total: 100
+                        }))
+                    });
+                    
+                    Object.defineProperty(navigator, 'languages', {
+                        get: () => ['en-US', 'en', 'es']
+                    });
+                }
+            """)
+            
+            page = await context.new_page()
+            
+            try:
+                # Step 1: Navigate to the file with human-like behavior
+                logger.info(f"Opening view-only PDF: https://drive.google.com/file/d/{file_id}/view")
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=60000)
+                await page.wait_for_load_state('networkidle')
+                
+                # Perform human-like interactions
+                await page.mouse.move(x=random.randint(100, 500), y=random.randint(100, 300))
+                await page.wait_for_timeout(random.randint(2000, 5000))
+                
+                # Step 2: Estimate the number of pages
+                estimated_pages = await page.evaluate("""
+                    () => {
+                        // Look for page counter in the interface
+                        const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
+                            const text = el.textContent || '';
+                            return /\\d+\\s*\\/\\s*\\d+/.test(text);
+                        });
+                        
+                        if (pageCounters.length > 0) {
+                            const text = pageCounters[0].textContent || '';
+                            const match = text.match(/(\\d+)\\s*\\/\\s*(\\d+)/);
+                            if (match && match[2]) return parseInt(match[2]);
+                        }
+                        
+                        // If we can't find a counter, check actual pages
+                        const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                        if (pages.length > 0) return pages.length;
+                        
+                        // Default to a reasonable number if we can't determine
+                        return 50;
+                    }
+                """)
+                
+                logger.info(f"Estimated number of pages: {estimated_pages}")
+                
+                # Step 3: Initial scroll to trigger loading
+                logger.info("Initial scroll to bottom to trigger lazy loading...")
+                await page.keyboard.press("End")
+                await page.wait_for_timeout(3000)
+                
+                # Step 4: Wait for all pages to load with better feedback and randomization
+                logger.info("Scrolling through document to load all pages...")
+                max_attempts = min(estimated_pages * 3, 300)
+                attempt = 0
+                prev_blob_count = 0
+                consecutive_same_count = 0
+                
+                while attempt < max_attempts:
+                    # Count blob images (which are the PDF pages)
+                    blob_count = await page.evaluate("""
+                        Array.from(document.getElementsByTagName('img'))
+                            .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                            .length
+                    """)
+                    
+                    logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
+                    
+                    # Check if we've loaded all pages or if we're stuck
+                    if blob_count >= estimated_pages:
+                        logger.info(f"All {estimated_pages} pages appear to be loaded.")
+                        break
+                    
+                    if blob_count == prev_blob_count:
+                        consecutive_same_count += 1
+                        if consecutive_same_count >= 5 and blob_count > 0:
+                            logger.info(f"No new pages loaded after {consecutive_same_count} attempts. Assuming all available pages ({blob_count}) are loaded.")
+                            break
+                    else:
+                        consecutive_same_count = 0
+                    
+                    # Mix up the scrolling approach for more human-like behavior
+                    scroll_action = random.choice(["PageDown", "End", "ArrowDown", "mouse"])
+                    
+                    if scroll_action == "PageDown":
+                        await page.keyboard.press("PageDown")
+                    elif scroll_action == "End":
+                        await page.keyboard.press("End")
+                    elif scroll_action == "ArrowDown":
+                        # Press arrow down multiple times
+                        for _ in range(random.randint(5, 15)):
+                            await page.keyboard.press("ArrowDown")
+                            await page.wait_for_timeout(random.randint(50, 150))
+                    else:  # mouse
+                        # Scroll using mouse wheel
+                        current_y = random.randint(300, 700)
+                        await page.mouse.move(x=random.randint(300, 800), y=current_y)
+                        await page.mouse.wheel(0, random.randint(300, 800))
+                    
+                    # Random wait between scrolls
+                    await page.wait_for_timeout(random.randint(1000, 3000))
+                    
+                    prev_blob_count = blob_count
+                    attempt += 1
+                
+                # Extra wait to ensure everything is fully loaded
+                await page.wait_for_timeout(5000)
+                
+                # Step 5: Set up a download event listener
+                download_promise = page.wait_for_event("download")
+                
+                # Step 6: Inject the jsPDF script to generate PDF
+                logger.info("Generating PDF from loaded pages...")
+                result = await page.evaluate(r'''
+                    (function() {
+                        return new Promise((resolve, reject) => {
+                            let script = document.createElement("script");
+                            script.onload = function () {
+                                try {
+                                    let pdf = new jsPDF();
+                                    let imgs = document.getElementsByTagName("img");
+                                    let validImages = [];
                                     
-                                    # Wait for the page to render
-                                    await page.wait_for_timeout(500)
+                                    // First collect all valid blob images
+                                    for (let i = 0; i < imgs.length; i++) {
+                                        let img = imgs[i];
+                                        if (!/^blob:/.test(img.src)) continue;
+                                        if (img.width < 100 || img.height < 100) continue;
+                                        validImages.push(img);
+                                    }
                                     
-                                    # Take screenshot
-                                    screenshot_path = os.path.join(screenshots_dir, f"page_{i+1}.png")
+                                    // Sort by position in the document
+                                    validImages.sort((a, b) => {
+                                        const rectA = a.getBoundingClientRect();
+                                        const rectB = b.getBoundingClientRect();
+                                        return rectA.top - rectB.top;
+                                    });
                                     
-                                    # Try to find the page element
-                                    page_element = await page.query_selector(f'.drive-viewer-paginated-page:nth-child({i+1})')
-                                    if page_element:
-                                        await page_element.screenshot(path=screenshot_path)
-                                    else:
-                                        # Fallback to viewport screenshot
-                                        await page.screenshot(path=screenshot_path)
+                                    console.log(`Found ${validImages.length} valid page images to add to PDF`);
                                     
-                                    screenshots.append(screenshot_path)
+                                    let added = 0;
+                                    // Process each image as a page
+                                    for (let i = 0; i < validImages.length; i++) {
+                                        let img = validImages[i];
+                                        let canvas = document.createElement("canvas");
+                                        let ctx = canvas.getContext("2d");
+                                        canvas.width = img.width;
+                                        canvas.height = img.height;
+                                        ctx.drawImage(img, 0, 0, img.width, img.height);
+                                        let imgData = canvas.toDataURL("image/jpeg", 1.0);
+                                        
+                                        if (added > 0) {
+                                            pdf.addPage();
+                                        }
+                                        
+                                        pdf.addImage(imgData, 'JPEG', 0, 0);
+                                        added++;
+                                    }
                                     
-                                    # Check if we should continue to next page
-                                    if i < total_pages - 1:
-                                        next_button = await page.query_selector('button[aria-label="Next page"]')
-                                        if next_button:
-                                            # Check if button is disabled
-                                            is_disabled = await next_button.get_attribute('disabled')
-                                            if is_disabled:
-                                                logger.info(f"Reached last page at page {i+1}")
-                                                break
-                                            
-                                            # Click next page
-                                            await next_button.click()
-                                            await page.wait_for_timeout(1000)
-                                        else:
-                                            logger.info("Next page button not found")
-                                            break
-                                except Exception as e:
-                                    logger.error(f"Error capturing page {i+1}: {e}")
-                                    continue
+                                    pdf.save("download.pdf");
+                                    resolve({success: true, pageCount: added});
+                                } catch (error) {
+                                    reject({success: false, error: error.toString()});
+                                }
+                            };
                             
-                            # Create PDF from screenshots
-                            if screenshots:
-                                # Get dimensions from first screenshot
-                                first_img = Image.open(screenshots[0])
-                                width, height = first_img.size
-                                
-                                # Create PDF
-                                c = canvas.Canvas(save_path, pagesize=(width, height))
-                                for screenshot in screenshots:
-                                    c.drawImage(screenshot, 0, 0, width, height)
-                                    c.showPage()
-                                c.save()
-                                
-                                # Clean up screenshots
-                                for screenshot in screenshots:
-                                    os.remove(screenshot)
-                                
-                                # Clean up temp directory
-                                shutil.rmtree(temp_dir, ignore_errors=True)
+                            script.onerror = function() {
+                                reject({success: false, error: "Failed to load jsPDF library"});
+                            };
+                            
+                            // Use a reliable CDN
+                            script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jspdf/1.5.3/jspdf.debug.js';
+                            document.body.appendChild(script);
+                        });
+                    })();
+                ''')
+                
+                if not result.get('success'):
+                    logger.error(f"Error in PDF generation: {result.get('error')}")
+                    return False
+                
+                logger.info(f"PDF generation triggered with {result.get('pageCount')} pages")
+                
+                # Step 7: Wait for the download to complete and save the file
+                download = await download_promise
+                
+                # Step 8: Save the downloaded file to the specified path
+                await download.save_as(save_path)
+                logger.info(f"Successfully saved PDF to {save_path}")
+                
+                return os.path.exists(save_path) and os.path.getsize(save_path) > 1000
+                
+            finally:
+                await browser.close()
+                
+        except Exception as e:
+            logger.error(f"Error in viewonly PDF download process: {e}")
+            return False
+
+    async def download_viewonly_with_screenshots(self, file_id, save_path, file_type):
+        """Download any view-only file by taking screenshots"""
+        try:
+            async with self.context.new_page() as page:
+                # Set high-resolution viewport
+                await page.set_viewport_size({"width": 1600, "height": 1200})
+                
+                # Navigate to the file
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", wait_until='networkidle', timeout=60000)
+                
+                # Make sure the file is loaded
+                await page.wait_for_load_state('networkidle')
+                await page.wait_for_timeout(3000)  # Extra time for rendering
+                
+                # Create directory for screenshots if multiple pages
+                base_dir = os.path.dirname(save_path)
+                base_name = os.path.splitext(os.path.basename(save_path))[0]
+                screenshots_dir = os.path.join(base_dir, f"{base_name}_screenshots")
+                os.makedirs(screenshots_dir, exist_ok=True)
+                
+                # Check if it's a multi-page document
+                is_multi_page = await page.evaluate("""
+                    () => {
+                        const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                        return pages.length > 1;
+                    }
+                """)
+                
+                if is_multi_page and file_type == 'pdf':
+                    # For multi-page PDFs, take screenshots of each page
+                    page_count = await page.evaluate("""
+                        async () => {
+                            const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                            const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                            const container = document.querySelector('.drive-viewer-paginated-scrollable');
+                            
+                            if (!container || pages.length === 0) return 0;
+                            
+                            // Scroll through to make sure all pages are loaded
+                            const scrollHeight = container.scrollHeight;
+                            const viewportHeight = container.clientHeight;
+                            const scrollStep = viewportHeight;
+                            
+                            for (let scrollPos = 0; scrollPos < scrollHeight; scrollPos += scrollStep) {
+                                container.scrollTo(0, scrollPos);
+                                await delay(300);
+                            }
+                            
+                            // Scroll back to top
+                            container.scrollTo(0, 0);
+                            await delay(300);
+                            
+                            return pages.length;
+                        }
+                    """)
+                    
+                    logger.info(f"Found {page_count} pages in document")
+                    
+                    # Take screenshots of each page
+                    screenshots = []
+                    for i in range(page_count):
+                        # Scroll to page
+                        await page.evaluate(f"""
+                            async () => {{
+                                const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                                const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                                if (pages.length <= {i}) return false;
                                 
-                                return save_path
+                                pages[{i}].scrollIntoView();
+                                await delay(500);
+                                return true;
+                            }}
+                        """)
+                        
+                        # Take screenshot
+                        screenshot_path = os.path.join(screenshots_dir, f"page_{i+1}.png")
+                        await page.screenshot(path=screenshot_path, clip={
+                            'x': 0,
+                            'y': 0,
+                            'width': 1600,
+                            'height': 1200
+                        })
+                        screenshots.append(screenshot_path)
+                    
+                    # Combine screenshots into PDF
+                    c = canvas.Canvas(save_path)
+                    for screenshot in screenshots:
+                        img = Image.open(screenshot)
+                        width, height = img.size
+                        
+                        # Add page to PDF
+                        c.setPageSize((width, height))
+                        c.drawImage(screenshot, 0, 0, width, height)
+                        c.showPage()
+                    
+                    c.save()
+                    
+                    # Clean up screenshots
+                    for screenshot in screenshots:
+                        os.remove(screenshot)
+                    os.rmdir(screenshots_dir)
+                    
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                else:
+                    # For single-page or non-PDF files, just take one screenshot
+                    screenshot_path = os.path.join(screenshots_dir, "screenshot.png")
+                    await page.screenshot(path=screenshot_path, fullPage=True)
+                    
+                    # Convert to requested format if needed
+                    if file_type == 'pdf':
+                        # Create PDF from screenshot
+                        img = Image.open(screenshot_path)
+                        width, height = img.size
+                        
+                        c = canvas.Canvas(save_path, pagesize=(width, height))
+                        c.drawImage(screenshot_path, 0, 0, width, height)
+                        c.save()
+                    else:
+                        # Just copy the screenshot to the destination with proper extension
+                        shutil.copy(screenshot_path, save_path)
+                    
+                    # Clean up
+                    os.remove(screenshot_path)
+                    os.rmdir(screenshots_dir)
+                    
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                
+        except Exception as e:
+            logger.error(f"Error taking screenshots: {e}")
+            return False
+
+    async def export_google_doc(self, file_id, file_type, save_path):
+        """Export Google Docs/Sheets/Slides to downloadable formats"""
+        try:
+            # Map file types to export formats
+            export_formats = {
+                'doc': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',  # docx
+                'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+                'sheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',  # xlsx
+                'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                'ppt': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',  # pptx
+                'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+                'pdf': 'application/pdf',
+            }
+            
+            export_format = export_formats.get(file_type, 'application/pdf')
+            export_url = f"https://docs.google.com/document/d/{file_id}/export?format={file_type}"
+            
+            if 'sheet' in file_type or 'xlsx' in file_type:
+                export_url = f"https://docs.google.com/spreadsheets/d/{file_id}/export?format=xlsx"
+            elif 'ppt' in file_type or 'presentation' in file_type:
+                export_url = f"https://docs.google.com/presentation/d/{file_id}/export/pptx"
+            elif file_type == 'pdf':
+                export_url = f"https://docs.google.com/document/d/{file_id}/export?format=pdf"
+            
+            async with self.context.new_page() as page:
+                # Get cookies from the main view page first
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", wait_until='networkidle')
+                
+                # Now try the export
+                response = await page.goto(export_url, wait_until='networkidle')
+                
+                if response.status == 200:
+                    content = await response.body()
+                    with open(save_path, 'wb') as f:
+                        f.write(content)
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                else:
+                    logger.warning(f"Export failed with status {response.status}")
+                    return False
+                    
+        except Exception as e:
+            logger.error(f"Error exporting Google Doc: {e}")
+            return False
+
+    async def get_google_drive_file_info(self, file_id):
+        """Get file type and view-only status from Google Drive"""
+        file_type = None
+        is_view_only = False
+        
+        try:
+            async with self.context.new_page() as page:
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=30000)
+                
+                # Check if view-only
+                view_only_text = await page.query_selector('text="the owner has not granted you permission to download this file"')
+                is_view_only = view_only_text is not None
+                
+                # Check for Google Docs viewer
+                gdocs_viewer = await page.query_selector('iframe[src*="docs.google.com/document"]')
+                gsheets_viewer = await page.query_selector('iframe[src*="docs.google.com/spreadsheets"]')
+                gslides_viewer = await page.query_selector('iframe[src*="docs.google.com/presentation"]')
+                
+                if gdocs_viewer:
+                    file_type = 'docx'
+                elif gsheets_viewer:
+                    file_type = 'xlsx'
+                elif gslides_viewer:
+                    file_type = 'pptx'
+                else:
+                    # Check for PDF viewer
+                    pdf_viewer = await page.query_selector('embed[type="application/pdf"]')
+                    if pdf_viewer:
+                        file_type = 'pdf'
+                    else:
+                        # Check for image viewer
+                        img_viewer = await page.query_selector('img[src*="googleusercontent.com"]')
+                        if img_viewer:
+                            # Get image type from src
+                            img_src = await img_viewer.get_attribute('src')
+                            if 'jpg' in img_src or 'jpeg' in img_src:
+                                file_type = 'jpg'
+                            elif 'png' in img_src:
+                                file_type = 'png'
                             else:
-                                logger.error("No screenshots captured")
+                                file_type = 'jpg'  # Default to jpg
                         else:
-                            # For non-PDF files, just take a screenshot
-                            screenshot_path = os.path.join(temp_dir, "file.png")
-                            await page.screenshot(path=screenshot_path)
-                            
-                            # Copy to destination
-                            shutil.copy(screenshot_path, save_path)
-                            
-                            # Clean up
-                            os.remove(screenshot_path)
-                            shutil.rmtree(temp_dir, ignore_errors=True)
-                            
-                            return save_path
-                    finally:
-                        await browser.close()
-            elif self.browser_engine == "pyppeteer":
-                # Similar implementation for Pyppeteer
-                pass
-            
-            return None
+                            # Generic file type fallback
+                            file_type = 'pdf'  # Default to PDF
+                
+                # If still no type, check filename
+                if not file_type:
+                    title_element = await page.query_selector('div[role="heading"]')
+                    if title_element:
+                        title = await title_element.text_content()
+                        if title:
+                            ext_match = re.search(r'\.([a-zA-Z0-9]+)$', title)
+                            if ext_match:
+                                file_type = ext_match.group(1).lower()
+        
         except Exception as e:
-            logger.error(f"Error downloading view-only file: {e}")
-            return None
+            logger.error(f"Error getting Google Drive file info: {e}")
+            file_type = 'pdf'  # Default to PDF if we can't determine
+        
+        return file_type, is_view_only
 
+    # IMPROVED: Enhanced sublink extraction method
     async def get_sublinks(self, url, limit=10000):
-        """Extract all sublinks from a website"""
+        """Enhanced method to extract sublinks from a website, including dynamic content and interactive elements"""
         links = set()
         try:
-            logger.info(f"Extracting sublinks from {url}")
+            logger.info(f"Fetching sublinks from: {url}")
             
-            # Special handling for educational sites
+            # Special handling for educational sites like phsms.cloud.ncnu.edu.tw
             if "phsms.cloud.ncnu.edu.tw" in url or any(keyword in url.lower() for keyword in 
-                                                    ["exam", "test", "pastpaper", "eduexp"]):
+                                                      ["exam", "test", "pastpaper", "eduexp"]):
+                logger.info("Using specialized exam site sublink extraction")
                 edu_links = await self.get_edu_exam_links(url)
                 for link in edu_links:
                     links.add(link)
                 
+                # If we found a good number of links with the specialized method, return them
                 if len(links) > 5:
                     logger.info(f"Found {len(links)} sublinks with specialized method")
                     return list(links)[:limit]
             
-            # Standard link extraction for all sites
-            await self.browser.goto(url, timeout=30000)
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
             
-            # Get page content
-            content = await self.browser.content()
-            soup = BeautifulSoup(content, 'html.parser')
+            # Standard sublink extraction for all sites
+            await self.page.goto(url, timeout=30000, wait_until='networkidle')
             
             # Get base URL for resolving relative links
             parsed_base = urlparse(url)
             base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+            path_base = os.path.dirname(parsed_base.path)
             
-            # Extract all links from the page
-            for a in soup.find_all('a', href=True):
-                href = a['href']
-                if href and not href.startswith('javascript:') and not href.startswith('#'):
-                    # Resolve relative URLs
-                    if href.startswith('/'):
-                        full_url = f"{base_url}{href}"
-                    elif href.startswith('http'):
-                        full_url = href
-                    else:
-                        full_url = urljoin(url, href)
+            # Perform initial scrolling to load lazy content
+            await self.page.evaluate("""
+                async () => {
+                    const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                    const height = document.body.scrollHeight;
+                    const step = Math.floor(window.innerHeight / 2);
+                    
+                    for (let i = 0; i < height; i += step) {
+                        window.scrollTo(0, i);
+                        await delay(150);
+                    }
                     
-                    links.add(full_url)
+                    window.scrollTo(0, 0);
+                }
+            """)
+            await self.page.wait_for_timeout(1000)
             
-            # Extract iframe sources
-            for iframe in soup.find_all('iframe', src=True):
-                src = iframe['src']
-                if src and not src.startswith('javascript:') and not src.startswith('about:'):
-                    full_url = src if src.startswith('http') else urljoin(url, src)
-                    links.add(full_url)
+            # Check if page has ASP.NET elements which might need special handling
+            is_aspnet = await self.page.evaluate('''
+                () => {
+                    return document.querySelector('form#aspnetForm') !== null || 
+                           document.querySelector('input[name="__VIEWSTATE"]') !== null;
+                }
+            ''')
             
-            return list(links)[:limit]
-        except Exception as e:
-            logger.error(f"Error extracting sublinks: {e}")
-            return list(links)[:limit]
-
-    @celery_app.task
-    def download_file_task(file_info, save_dir, referer=None):
-        """Celery task for downloading files asynchronously"""
-        # This function runs in a separate worker process
-        file_url = file_info['url']
-        fname = file_info['filename']
-        referer = referer or file_info.get('source_url', 'https://www.google.com')
-        
-        # Create unique filename
-        path = os.path.join(save_dir, fname)
-        base, ext = os.path.splitext(fname)
-        counter = 1
-        while os.path.exists(path):
-            path = os.path.join(save_dir, f"{base}_{counter}{ext}")
-            counter += 1
-        
-        os.makedirs(save_dir, exist_ok=True)
-        
-        try:
-            # Handle Google Drive files
-            if "drive.google.com" in file_url or "docs.google.com" in file_url:
-                # Extract file ID
-                file_id = None
-                for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
-                    match = re.search(pattern, file_url)
-                    if match:
-                        file_id = match.group(1)
-                        break
+            if is_aspnet:
+                logger.info("Detected ASP.NET page, using enhanced extraction method")
+                
+                # Try to interact with ASP.NET controls that might reveal more links
+                # Look for dropdowns, buttons, and grid elements
+                dropdowns = await self.page.query_selector_all('select')
+                buttons = await self.page.query_selector_all('input[type="button"], input[type="submit"], button')
+                
+                # Try interacting with dropdowns first
+                for dropdown in dropdowns:
+                    try:
+                        # Get all options
+                        options = await self.page.evaluate('''
+                            (dropdown) => {
+                                return Array.from(dropdown.options).map(o => o.value);
+                            }
+                        ''', dropdown)
+                        
+                        # Try selecting each option
+                        for option in options:
+                            if option:
+                                await dropdown.select_option(value=option)
+                                await self.page.wait_for_timeout(1000)
+                                await self.page.wait_for_load_state('networkidle', timeout=5000)
+                                
+                                # Extract any new links that appeared
+                                await self.extract_all_link_types(links, base_url, path_base)
+                    except Exception as e:
+                        logger.warning(f"Error interacting with dropdown: {e}")
+                
+                # Try clicking buttons (but avoid dangerous ones like "delete")
+                safe_buttons = []
+                for button in buttons:
+                    button_text = await button.text_content() or ""
+                    button_value = await button.get_attribute("value") or ""
+                    button_id = await button.get_attribute("id") or ""
+                    combined_text = (button_text + button_value + button_id).lower()
+                    
+                    # Skip potentially destructive buttons
+                    if any(keyword in combined_text for keyword in ["delete", "remove", "cancel", "close", "logout"]):
+                        continue
+                    
+                    # Prioritize buttons that might show more content
+                    if any(keyword in combined_text for keyword in ["view", "show", "search", "browse", "list", "go", "display"]):
+                        safe_buttons.append(button)
                 
-                if file_id:
-                    # Try direct download
-                    download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
-                    headers = {
-                        'User-Agent': get_random_user_agent(),
-                        'Referer': referer
+                # Click the safe buttons
+                for button in safe_buttons[:5]:  # Limit to first 5 to avoid too many clicks
+                    try:
+                        await button.click()
+                        await self.page.wait_for_timeout(1000)
+                        await self.page.wait_for_load_state('networkidle', timeout=5000)
+                        
+                        # Extract any new links that appeared
+                        await self.extract_all_link_types(links, base_url, path_base)
+                    except Exception as e:
+                        logger.warning(f"Error clicking button: {e}")
+            
+            # Extract links from the initial page state
+            await self.extract_all_link_types(links, base_url, path_base)
+            
+            # Look specifically for links inside grid/table views which are common in ASP.NET applications
+            grid_cells = await self.page.query_selector_all('td a, tr.rgRow a, tr.rgAltRow a, .grid a, .table a')
+            for cell in grid_cells:
+                try:
+                    href = await cell.get_attribute('href')
+                    if href:
+                        full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                        links.add(full_url)
+                except Exception as e:
+                    logger.warning(f"Error extracting grid link: {e}")
+            
+            # Extract links from onclick attributes and javascript:__doPostBack calls
+            postback_links = await self.page.evaluate('''
+                () => {
+                    const results = [];
+                    // Find elements with onclick containing __doPostBack
+                    const elements = document.querySelectorAll('*[onclick*="__doPostBack"]');
+                    for (const el of elements) {
+                        // Extract the postback target
+                        const onclick = el.getAttribute('onclick') || '';
+                        const match = onclick.match(/__doPostBack\\('([^']+)'.*?\\)/);
+                        if (match && match[1]) {
+                            // Get the visible text to use as description
+                            const text = el.innerText || el.textContent || 'Link';
+                            results.push({
+                                id: match[1],
+                                text: text.trim()
+                            });
+                        }
                     }
+                    return results;
+                }
+            ''')
+            
+            # Try interacting with some of the postback links
+            for postback in postback_links[:10]:  # Limit to first 10 to avoid too many interactions
+                try:
+                    logger.info(f"Trying postback link: {postback['text']} ({postback['id']})")
+                    await self.page.evaluate(f'''
+                        () => {{
+                            if (typeof __doPostBack === 'function') {{
+                                __doPostBack('{postback["id"]}', '');
+                            }}
+                        }}
+                    ''')
+                    await self.page.wait_for_timeout(1500)
+                    await self.page.wait_for_load_state('networkidle', timeout=5000)
                     
-                    with requests.get(download_url, headers=headers, stream=True) as r:
-                        if r.status_code == 200:
-                            with open(path, 'wb') as f:
-                                for chunk in r.iter_content(chunk_size=8192):
-                                    f.write(chunk)
-                            
-                            # Check if this is HTML (common for Google Drive restrictions)
-                            with open(path, 'rb') as f:
-                                content_start = f.read(100).decode('utf-8', errors='ignore')
-                                if '<html' in content_start.lower():
-                                    os.remove(path)
-                                    return {'status': 'error', 'message': 'Received HTML instead of file'}
-                            
-                            return {'status': 'success', 'path': path}
+                    # Extract any new links that appeared
+                    await self.extract_all_link_types(links, base_url, path_base)
+                except Exception as e:
+                    logger.warning(f"Error with postback: {e}")
             
-            # Standard download for regular files
-            headers = {
-                'User-Agent': get_random_user_agent(),
-                'Referer': referer,
-                'Accept': '*/*',
-                'Accept-Encoding': 'gzip, deflate, br'
-            }
+            # Look for pagination controls and try to navigate through them
+            pagination_elements = await self.page.query_selector_all(
+                'a[href*="page"], .pagination a, .pager a, [onclick*="page"], [aria-label*="Next"]'
+            )
             
-            with requests.get(file_url, headers=headers, stream=True) as r:
-                if r.status_code == 200:
-                    with open(path, 'wb') as f:
-                        for chunk in r.iter_content(chunk_size=8192):
-                            f.write(chunk)
+            # Try clicking on pagination links (limit to max 5 pages to avoid infinite loops)
+            for i in range(min(5, len(pagination_elements))):
+                try:
+                    # Focus on elements that look like "next page" buttons
+                    el = pagination_elements[i]
+                    el_text = await el.text_content() or ""
                     
-                    return {'status': 'success', 'path': path}
-                else:
-                    return {'status': 'error', 'message': f"HTTP error: {r.status_code}"}
-                
+                    # Only click if this looks like a pagination control
+                    if "next" in el_text.lower() or ">" == el_text.strip() or "→" == el_text.strip():
+                        logger.info(f"Clicking pagination control: {el_text}")
+                        await el.click()
+                        await self.page.wait_for_timeout(2000)
+                        await self.page.wait_for_load_state('networkidle', timeout=5000)
+                        
+                        # Get new links from this page
+                        await self.extract_all_link_types(links, base_url, path_base)
+                except Exception as e:
+                    logger.warning(f"Error clicking pagination: {e}")
+            
+            # Check for hidden links that might be revealed by JavaScript
+            hidden_links = await self.page.evaluate("""
+                () => {
+                    // Try to execute common JavaScript patterns that reveal hidden content
+                    try {
+                        // Common patterns used in websites to initially hide content
+                        const hiddenContainers = document.querySelectorAll(
+                            '.hidden, .hide, [style*="display: none"], [style*="visibility: hidden"]'
+                        );
+                        
+                        // Attempt to make them visible
+                        hiddenContainers.forEach(el => {
+                            el.style.display = 'block';
+                            el.style.visibility = 'visible';
+                            el.classList.remove('hidden', 'hide');
+                        });
+                        
+                        // Return any newly visible links
+                        return Array.from(document.querySelectorAll('a[href]')).map(a => a.href);
+                    } catch (e) {
+                        return [];
+                    }
+                }
+            """)
+            
+            # Add any newly discovered links
+            for href in hidden_links:
+                if href and not href.startswith('javascript:'):
+                    links.add(href)
+            
+            logger.info(f"Found {len(links)} sublinks")
+            return list(links)[:limit]
+        
         except Exception as e:
-            return {'status': 'error', 'message': str(e)}
+            logger.error(f"Error getting sublinks from {url}: {e}")
+            return list(links)[:limit]  # Return what we have so far
+
+    async def extract_all_link_types(self, links_set, base_url, path_base):
+        """Extract all types of links from the current page"""
+        # Get all <a> tag links
+        a_links = await self.page.query_selector_all('a[href]')
+        for a in a_links:
+            try:
+                href = await a.get_attribute('href')
+                if href and not href.startswith('javascript:') and not href.startswith('#'):
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Get iframe sources
+        iframes = await self.page.query_selector_all('iframe[src]')
+        for iframe in iframes:
+            try:
+                src = await iframe.get_attribute('src')
+                if src and not src.startswith('javascript:') and not src.startswith('about:'):
+                    full_url = src if src.startswith('http') else self.resolve_relative_url(src, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Get links from onclick attributes that reference URLs
+        onclick_elements = await self.page.query_selector_all('*[onclick*="window.location"], *[onclick*="document.location"]')
+        for el in onclick_elements:
+            try:
+                onclick = await el.get_attribute('onclick')
+                urls = re.findall(r'(https?://[^\'"]+)', onclick)
+                for url in urls:
+                    links_set.add(url)
+            except Exception:
+                pass
+        
+        # Look for URLs in data-* attributes
+        data_elements = await self.page.query_selector_all('*[data-url], *[data-href], *[data-src]')
+        for el in data_elements:
+            for attr in ['data-url', 'data-href', 'data-src']:
+                try:
+                    value = await el.get_attribute(attr)
+                    if value and not value.startswith('javascript:'):
+                        full_url = value if value.startswith('http') else self.resolve_relative_url(value, base_url, path_base)
+                        links_set.add(full_url)
+                except Exception:
+                    pass
+        
+        # Look for special anchor links that might not have href attributes
+        special_anchors = await self.page.query_selector_all('.rgMasterTable a, .grid a, #GridView1 a, #gvResults a')
+        for anchor in special_anchors:
+            try:
+                href = await anchor.get_attribute('href')
+                if href and not href.startswith('javascript:') and not href.startswith('#'):
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Extract links from JSON data embedded in the page
+        script_elements = await self.page.query_selector_all('script[type="application/json"], script[type="text/json"]')
+        for script in script_elements:
+            try:
+                script_content = await script.text_content()
+                if script_content:
+                    # Look for URLs in the JSON content
+                    urls = re.findall(r'(https?://[^\'"]+)', script_content)
+                    for url in urls:
+                        links_set.add(url)
+            except Exception:
+                pass
+
+    def resolve_relative_url(self, relative_url, base_url, path_base):
+        """Properly resolve relative URLs considering multiple formats"""
+        if relative_url.startswith('/'):
+            # Absolute path relative to domain
+            return f"{base_url}{relative_url}"
+        elif relative_url.startswith('./'):
+            # Explicit relative path
+            return f"{base_url}{path_base}/{relative_url[2:]}"
+        elif relative_url.startswith('../'):
+            # Parent directory
+            parent_path = '/'.join(path_base.split('/')[:-1])
+            return f"{base_url}{parent_path}/{relative_url[3:]}"
+        else:
+            # Regular relative path
+            return f"{base_url}{path_base}/{relative_url}"
 
     async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
-        """Perform deep search for files on a website and its subpages"""
         if not custom_ext_list:
             custom_ext_list = []
-        
-        # Create progress indicators
         progress_text = st.empty()
         progress_bar = st.progress(0)
         file_count_text = st.empty()
         
         try:
             progress_text.text("Analyzing main page...")
+            # Special handling for ASP.NET pages
+            is_aspnet = False
+            try:
+                await self.page.goto(url, timeout=30000, wait_until='networkidle')
+                is_aspnet = await self.page.evaluate('''
+                    () => {
+                        return document.querySelector('form#aspnetForm') !== null || 
+                               document.querySelector('input[name="__VIEWSTATE"]') !== null;
+                    }
+                ''')
+            except Exception:
+                pass
             
-            # Extract files from main page first
+            # Extract files from main page
             main_files = await self.extract_downloadable_files(url, custom_ext_list)
             initial_count = len(main_files)
             file_count_text.text(f"Found {initial_count} files on main page")
             
-            # Get sublinks
+            # Get sublinks with enhanced method
             progress_text.text("Getting sublinks...")
             sublinks = await self.get_sublinks(url, sublink_limit)
             total_links = len(sublinks)
             progress_text.text(f"Found {total_links} sublinks to process")
             
-            # Initialize all_files with main_files to ensure they're included
-            all_files = main_files.copy()
+            # Always include files from the main page, regardless of sublinks
+            all_files = main_files
+            
+            if not sublinks:
+                progress_bar.progress(1.0)
+                return all_files
             
             # Process each sublink
             for i, sublink in enumerate(sublinks, 1):
-                progress = i / max(total_links, 1)  # Avoid division by zero
+                progress = i / total_links
                 progress_text.text(f"Processing sublink {i}/{total_links}: {sublink}")
                 progress_bar.progress(progress)
                 
                 try:
+                    # Use a longer timeout for ASP.NET pages which can be slower
+                    sub_timeout = timeout * 2 if is_aspnet else timeout
+                    
                     # Extract files from sublink
                     sub_files = await self.extract_downloadable_files(sublink, custom_ext_list)
                     all_files.extend(sub_files)
@@ -1379,13 +2689,12 @@ class DownloadManager:
                     seen_urls.add(f['url'])
                     unique_files.append(f)
             
-            # Complete progress
+            final_count = len(unique_files)
             progress_text.text(f"Deep search complete!")
-            file_count_text.text(f"Found {len(unique_files)} unique files")
+            file_count_text.text(f"Found {final_count} unique files")
             progress_bar.progress(1.0)
-            
             return unique_files
-            
+        
         except Exception as e:
             logger.error(f"Deep search error: {e}")
             progress_text.text(f"Error during deep search: {str(e)}")
@@ -1401,7 +2710,12 @@ class DownloadManager:
 def main():
     st.title("Advanced File Downloader")
     
-    # Initialize session state
+    # Initialize playwright if needed
+    if "playwright_installed" not in st.session_state:
+        with st.spinner("Setting up browser automation. This may take a minute..."):
+            install_playwright_dependencies()
+        st.session_state.playwright_installed = True
+    
     if "initialized" not in st.session_state:
         st.session_state.initialized = True
         st.session_state.discovered_files = []
@@ -1411,44 +2725,17 @@ def main():
         st.session_state.do_deep_search = False
         st.session_state.deep_search_url = None
         st.session_state.search_results = []
-        st.session_state.download_urls = {}  # For direct download links
-    
-    # Install dependencies if needed
-    if "dependencies_installed" not in st.session_state:
-        with st.spinner("Setting up dependencies. This may take a minute..."):
-            st.session_state.dependencies_installed = setup_dependencies()
-            check_services()
-    
-    # Sidebar options
+
     with st.sidebar:
-        mode = st.radio("Select Mode", ["Manual URL", "Web Search", "Single File"], key="mode_select")
-        
-        with st.expander("Search Options", expanded=True):
-            search_engine = st.selectbox("Search Engine", ["bing", "google"], index=0, key="search_engine")
-            browser_engine = st.selectbox("Browser Engine", ["playwright", "pyppeteer", "splash"], index=0, key="browser_engine")
-            custom_extensions = st.text_input("Custom File Extensions", placeholder=".csv, .txt, .epub", key="custom_ext_input", 
-                                            help="Enter extensions like .csv, .txt")
-            max_sublinks = st.number_input("Maximum Sublinks", min_value=1, max_value=10000, value=100, step=10, key="max_sublinks")
-            sublink_timeout = st.number_input("Timeout (seconds)", min_value=1, max_value=300, value=30, step=5, key="timeout")
-        
-        with st.expander("Advanced Options", expanded=False):
-            use_proxy = st.checkbox("Use Proxy", key="use_proxy")
+        mode = st.radio("Select Mode", ["Manual URL", "Bing Search"], key="mode_select")
+        with st.expander("Advanced Options", expanded=True):
+            custom_extensions = st.text_input("Custom File Extensions", placeholder=".csv, .txt, .epub", key="custom_ext_input", help="Enter extensions like .csv, .txt")
+            max_sublinks = st.number_input("Maximum Sublinks to Process", min_value=1, max_value=100000, value=10000, step=50, key="max_sublinks_input", help="Max sublinks to scan from main page")
+            sublink_timeout = st.number_input("Search Timeout (seconds per sublink)", min_value=1, max_value=3000, value=30, step=5, key="timeout_input", help="Timeout for each sublink")
+            use_proxy = st.checkbox("Use Proxy", key="proxy_checkbox")
             proxy = st.text_input("Proxy URL", placeholder="http://proxy:port", key="proxy_input")
-            use_stealth = st.checkbox("Use Stealth Mode", value=True, key="use_stealth", 
-                                    help="Makes browser harder to detect as automated")
-            enable_network_intercept = st.checkbox("Enable Network Interception", value=NETWORK_INTERCEPTOR_CONFIG["enabled"], 
-                                                key="enable_intercept",
-                                                help="Intercept network traffic to find additional files")
-            if enable_network_intercept:
-                NETWORK_INTERCEPTOR_CONFIG["enabled"] = True
-                intercept_types = st.multiselect("Intercept Types", 
-                                               ["xhr", "fetch", "document", "media", "stylesheet", "image", "font"],
-                                               default=["xhr", "fetch", "document", "media"],
-                                               key="intercept_types")
-                NETWORK_INTERCEPTOR_CONFIG["intercept_types"] = intercept_types
-            else:
-                NETWORK_INTERCEPTOR_CONFIG["enabled"] = False
-        
+            use_stealth = st.checkbox("Use Stealth Mode (harder to detect)", value=True, key="stealth_checkbox")
+
         with st.expander("Google Drive Integration", expanded=False):
             if st.button("Start Google Sign-In", key="google_signin_btn"):
                 auth_url = get_google_auth_url()
@@ -1458,75 +2745,97 @@ def main():
                 creds, msg = exchange_code_for_credentials(auth_code)
                 st.session_state.google_creds = creds
                 st.write(msg)
-    
-    # Main content area
+                
+        with st.expander("Advanced Browser Settings", expanded=False):
+            # Captcha handling options
+            st.write("**Captcha Handling**")
+            captcha_option = st.radio(
+                "Captcha Detection:",
+                ["Auto-detect only", "Manual solve (shows captcha)"],
+                index=0,
+                key="captcha_option"
+            )
+            
+            # Proxy rotation settings
+            st.write("**Proxy Rotation**")
+            enable_rotation = st.checkbox("Enable Proxy Rotation", value=False, key="enable_rotation")
+            if enable_rotation:
+                PROXY_ROTATION_CONFIG["enabled"] = True
+                proxy_list = st.text_area(
+                    "Proxy List (one per line)",
+                    placeholder="http://proxy1:port\nhttp://proxy2:port",
+                    key="proxy_list"
+                )
+                if proxy_list:
+                    PROXY_ROTATION_CONFIG["proxies"] = [p.strip() for p in proxy_list.split("\n") if p.strip()]
+                rotation_interval = st.slider(
+                    "Rotation Interval (# of requests)",
+                    min_value=1,
+                    max_value=50,
+                    value=10,
+                    key="rotation_interval"
+                )
+                PROXY_ROTATION_CONFIG["rotation_interval"] = rotation_interval
+
     if mode == "Manual URL":
         st.header("Manual URL Mode")
-        url = st.text_input("Enter URL", placeholder="https://example.com/downloads", key="url_input")
-        
+        url = st.text_input("Enter URL", placeholder="https://example.com", key="url_input")
         col1, col2 = st.columns([3, 1])
         with col1:
             if st.button("Deep Search", use_container_width=True, key="deep_search_btn"):
                 if url:
-                    # Process custom extensions
                     custom_ext_list = [ext.strip().lower() for ext in custom_extensions.split(',') if ext.strip()]
+                    valid_ext_list = [ext for ext in custom_ext_list if re.match(r'^\.[a-zA-Z0-9]+$', ext)]
+                    if custom_ext_list != valid_ext_list:
+                        st.warning("Invalid extensions ignored. Use format like '.csv'.")
                     
-                    with st.spinner("Searching for files..."):
-                        async def run_deep_search():
+                    @st.cache_resource
+                    def run_deep_search(url, ext_list, max_links, timeout_val, use_proxy_val, proxy_val, use_stealth_val):
+                        async def _run():
                             async with DownloadManager(
-                                browser_engine=browser_engine,
-                                use_proxy=use_proxy,
-                                proxy=proxy,
-                                use_stealth=use_stealth
+                                use_proxy=use_proxy_val, 
+                                proxy=proxy_val,
+                                use_stealth=use_stealth_val
                             ) as dm:
-                                files = await dm.deep_search(url, custom_ext_list, max_sublinks, sublink_timeout)
+                                files = await dm.deep_search(url, ext_list, max_links, timeout_val)
                                 return files
-                        
-                        # Run the search
-                        files = asyncio.run(run_deep_search())
-                        
-                        if files:
-                            st.session_state.discovered_files = files
-                            st.session_state.current_url = url
-                            st.success(f"Found {len(files)} files!")
-                        else:
-                            st.warning("No files found.")
-        
-        # Display and process discovered files
+                        return asyncio.run(_run())
+                    
+                    with st.spinner("Searching for files..."):
+                        files = run_deep_search(url, valid_ext_list, max_sublinks, 
+                                              sublink_timeout, use_proxy, proxy, use_stealth)
+                    
+                    if files:
+                        st.session_state.discovered_files = files
+                        st.session_state.current_url = url
+                        st.success(f"Found {len(files)} files!")
+                    else:
+                        st.warning("No files found.")
+
         if st.session_state.discovered_files:
             files = st.session_state.discovered_files
-            
-            # Select/deselect buttons
-            col1, col2 = st.columns([1, 1])
+            col1, col2 = st.columns([1, 4])
             with col1:
                 if st.button("Select All", key="select_all_btn"):
                     st.session_state.selected_files = list(range(len(files)))
-            with col2:
                 if st.button("Clear Selection", key="clear_selection_btn"):
                     st.session_state.selected_files = []
             
-            # Display file list with metadata
+            # Create a formatted display of files with metadata
             file_options = []
             for i, file in enumerate(files):
                 filename = file['filename']
                 size = file['size']
                 meta = file.get('metadata', {})
                 
-                # Format display info
+                # Format display string with relevant metadata
                 if meta and 'Pages' in meta:
                     file_info = f"{filename} ({size}) - {meta.get('Pages', '')} pages"
                 else:
                     file_info = f"{filename} ({size})"
-                
+                    
                 file_options.append((i, file_info))
-                
-                # Generate direct download URL for this file
-                if i not in st.session_state.download_urls:
-                    # Generate a unique key for this file
-                    file_key = base64.urlsafe_b64encode(f"{file['url']}_{time.time()}".encode()).decode()
-                    st.session_state.download_urls[i] = file_key
             
-            # File selection multiselect
             selected_indices = st.multiselect(
                 "Select files to download",
                 options=[i for i, _ in file_options],
@@ -1537,341 +2846,215 @@ def main():
             
             st.session_state.selected_files = selected_indices
             
-            # Display individual files with direct download links
-            if files:
-                st.subheader("Available Files")
-                for i, file in enumerate(files):
-                    with st.expander(f"{i+1}. {file['filename']} ({file['size']})"):
-                        st.write(f"Source: {file.get('source_url', 'Unknown')}")
-                        st.write(f"URL: {file['url']}")
-                        
-                        # Download button for this specific file
-                        if st.button(f"Download this file", key=f"download_single_{i}"):
-                            with st.spinner(f"Downloading {file['filename']}..."):
-                                # Create downloads directory
-                                download_dir = "./downloads"
-                                os.makedirs(download_dir, exist_ok=True)
-                                
-                                # Download the file
-                                async def download_single():
-                                    async with DownloadManager(
-                                        browser_engine=browser_engine,
-                                        use_proxy=use_proxy,
-                                        proxy=proxy,
-                                        use_stealth=use_stealth
-                                    ) as dm:
-                                        return await dm.download_file(file, download_dir)
-                                
-                                file_path = asyncio.run(download_single())
-                                
-                                if file_path:
-                                    # Create a download link
-                                    with open(file_path, "rb") as f:
-                                        file_bytes = f.read()
-                                    
-                                    file_name = os.path.basename(file_path)
-                                    mime_type = mimetypes.guess_type(file_path)[0] or "application/octet-stream"
-                                    
-                                    st.download_button(
-                                        label=f"Download {file_name}",
-                                        data=file_bytes,
-                                        file_name=file_name,
-                                        mime=mime_type,
-                                        key=f"download_btn_{i}"
-                                    )
-                                    
-                                    st.success(f"Downloaded successfully to {file_path}")
-                                else:
-                                    st.error(f"Failed to download {file['filename']}")
-            
-            # Batch download options
             if selected_indices:
-                st.subheader("Batch Download Options")
-                
                 col1, col2, col3, col4 = st.columns(4)
                 with col1:
                     download_dir = st.text_input("Download Directory", value="./downloads", key="download_dir_input")
                 with col2:
                     create_zip = st.checkbox("Create ZIP file", value=True, key="create_zip_checkbox")
                 with col3:
-                    delete_after = st.checkbox("Delete after ZIP", key="delete_after_checkbox")
+                    delete_after = st.checkbox("Delete after creating ZIP", key="delete_after_checkbox")
                 with col4:
                     upload_to_drive = st.checkbox("Upload to Google Drive", key="upload_drive_checkbox")
                 
-                if st.button("Download Selected Files", key="batch_download_btn"):
-                    with st.spinner(f"Downloading {len(selected_indices)} files..."):
-                        if not os.path.exists(download_dir):
-                            os.makedirs(download_dir)
-                        
-                        # Start download process
+                if st.button("Download Selected", key="download_btn"):
+                    if not os.path.exists(download_dir):
+                        os.makedirs(download_dir)
+                    
+                    async def download_files():
                         downloaded_paths = []
                         progress_bar = st.progress(0)
                         status_text = st.empty()
                         
-                        async def download_batch():
-                            async with DownloadManager(
-                                browser_engine=browser_engine,
-                                use_proxy=use_proxy,
-                                proxy=proxy,
-                                use_stealth=use_stealth
-                            ) as dm:
-                                paths = []
-                                for i, idx in enumerate(selected_indices):
-                                    file_info = files[idx]
-                                    progress = (i + 1) / len(selected_indices)
-                                    status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_indices)})")
-                                    progress_bar.progress(progress)
-                                    
-                                    path = await dm.download_file(file_info, download_dir)
-                                    if path:
-                                        paths.append(path)
+                        async with DownloadManager(
+                            use_proxy=use_proxy, 
+                            proxy=proxy,
+                            use_stealth=use_stealth
+                        ) as dm:
+                            for i, idx in enumerate(selected_indices):
+                                progress = (i + 1) / len(selected_indices)
+                                file_info = files[idx]
+                                status_text.text(f"Downloading {file_info['filename']}... ({i+1}/{len(selected_indices)})")
+                                progress_bar.progress(progress)
                                 
-                                return paths
-                        
-                        downloaded_paths = asyncio.run(download_batch())
-                        status_text.empty()
-                        progress_bar.empty()
+                                path = await dm.download_file(file_info, download_dir, url)
+                                if path:
+                                    downloaded_paths.append(path)
+                            
+                            status_text.empty()
+                            progress_bar.empty()
+                            return downloaded_paths
+                    
+                    with st.spinner("Downloading files..."):
+                        downloaded = asyncio.run(download_files())
+                    
+                    if downloaded:
+                        st.success(f"Successfully downloaded {len(downloaded)} files")
                         
-                        if downloaded_paths:
-                            st.success(f"Successfully downloaded {len(downloaded_paths)} files")
+                        if create_zip:
+                            zip_path = create_zip_file(downloaded, download_dir)
+                            st.success(f"Created ZIP file: {zip_path}")
                             
-                            if create_zip:
-                                zip_path = create_zip_file(downloaded_paths, download_dir)
-                                st.success(f"Created ZIP file: {zip_path}")
+                            # Provide download link for the zip file
+                            with open(zip_path, "rb") as f:
+                                zip_data = f.read()
+                            
+                            st.download_button(
+                                label="Download ZIP",
+                                data=zip_data,
+                                file_name=os.path.basename(zip_path),
+                                mime="application/zip",
+                                key="download_zip_btn"
+                            )
+                            
+                            # Upload to Google Drive if requested
+                            if upload_to_drive and st.session_state.google_creds:
+                                drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_creds)
+                                folder_id = create_drive_folder(drive_service, f"Downloads_{urlparse(url).netloc}")
+                                drive_id = google_drive_upload(zip_path, st.session_state.google_creds, folder_id)
+                                if not isinstance(drive_id, str) or not drive_id.startswith("Error"):
+                                    st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
+                                else:
+                                    st.error(drive_id)
+                                    
+                            # Delete original files if requested
+                            if delete_after:
+                                for path in downloaded:
+                                    try:
+                                        os.remove(path)
+                                    except Exception as e:
+                                        st.warning(f"Could not delete {path}: {e}")
+                                st.info("Deleted original files after ZIP creation")
+                        else:
+                            # Provide individual file downloads
+                            st.write("Download files individually:")
+                            for path in downloaded:
+                                with open(path, "rb") as f:
+                                    file_data = f.read()
                                 
-                                # Provide download link for the zip file
-                                with open(zip_path, "rb") as f:
-                                    zip_data = f.read()
+                                file_name = os.path.basename(path)
+                                mime_type = mimetypes.guess_type(path)[0] or "application/octet-stream"
                                 
                                 st.download_button(
-                                    label="Download ZIP",
-                                    data=zip_data,
-                                    file_name=os.path.basename(zip_path),
-                                    mime="application/zip",
-                                    key="download_zip_btn"
+                                    label=f"Download {file_name}",
+                                    data=file_data,
+                                    file_name=file_name,
+                                    mime=mime_type,
+                                    key=f"download_file_{path}"
                                 )
-                                
-                                # Upload to Google Drive if requested
-                                if upload_to_drive and st.session_state.google_creds:
-                                    with st.spinner("Uploading to Google Drive..."):
-                                        drive_service = googleapiclient.discovery.build(
-                                            "drive", "v3", credentials=st.session_state.google_creds
-                                        )
-                                        folder_id = create_drive_folder(
-                                            drive_service, f"Downloads_{get_domain(url)}"
-                                        )
-                                        drive_id = google_drive_upload(
-                                            zip_path, st.session_state.google_creds, folder_id
-                                        )
-                                        
-                                        if not isinstance(drive_id, str) or not drive_id.startswith("Error"):
-                                            st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
-                                        else:
-                                            st.error(drive_id)
-                                
-                                # Delete original files if requested
-                                if delete_after:
-                                    for path in downloaded_paths:
-                                        try:
-                                            os.remove(path)
-                                        except Exception as e:
-                                            st.warning(f"Could not delete {path}: {e}")
-                                    st.info("Deleted original files after ZIP creation")
-    
-    elif mode == "Web Search":
-        st.header("Web Search Mode")
-        
-        # Search query input
-        query = st.text_input("Enter search query", placeholder="example file type:pdf", key="search_query")
-        num_results = st.slider("Number of results", 1, 50, 10, key="num_results")
+
+    elif mode == "Bing Search":
+        st.header("Bing Search Mode")
+        query = st.text_input("Enter search query", key="search_query_input")
+        num_results = st.slider("Number of results", 1, 50, 5, key="num_results_slider")
         
-        if st.button("Search", key="web_search_btn"):
+        if st.button("Search", key="search_btn"):
             if query:
-                with st.spinner("Searching the web..."):
-                    async def run_search():
-                        async with DownloadManager(
-                            browser_engine=browser_engine,
-                            use_proxy=use_proxy,
-                            proxy=proxy,
-                            query=query,
-                            num_results=num_results,
-                            use_stealth=use_stealth
-                        ) as dm:
-                            urls = await dm.search_web(search_engine)
-                            return urls
-                    
-                    urls = asyncio.run(run_search())
-                    
-                    if urls:
-                        st.session_state.search_results = urls
-                        st.success(f"Found {len(urls)} results!")
-                        
-                        # Display search results with deep search option
-                        for i, url in enumerate(urls, 1):
-                            with st.expander(f"Result {i}: {url}", expanded=(i == 1)):
-                                st.write(f"URL: {url}")
-                                if st.button(f"Search for files", key=f"search_result_{i}"):
-                                    st.session_state.deep_search_url = url
-                                    st.session_state.do_deep_search = True
-                    else:
-                        st.warning("No search results found.")
-        
-        # Handle deep search of a result if requested
-        if st.session_state.do_deep_search and st.session_state.deep_search_url:
-            url = st.session_state.deep_search_url
-            st.info(f"Searching for files on: {url}")
-            
-            # Reset the search flag to avoid re-running
-            st.session_state.do_deep_search = False
-            
-            # Process custom extensions
-            custom_ext_list = [ext.strip().lower() for ext in custom_extensions.split(',') if ext.strip()]
-            
-            with st.spinner("Searching for files..."):
-                async def deep_search_result():
+                async def run_search():
                     async with DownloadManager(
-                        browser_engine=browser_engine,
-                        use_proxy=use_proxy,
-                        proxy=proxy,
+                        use_proxy=use_proxy, 
+                        proxy=proxy, 
+                        query=query, 
+                        num_results=num_results,
                         use_stealth=use_stealth
                     ) as dm:
-                        return await dm.deep_search(url, custom_ext_list, max_sublinks, sublink_timeout)
+                        with st.spinner("Searching..."):
+                            urls = await dm.search_bing()
+                            if urls:
+                                st.session_state.search_results = urls
+                                st.success(f"Found {len(urls)} results!")
+                                
+                                # Create expanders for each result
+                                for i, url in enumerate(urls, 1):
+                                    with st.expander(f"Result {i}: {url}", expanded=(i == 1)):
+                                        if st.button(f"Deep Search Result {i}", key=f"deep_search_result_{i}"):
+                                            st.session_state.deep_search_url = url
+                                            st.session_state.do_deep_search = True
+                            else:
+                                st.warning("No search results found.")
+                
+                asyncio.run(run_search())
+        
+            # Handle deep search based on search results
+            if st.session_state.do_deep_search and st.session_state.deep_search_url:
+                url = st.session_state.deep_search_url
+                st.info(f"Deep searching: {url}")
+                
+                # Reset the flag to avoid re-running
+                st.session_state.do_deep_search = False
                 
-                files = asyncio.run(deep_search_result())
+                # Set up custom extensions
+                custom_ext_list = [ext.strip().lower() for ext in custom_extensions.split(',') if ext.strip()]
+                valid_ext_list = [ext for ext in custom_ext_list if re.match(r'^\.[a-zA-Z0-9]+$', ext)]
+                
+                @st.cache_resource
+                def run_deep_search(url, ext_list, max_links, timeout_val, use_proxy_val, proxy_val, use_stealth_val):
+                    async def _run():
+                        async with DownloadManager(
+                            use_proxy=use_proxy_val, 
+                            proxy=proxy_val,
+                            use_stealth=use_stealth_val
+                        ) as dm:
+                            files = await dm.deep_search(url, ext_list, max_links, timeout_val)
+                            return files
+                    return asyncio.run(_run())
+                
+                with st.spinner("Searching for files..."):
+                    files = run_deep_search(url, valid_ext_list, max_sublinks, 
+                                           sublink_timeout, use_proxy, proxy, use_stealth)
                 
                 if files:
                     st.session_state.discovered_files = files
                     st.session_state.current_url = url
                     st.success(f"Found {len(files)} files!")
                 else:
-                    st.warning("No files found on this page.")
-    
-    elif mode == "Single File":
-        st.header("Single File Download")
-        
-        # View-only Google Drive download
-        with st.expander("Download View-Only Google Drive Document", expanded=True):
-            st.write("Download protected/view-only Google Drive documents")
-            
-            file_id = st.text_input(
-                "Google Drive File ID", 
-                placeholder="Enter ID from drive.google.com/file/d/THIS_IS_THE_ID/view",
-                key="drive_file_id"
-            )
-            
-            if st.button("Download Document", key="drive_download_btn") and file_id:
-                with st.spinner("Downloading view-only document... (this may take a minute)"):
-                    # Create download directory
-                    download_dir = "./downloads"
-                    os.makedirs(download_dir, exist_ok=True)
-                    
-                    # Set output path
-                    output_path = os.path.join(download_dir, f"gdrive_{file_id}.pdf")
-                    
-                    # Download the file
-                    async def download_drive_file():
-                        async with DownloadManager(
-                            browser_engine=browser_engine,
-                            use_proxy=use_proxy,
-                            proxy=proxy,
-                            use_stealth=use_stealth
-                        ) as dm:
-                            file_info = {
-                                'url': f"https://drive.google.com/file/d/{file_id}/view",
-                                'filename': f"gdrive_{file_id}.pdf",
-                                'metadata': {'file_id': file_id, 'view_only': True}
-                            }
-                            return await dm.download_viewonly_google_drive(file_info, output_path)
-                    
-                    result_path = asyncio.run(download_drive_file())
-                    
-                    if result_path:
-                        st.success("Document downloaded successfully!")
-                        
-                        # Provide download link
-                        with open(result_path, "rb") as f:
-                            file_bytes = f.read()
-                        
-                        st.download_button(
-                            label="Download PDF",
-                            data=file_bytes,
-                            file_name=os.path.basename(result_path),
-                            mime="application/pdf",
-                            key="drive_pdf_download"
-                        )
-                    else:
-                        st.error("Failed to download the document. Please check the file ID and try again.")
+                    st.warning("No files found.")
+
+    # Add a special section for direct Google Drive file download
+    st.markdown("---")
+    with st.expander("Download View-Only Google Drive Document", expanded=False):
+        st.write("Download protected/view-only Google Drive documents - just enter the file ID")
+        file_id = st.text_input("Google Drive File ID", 
+                      placeholder="Example: 139CTPrz7jOuJRW6pL6eupH-7B4fnNRku", 
+                      help="Enter the ID from the Google Drive URL (e.g., from 'drive.google.com/file/d/THIS_IS_THE_ID/view')")
         
-        # Direct URL download
-        with st.expander("Download from Direct URL", expanded=True):
-            st.write("Download a file from a direct URL")
-            
-            file_url = st.text_input(
-                "File URL",
-                placeholder="https://example.com/file.pdf",
-                key="direct_url"
-            )
+        if st.button("Download Document") and file_id:
+            download_dir = "./downloads"
+            os.makedirs(download_dir, exist_ok=True)
+            output_path = os.path.join(download_dir, f"gdrive_{file_id}.pdf")
             
-            file_name = st.text_input(
-                "Save as (optional)",
-                placeholder="Leave blank to use original filename",
-                key="save_filename"
-            )
-            
-            if st.button("Download File", key="direct_download_btn") and file_url:
-                with st.spinner("Downloading file..."):
-                    # Create download directory
-                    download_dir = "./downloads"
-                    os.makedirs(download_dir, exist_ok=True)
-                    
-                    # Determine filename
-                    if not file_name:
-                        file_name = os.path.basename(urlparse(file_url).path)
-                        if not file_name or file_name == '/':
-                            file_name = f"downloaded_file_{int(time.time())}{get_file_extension(file_url)}"
-                    
-                    # Create file info
-                    file_info = {
-                        'url': file_url,
-                        'filename': file_name,
-                        'metadata': {}
-                    }
-                    
-                    # Download the file
-                    async def download_direct_file():
-                        async with DownloadManager(
-                            browser_engine=browser_engine,
-                            use_proxy=use_proxy,
-                            proxy=proxy,
-                            use_stealth=use_stealth
-                        ) as dm:
-                            return await dm.download_file(file_info, download_dir)
+            with st.spinner("Downloading view-only document... (this may take a minute)"):
+                async def download_viewonly():
+                    async with DownloadManager(use_stealth=use_stealth) as dm:
+                        file_info = {
+                            'url': f"https://drive.google.com/file/d/{file_id}/view",
+                            'filename': f"gdrive_{file_id}.pdf",
+                            'metadata': {'file_id': file_id, 'file_type': 'pdf', 'view_only': True}
+                        }
+                        result_path = await dm.force_download_viewonly(file_info, output_path)
+                        return result_path
+                
+                result = asyncio.run(download_viewonly())
+                
+                if result:
+                    st.success("Document downloaded successfully!")
                     
-                    file_path = asyncio.run(download_direct_file())
+                    # Provide download button
+                    with open(result, "rb") as f:
+                        file_bytes = f.read()
                     
-                    if file_path:
-                        st.success(f"File downloaded successfully to {file_path}")
-                        
-                        # Provide download link
-                        with open(file_path, "rb") as f:
-                            file_bytes = f.read()
-                        
-                        mime_type = mimetypes.guess_type(file_path)[0] or "application/octet-stream"
-                        
-                        st.download_button(
-                            label=f"Download {os.path.basename(file_path)}",
-                            data=file_bytes,
-                            file_name=os.path.basename(file_path),
-                            mime=mime_type,
-                            key="direct_file_download"
-                        )
-                    else:
-                        st.error("Failed to download the file. Please check the URL and try again.")
-    
-    # Footer
-    st.markdown("---")
-    st.markdown("Created by [Euler314](https://github.com/euler314) | Enhanced with advanced scraping technologies")
+                    st.download_button(
+                        label="Download PDF",
+                        data=file_bytes,
+                        file_name=f"gdrive_{file_id}.pdf",
+                        mime="application/pdf"
+                    )
+                else:
+                    st.error("Failed to download the document. Please check the file ID and try again.")
+
+    # Add footer with attribution
+    st.markdown('---')
+    st.markdown('Created by [Euler314](https://github.com/euler314)')
 
-# Run the app
 if __name__ == "__main__":
     main()
\ No newline at end of file