diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,20 +1,79 @@
+import streamlit as st
+# This MUST be the first Streamlit command
+st.set_page_config(page_title="Advanced File Downloader", layout="wide", page_icon="📁")
+    
+# Core imports
 import os
-import json
+import subprocess
+from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
 import asyncio
-import streamlit as st
 import logging
+from urllib.parse import urlparse, urljoin, unquote, parse_qs, quote
+import re
+from pathlib import Path
+from io import BytesIO
+import random
+from bs4 import BeautifulSoup
+from PyPDF2 import PdfReader
+import zipfile
+import tempfile
+import mimetypes
+import requests
+import datetime
+import traceback
+import base64
+import shutil
+import json
+import time
+from PIL import Image
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+import google_auth_oauthlib.flow
+import googleapiclient.discovery
+import google.auth.transport.requests
+import googleapiclient.http
 
-# Configure logging
+# Enhanced RAG search imports
+import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+import docx2txt
+
+# Try to import sentence-transformers for better embeddings
+try:
+    from sentence_transformers import SentenceTransformer
+    HAVE_TRANSFORMERS = True
+except ImportError:
+    HAVE_TRANSFORMERS = False
+
+# Try to download NLTK data if not already present
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    try:
+        nltk.download('punkt', quiet=True)
+    except:
+        pass
+
+try:
+    nltk.data.find('corpora/stopwords')
+except LookupError:
+    try:
+        nltk.download('stopwords', quiet=True)
+        from nltk.corpus import stopwords
+        STOPWORDS = set(stopwords.words('english'))
+    except:
+        STOPWORDS = set(['the', 'and', 'a', 'in', 'to', 'of', 'is', 'it', 'that', 'for', 'with', 'as', 'on', 'by'])
+
+# -------------------- Logging Setup --------------------
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.FileHandler('app.log'),
-        logging.StreamHandler()
-    ]
+    format='%(asctime)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger(__name__)
 
-# Load Google OAuth config from environment variables
+# -------------------- Google OAuth Config --------------------
 GOOGLE_OAUTH_CONFIG = {
     "web": {
         "client_id": os.environ.get("GOOGLE_CLIENT_ID"),
@@ -27,23 +86,3785 @@ GOOGLE_OAUTH_CONFIG = {
     }
 }
 
-# Setup the UI
-st.set_page_config(page_title="Advanced File Downloader", layout="wide", page_icon="📁")
+# -------------------- Stealth and UA Settings --------------------
+# Extended user agent list for better variety
+USER_AGENTS = [
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15',
+    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Edg/116.0.1938.54',
+    'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1',
+    'Mozilla/5.0 (iPad; CPU OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1',
+    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
+    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0'
+]
 
-# Import the core components (still keeping modular organization)
-from utils import USER_AGENTS, STEALTH_SETTINGS, PROXY_ROTATION_CONFIG
-from utils import (
-    get_random_user_agent, sizeof_fmt, create_zip_file, humanize_file_size, get_domain, 
-    is_download_link, normalize_download_url, detect_captcha, show_user_friendly_error
-)
-from google_drive import (
-    get_google_auth_url, exchange_code_for_credentials, google_drive_upload, create_drive_folder
-)
-from download_manager import DownloadManager
-from rag_search import EnhancedRAGSearch
+# Stealth browser settings
+STEALTH_SETTINGS = {
+    # Hardware features to modify/disable
+    "hardware_concurrency": 4,
+    "device_memory": 8,
+    # Browser features to enable/disable
+    "webgl_vendor": "Google Inc. (Intel)",
+    "webgl_renderer": "Intel Iris OpenGL Engine",
+    "languages": ["en-US", "en"],
+    "disable_webrtc": True,
+    # Additional timing randomization
+    "navigator_platform": "Win32",
+    "touch_support": False
+}
+
+# Proxy rotation configuration (if using multiple proxies)
+PROXY_ROTATION_CONFIG = {
+    "enabled": False,  # Set to True to enable rotation
+    "rotation_interval": 10,  # Rotate every 10 requests
+    "proxies": []  # Will be populated from the UI if needed
+}
+
+# -------------------- Enhanced RAG Search with Small LLM --------------------
+class EnhancedRAGSearch:
+    def __init__(self):
+        self.file_texts = []
+        self.chunks = []  # Document chunks for more targeted search
+        self.chunk_metadata = []  # Metadata for each chunk
+        self.file_metadata = []
+        self.languages = []
+        self.model = None
+        
+        # Try to load the sentence transformer model if available
+        if HAVE_TRANSFORMERS:
+            try:
+                # Use a small, efficient model
+                self.model = SentenceTransformer('all-MiniLM-L6-v2')
+                self.use_transformer = True
+                logger.info("Using sentence-transformers for RAG")
+            except Exception as e:
+                logger.warning(f"Error loading sentence-transformer: {e}")
+                self.use_transformer = False
+        else:
+            self.use_transformer = False
+        
+        # Fallback to TF-IDF if transformers not available
+        if not self.use_transformer:
+            self.vectorizer = TfidfVectorizer(
+                stop_words='english', 
+                ngram_range=(1, 2),  # Use bigrams for better context
+                max_features=15000,   # Use more features for better representation
+                min_df=1              # Include rare terms
+            )
+        
+        self.vectors = None
+        self.chunk_vectors = None
+        
+    def add_file(self, file_data, file_info):
+        """Add a file to the search index with improved processing"""
+        file_ext = os.path.splitext(file_info['filename'])[1].lower()
+        text = self.extract_text(file_data, file_ext)
+        
+        if text:
+            # Store the whole document text
+            self.file_texts.append(text)
+            self.file_metadata.append(file_info)
+            
+            # Try to detect language
+            try:
+                # Simple language detection based on stopwords
+                words = re.findall(r'\b\w+\b', text.lower())
+                english_stopwords_ratio = len([w for w in words[:100] if w in STOPWORDS]) / max(1, len(words[:100]))
+                lang = 'en' if english_stopwords_ratio > 0.2 else 'unknown'
+                self.languages.append(lang)
+            except:
+                self.languages.append('en')  # Default to English
+            
+            # Create chunks for more granular search
+            chunks = self.create_chunks(text)
+            for chunk in chunks:
+                self.chunks.append(chunk)
+                self.chunk_metadata.append({
+                    'file_info': file_info,
+                    'chunk_size': len(chunk),
+                    'file_index': len(self.file_texts) - 1
+                })
+            
+            return True
+        return False
+    
+    def create_chunks(self, text, chunk_size=1000, overlap=200):
+        """Split text into overlapping chunks for better search precision"""
+        # Try to use NLTK for sentence-aware chunking
+        try:
+            sentences = nltk.sent_tokenize(text)
+            chunks = []
+            current_chunk = ""
+            
+            for sentence in sentences:
+                if len(current_chunk) + len(sentence) <= chunk_size:
+                    current_chunk += sentence + " "
+                else:
+                    # Add current chunk if it has content
+                    if current_chunk:
+                        chunks.append(current_chunk.strip())
+                    
+                    # Start new chunk with overlap from previous chunk
+                    if len(current_chunk) > overlap:
+                        # Find the last space within the overlap region
+                        overlap_text = current_chunk[-overlap:] 
+                        last_space = overlap_text.rfind(' ')
+                        if last_space != -1:
+                            current_chunk = current_chunk[-(overlap-last_space):] + sentence + " "
+                        else:
+                            current_chunk = sentence + " "
+                    else:
+                        current_chunk = sentence + " "
+            
+            # Add the last chunk if it has content
+            if current_chunk:
+                chunks.append(current_chunk.strip())
+                
+            return chunks
+        except:
+            # Fallback to simpler chunking approach
+            chunks = []
+            for i in range(0, len(text), chunk_size - overlap):
+                chunk = text[i:i + chunk_size]
+                if chunk:
+                    chunks.append(chunk)
+            return chunks
+    
+    def extract_text(self, file_data, file_ext):
+        """Extract text from different file types with enhanced support"""
+        try:
+            if file_ext.lower() == '.pdf':
+                reader = PyPDF2.PdfReader(BytesIO(file_data))
+                text = ""
+                for page in reader.pages:
+                    extracted = page.extract_text()
+                    if extracted:
+                        text += extracted + "\n"
+                    # If text extraction fails, try to OCR (would need extra libraries)
+                return text
+            elif file_ext.lower() in ['.docx', '.doc']:
+                return docx2txt.process(BytesIO(file_data))
+            elif file_ext.lower() in ['.txt', '.csv', '.json', '.html', '.htm']:
+                # Handle both UTF-8 and other common encodings
+                try:
+                    return file_data.decode('utf-8', errors='ignore')
+                except:
+                    encodings = ['latin-1', 'iso-8859-1', 'windows-1252']
+                    for enc in encodings:
+                        try:
+                            return file_data.decode(enc, errors='ignore')
+                        except:
+                            pass
+                # Last resort fallback
+                return file_data.decode('utf-8', errors='ignore')
+            elif file_ext.lower() in ['.pptx', '.ppt', '.xlsx', '.xls']:
+                # For these types, we would need additional libraries
+                # For now, return a placeholder message
+                return f"[Content of {file_ext} file - install additional libraries for full text extraction]"
+            else:
+                return ""
+        except Exception as e:
+            logger.error(f"Error extracting text: {e}")
+            return ""
+    
+    def build_index(self):
+        """Build both document and chunk search indices"""
+        if not self.file_texts:
+            return False
+        
+        try:
+            if self.use_transformer:
+                # Use sentence transformer models for embeddings
+                logger.info("Building document and chunk embeddings with transformer model...")
+                self.vectors = self.model.encode(self.file_texts, show_progress_bar=False)
+                
+                # Build chunk-level index if we have chunks
+                if self.chunks:
+                    # Process in batches to avoid memory issues
+                    batch_size = 32
+                    chunk_vectors = []
+                    for i in range(0, len(self.chunks), batch_size):
+                        batch = self.chunks[i:i+batch_size]
+                        batch_vectors = self.model.encode(batch, show_progress_bar=False)
+                        chunk_vectors.append(batch_vectors)
+                    self.chunk_vectors = np.vstack(chunk_vectors)
+            else:
+                # Build document-level index
+                self.vectors = self.vectorizer.fit_transform(self.file_texts)
+                
+                # Build chunk-level index if we have chunks
+                if self.chunks:
+                    self.chunk_vectors = self.vectorizer.transform(self.chunks)
+            
+            return True
+        except Exception as e:
+            logger.error(f"Error building search index: {e}")
+            return False
+    
+    def expand_query(self, query):
+        """Add related terms to query for better recall - mini LLM function"""
+        # Dictionary of related terms for common keywords
+        expansions = {
+            "exam": ["test", "assessment", "quiz", "paper", "exam paper", "past paper", "past exam"],
+            "test": ["exam", "quiz", "assessment", "paper"],
+            "document": ["file", "paper", "report", "doc", "documentation"],
+            "manual": ["guide", "instruction", "documentation", "handbook"],
+            "tutorial": ["guide", "instructions", "how-to", "lesson"],
+            "article": ["paper", "publication", "journal", "research"],
+            "research": ["study", "investigation", "paper", "analysis"],
+            "book": ["textbook", "publication", "volume", "edition"],
+            "thesis": ["dissertation", "paper", "research", "study"],
+            "report": ["document", "paper", "analysis", "summary"],
+            "assignment": ["homework", "task", "project", "work"],
+            "lecture": ["class", "presentation", "talk", "lesson"],
+            "notes": ["annotations", "summary", "outline", "study material"],
+            "syllabus": ["curriculum", "course outline", "program", "plan"],
+            "paper": ["document", "article", "publication", "exam", "test"],
+            "question": ["problem", "query", "exercise", "inquiry"],
+            "solution": ["answer", "resolution", "explanation", "result"],
+            "reference": ["source", "citation", "bibliography", "resource"],
+            "analysis": ["examination", "study", "evaluation", "assessment"],
+            "guide": ["manual", "instruction", "handbook", "tutorial"],
+            "worksheet": ["exercise", "activity", "handout", "practice"],
+            "review": ["evaluation", "assessment", "critique", "feedback"],
+            "material": ["resource", "content", "document", "information"],
+            "data": ["information", "statistics", "figures", "numbers"]
+        }
+        
+        # Enhanced query expansion simulating a mini-LLM
+        query_words = re.findall(r'\b\w+\b', query.lower())
+        expanded_terms = set()
+        
+        # Directly add expansions from our dictionary
+        for word in query_words:
+            if word in expansions:
+                expanded_terms.update(expansions[word])
+        
+        # Add common academic file formats if not already included
+        if any(term in query.lower() for term in ["file", "document", "download", "paper"]):
+            if not any(ext in query.lower() for ext in ["pdf", "docx", "ppt", "excel"]):
+                expanded_terms.update(["pdf", "docx", "pptx", "xlsx"])
+                
+        # Add special academic terms when the query seems related to education
+        if any(term in query.lower() for term in ["course", "university", "college", "school", "class"]):
+            expanded_terms.update(["syllabus", "lecture", "notes", "textbook"])
+                
+        # Return original query plus expanded terms
+        if expanded_terms:
+            expanded_query = f"{query} {' '.join(expanded_terms)}"
+            logger.info(f"Expanded query: '{query}' -> '{expanded_query}'")
+            return expanded_query
+        return query
+    
+    def search(self, query, top_k=5, search_chunks=True):
+        """Enhanced search with both document and chunk-level search"""
+        if self.vectors is None:
+            return []
+        
+        # Simulate a small LLM by expanding the query with related terms
+        expanded_query = self.expand_query(query)
+        
+        try:
+            results = []
+            
+            if self.use_transformer:
+                # Transform the query to embedding
+                query_vector = self.model.encode([expanded_query])[0]
+                
+                # First search at document level for higher-level matches
+                if self.vectors is not None:
+                    # Compute similarities between query and documents
+                    doc_similarities = cosine_similarity(
+                        query_vector.reshape(1, -1), 
+                        self.vectors
+                    ).flatten()
+                    
+                    top_doc_indices = doc_similarities.argsort()[-top_k:][::-1]
+                    
+                    for i, idx in enumerate(top_doc_indices):
+                        if doc_similarities[idx] > 0.2:  # Threshold to exclude irrelevant results
+                            results.append({
+                                'file_info': self.file_metadata[idx],
+                                'score': float(doc_similarities[idx]),
+                                'rank': i+1,
+                                'match_type': 'document',
+                                'language': self.languages[idx] if idx < len(self.languages) else 'unknown'
+                            })
+                
+                # Then search at chunk level for more specific matches if enabled
+                if search_chunks and self.chunk_vectors is not None:
+                    # Compute similarities between query and chunks
+                    chunk_similarities = cosine_similarity(
+                        query_vector.reshape(1, -1), 
+                        self.chunk_vectors
+                    ).flatten()
+                    
+                    top_chunk_indices = chunk_similarities.argsort()[-top_k*2:][::-1]  # Get more chunk results
+                    
+                    # Use a set to avoid duplicate file results
+                    seen_files = set(r['file_info']['url'] for r in results)
+                    
+                    for i, idx in enumerate(top_chunk_indices):
+                        if chunk_similarities[idx] > 0.25:  # Higher threshold for chunks
+                            file_index = self.chunk_metadata[idx]['file_index']
+                            file_info = self.file_metadata[file_index]
+                            
+                            # Only add if we haven't already included this file
+                            if file_info['url'] not in seen_files:
+                                seen_files.add(file_info['url'])
+                                results.append({
+                                    'file_info': file_info,
+                                    'score': float(chunk_similarities[idx]),
+                                    'rank': len(results) + 1,
+                                    'match_type': 'chunk',
+                                    'language': self.languages[file_index] if file_index < len(self.languages) else 'unknown',
+                                    'chunk_preview': self.chunks[idx][:200] + "..." if len(self.chunks[idx]) > 200 else self.chunks[idx]
+                                })
+                                
+                                # Stop after we've found enough results
+                                if len(results) >= top_k*1.5:
+                                    break
+            else:
+                # Fallback to TF-IDF if transformers not available
+                query_vector = self.vectorizer.transform([expanded_query])
+                
+                # First search at document level
+                if self.vectors is not None:
+                    doc_similarities = cosine_similarity(query_vector, self.vectors).flatten()
+                    top_doc_indices = doc_similarities.argsort()[-top_k:][::-1]
+                    
+                    for i, idx in enumerate(top_doc_indices):
+                        if doc_similarities[idx] > 0.1:  # Threshold to exclude irrelevant results
+                            results.append({
+                                'file_info': self.file_metadata[idx],
+                                'score': float(doc_similarities[idx]),
+                                'rank': i+1,
+                                'match_type': 'document',
+                                'language': self.languages[idx] if idx < len(self.languages) else 'unknown'
+                            })
+                
+                # Then search at chunk level if enabled
+                if search_chunks and self.chunk_vectors is not None:
+                    chunk_similarities = cosine_similarity(query_vector, self.chunk_vectors).flatten()
+                    top_chunk_indices = chunk_similarities.argsort()[-top_k*2:][::-1]
+                    
+                    # Avoid duplicates
+                    seen_files = set(r['file_info']['url'] for r in results)
+                    
+                    for i, idx in enumerate(top_chunk_indices):
+                        if chunk_similarities[idx] > 0.15:
+                            file_index = self.chunk_metadata[idx]['file_index']
+                            file_info = self.file_metadata[file_index]
+                            
+                            if file_info['url'] not in seen_files:
+                                seen_files.add(file_info['url'])
+                                results.append({
+                                    'file_info': file_info,
+                                    'score': float(chunk_similarities[idx]),
+                                    'rank': len(results) + 1,
+                                    'match_type': 'chunk',
+                                    'language': self.languages[file_index] if file_index < len(self.languages) else 'unknown',
+                                    'chunk_preview': self.chunks[idx][:200] + "..." if len(self.chunks[idx]) > 200 else self.chunks[idx]
+                                })
+                                
+                                if len(results) >= top_k*1.5:
+                                    break
+            
+            # Sort combined results by score
+            results.sort(key=lambda x: x['score'], reverse=True)
+            
+            # Re-rank and truncate
+            for i, result in enumerate(results[:top_k]):
+                result['rank'] = i+1
+            
+            return results[:top_k]
+        except Exception as e:
+            logger.error(f"Error during search: {e}")
+            return []
+
+# -------------------- Utility Functions --------------------
+def get_random_user_agent():
+    return random.choice(USER_AGENTS)
+
+def sizeof_fmt(num, suffix='B'):
+    for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
+        if abs(num) < 1024.0:
+            return f"{num:3.1f}{unit}{suffix}"
+        num /= 1024.0
+    return f"{num:.1f}Y{suffix}"
+
+def create_zip_file(file_paths, output_dir):
+    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    zip_path = os.path.join(output_dir, f"downloads_{timestamp}.zip")
+    with zipfile.ZipFile(zip_path, 'w') as zipf:
+        for file_path in file_paths:
+            zipf.write(file_path, os.path.basename(file_path))
+    return zip_path
+
+def get_file_extension(url, default='.pdf'):
+    """Extract file extension from URL or filename"""
+    path = urlparse(url).path
+    ext = os.path.splitext(path)[1].lower()
+    if not ext:
+        return default
+    return ext
+
+def humanize_file_size(size_bytes):
+    """Format file size in human-readable format"""
+    if size_bytes < 1024:
+        return f"{size_bytes} bytes"
+    for unit in ['KB', 'MB', 'GB', 'TB']:
+        size_bytes /= 1024.0
+        if size_bytes < 1024.0:
+            return f"{size_bytes:.1f} {unit}"
+    return f"{size_bytes:.1f} PB"
+
+def get_domain(url):
+    """Extract domain from URL"""
+    parsed = urlparse(url)
+    return parsed.netloc
+
+def is_valid_file_url(url, extensions):
+    """Check if URL is a valid file URL based on extension"""
+    return any(url.lower().endswith(ext) for ext in extensions)
+
+def detect_captcha(html_content):
+    """Detect common captcha patterns in HTML content"""
+    captcha_patterns = [
+        'captcha', 'recaptcha', 'g-recaptcha', 'hcaptcha', 'cf-turnstile',
+        'challenge', 'solve the following', 'verify you are human'
+    ]
+    html_lower = html_content.lower()
+    return any(pattern in html_lower for pattern in captcha_patterns)
+
+def is_download_link(url):
+    """Enhanced function to detect if a URL is likely a download link"""
+    # Check for obvious download indicators in URL
+    url_lower = url.lower()
+    
+    # Check for common download-related terms in the URL
+    download_terms = [
+        'download', 'dl', 'get', 'file', 'attachment', 'export', 'view',
+        'retrieve', 'fetch', 'load', 'open', 'access', 'doc', 'document'
+    ]
+    if any(term in url_lower for term in download_terms):
+        return True
+    
+    # Check for common download script patterns
+    script_patterns = [
+        'download.php', 'getfile.php', 'fetch.php', 'view.php', 'dl.php',
+        'download.aspx', 'getfile.aspx', 'file.aspx',
+        'downloadhandler', 'filehandler', 'filedownload',
+        'download.jsp', 'download.cgi', 'download.do',
+        'download-file', 'get-file',
+        'downloadfile', 'getfile', 'viewfile',
+        'Action=downloadfile', 'action=download', 'action=view',
+        'download?', 'file?', 'get?', 'view?'
+    ]
+    if any(pattern in url_lower for pattern in script_patterns):
+        return True
+    
+    # Check for common file extensions in the URL path or parameters
+    path = urlparse(url).path
+    common_extensions = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', 
+                         '.zip', '.rar', '.txt', '.csv', '.json', '.xml', '.jpg', 
+                         '.png', '.gif', '.mp3', '.mp4', '.avi', '.mov']
+    
+    if any(ext in path.lower() for ext in common_extensions):
+        return True
+    
+    # Check for file ID or file parameters in URL
+    params = parse_qs(urlparse(url).query)
+    param_keys = params.keys()
+    file_param_indicators = ['file', 'id', 'key', 'filename', 'name', 'fileid', 'attachment', 'attid']
+    if any(key.lower() in file_param_indicators for key in param_keys):
+        return True
+    
+    # Check for complex encoding patterns like in the example URL
+    if 'Action=downloadfile' in url or 'fname=' in url:
+        return True
+        
+    return False
+
+def normalize_download_url(url):
+    """Normalize download URLs to handle various formats and encodings"""
+    try:
+        # Handle common URL shorteners and redirections
+        parsed = urlparse(url)
+        
+        # Handle phpMyAdmin-style encoded URLs
+        if 'Action=downloadfile' in url and 'file=' in url:
+            # Extract the encoded file parameter
+            params = parse_qs(parsed.query)
+            if 'file' in params:
+                # This is just a placeholder - in a real implementation, 
+                # you would need to handle the specific encoding used
+                encoded_file = params['file'][0]
+                # Keep the URL as is for now, since we'll handle it during download
+                return url
+        
+        # Handle URLs with fname parameter (like in the example)
+        if 'fname=' in url:
+            # Keep as is - we'll handle this specially during download
+            return url
+            
+        # For other URLs, make sure they are properly quoted
+        path = parsed.path
+        # Only quote the path portion if needed
+        if '%' not in path and ' ' in path:
+            path = quote(path)
+            
+        # Reconstruct the URL
+        normalized = parsed._replace(path=path).geturl()
+        return normalized
+    except Exception as e:
+        logger.error(f"Error normalizing URL {url}: {e}")
+        return url
+
+# -------------------- Google Drive Functions --------------------
+def get_google_auth_url():
+    client_config = GOOGLE_OAUTH_CONFIG["web"]
+    flow = google_auth_oauthlib.flow.Flow.from_client_config(
+        {"web": client_config},
+        scopes=["https://www.googleapis.com/auth/drive.file"]
+    )
+    flow.redirect_uri = client_config["redirect_uris"][0]
+    authorization_url, _ = flow.authorization_url(
+        access_type="offline",
+        include_granted_scopes="true",
+        prompt="consent"
+    )
+    return authorization_url
+
+def exchange_code_for_credentials(auth_code):
+    if not auth_code.strip():
+        return None, "No code provided."
+    try:
+        client_config = GOOGLE_OAUTH_CONFIG["web"]
+        flow = google_auth_oauthlib.flow.Flow.from_client_config(
+            {"web": client_config},
+            scopes=["https://www.googleapis.com/auth/drive.file"]
+        )
+        flow.redirect_uri = client_config["redirect_uris"][0]
+        flow.fetch_token(code=auth_code.strip())
+        creds = flow.credentials
+        if not creds or not creds.valid:
+            return None, "Could not validate credentials. Check code and try again."
+        return creds, "Google Sign-In successful!"
+    except Exception as e:
+        return None, f"Error during token exchange: {e}"
+
+def google_drive_upload(file_path, credentials, folder_id=None):
+    try:
+        drive_service = googleapiclient.discovery.build("drive", "v3", credentials=credentials)
+        file_metadata = {'name': os.path.basename(file_path)}
+        if folder_id:
+            file_metadata['parents'] = [folder_id]
+        media = googleapiclient.http.MediaFileUpload(file_path, resumable=True)
+        created = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
+        return created.get("id", "")
+    except Exception as e:
+        return f"Error uploading to Drive: {str(e)}"
+
+def create_drive_folder(drive_service, name):
+    folder_metadata = {'name': name, 'mimeType': 'application/vnd.google-apps.folder'}
+    folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
+    return folder.get('id')
+
+# -------------------- Playwright Setup --------------------
+def install_playwright_dependencies():
+    try:
+        # Set environment variable for Playwright browsers path
+        os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
+        
+        # Install system dependencies
+        subprocess.run(['apt-get', 'update', '-y'], check=True)
+        packages = [
+            'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
+            'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
+            'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
+        ]
+        subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
+        
+        # Install Playwright and dependencies
+        subprocess.run(['pip', 'install', 'playwright'], check=True)
+        subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
+        
+        st.success("Playwright dependencies installed successfully!")
+    except Exception as e:
+        st.error(f"Error installing Playwright dependencies: {e}")
+        st.info("You may need to manually install dependencies. Check console for details.")
+        logger.error(f"Playwright setup error: {e}")
+        traceback.print_exc()
+
+# -------------------- Download Manager Class --------------------
+class DownloadManager:
+    def __init__(self, use_proxy=False, proxy=None, query=None, num_results=5, use_stealth=True, proxy_rotation=False):
+        self.use_proxy = use_proxy
+        self.proxy = proxy
+        self.query = query
+        self.num_results = num_results
+        self.playwright = None
+        self.browser = None
+        self.context = None
+        self.page = None
+        self.use_stealth = use_stealth
+        self.proxy_rotation = proxy_rotation
+        self.request_count = 0
+        self.captcha_detected = False
+        self.download_timeout = 300  # 5 minutes timeout for downloads
+        # Track visited URLs to avoid revisiting the same URL multiple times
+        self.visited_urls = set()
+        # Track successfully downloaded files to avoid redownloading
+        self.downloaded_files = set()
+
+    async def __aenter__(self):
+        self.playwright = await async_playwright().start()
+        
+        # Prepare browser args with stealth settings
+        browser_args = [
+            '--no-sandbox',
+            '--disable-setuid-sandbox',
+            '--disable-dev-shm-usage',
+            '--disable-gpu',
+            '--no-zygote',
+            '--single-process',
+            '--disable-web-security',
+            '--disable-features=IsolateOrigins',
+            '--disable-site-isolation-trials'
+        ]
+        
+        # Add stealth-specific args
+        if self.use_stealth:
+            browser_args.extend([
+                '--disable-blink-features=AutomationControlled',
+                '--disable-features=IsolateOrigins,site-per-process',
+                '--disable-webgl',
+                '--disable-webrtc'
+            ])
+        
+        # Setup browser options
+        opts = {
+            "headless": True,
+            "args": browser_args
+        }
+        
+        # Configure proxy if specified
+        if self.use_proxy and self.proxy:
+            opts["proxy"] = {"server": self.proxy}
+        
+        # Launch browser with options
+        self.browser = await self.playwright.chromium.launch(**opts)
+        
+        # Setup browser context with enhanced settings
+        context_opts = {
+            "user_agent": get_random_user_agent(),
+            "viewport": {"width": 1920, "height": 1080},
+            "device_scale_factor": 1,
+            "has_touch": False,
+            "is_mobile": False,
+            "ignore_https_errors": True,
+            "accept_downloads": True
+        }
+        
+        # Apply stealth-specific settings to the context
+        if self.use_stealth:
+            # Apply JS-injection for enhanced stealth 
+            context_opts["bypass_csp"] = True
+            self.context = await self.browser.new_context(**context_opts)
+            
+            # Execute stealth JS to avoid detection
+            await self.context.add_init_script("""
+            () => {
+                Object.defineProperty(navigator, 'webdriver', {
+                    get: () => false,
+                });
+                
+                // Change navigator properties
+                const newProto = navigator.__proto__;
+                delete newProto.webdriver;
+                
+                // Overwrite the plugins
+                Object.defineProperty(navigator, 'plugins', {
+                    get: () => [1, 2, 3, 4, 5].map(() => ({
+                        lengthComputable: true,
+                        loaded: 100,
+                        total: 100
+                    }))
+                });
+                
+                // Handle languages more naturally
+                Object.defineProperty(navigator, 'languages', {
+                    get: () => ['en-US', 'en', 'es']
+                });
+                
+                // Modify hardware concurrency
+                Object.defineProperty(navigator, 'hardwareConcurrency', {
+                    get: () => 4
+                });
+                
+                // Modify deviceMemory
+                Object.defineProperty(navigator, 'deviceMemory', {
+                    get: () => 8
+                });
+                
+                // WebGL modifications
+                const getParameter = WebGLRenderingContext.prototype.getParameter;
+                WebGLRenderingContext.prototype.getParameter = function(parameter) {
+                    if (parameter === 37445) {
+                        return 'Intel Inc.';
+                    }
+                    if (parameter === 37446) {
+                        return 'Intel Iris OpenGL Engine';
+                    }
+                    return getParameter.apply(this, arguments);
+                };
+            }
+            """)
+        else:
+            # Regular context without stealth
+            self.context = await self.browser.new_context(**context_opts)
+        
+        # Create page with enhanced headers
+        self.page = await self.context.new_page()
+        await self.page.set_extra_http_headers({
+            'Accept-Language': 'en-US,en;q=0.9,es;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
+            'Cache-Control': 'max-age=0',
+            'DNT': '1',  # Do Not Track
+            'Referer': 'https://www.google.com/',
+            'Sec-Fetch-Dest': 'document',
+            'Sec-Fetch-Mode': 'navigate',
+            'Sec-Fetch-Site': 'cross-site',
+            'Sec-Fetch-User': '?1',
+            'Upgrade-Insecure-Requests': '1'
+        })
+        
+        # Add delay for mouse movements to simulate human behavior
+        if self.use_stealth:
+            await self.page.mouse.move(x=random.randint(100, 500), y=random.randint(100, 500))
+            await self.page.wait_for_timeout(random.randint(200, 500))
+        
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.browser:
+            await self.browser.close()
+        if self.playwright:
+            await self.playwright.stop()
+
+    async def rotate_proxy_if_needed(self):
+        """Rotate proxy if proxy rotation is enabled and threshold is reached"""
+        if self.proxy_rotation and PROXY_ROTATION_CONFIG["enabled"]:
+            self.request_count += 1
+            if self.request_count >= PROXY_ROTATION_CONFIG["rotation_interval"] and PROXY_ROTATION_CONFIG["proxies"]:
+                # Get next proxy from the pool
+                next_proxy = PROXY_ROTATION_CONFIG["proxies"].pop(0)
+                PROXY_ROTATION_CONFIG["proxies"].append(next_proxy)  # Move to end of list
+                
+                # Close existing context and create new one with the new proxy
+                if self.context:
+                    await self.context.close()
+                
+                # Create new context with the new proxy
+                context_opts = {
+                    "user_agent": get_random_user_agent(),
+                    "proxy": {"server": next_proxy},
+                    "accept_downloads": True
+                }
+                self.context = await self.browser.new_context(**context_opts)
+                self.page = await self.context.new_page()
+                
+                # Reset counter
+                self.request_count = 0
+                logger.info(f"Rotated to new proxy: {next_proxy}")
 
-# Initialize session state variables
-def initialize_session_state():
+    async def handle_captcha(self, page):
+        """Detect and handle captchas if possible"""
+        # Check for common captcha patterns
+        content = await page.content()
+        if detect_captcha(content):
+            self.captcha_detected = True
+            logger.warning("Captcha detected on page")
+            
+            # Strategies for handling captchas:
+            # 1. For simple captchas, try to extract the image and solve it
+            captcha_img = await page.query_selector('img[alt*="captcha" i], img[src*="captcha" i]')
+            if captcha_img:
+                logger.info("Found captcha image, attempting to capture")
+                
+                # Take screenshot of the captcha
+                captcha_path = os.path.join(tempfile.gettempdir(), "captcha.png")
+                await captcha_img.screenshot(path=captcha_path)
+                
+                # In a real implementation, you would send this to a captcha solving service
+                # For now, just log the detection
+                logger.info(f"Captcha image saved to {captcha_path}")
+                
+                # For demonstration, we'll notify the user but not actually solve it
+                return False
+            
+            # 2. For reCAPTCHA, special handling would be required
+            recaptcha = await page.query_selector('iframe[src*="recaptcha"]')
+            if recaptcha:
+                logger.warning("reCAPTCHA detected, would require external solving service")
+                return False
+            
+            # 3. Try to perform human-like actions that might bypass simple bot checks
+            await self.perform_human_actions(page)
+            
+            # Check if captcha is still present
+            content = await page.content()
+            if detect_captcha(content):
+                logger.warning("Captcha still present after human-like actions")
+                return False
+            else:
+                logger.info("Captcha appears to be resolved")
+                return True
+                
+        return True  # No captcha detected
+
+    async def perform_human_actions(self, page):
+        """Perform human-like actions on the page to possibly bypass simple bot checks"""
+        try:
+            # 1. Slowly scroll down the page
+            for i in range(3):
+                await page.evaluate(f"window.scrollTo(0, {i * 300})")
+                await page.wait_for_timeout(random.randint(300, 700))
+            
+            # 2. Random mouse movements
+            for _ in range(3):
+                x = random.randint(100, 800)
+                y = random.randint(100, 600)
+                await page.mouse.move(x=x, y=y)
+                await page.wait_for_timeout(random.randint(200, 500))
+            
+            # 3. Click on a non-essential part of the page
+            try:
+                await page.click("body", position={"x": 50, "y": 50})
+            except:
+                pass
+            
+            # 4. Wait a bit before continuing
+            await page.wait_for_timeout(1000)
+            
+        except Exception as e:
+            logger.warning(f"Error during human-like actions: {e}")
+
+    async def search_bing(self):
+        urls = []
+        try:
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            search_url = f"https://www.bing.com/search?q={self.query}"
+            await self.page.goto(search_url, timeout=30000)
+            await self.page.wait_for_load_state('networkidle')
+            
+            # Check for captchas
+            if not await self.handle_captcha(self.page):
+                logger.warning("Captcha detected during search, results may be limited")
+            
+            # More natural scrolling behavior
+            for i in range(3):
+                await self.page.evaluate(f"window.scrollTo(0, {i * 400})")
+                await self.page.wait_for_timeout(random.randint(300, 800))
+            
+            # Extract search results
+            links = await self.page.query_selector_all("li.b_algo h2 a")
+            for link in links[:self.num_results]:
+                href = await link.get_attribute('href')
+                if href:
+                    urls.append(href)
+            
+            # If we didn't find enough results, try an alternative selector
+            if len(urls) < self.num_results:
+                alt_links = await self.page.query_selector_all(".b_caption a")
+                for link in alt_links:
+                    href = await link.get_attribute('href')
+                    if href and href not in urls:
+                        urls.append(href)
+                        if len(urls) >= self.num_results:
+                            break
+            
+            return urls
+        except Exception as e:
+            logger.error(f"Error searching Bing: {e}")
+            return []
+
+    async def get_file_size(self, url):
+        try:
+            await self.rotate_proxy_if_needed()
+            
+            # For complex download URLs, we need to be careful with HEAD requests
+            if '?' in url or 'Action=downloadfile' in url or 'fname=' in url:
+                # For these URLs, we'll try a more reliable approach using range headers
+                headers = {
+                    'User-Agent': get_random_user_agent(),
+                    'Range': 'bytes=0-0'  # Just request the first byte to check headers
+                }
+                
+                try:
+                    with requests.get(url, headers=headers, stream=True, timeout=10) as r:
+                        if 'Content-Range' in r.headers:
+                            content_range = r.headers['Content-Range']
+                            match = re.search(r'bytes 0-0/(\d+)', content_range)
+                            if match:
+                                size = int(match.group(1))
+                                return sizeof_fmt(size)
+                        
+                        if 'Content-Length' in r.headers:
+                            size = int(r.headers['Content-Length'])
+                            # If size is 1, it's likely just our single requested byte
+                            if size > 1:
+                                return sizeof_fmt(size)
+                except Exception as e:
+                    logger.warning(f"Error getting file size with Range request: {e}")
+                
+                # Fallback to browser approach
+                try:
+                    async with self.context.new_page() as page:
+                        response = await page.request.head(url, timeout=15000)
+                        length = response.headers.get('Content-Length', None)
+                        if length:
+                            return sizeof_fmt(int(length))
+                except Exception as e:
+                    logger.warning(f"Error getting file size with browser: {e}")
+                    
+                return "Unknown Size"
+            else:
+                # Standard approach for normal URLs
+                async with self.context.new_page() as page:
+                    response = await page.request.head(url, timeout=15000)
+                    length = response.headers.get('Content-Length', None)
+                    if length:
+                        return sizeof_fmt(int(length))
+                    else:
+                        return "Unknown Size"
+        except Exception as e:
+            logger.warning(f"Error getting file size: {e}")
+            return "Unknown Size"
+
+    async def get_pdf_metadata(self, url):
+        try:
+            await self.rotate_proxy_if_needed()
+            
+            async with self.context.new_page() as page:
+                resp = await page.request.get(url, timeout=15000)
+                if resp.ok:
+                    content = await resp.body()
+                    pdf = BytesIO(content)
+                    reader = PdfReader(pdf)
+                    return {
+                        'Title': reader.metadata.get('/Title', 'N/A') if reader.metadata else 'N/A',
+                        'Author': reader.metadata.get('/Author', 'N/A') if reader.metadata else 'N/A',
+                        'Pages': len(reader.pages),
+                    }
+                else:
+                    return {}
+        except Exception as e:
+            logger.warning(f"Error reading PDF metadata: {e}")
+            return {}
+
+    async def extract_real_download_url(self, url):
+        """Enhanced method to extract real download URL, handling complex URLs"""
+        try:
+            # Check if this is a complex download URL that needs special handling
+            if 'Action=downloadfile' in url or 'fname=' in url:
+                logger.info(f"Complex download URL detected: {url}")
+                
+                # For these special cases, we'll use the browser to navigate and intercept redirects
+                await self.rotate_proxy_if_needed()
+                
+                async with self.context.new_page() as page:
+                    # Set up request interception to capture redirects
+                    await page.route('**', lambda route: route.continue_())
+                    
+                    # Listen for all responses
+                    responses = []
+                    page.on('response', lambda response: responses.append(response))
+                    
+                    try:
+                        # Go to the URL
+                        await page.goto(url, wait_until='networkidle', timeout=30000)
+                        
+                        # Check all responses for potential downloads
+                        for response in responses:
+                            # Look for content-disposition headers indicating a download
+                            content_disposition = response.headers.get('Content-Disposition', '')
+                            if 'attachment' in content_disposition or 'filename=' in content_disposition:
+                                return response.url
+                            
+                            # Look for content-type headers indicating a file
+                            content_type = response.headers.get('Content-Type', '')
+                            if content_type and content_type != 'text/html' and not content_type.startswith('text/'):
+                                return response.url
+                        
+                        # If no clear download was detected, return the final URL
+                        return page.url
+                    except Exception as e:
+                        logger.warning(f"Error extracting real download URL: {e}")
+                        return url
+            else:
+                # Standard approach for normal URLs
+                await self.rotate_proxy_if_needed()
+                
+                async with self.context.new_page() as page:
+                    response = await page.goto(url, wait_until='networkidle', timeout=30000)
+                    if response and response.headers.get('location'):
+                        return response.headers['location']
+                    return page.url
+        except Exception as e:
+            logger.error(f"Error extracting real download URL: {e}")
+            return url
+
+    # IMPROVED: Enhanced exam links extraction method
+    async def get_edu_exam_links(self, url):
+        """Specialized method for educational exam websites that follows a common pattern."""
+        try:
+            logger.info(f"Fetching exam links from {url}")
+            links = set()
+            
+            # First try with direct requests for speed (but with proper headers)
+            headers = {
+                "User-Agent": get_random_user_agent(),
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+                "Accept-Language": "en-US,en;q=0.9",
+                "Referer": "https://www.google.com/",
+                "DNT": "1"
+            }
+            
+            try:
+                response = requests.get(url, headers=headers, timeout=30)
+                
+                if response.status_code == 200:
+                    # Parse with BeautifulSoup first for efficiency
+                    soup = BeautifulSoup(response.text, "html.parser")
+                    parsed_base = urlparse(url)
+                    base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+                    
+                    # Look for all links
+                    for a in soup.find_all("a", href=True):
+                        href = a["href"]
+                        full_url = urljoin(url, href)
+                        
+                        # Look for text clues
+                        link_text = a.get_text().lower()
+                        
+                        # Special patterns for exam sites (expanded list)
+                        url_patterns = [
+                            "/eduexp/docs/", "/exam/", "/pastexam/", "/papers/", 
+                            "/test/", "/download/", "/files/", "/assignments/",
+                            "paper_", "question_", "exam_", "test_", "past_",
+                            "assignment_", "sample_", "study_material", "notes_",
+                            "/resource/", "/subject/", "/course/", "/material/"
+                        ]
+                        
+                        text_patterns = [
+                            "exam", "paper", "test", "question", "past", "download",
+                            "assignment", "sample", "study", "material", "notes",
+                            "subject", "course", "resource", "pdf", "document",
+                            "view", "open", "get", "solution", "answer"
+                        ]
+                        
+                        # Check URL for patterns
+                        if any(pattern in full_url.lower() for pattern in url_patterns):
+                            links.add(full_url)
+                            continue
+                            
+                        # Check link text for patterns
+                        if any(pattern in link_text for pattern in text_patterns):
+                            links.add(full_url)
+                            continue
+                            
+                        # Check for common file extensions
+                        if any(full_url.lower().endswith(ext) for ext in 
+                              ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                            links.add(full_url)
+                    
+                    # Check for download script parameters
+                    if "Action=downloadfile" in url or "fname=" in url:
+                        links.add(url)  # Add the URL itself as it's a download link
+            except Exception as e:
+                logger.warning(f"Request-based extraction failed: {e}")
+            
+            # Browser-based approach for more thorough extraction or if initial approach was inadequate
+            try:
+                # Check if we need to proceed with browser-based extraction
+                if len(links) < 5 or "phsms.cloud.ncnu.edu.tw" in url or "Action=downloadfile" in url:
+                    logger.info("Using browser for enhanced link extraction")
+                    
+                    # Rotate proxy if needed
+                    await self.rotate_proxy_if_needed()
+                    
+                    # Navigate to the page with more natural timing
+                    await self.page.goto(url, timeout=45000, wait_until='networkidle')
+                    await self.page.wait_for_timeout(random.randint(1000, 2000))
+                    
+                    # Handle captchas if present
+                    if not await self.handle_captcha(self.page):
+                        logger.warning("Captcha detected, extraction may be limited")
+                    
+                    # Get base URL for resolving relative links
+                    parsed_base = urlparse(url)
+                    base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+                    
+                    # Perform natural scrolling to trigger lazy-loaded content
+                    page_height = await self.page.evaluate("document.body.scrollHeight")
+                    viewport_height = await self.page.evaluate("window.innerHeight")
+                    
+                    for scroll_pos in range(0, page_height, viewport_height // 2):
+                        await self.page.evaluate(f"window.scrollTo(0, {scroll_pos})")
+                        await self.page.wait_for_timeout(random.randint(300, 800))
+                    
+                    # Scroll back to top
+                    await self.page.evaluate("window.scrollTo(0, 0)")
+                    await self.page.wait_for_timeout(500)
+                    
+                    # Extract all links with Playwright (better than just anchor tags)
+                    all_links = await self.page.evaluate("""
+                        () => {
+                            const results = [];
+                            
+                            // Get all anchor tags
+                            const anchors = document.querySelectorAll('a[href]');
+                            for (const a of anchors) {
+                                if (a.href) {
+                                    results.push({
+                                        href: a.href,
+                                        text: a.innerText || a.textContent || '',
+                                        isButton: a.classList.contains('btn') || a.role === 'button'
+                                    });
+                                }
+                            }
+                            
+                            // Get buttons that might contain links
+                            const buttons = document.querySelectorAll('button');
+                            for (const btn of buttons) {
+                                const onclick = btn.getAttribute('onclick') || '';
+                                if (onclick.includes('window.location') || onclick.includes('download')) {
+                                    results.push({
+                                        href: '#button',
+                                        text: btn.innerText || btn.textContent || '',
+                                        isButton: true,
+                                        onclick: onclick
+                                    });
+                                }
+                            }
+                            
+                            return results;
+                        }
+                    """)
+                    
+                    # Process the extracted links
+                    for link_info in all_links:
+                        href = link_info.get('href', '')
+                        text = link_info.get('text', '').lower()
+                        
+                        if href and href != '#button':
+                            # Check URL patterns
+                            url_patterns = [
+                                "/eduexp/docs/", "/exam/", "/pastexam/", "/papers/", 
+                                "/test/", "/download/", "/files/", "/assignments/",
+                                "paper_", "question_", "exam_", "test_", "past_",
+                                "assignment_", "sample_", "study_material", "notes_"
+                            ]
+                            
+                            # Check text patterns
+                            text_patterns = [
+                                "exam", "paper", "test", "question", "past", "download",
+                                "assignment", "sample", "study", "material", "notes",
+                                "pdf", "document", "view", "open", "solution"
+                            ]
+                            
+                            if any(pattern in href.lower() for pattern in url_patterns) or \
+                               any(pattern in text for pattern in text_patterns) or \
+                               any(href.lower().endswith(ext) for ext in 
+                                  ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                links.add(href)
+                    
+                    # Check for download links in the page
+                    download_links = await self.page.evaluate("""
+                        () => {
+                            // Find all links that might be download links
+                            const links = Array.from(document.querySelectorAll('a[href]'));
+                            return links
+                                .filter(a => {
+                                    const href = a.href.toLowerCase();
+                                    return href.includes('download') || 
+                                           href.includes('getfile') || 
+                                           href.includes('view.php') ||
+                                           href.includes('action=downloadfile') ||
+                                           href.includes('fname=');
+                                })
+                                .map(a => a.href);
+                        }
+                    """)
+                    
+                    for dl_link in download_links:
+                        links.add(dl_link)
+                    
+                    # Check for ASP.NET specific elements that might contain exam links
+                    grid_elements = await self.page.query_selector_all('table.grid, .GridView, #GridView1, .rgMasterTable, .table-responsive')
+                    for grid in grid_elements:
+                        grid_links = await grid.query_selector_all('a[href]')
+                        for a in grid_links:
+                            href = await a.get_attribute('href')
+                            text = await a.text_content()
+                            
+                            if href:
+                                full_url = href if href.startswith('http') else urljoin(url, href)
+                                links.add(full_url)
+                    
+                    # Try clicking pagination controls to reveal more content
+                    pagination_buttons = await self.page.query_selector_all('a[href*="page"], .pagination a, .pager a')
+                    for i, button in enumerate(pagination_buttons[:5]):  # Limit to first 5 pagination buttons
+                        try:
+                            # Check if this is a numeric pagination button (more likely to be useful)
+                            button_text = await button.text_content()
+                            if button_text and button_text.strip().isdigit():
+                                logger.info(f"Clicking pagination button: {button_text}")
+                                await button.click()
+                                await self.page.wait_for_timeout(2000)
+                                await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                
+                                # Extract links from this page
+                                new_page_links = await self.page.evaluate("""
+                                    () => {
+                                        return Array.from(document.querySelectorAll('a[href]')).map(a => a.href);
+                                    }
+                                """)
+                                
+                                for href in new_page_links:
+                                    if href and not href.startswith('javascript:'):
+                                        if any(pattern in href.lower() for pattern in url_patterns) or \
+                                           any(href.lower().endswith(ext) for ext in 
+                                              ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                            links.add(href)
+                        except Exception as e:
+                            logger.warning(f"Error clicking pagination button: {e}")
+                    
+                    # Try clicking any controls that might reveal more exam links (more focused approach)
+                    show_buttons = await self.page.query_selector_all('input[type="button"], button, a.btn')
+                    for button in show_buttons:
+                        button_text = (await button.text_content() or "").lower()
+                        button_value = (await button.get_attribute("value") or "").lower()
+                        button_id = (await button.get_attribute("id") or "").lower()
+                        
+                        # Look for buttons that seem likely to reveal file lists
+                        promising_terms = ["show", "view", "display", "list", "exam", "paper", "test", 
+                                         "download", "resource", "material", "browse", "file"]
+                        
+                        if any(term in button_text or term in button_value or term in button_id 
+                               for term in promising_terms):
+                            try:
+                                logger.info(f"Clicking button: {button_text or button_value}")
+                                await button.click()
+                                await self.page.wait_for_timeout(2000)
+                                await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                
+                                # Get any new links that appeared
+                                new_links = await self.page.query_selector_all('a[href]')
+                                for a in new_links:
+                                    href = await a.get_attribute('href')
+                                    if href:
+                                        full_url = href if href.startswith('http') else urljoin(url, href)
+                                        
+                                        # Focus on file extensions and patterns
+                                        if any(full_url.lower().endswith(ext) for ext in 
+                                               ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']) or \
+                                           any(pattern in full_url.lower() for pattern in url_patterns):
+                                            links.add(full_url)
+                            except Exception as e:
+                                logger.warning(f"Error clicking button: {e}")
+                
+                # Special handling for ASP.NET PostBack links
+                try:
+                    # Find and interact with ASP.NET __doPostBack elements
+                    postback_elements = await self.page.query_selector_all('[onclick*="__doPostBack"]')
+                    for i, element in enumerate(postback_elements[:10]):  # Limit to avoid too many clicks
+                        try:
+                            onclick = await element.get_attribute('onclick')
+                            if onclick and '__doPostBack' in onclick:
+                                element_text = await element.text_content()
+                                
+                                # Only interact with elements that seem likely to contain exam links
+                                promising_terms = ["show", "view", "list", "exam", "paper", "test", 
+                                                "download", "resource", "material"]
+                                                
+                                if any(term in element_text.lower() for term in promising_terms):
+                                    logger.info(f"Clicking ASP.NET postback element: {element_text}")
+                                    
+                                    # Click the element
+                                    await element.click()
+                                    await self.page.wait_for_timeout(2000)
+                                    await self.page.wait_for_load_state('networkidle', timeout=10000)
+                                    
+                                    # Extract any new links
+                                    new_links = await self.page.query_selector_all('a[href]')
+                                    for a in new_links:
+                                        href = await a.get_attribute('href')
+                                        if href:
+                                            full_url = href if href.startswith('http') else urljoin(url, href)
+                                            if any(full_url.lower().endswith(ext) for ext in 
+                                                ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                                                links.add(full_url)
+                        except Exception as e:
+                            logger.warning(f"Error interacting with postback element: {e}")
+                except Exception as e:
+                    logger.warning(f"Error during postback handling: {e}")
+
+            except Exception as e:
+                logger.error(f"Browser-based extraction failed: {e}")
+            
+            # Filter links to likely contain exam documents
+            filtered_links = []
+            for link in links:
+                # Common file extensions for exam documents
+                if any(ext in link.lower() for ext in ['.pdf', '.doc', '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.zip']):
+                    filtered_links.append(link)
+                    continue
+                    
+                # Common paths for exam documents
+                if any(pattern in link.lower() for pattern in [
+                    "/eduexp/docs/pastexam", "/exam/", "/pastexam/", "/papers/", 
+                    "/pastpapers/", "/questionpapers/", "/tests/", "/assignments/",
+                    "/resource/", "/material/", "/notes/", "/subjectmaterial/"
+                ]):
+                    filtered_links.append(link)
+                    continue
+                
+                # Check for download links (these may not have obvious extensions)
+                if is_download_link(link):
+                    filtered_links.append(link)
+            
+            logger.info(f"Found {len(filtered_links)} potential exam document links")
+            return filtered_links
+            
+        except Exception as e:
+            logger.error(f"Error getting exam links: {e}")
+            return []
+
+    async def discover_hidden_links(self, page):
+        """Discover hidden links that might be in JavaScript, iframes, or dynamic content"""
+        hidden_links = set()
+        
+        # Execute JavaScript to find links in script tags and data attributes
+        js_links = await page.evaluate("""
+        () => {
+            const links = new Set();
+            
+            // Extract URLs from script tags
+            const scripts = document.querySelectorAll('script');
+            for (const script of scripts) {
+                const content = script.textContent || '';
+                const urlMatches = content.match(/["'](https?:\/\/[^"']+)["']/g) || [];
+                for (let match of urlMatches) {
+                    links.add(match.replace(/["']/g, ''));
+                }
+            }
+            
+            // Look for download-related variables in scripts
+            for (const script of scripts) {
+                const content = script.textContent || '';
+                // Look for common patterns for file URLs in JavaScript
+                if (content.includes('downloadURL') || content.includes('fileURL') || 
+                    content.includes('pdfURL') || content.includes('documentURL')) {
+                    
+                    // Extract potential URLs
+                    const potentialUrls = content.match(/["']([^"']+\.(pdf|doc|docx|xls|xlsx|zip|ppt|pptx))["']/gi) || [];
+                    for (let match of potentialUrls) {
+                        const url = match.replace(/["']/g, '');
+                        // Try to resolve relative URLs
+                        if (url.startsWith('/') || !url.includes('://')) {
+                            if (url.startsWith('/')) {
+                                links.add(window.location.origin + url);
+                            } else {
+                                // Handle relative paths more carefully
+                                const base = window.location.href.substring(0, window.location.href.lastIndexOf('/') + 1);
+                                links.add(base + url);
+                            }
+                        } else if (url.startsWith('http')) {
+                            links.add(url);
+                        }
+                    }
+                }
+            }
+            
+            // Check for links in data attributes
+            const elements = document.querySelectorAll('*[data-url], *[data-href], *[data-src], *[data-link], *[data-file], *[data-download]');
+            for (const el of elements) {
+                for (const attr of ['data-url', 'data-href', 'data-src', 'data-link', 'data-file', 'data-download']) {
+                    const val = el.getAttribute(attr);
+                    if (val) {
+                        // Try to resolve relative URLs
+                        if (val.startsWith('/')) {
+                            links.add(window.location.origin + val);
+                        } else if (val.startsWith('http')) {
+                            links.add(val);
+                        } else if (!val.startsWith('javascript:') && !val.startsWith('#')) {
+                            // Handle relative paths
+                            const base = window.location.href.substring(0, window.location.href.lastIndexOf('/') + 1);
+                            links.add(base + val);
+                        }
+                    }
+                }
+            }
+            
+            // Look for URLs in inline event handlers
+            const clickableElements = document.querySelectorAll('*[onclick], *[onmousedown], *[onmouseup], *[href]');
+            for (const el of clickableElements) {
+                for (const attr of ['onclick', 'onmousedown', 'onmouseup', 'href']) {
+                    const val = el.getAttribute(attr);
+                    if (val) {
+                        // Check for JavaScript URLs with window.location
+                        if (val.includes('window.location') || val.includes('document.location')) {
+                            const urlMatch = val.match(/location(?:.*)=\s*["']([^"']+)["']/);
+                            if (urlMatch && urlMatch[1]) {
+                                const url = urlMatch[1];
+                                if (url.startsWith('/')) {
+                                    links.add(window.location.origin + url);
+                                } else if (url.startsWith('http')) {
+                                    links.add(url);
+                                } else if (!url.startsWith('javascript:') && !url.startsWith('#')) {
+                                    const base = window.location.href.substring(0, window.location.href.lastIndexOf('/') + 1);
+                                    links.add(base + url);
+                                }
+                            }
+                        }
+                        
+                        // Check for direct URLs in attributes
+                        const urlMatches = val.match(/["'](https?:\/\/[^"']+)["']/g) || [];
+                        for (let match of urlMatches) {
+                            links.add(match.replace(/["']/g, ''));
+                        }
+                        
+                        // Check for download.php and similar patterns
+                        if (val.includes('download.php') || val.includes('getfile.php') || 
+                            val.includes('Action=downloadfile') || val.includes('viewfile.php')) {
+                            
+                            // Handle both onclick handlers and direct hrefs
+                            let url = '';
+                            if (attr === 'href') {
+                                url = val;
+                            } else {
+                                // Extract URL from JavaScript
+                                const jsUrlMatch = val.match(/["']([^"']+(?:download|getfile|viewfile|downloadfile)[^"']*)["']/i);
+                                if (jsUrlMatch) {
+                                    url = jsUrlMatch[1];
+                                }
+                            }
+                            
+                            // Resolve URL if needed
+                            if (url) {
+                                if (url.startsWith('/')) {
+                                    links.add(window.location.origin + url);
+                                } else if (url.startsWith('http')) {
+                                    links.add(url);
+                                } else if (!url.startsWith('javascript:') && !url.startsWith('#')) {
+                                    const base = window.location.href.substring(0, window.location.href.lastIndexOf('/') + 1);
+                                    links.add(base + url);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            
+            // Find PHP/ASP file download links
+            const fileLinks = document.querySelectorAll('a[href*="download.php"], a[href*="getfile.php"], a[href*="viewfile.php"], a[href*="file.aspx"], a[href*="download.aspx"], a[href*="Action=downloadfile"]');
+            for (const link of fileLinks) {
+                links.add(link.href);
+            }
+            
+            return Array.from(links);
+        }
+        """)
+        
+        for link in js_links:
+            hidden_links.add(link)
+        
+        # Extract links from iframes
+        iframes = await page.query_selector_all('iframe')
+        for iframe in iframes:
+            try:
+                frame = await iframe.content_frame()
+                if frame:
+                    iframe_links = await frame.evaluate("""
+                    () => {
+                        return Array.from(document.querySelectorAll('a[href]'))
+                            .map(a => a.href)
+                            .filter(href => href.startsWith('http'));
+                    }
+                    """)
+                    for link in iframe_links:
+                        hidden_links.add(link)
+            except Exception as e:
+                logger.warning(f"Could not extract links from iframe: {e}")
+        
+        # Look for links in shadow DOM (used in modern web components)
+        shadow_links = await page.evaluate("""
+        () => {
+            const links = new Set();
+            
+            // Helper function to recursively process shadow roots
+            function processShadowRoot(root) {
+                if (!root) return;
+                
+                // Get links in this shadow root
+                const shadowLinks = root.querySelectorAll('a[href]');
+                for (const link of shadowLinks) {
+                    if (link.href && link.href.startsWith('http')) {
+                        links.add(link.href);
+                    }
+                }
+                
+                // Process nested shadow roots
+                const elements = root.querySelectorAll('*');
+                for (const el of elements) {
+                    if (el.shadowRoot) {
+                        processShadowRoot(el.shadowRoot);
+                    }
+                }
+            }
+            
+            // Find all shadow roots in the document
+            const elements = document.querySelectorAll('*');
+            for (const el of elements) {
+                if (el.shadowRoot) {
+                    processShadowRoot(el.shadowRoot);
+                }
+            }
+            
+            return Array.from(links);
+        }
+        """)
+        
+        for link in shadow_links:
+            hidden_links.add(link)
+        
+        # Look for download links in forms
+        form_links = await page.evaluate("""
+        () => {
+            const links = new Set();
+            
+            // Check for form actions that might be download endpoints
+            const forms = document.querySelectorAll('form');
+            for (const form of forms) {
+                const action = form.action || '';
+                if (action && (
+                    action.includes('download') || 
+                    action.includes('getfile') || 
+                    action.includes('viewfile') || 
+                    action.includes('Action=downloadfile')
+                )) {
+                    // Collect input values that might be needed for the download
+                    const inputs = {};
+                    const formInputs = form.querySelectorAll('input[name]');
+                    for (const input of formInputs) {
+                        inputs[input.name] = input.value;
+                    }
+                    
+                    // Store both the form action and any important inputs
+                    links.add(action);
+                }
+            }
+            
+            return Array.from(links);
+        }
+        """)
+        
+        for link in form_links:
+            hidden_links.add(link)
+        
+        return hidden_links
+
+    async def extract_downloadable_files(self, url, custom_ext_list):
+        found_files = []
+        try:
+            # Normalize the URL to handle special cases
+            normalized_url = normalize_download_url(url)
+            
+            # Skip if we've already visited this URL
+            if normalized_url in self.visited_urls:
+                logger.info(f"Skipping already visited URL: {normalized_url}")
+                return []
+            
+            # Mark this URL as visited
+            self.visited_urls.add(normalized_url)
+            
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            # First check if this is a direct download link (Action=downloadfile or fname parameter)
+            if is_download_link(normalized_url):
+                logger.info(f"Processing potential direct download link: {normalized_url}")
+                
+                # Try to extract the real download URL if needed
+                real_url = await self.extract_real_download_url(normalized_url)
+                
+                # Determine filename - for complex URLs this can be tricky
+                filename = os.path.basename(urlparse(real_url).path)
+                
+                # Handle URL-encoded filenames
+                if '%' in filename:
+                    try:
+                        filename = unquote(filename)
+                    except Exception:
+                        pass
+                
+                # For URLs with download parameters, try to extract filename from query
+                if not filename or filename == '/' or filename.endswith('.php') or filename.endswith('.aspx'):
+                    # Look for file parameter
+                    params = parse_qs(urlparse(normalized_url).query)
+                    
+                    # Check common filename parameters
+                    for param in ['file', 'filename', 'name', 'fname', 'f']:
+                        if param in params and params[param]:
+                            potential_filename = params[param][0]
+                            if potential_filename and '/' not in potential_filename and '\\' not in potential_filename:
+                                filename = os.path.basename(potential_filename)
+                                break
+                
+                # If still no valid filename, use domain-based fallback
+                if not filename or filename == '/' or filename.endswith('.php') or filename.endswith('.aspx'):
+                    domain = get_domain(real_url)
+                    # Try to determine file type from content-type or extension hints in URL
+                    ext = '.pdf'  # Default
+                    for common_ext in ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip']:
+                        if common_ext in normalized_url.lower():
+                            ext = common_ext
+                            break
+                    filename = f"file_from_{domain}{ext}"
+                
+                # Get file size
+                size_str = await self.get_file_size(real_url)
+                
+                # Add to found files
+                found_files.append({
+                    'url': real_url,
+                    'filename': filename,
+                    'size': size_str,
+                    'metadata': {},
+                    'download_url': normalized_url  # Keep original URL for downloading
+                })
+                
+                # For direct download links, we can return early
+                if len(found_files) > 0 and (normalized_url.startswith(url) or real_url.startswith(url)):
+                    return found_files
+            
+            # Special handling for educational exam sites
+            if "phsms.cloud.ncnu.edu.tw" in url or any(keyword in url.lower() for keyword in 
+                                                      ["exam", "test", "pastpaper", "eduexp"]):
+                logger.info("Using specialized handler for educational exam site")
+                
+                # Get direct links to exam files
+                exam_links = await self.get_edu_exam_links(url)
+                
+                for link in exam_links:
+                    # Try to resolve any redirection
+                    real_url = await self.extract_real_download_url(link)
+                    filename = os.path.basename(urlparse(real_url).path)
+                    
+                    # If filename is URL encoded (common with Chinese/international sites)
+                    if '%' in filename:
+                        try:
+                            filename = unquote(filename)
+                        except Exception:
+                            pass
+                    
+                    # If filename is empty or invalid, create a sensible one
+                    if not filename or filename == '/':
+                        domain = get_domain(real_url)
+                        ext = '.pdf'  # Default
+                        for common_ext in ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip']:
+                            if common_ext in link.lower():
+                                ext = common_ext
+                                break
+                        filename = f"file_from_{domain}{ext}"
+                    
+                    # Get file size
+                    size_str = await self.get_file_size(real_url)
+                    
+                    # Get metadata for PDFs
+                    meta = {}
+                    if real_url.lower().endswith('.pdf'):
+                        try:
+                            meta = await self.get_pdf_metadata(real_url)
+                        except Exception:
+                            pass
+                    
+                    found_files.append({
+                        'url': real_url,
+                        'filename': filename,
+                        'size': size_str,
+                        'metadata': meta,
+                        'download_url': link  # Store original link for downloading
+                    })
+                
+                # If we found exam files with the specialized method, return them
+                if found_files:
+                    return found_files
+            
+            # Standard extraction method if specialized method didn't find files
+            response = await self.page.goto(url, timeout=30000, wait_until='networkidle')
+            if not response:
+                return []
+            
+            # Check for captchas
+            if not await self.handle_captcha(self.page):
+                logger.warning("Captcha detected, file extraction may be limited")
+
+            # Scroll through the page naturally to trigger lazy loading
+            await self.page.evaluate("""
+                (async () => {
+                    const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+                    const height = document.body.scrollHeight;
+                    const scrollStep = Math.floor(window.innerHeight / 2);
+                    
+                    for (let i = 0; i < height; i += scrollStep) {
+                        window.scrollTo(0, i);
+                        await delay(100);
+                    }
+                    
+                    window.scrollTo(0, 0);
+                })()
+            """)
+            await self.page.wait_for_timeout(1000)
+
+            final_url = self.page.url
+            if '.php' in final_url or 'download' in final_url:
+                real_url = await self.extract_real_download_url(final_url)
+                if real_url != final_url:
+                    # Try to detect the filename from headers or URL
+                    response = await self.page.request.head(real_url, timeout=15000)
+                    filename = None
+                    
+                    # Try to get from Content-Disposition header
+                    content_disposition = response.headers.get('Content-Disposition', '')
+                    if 'filename=' in content_disposition:
+                        filename_match = re.search(r'filename=["\'](.*?)["\']', content_disposition)
+                        if filename_match:
+                            filename = filename_match.group(1)
+                    
+                    # If not found in headers, use URL basename
+                    if not filename:
+                        filename = os.path.basename(urlparse(real_url).path)
+                        if not filename or filename == '/':
+                            # Generate a name based on domain
+                            domain = get_domain(real_url)
+                            ext = '.pdf'  # Default
+                            for common_ext in ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip']:
+                                if common_ext in real_url.lower():
+                                    ext = common_ext
+                                    break
+                            filename = f"file_from_{domain}{ext}"
+                    
+                    found_files.append({
+                        'url': real_url,
+                        'filename': filename,
+                        'size': await self.get_file_size(real_url),
+                        'metadata': {},
+                        'download_url': final_url  # Keep original URL for downloading
+                    })
+                    return found_files
+
+            await self.page.wait_for_load_state('networkidle', timeout=30000)
+            content = await self.page.content()
+            soup = BeautifulSoup(content, 'html.parser')
+            
+            default_exts = ['.pdf', '.docx', '.doc', '.zip', '.rar', '.mp3', '.mp4', 
+                            '.avi', '.mkv', '.png', '.jpg', '.jpeg', '.gif', '.xlsx', 
+                            '.pptx', '.odt', '.txt']
+            all_exts = set(default_exts + [ext.strip().lower() for ext in custom_ext_list if ext.strip()])
+            
+            parsed_base = urlparse(final_url)
+            base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+            path_base = os.path.dirname(parsed_base.path)
+            
+            # Process all anchor tags
+            for a in soup.find_all('a', href=True):
+                href = a['href'].strip()
+                
+                if '.php' in href.lower() or 'download' in href.lower() or 'action=' in href.lower():
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    real_url = await self.extract_real_download_url(full_url)
+                    if real_url and real_url != full_url:
+                        found_files.append({
+                            'url': real_url,
+                            'filename': os.path.basename(urlparse(real_url).path) or 'downloaded_file',
+                            'size': await self.get_file_size(real_url),
+                            'metadata': {},
+                            'download_url': full_url  # Original URL for download
+                        })
+                        continue
+
+                if any(href.lower().endswith(ext) for ext in all_exts):
+                    file_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    size_str = await self.get_file_size(file_url)
+                    meta = {}
+                    if file_url.lower().endswith('.pdf'):
+                        meta = await self.get_pdf_metadata(file_url)
+                    found_files.append({
+                        'url': file_url,
+                        'filename': os.path.basename(file_url.split('?')[0]),
+                        'size': size_str,
+                        'metadata': meta,
+                        'download_url': file_url  # Same as URL for direct links
+                    })
+
+                # Handle Google Drive links
+                elif ("drive.google.com" in href) or ("docs.google.com" in href):
+                    file_id = None
+                    for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
+                        match = re.search(pattern, href)
+                        if match:
+                            file_id = match.group(1)
+                            break
+                    if file_id:
+                        # Get file info to determine type and view-only status
+                        file_type, is_view_only = await self.get_google_drive_file_info(file_id)
+                        
+                        # Create a more informative filename based on info
+                        filename = f"gdrive_{file_id}"
+                        if file_type:
+                            filename = f"{filename}.{file_type}"
+                        
+                        size_str = "View-only" if is_view_only else await self.get_file_size(f"https://drive.google.com/uc?export=download&id={file_id}")
+                        
+                        found_files.append({
+                            'url': href,  # Use original URL
+                            'filename': filename,
+                            'size': size_str,
+                            'metadata': {
+                                'view_only': is_view_only,
+                                'file_type': file_type,
+                                'file_id': file_id
+                            },
+                            'download_url': href  # Same as URL for Google Drive
+                        })
+            
+            # Also check for files in other elements (iframe, embed, object, etc.)
+            other_elements = soup.find_all(['iframe', 'embed', 'object', 'source'])
+            for elem in other_elements:
+                src = elem.get('src') or elem.get('data')
+                if src and any(src.lower().endswith(ext) for ext in all_exts):
+                    file_url = src if src.startswith('http') else self.resolve_relative_url(src, base_url, path_base)
+                    size_str = await self.get_file_size(file_url)
+                    meta = {}
+                    if file_url.lower().endswith('.pdf'):
+                        meta = await self.get_pdf_metadata(file_url)
+                    found_files.append({
+                        'url': file_url,
+                        'filename': os.path.basename(file_url.split('?')[0]),
+                        'size': size_str,
+                        'metadata': meta,
+                        'download_url': file_url
+                    })
+            
+            # Check for file links in onclick attributes
+            onclick_elements = await self.page.query_selector_all('*[onclick*="download"], *[onclick*="file"]')
+            for elem in onclick_elements:
+                onclick = await elem.get_attribute('onclick')
+                urls = re.findall(r'(https?://[^\'"]+)', onclick)
+                for url_match in urls:
+                    if any(url_match.lower().endswith(ext) for ext in all_exts):
+                        size_str = await self.get_file_size(url_match)
+                        meta = {}
+                        if url_match.lower().endswith('.pdf'):
+                            meta = await self.get_pdf_metadata(url_match)
+                        found_files.append({
+                            'url': url_match,
+                            'filename': os.path.basename(url_match.split('?')[0]),
+                            'size': size_str,
+                            'metadata': meta,
+                            'download_url': url_match
+                        })
+            
+            # Also check for data-src and data-url attributes (common in lazy-loaded sites)
+            data_elements = await self.page.query_selector_all('[data-src], [data-url], [data-href], [data-download]')
+            for elem in data_elements:
+                for attr in ['data-src', 'data-url', 'data-href', 'data-download']:
+                    try:
+                        value = await elem.get_attribute(attr)
+                        if value and any(value.lower().endswith(ext) for ext in all_exts):
+                            file_url = value if value.startswith('http') else self.resolve_relative_url(value, base_url, path_base)
+                            found_files.append({
+                                'url': file_url,
+                                'filename': os.path.basename(file_url.split('?')[0]),
+                                'size': await self.get_file_size(file_url),
+                                'metadata': {},
+                                'download_url': file_url
+                            })
+                    except:
+                        pass
+            
+            # Check script tags for JSON data that might contain file URLs
+            script_elements = soup.find_all('script', type='application/json')
+            for script in script_elements:
+                try:
+                    json_data = json.loads(script.string)
+                    # Look for URL patterns in the JSON data
+                    def extract_urls_from_json(obj, urls_found=None):
+                        if urls_found is None:
+                            urls_found = []
+                        if isinstance(obj, dict):
+                            for k, v in obj.items():
+                                # Check if any key contains url-like terms
+                                url_keys = ['url', 'href', 'src', 'link', 'file', 'path', 'download']
+                                if any(url_key in k.lower() for url_key in url_keys) and isinstance(v, str) and v.startswith('http'):
+                                    urls_found.append(v)
+                                else:
+                                    extract_urls_from_json(v, urls_found)
+                        elif isinstance(obj, list):
+                            for item in obj:
+                                extract_urls_from_json(item, urls_found)
+                        return urls_found
+                    
+                    json_urls = extract_urls_from_json(json_data)
+                    for json_url in json_urls:
+                        if any(json_url.lower().endswith(ext) for ext in all_exts):
+                            found_files.append({
+                                'url': json_url,
+                                'filename': os.path.basename(json_url.split('?')[0]),
+                                'size': await self.get_file_size(json_url),
+                                'metadata': {},
+                                'download_url': json_url
+                            })
+                except:
+                    pass
+            
+            # Check for hidden download buttons or forms
+            hidden_elements = await self.page.evaluate("""
+                () => {
+                    const results = [];
+                    
+                    // Check for hidden forms with download actions
+                    const forms = document.querySelectorAll('form[action*="download"], form[action*="file"]');
+                    for (const form of forms) {
+                        const action = form.getAttribute('action') || '';
+                        results.push({
+                            type: 'form',
+                            action: action,
+                            inputs: Array.from(form.querySelectorAll('input[name]')).map(input => {
+                                return {name: input.name, value: input.value};
+                            })
+                        });
+                    }
+                    
+                    // Check for hidden download links/buttons
+                    const hiddenLinks = Array.from(document.querySelectorAll('a[href]')).filter(a => {
+                        const style = window.getComputedStyle(a);
+                        return (style.display === 'none' || style.visibility === 'hidden') && 
+                               (a.href.includes('download') || a.href.includes('file'));
+                    });
+                    
+                    for (const link of hiddenLinks) {
+                        results.push({
+                            type: 'link',
+                            href: link.href,
+                            text: link.innerText || link.textContent
+                        });
+                    }
+                    
+                    return results;
+                }
+            """)
+            
+            # Process hidden elements
+            for elem in hidden_elements:
+                if elem['type'] == 'link' and 'href' in elem:
+                    href = elem['href']
+                    if any(href.lower().endswith(ext) for ext in all_exts):
+                        found_files.append({
+                            'url': href,
+                            'filename': os.path.basename(href.split('?')[0]),
+                            'size': await self.get_file_size(href),
+                            'metadata': {},
+                            'download_url': href
+                        })
+            
+            # Check for hidden links that might be in JavaScript, iframes, or dynamic content
+            hidden_links = await self.discover_hidden_links(self.page)
+            for link in hidden_links:
+                if any(link.lower().endswith(ext) for ext in all_exts):
+                    found_files.append({
+                        'url': link,
+                        'filename': os.path.basename(link.split('?')[0]),
+                        'size': await self.get_file_size(link),
+                        'metadata': {},
+                        'download_url': link
+                    })
+            
+            # Deduplicate files by URL
+            seen_urls = set()
+            unique_files = []
+            for f in found_files:
+                if f['url'] not in seen_urls:
+                    seen_urls.add(f['url'])
+                    unique_files.append(f)
+            
+            return unique_files
+        except Exception as e:
+            logger.error(f"Error extracting files from {url}: {e}")
+            traceback.print_exc()
+            return []
+
+    async def download_file(self, file_info, save_dir, referer):
+        file_url = file_info.get('download_url', file_info['url'])  # Use download_url if available
+        fname = file_info['filename']
+        path = os.path.join(save_dir, fname)
+        base, ext = os.path.splitext(fname)
+        counter = 1
+        while os.path.exists(path):
+            path = os.path.join(save_dir, f"{base}_{counter}{ext}")
+            counter += 1
+        os.makedirs(save_dir, exist_ok=True)
+        
+        # Check if we've already downloaded this file
+        if file_url in self.downloaded_files:
+            logger.info(f"File already downloaded: {file_url}")
+            return None
+            
+        try:
+            # Special handling for Google Drive files
+            if "drive.google.com" in file_url or "docs.google.com" in file_url:
+                # Check if it's marked as view-only in metadata
+                is_view_only = file_info.get('metadata', {}).get('view_only', False)
+                
+                # For view-only files, try our most robust approach first
+                if is_view_only:
+                    logger.info(f"Attempting to download view-only file: {file_url}")
+                    result_path = await self.force_download_viewonly(file_info, path)
+                    if result_path:
+                        self.downloaded_files.add(file_url)
+                        return result_path
+                    
+                    # If that failed, try the regular download approach
+                    logger.info("Primary method failed, trying fallback methods")
+                
+                # Try regular download methods
+                success = await self.download_from_google_drive(file_url, path)
+                if success:
+                    self.downloaded_files.add(file_url)
+                    return path
+                
+                # If all methods failed for Google Drive, try one last approach
+                logger.warning("All standard methods failed, attempting force download")
+                result_path = await self.force_download_viewonly(file_info, path)
+                if result_path:
+                    self.downloaded_files.add(file_url)
+                return result_path if result_path else None
+            
+            # Special handling for complex download URLs
+            if 'Action=downloadfile' in file_url or 'fname=' in file_url:
+                logger.info(f"Using browser download approach for complex URL: {file_url}")
+                
+                # For these URLs, we'll need to navigate to the page and handle the download
+                await self.rotate_proxy_if_needed()
+                
+                async with self.context.new_page() as page:
+                    # Set up download event listener
+                    download_promise = page.wait_for_event("download")
+                    
+                    # Navigate to the URL
+                    await page.goto(file_url, timeout=60000)
+                    
+                    # Wait for the download to start
+                    try:
+                        download = await download_promise
+                        await download.save_as(path)
+                        
+                        if os.path.exists(path) and os.path.getsize(path) > 0:
+                            self.downloaded_files.add(file_url)
+                            return path
+                    except Exception as e:
+                        logger.error(f"Browser download failed: {e}")
+                        
+                        # If download didn't start automatically, try to find and click download buttons
+                        download_buttons = await page.query_selector_all('input[type="submit"], button[type="submit"], a.btn, a[href*="download"]')
+                        for button in download_buttons:
+                            try:
+                                await button.click()
+                                try:
+                                    download = await download_promise
+                                    await download.save_as(path)
+                                    if os.path.exists(path) and os.path.getsize(path) > 0:
+                                        self.downloaded_files.add(file_url)
+                                        return path
+                                except:
+                                    pass
+                            except:
+                                continue
+                
+                # If browser approach failed, try direct request as last resort
+                logger.info("Browser approach failed, trying direct request")
+            
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            # Try with direct requests first (faster)
+            try:
+                headers = {
+                    'User-Agent': get_random_user_agent(),
+                    'Accept': '*/*',
+                    'Accept-Encoding': 'gzip, deflate, br',
+                    'Referer': referer,
+                    'DNT': '1'
+                }
+                
+                with requests.get(file_url, headers=headers, stream=True, timeout=30) as response:
+                    if response.status_code == 200:
+                        # Check content type to verify it's not HTML/error page
+                        content_type = response.headers.get('Content-Type', '')
+                        if 'text/html' in content_type and not file_url.endswith('.html'):
+                            logger.warning(f"Received HTML instead of expected file: {file_url}")
+                        else:
+                            with open(path, 'wb') as f:
+                                for chunk in response.iter_content(chunk_size=8192):
+                                    if chunk:
+                                        f.write(chunk)
+                            
+                            # Verify file was downloaded correctly
+                            if os.path.exists(path) and os.path.getsize(path) > 0:
+                                self.downloaded_files.add(file_url)
+                                return path
+            except Exception as e:
+                logger.warning(f"Direct download failed: {e}, trying browser approach")
+                    
+            # Original code for non-Google Drive downloads using Playwright
+            async with self.context.new_page() as page:
+                headers = {
+                    'Accept': '*/*',
+                    'Accept-Encoding': 'gzip, deflate, br',
+                    'Referer': referer
+                }
+                
+                # Try to download with timeout protection
+                try:
+                    response = await page.request.get(file_url, headers=headers, timeout=self.download_timeout * 1000)
+                    if response.status == 200:
+                        content = await response.body()
+                        with open(path, 'wb') as f:
+                            f.write(content)
+                        if os.path.exists(path) and os.path.getsize(path) > 0:
+                            self.downloaded_files.add(file_url)
+                            return path
+                    else:
+                        logger.error(f"Download failed with status {response.status}: {file_url}")
+                        
+                        # Try to extract error information
+                        error_info = await response.text()
+                        logger.debug(f"Error response: {error_info[:200]}...")
+                        
+                        # Check if this might be a captcha or login issue
+                        if detect_captcha(error_info):
+                            logger.warning("Captcha detected during download")
+                            # For HF Spaces, we can't implement browser-based captcha solving here
+                            # Just log the issue for now
+                except PlaywrightTimeoutError:
+                    logger.error(f"Download timed out after {self.download_timeout} seconds: {file_url}")
+                    
+                # Try an alternative approach - using the browser's download manager
+                try:
+                    logger.info("Trying browser download manager approach")
+                    download_promise = page.wait_for_event("download")
+                    await page.goto(file_url, timeout=60000)
+                    
+                    # Wait for download to start (with timeout)
+                    download = await download_promise
+                    await download.save_as(path)
+                    
+                    if os.path.exists(path) and os.path.getsize(path) > 0:
+                        self.downloaded_files.add(file_url)
+                        return path
+                except Exception as e:
+                    logger.error(f"Browser download manager approach failed: {e}")
+                
+                return None
+        except Exception as e:
+            logger.error(f"Error downloading {file_url}: {e}")
+            return None
+
+    async def force_download_viewonly(self, file_info, save_path):
+        """Completely rewritten method to handle view-only files reliably, especially multi-page PDFs"""
+        try:
+            # Extract file ID
+            file_id = file_info.get('metadata', {}).get('file_id')
+            if not file_id:
+                url = file_info['url']
+                for pattern in [r'/file/d/([^/]+)', r'id=([^&]+)', r'open\?id=([^&]+)']:
+                    match = re.search(pattern, url)
+                    if match:
+                        file_id = match.group(1)
+                        break
+            
+            if not file_id:
+                logger.error("Could not extract file ID")
+                return None
+            
+            file_type = file_info.get('metadata', {}).get('file_type', 'pdf')
+            base, ext = os.path.splitext(save_path)
+            if not ext:
+                save_path = f"{base}.{file_type}"
+            
+            logger.info(f"Starting reliable download of Google Drive file {file_id} (type: {file_type})")
+            
+            # Create a dedicated browser instance with better resolution and stealth
+            browser_args = [
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-web-security',
+                '--disable-features=IsolateOrigins,site-per-process',
+                '--disable-site-isolation-trials',
+                '--disable-blink-features=AutomationControlled'  # Anti-detection
+            ]
+            
+            browser = await self.playwright.chromium.launch(
+                headless=True,
+                args=browser_args
+            )
+            
+            # Use higher resolution for better quality
+            context = await browser.new_context(
+                viewport={'width': 1600, 'height': 1200},
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                device_scale_factor=2.0,
+                accept_downloads=True  # Critical for the download workflow
+            )
+            
+            # Add anti-detection script
+            await context.add_init_script("""
+                () => {
+                    Object.defineProperty(navigator, 'webdriver', {
+                        get: () => false,
+                    });
+                    
+                    // Change plugins
+                    Object.defineProperty(navigator, 'plugins', {
+                        get: () => [1, 2, 3, 4, 5].map(() => ({
+                            lengthComputable: true,
+                            loaded: 100,
+                            total: 100
+                        }))
+                    });
+                    
+                    // Handle languages
+                    Object.defineProperty(navigator, 'languages', {
+                        get: () => ['en-US', 'en', 'es']
+                    });
+                    
+                    // Modify hardware concurrency
+                    Object.defineProperty(navigator, 'hardwareConcurrency', {
+                        get: () => 4
+                    });
+                }
+            """)
+            
+            page = await context.new_page()
+            
+            try:
+                # Go to the file view page
+                logger.info(f"Opening file view page: https://drive.google.com/file/d/{file_id}/view")
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=90000)
+                await page.wait_for_load_state('networkidle')
+                
+                # Check for any barriers or permissions issues
+                content = await page.content()
+                if "the owner has not granted you permission to" in content:
+                    logger.warning("Permission denied error detected")
+                
+                # Randomized wait to appear more human-like
+                await page.wait_for_timeout(random.randint(3000, 7000))
+
+                # Create temp directory
+                temp_dir = tempfile.mkdtemp()
+                
+                # Special handling for PDFs
+                if file_type.lower() == 'pdf':
+                    # Use the improved scrolling and detection approach
+                    
+                    # Perform some natural mouse movements and scrolling
+                    await page.mouse.move(x=random.randint(200, 400), y=random.randint(200, 400))
+                    await page.wait_for_timeout(random.randint(500, 1000))
+                    
+                    # Estimate number of pages
+                    estimated_pages = await page.evaluate("""
+                    () => {
+                        // Method 1: Check page counter text
+                        const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
+                            const text = el.textContent || '';
+                            return /\\d+\\s*\\/\\s*\\d+/.test(text);
+                        });
+                        
+                        if (pageCounters.length > 0) {
+                            const text = pageCounters[0].textContent || '';
+                            const match = text.match(/(\\d+)\\s*\\/\\s*(\\d+)/);
+                            if (match && match[2]) return parseInt(match[2]);
+                        }
+                        
+                        // Method 2: Check actual page elements
+                        const pageElements = document.querySelectorAll('.drive-viewer-paginated-page');
+                        if (pageElements.length > 0) return pageElements.length;
+                        
+                        // Method 3: Look for page thumbnails
+                        const thumbnails = document.querySelectorAll('.drive-viewer-paginated-thumb');
+                        if (thumbnails.length > 0) return thumbnails.length;
+                        
+                        // Fallback: conservative guess 
+                        return 50;
+                    }
+                    """)
+                    
+                    logger.info(f"Estimated {estimated_pages} pages in PDF")
+                    
+                    # Initial scroll to trigger lazy loading
+                    logger.info("Initial scroll to bottom to trigger lazy loading...")
+                    await page.keyboard.press("End")
+                    await page.wait_for_timeout(3000)
+                    
+                    # Scroll page by page to ensure all pages are loaded
+                    logger.info("Scrolling page by page...")
+                    max_attempts = min(estimated_pages * 3, 300)
+                    attempt = 0
+                    prev_blob_count = 0
+                    
+                    while attempt < max_attempts:
+                        blob_count = await page.evaluate("""
+                            Array.from(document.getElementsByTagName('img'))
+                                .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                .length
+                        """)
+                        
+                        logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
+                        
+                        if blob_count >= estimated_pages or (blob_count > 0 and blob_count == prev_blob_count and attempt > 10):
+                            logger.info("All pages appear to be loaded.")
+                            break
+                        
+                        # Alternate between PageDown and End keys for more natural scrolling
+                        if attempt % 3 == 0:
+                            await page.keyboard.press("End")
+                        else:
+                            await page.keyboard.press("PageDown")
+                            
+                        # Randomized wait times
+                        await page.wait_for_timeout(random.randint(1500, 3000))
+                        
+                        # Move mouse randomly to appear more human-like
+                        if attempt % 4 == 0:
+                            await page.mouse.move(x=random.randint(200, 800), y=random.randint(200, 800))
+                        
+                        prev_blob_count = blob_count
+                        attempt += 1
+                    
+                    # Extra wait to ensure everything is loaded
+                    await page.wait_for_timeout(5000)
+                    
+                    # Set up download event listener for the PDF
+                    download_promise = page.wait_for_event("download")
+                    
+                    # Use jsPDF to generate PDF from loaded pages
+                    logger.info("Generating PDF from loaded pages...")
+                    result = await page.evaluate(r'''
+                        (function() {
+                            return new Promise((resolve, reject) => {
+                                let script = document.createElement("script");
+                                script.onload = function () {
+                                    try {
+                                        let pdf = new jsPDF();
+                                        let imgs = Array.from(document.getElementsByTagName("img"))
+                                            .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                                            .sort((a, b) => {
+                                                const rectA = a.getBoundingClientRect();
+                                                const rectB = b.getBoundingClientRect();
+                                                return rectA.top - rectB.top;
+                                            });
+                                        
+                                        console.log(`Found ${imgs.length} valid page images to add to PDF`);
+                                        
+                                        let added = 0;
+                                        for (let i = 0; i < imgs.length; i++) {
+                                            let img = imgs[i];
+                                            let canvas = document.createElement("canvas");
+                                            let ctx = canvas.getContext("2d");
+                                            canvas.width = img.width;
+                                            canvas.height = img.height;
+                                            ctx.drawImage(img, 0, 0, img.width, img.height);
+                                            let imgData = canvas.toDataURL("image/jpeg", 1.0);
+                                            
+                                            if (added > 0) {
+                                                pdf.addPage();
+                                            }
+                                            
+                                            pdf.addImage(imgData, 'JPEG', 0, 0);
+                                            added++;
+                                        }
+                                        
+                                        pdf.save("download.pdf");
+                                        resolve({success: true, pageCount: added});
+                                    } catch (error) {
+                                        reject({success: false, error: error.toString()});
+                                    }
+                                };
+                                
+                                script.onerror = function() {
+                                    reject({success: false, error: "Failed to load jsPDF library"});
+                                };
+                                
+                                script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jspdf/1.5.3/jspdf.debug.js';
+                                document.body.appendChild(script);
+                            });
+                        })();
+                    ''')
+                    
+                    if not result.get('success', False):
+                        logger.error(f"Error in PDF generation: {result.get('error', 'Unknown error')}")
+                        
+                        # Try fallback approach - screenshot method
+                        logger.info("Trying fallback screenshot method...")
+                        
+                        # Navigate back to the first page
+                        await page.evaluate("""
+                            () => {
+                                // Find and click the "first page" button if available
+                                const buttons = Array.from(document.querySelectorAll('button'));
+                                const firstPageBtn = buttons.find(b => b.getAttribute('aria-label')?.includes('First page'));
+                                if (firstPageBtn) firstPageBtn.click();
+                            }
+                        """)
+                        await page.wait_for_timeout(1000);
+                        
+                        # Create a PDF by taking screenshots of each page
+                        screenshots = []
+                        current_page = 1
+                        max_pages = estimated_pages
+                        
+                        # Create a PDF using the reportlab package
+                        while current_page <= max_pages:
+                            screenshot_path = os.path.join(temp_dir, f"page_{current_page}.png")
+                            
+                            # Try to find the current page element
+                            page_elem = await page.query_selector('.drive-viewer-paginated-page')
+                            if page_elem:
+                                await page_elem.screenshot(path=screenshot_path)
+                            else:
+                                # Fallback to full page screenshot
+                                await page.screenshot(path=screenshot_path)
+                            
+                            screenshots.append(screenshot_path)
+                            
+                            # Try to navigate to next page
+                            next_btn = await page.query_selector('button[aria-label="Next page"]')
+                            if next_btn:
+                                is_disabled = await next_btn.get_attribute('disabled')
+                                if is_disabled:
+                                    logger.info(f"Reached end of document at page {current_page}")
+                                    break
+                                
+                                await next_btn.click()
+                                await page.wait_for_timeout(1000)
+                                current_page += 1
+                            else:
+                                break
+                        
+                        # Create PDF from screenshots
+                        if screenshots:
+                            first_img = Image.open(screenshots[0])
+                            width, height = first_img.size
+                            
+                            c = canvas.Canvas(save_path, pagesize=(width, height))
+                            for screenshot in screenshots:
+                                img = Image.open(screenshot)
+                                c.drawImage(screenshot, 0, 0, width, height)
+                                c.showPage()
+                            c.save()
+                            
+                            # Clean up screenshots
+                            for screenshot in screenshots:
+                                os.remove(screenshot)
+                            
+                            return save_path
+                        
+                        return None
+                    
+                    logger.info(f"PDF generation triggered with {result.get('pageCount')} pages")
+                    
+                    # Wait for the download and save it
+                    download = await download_promise
+                    await download.save_as(save_path)
+                    
+                    # Clean up temp directory
+                    try:
+                        os.rmdir(temp_dir)
+                    except:
+                        pass
+                    
+                else:
+                    # Non-PDF file handling
+                    screenshot_path = os.path.join(temp_dir, "file.png")
+                    await page.screenshot(path=screenshot_path)
+                    
+                    if file_type.lower() in ['doc', 'docx', 'xlsx', 'pptx']:
+                        # For document types, try to export directly
+                        await self.export_google_doc(file_id, file_type, save_path)
+                    else:
+                        # For other types, save the screenshot with appropriate extension
+                        shutil.copy(screenshot_path, save_path)
+                    
+                    os.remove(screenshot_path)
+                
+                # Close browser
+                await browser.close()
+                
+                # Verify file exists and has content
+                if os.path.exists(save_path) and os.path.getsize(save_path) > 1000:
+                    logger.info(f"Successfully downloaded file to {save_path}")
+                    return save_path
+                else:
+                    logger.error(f"Generated file is too small or missing: {save_path}")
+                    return None
+                
+            except Exception as e:
+                logger.error(f"Error during force download: {e}")
+                if browser:
+                    await browser.close()
+                return None
+                
+        except Exception as e:
+            logger.error(f"Force download preparation failed: {e}")
+            return None
+
+    async def download_from_google_drive(self, url, save_path):
+        """Enhanced method to download from Google Drive with multiple fallback approaches"""
+        # Extract the file ID from different URL formats
+        file_id = None
+        url_patterns = [
+            r'drive\.google\.com/file/d/([^/]+)',
+            r'drive\.google\.com/open\?id=([^&]+)',
+            r'docs\.google\.com/\w+/d/([^/]+)',
+            r'id=([^&]+)',
+            r'drive\.google\.com/uc\?id=([^&]+)',
+        ]
+        
+        for pattern in url_patterns:
+            match = re.search(pattern, url)
+            if match:
+                file_id = match.group(1)
+                break
+        
+        if not file_id:
+            logger.error(f"Could not extract file ID from URL: {url}")
+            return False
+        
+        # Determine file type first (important for handling different file types)
+        file_type, is_view_only = await self.get_google_drive_file_info(file_id)
+        logger.info(f"Google Drive file type: {file_type}, View-only: {is_view_only}")
+        
+        base, ext = os.path.splitext(save_path)
+        if not ext and file_type:
+            # Add the correct extension if missing
+            save_path = f"{base}.{file_type}"
+        
+        # For view-only files, use specialized approaches
+        if is_view_only:
+            # Approach 1: For PDFs, use the JS method
+            if file_type == 'pdf':
+                success = await self.download_viewonly_pdf_with_js(file_id, save_path)
+                if success:
+                    return True
+                    
+            # Approach 2: For Google Docs, Sheets, etc., use export API
+            if file_type in ['doc', 'docx', 'sheet', 'ppt', 'xlsx', 'pptx']:
+                success = await self.export_google_doc(file_id, file_type, save_path)
+                if success:
+                    return True
+                    
+            # Approach 3: Try the direct screenshot method for any view-only file
+            success = await self.download_viewonly_with_screenshots(file_id, save_path, file_type)
+            if success:
+                return True
+        
+        # Try standard approaches for non-view-only files
+        try:
+            # Try direct download link first (fastest)
+            direct_url = f"https://drive.google.com/uc?id={file_id}&export=download&confirm=t"
+            
+            # Add anti-bot headers
+            headers = {
+                'User-Agent': get_random_user_agent(),
+                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+                'Accept-Language': 'en-US,en;q=0.9',
+                'Referer': 'https://drive.google.com/',
+                'DNT': '1'
+            }
+            
+            # Try with streaming to handle larger files
+            with requests.get(direct_url, headers=headers, stream=True, timeout=60) as r:
+                if r.status_code == 200:
+                    # Check if we got HTML instead of the file
+                    content_type = r.headers.get('Content-Type', '')
+                    if 'text/html' in content_type and not file_id.endswith('.html'):
+                        logger.warning("Received HTML instead of file, trying with session cookies")
+                    else:
+                        # Looks like we got the actual file
+                        with open(save_path, 'wb') as f:
+                            for chunk in r.iter_content(chunk_size=8192):
+                                if chunk:
+                                    f.write(chunk)
+                        
+                        # Verify file exists and has content
+                        if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
+                            logger.info("Direct download successful")
+                            return True
+            
+            # Try with requests and session cookies
+            session = requests.Session()
+            session.headers.update({'User-Agent': get_random_user_agent()})
+            
+            # Visit the page first to get cookies
+            session.get(f"https://drive.google.com/file/d/{file_id}/view", timeout=30)
+            
+            # Try download
+            url = f"https://drive.google.com/uc?id={file_id}&export=download"
+            response = session.get(url, stream=True, timeout=30)
+            
+            # Check for confirmation token
+            confirmation_token = None
+            for k, v in response.cookies.items():
+                if k.startswith('download_warning'):
+                    confirmation_token = v
+                    break
+            
+            # Use confirmation token if found
+            if confirmation_token:
+                url = f"{url}&confirm={confirmation_token}"
+                response = session.get(url, stream=True, timeout=60)
+            
+            # Check if we're getting HTML instead of the file
+            content_type = response.headers.get('Content-Type', '')
+            if 'text/html' in content_type:
+                logger.warning("Received HTML instead of file - likely download restriction")
+            else:
+                with open(save_path, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=1024*1024):
+                        if chunk:
+                            f.write(chunk)
+                
+                if os.path.exists(save_path) and os.path.getsize(save_path) > 0:
+                    with open(save_path, 'rb') as f:
+                        content = f.read(100)
+                        if b'<!DOCTYPE html>' not in content:
+                            logger.info("Successfully downloaded with requests session")
+                            return True
+        except Exception as e:
+            logger.warning(f"Requests session download failed: {e}")
+        
+        # Try browser-based approach as last resort
+        try:
+            async with self.context.new_page() as page:
+                # Visit the file view page first to get cookies
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=30000)
+                await page.wait_for_timeout(3000)
+                
+                # Set up download event listener
+                download_promise = page.wait_for_event("download")
+                
+                # Try to trigger the download button click
+                download_button = await page.query_selector('button[aria-label*="Download"], [data-tooltip*="Download"]')
+                if download_button:
+                    await download_button.click()
+                    
+                    # Wait for download to start
+                    try:
+                        download = await download_promise
+                        await download.save_as(save_path)
+                        return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                    except Exception as e:
+                        logger.error(f"Error during browser download: {e}")
+                        return False
+                else:
+                    # Try the export download URL
+                    await page.goto(f"https://drive.google.com/uc?id={file_id}&export=download", timeout=30000)
+                    
+                    # Look for and click any download buttons or links
+                    download_elements = await page.query_selector_all('a[href*="download"], a[href*="export"], form[action*="download"], button:has-text("Download")')
+                    for elem in download_elements:
+                        try:
+                            await elem.click()
+                            # Wait a bit to see if download starts
+                            try:
+                                download = await download_promise
+                                await download.save_as(save_path)
+                                return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                            except:
+                                pass
+                        except:
+                            continue
+        except Exception as e:
+            logger.error(f"Browser-based download attempt failed: {e}")
+        
+        logger.warning("All standard download methods failed")
+        return False
+
+    async def download_viewonly_pdf_with_js(self, file_id, save_path):
+        """Download view-only PDF using the enhanced blob image caching technique"""
+        try:
+            # Create a dedicated browser instance with stealth capabilities
+            browser_args = [
+                '--no-sandbox',
+                '--disable-setuid-sandbox',
+                '--disable-dev-shm-usage',
+                '--disable-web-security',
+                '--disable-blink-features=AutomationControlled'  # Anti-detection
+            ]
+            
+            browser = await self.playwright.chromium.launch(
+                headless=True,
+                args=browser_args
+            )
+            
+            # Setup stealth context
+            context = await browser.new_context(
+                viewport={'width': 1600, 'height': 1200},
+                user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+                accept_downloads=True,  # Critical for handling the download event
+                ignore_https_errors=True
+            )
+            
+            # Add stealth script
+            await context.add_init_script("""
+                () => {
+                    Object.defineProperty(navigator, 'webdriver', {
+                        get: () => false,
+                    });
+                    
+                    // Change plugins and languages to appear more human
+                    Object.defineProperty(navigator, 'plugins', {
+                        get: () => [1, 2, 3, 4, 5].map(() => ({
+                            lengthComputable: true,
+                            loaded: 100,
+                            total: 100
+                        }))
+                    });
+                    
+                    Object.defineProperty(navigator, 'languages', {
+                        get: () => ['en-US', 'en', 'es']
+                    });
+                }
+            """)
+            
+            page = await context.new_page()
+            
+            try:
+                # Step 1: Navigate to the file with human-like behavior
+                logger.info(f"Opening view-only PDF: https://drive.google.com/file/d/{file_id}/view")
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=60000)
+                await page.wait_for_load_state('networkidle')
+                
+                # Perform human-like interactions
+                await page.mouse.move(x=random.randint(100, 500), y=random.randint(100, 300))
+                await page.wait_for_timeout(random.randint(2000, 5000))
+                
+                # Step 2: Estimate the number of pages
+                estimated_pages = await page.evaluate("""
+                    () => {
+                        // Look for page counter in the interface
+                        const pageCounters = Array.from(document.querySelectorAll('*')).filter(el => {
+                            const text = el.textContent || '';
+                            return /\\d+\\s*\\/\\s*\\d+/.test(text);
+                        });
+                        
+                        if (pageCounters.length > 0) {
+                            const text = pageCounters[0].textContent || '';
+                            const match = text.match(/(\\d+)\\s*\\/\\s*(\\d+)/);
+                            if (match && match[2]) return parseInt(match[2]);
+                        }
+                        
+                        // If we can't find a counter, check actual pages
+                        const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                        if (pages.length > 0) return pages.length;
+                        
+                        // Default to a reasonable number if we can't determine
+                        return 50;
+                    }
+                """)
+                
+                logger.info(f"Estimated number of pages: {estimated_pages}")
+                
+                # Step 3: Initial scroll to trigger loading
+                logger.info("Initial scroll to bottom to trigger lazy loading...")
+                await page.keyboard.press("End")
+                await page.wait_for_timeout(3000)
+                
+                # Step 4: Wait for all pages to load with better feedback and randomization
+                logger.info("Scrolling through document to load all pages...")
+                max_attempts = min(estimated_pages * 3, 300)
+                attempt = 0
+                prev_blob_count = 0
+                consecutive_same_count = 0
+                
+                while attempt < max_attempts:
+                    # Count blob images (which are the PDF pages)
+                    blob_count = await page.evaluate("""
+                        Array.from(document.getElementsByTagName('img'))
+                            .filter(img => img.src.startsWith('blob:') && img.width > 100)
+                            .length
+                    """)
+                    
+                    logger.info(f"Attempt {attempt+1}: Found {blob_count} blob images")
+                    
+                    # Check if we've loaded all pages or if we're stuck
+                    if blob_count >= estimated_pages:
+                        logger.info(f"All {estimated_pages} pages appear to be loaded.")
+                        break
+                    
+                    if blob_count == prev_blob_count:
+                        consecutive_same_count += 1
+                        if consecutive_same_count >= 5 and blob_count > 0:
+                            logger.info(f"No new pages loaded after {consecutive_same_count} attempts. Assuming all available pages ({blob_count}) are loaded.")
+                            break
+                    else:
+                        consecutive_same_count = 0
+                    
+                    # Mix up the scrolling approach for more human-like behavior
+                    scroll_action = random.choice(["PageDown", "End", "ArrowDown", "mouse"])
+                    
+                    if scroll_action == "PageDown":
+                        await page.keyboard.press("PageDown")
+                    elif scroll_action == "End":
+                        await page.keyboard.press("End")
+                    elif scroll_action == "ArrowDown":
+                        # Press arrow down multiple times
+                        for _ in range(random.randint(5, 15)):
+                            await page.keyboard.press("ArrowDown")
+                            await page.wait_for_timeout(random.randint(50, 150))
+                    else:  # mouse
+                        # Scroll using mouse wheel
+                        current_y = random.randint(300, 700)
+                        await page.mouse.move(x=random.randint(300, 800), y=current_y)
+                        await page.mouse.wheel(0, random.randint(300, 800))
+                    
+                    # Random wait between scrolls
+                    await page.wait_for_timeout(random.randint(1000, 3000))
+                    
+                    prev_blob_count = blob_count
+                    attempt += 1
+                
+                # Extra wait to ensure everything is fully loaded
+                await page.wait_for_timeout(5000)
+                
+                # Step 5: Set up a download event listener
+                download_promise = page.wait_for_event("download")
+                
+                # Step 6: Inject the jsPDF script to generate PDF
+                logger.info("Generating PDF from loaded pages...")
+                result = await page.evaluate(r'''
+                    (function() {
+                        return new Promise((resolve, reject) => {
+                            let script = document.createElement("script");
+                            script.onload = function () {
+                                try {
+                                    let pdf = new jsPDF();
+                                    let imgs = document.getElementsByTagName("img");
+                                    let validImages = [];
+                                    
+                                    // First collect all valid blob images
+                                    for (let i = 0; i < imgs.length; i++) {
+                                        let img = imgs[i];
+                                        if (!/^blob:/.test(img.src)) continue;
+                                        if (img.width < 100 || img.height < 100) continue;
+                                        validImages.push(img);
+                                    }
+                                    
+                                    // Sort by position in the document
+                                    validImages.sort((a, b) => {
+                                        const rectA = a.getBoundingClientRect();
+                                        const rectB = b.getBoundingClientRect();
+                                        return rectA.top - rectB.top;
+                                    });
+                                    
+                                    console.log(`Found ${validImages.length} valid page images to add to PDF`);
+                                    
+                                    let added = 0;
+                                    // Process each image as a page
+                                    for (let i = 0; i < validImages.length; i++) {
+                                        let img = validImages[i];
+                                        let canvas = document.createElement("canvas");
+                                        let ctx = canvas.getContext("2d");
+                                        canvas.width = img.width;
+                                        canvas.height = img.height;
+                                        ctx.drawImage(img, 0, 0, img.width, img.height);
+                                        let imgData = canvas.toDataURL("image/jpeg", 1.0);
+                                        
+                                        if (added > 0) {
+                                            pdf.addPage();
+                                        }
+                                        
+                                        pdf.addImage(imgData, 'JPEG', 0, 0);
+                                        added++;
+                                    }
+                                    
+                                    pdf.save("download.pdf");
+                                    resolve({success: true, pageCount: added});
+                                } catch (error) {
+                                    reject({success: false, error: error.toString()});
+                                }
+                            };
+                            
+                            script.onerror = function() {
+                                reject({success: false, error: "Failed to load jsPDF library"});
+                            };
+                            
+                            // Use a reliable CDN
+                            script.src = 'https://cdnjs.cloudflare.com/ajax/libs/jspdf/1.5.3/jspdf.debug.js';
+                            document.body.appendChild(script);
+                        });
+                    })();
+                ''')
+                
+                if not result.get('success'):
+                    logger.error(f"Error in PDF generation: {result.get('error')}")
+                    return False
+                
+                logger.info(f"PDF generation triggered with {result.get('pageCount')} pages")
+                
+                # Step 7: Wait for the download to complete and save the file
+                download = await download_promise
+                
+                # Step 8: Save the downloaded file to the specified path
+                await download.save_as(save_path)
+                logger.info(f"Successfully saved PDF to {save_path}")
+                
+                return os.path.exists(save_path) and os.path.getsize(save_path) > 1000
+                
+            finally:
+                await browser.close()
+                
+        except Exception as e:
+            logger.error(f"Error in viewonly PDF download process: {e}")
+            return False
+
+    async def download_viewonly_with_screenshots(self, file_id, save_path, file_type):
+        """Download any view-only file by taking screenshots"""
+        try:
+            async with self.context.new_page() as page:
+                # Set high-resolution viewport
+                await page.set_viewport_size({"width": 1600, "height": 1200})
+                
+                # Navigate to the file
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", wait_until='networkidle', timeout=60000)
+                
+                # Make sure the file is loaded
+                await page.wait_for_load_state('networkidle')
+                await page.wait_for_timeout(3000)  # Extra time for rendering
+                
+                # Create directory for screenshots if multiple pages
+                base_dir = os.path.dirname(save_path)
+                base_name = os.path.splitext(os.path.basename(save_path))[0]
+                screenshots_dir = os.path.join(base_dir, f"{base_name}_screenshots")
+                os.makedirs(screenshots_dir, exist_ok=True)
+                
+                # Check if it's a multi-page document
+                is_multi_page = await page.evaluate("""
+                    () => {
+                        const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                        return pages.length > 1;
+                    }
+                """)
+                
+                if is_multi_page and file_type == 'pdf':
+                    # For multi-page PDFs, take screenshots of each page
+                    page_count = await page.evaluate("""
+                        async () => {
+                            const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                            const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                            const container = document.querySelector('.drive-viewer-paginated-scrollable');
+                            
+                            if (!container || pages.length === 0) return 0;
+                            
+                            // Scroll through to make sure all pages are loaded
+                            const scrollHeight = container.scrollHeight;
+                            const viewportHeight = container.clientHeight;
+                            const scrollStep = viewportHeight;
+                            
+                            for (let scrollPos = 0; scrollPos < scrollHeight; scrollPos += scrollStep) {
+                                container.scrollTo(0, scrollPos);
+                                await delay(300);
+                            }
+                            
+                            // Scroll back to top
+                            container.scrollTo(0, 0);
+                            await delay(300);
+                            
+                            return pages.length;
+                        }
+                    """)
+                    
+                    logger.info(f"Found {page_count} pages in document")
+                    
+                    # Take screenshots of each page
+                    screenshots = []
+                    for i in range(page_count):
+                        # Scroll to page
+                        await page.evaluate(f"""
+                            async () => {{
+                                const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                                const pages = document.querySelectorAll('.drive-viewer-paginated-page');
+                                if (pages.length <= {i}) return false;
+                                
+                                pages[{i}].scrollIntoView();
+                                await delay(500);
+                                return true;
+                            }}
+                        """)
+                        
+                        # Take screenshot
+                        screenshot_path = os.path.join(screenshots_dir, f"page_{i+1}.png")
+                        await page.screenshot(path=screenshot_path, clip={
+                            'x': 0,
+                            'y': 0,
+                            'width': 1600,
+                            'height': 1200
+                        })
+                        screenshots.append(screenshot_path)
+                    
+                    # Combine screenshots into PDF
+                    c = canvas.Canvas(save_path)
+                    for screenshot in screenshots:
+                        img = Image.open(screenshot)
+                        width, height = img.size
+                        
+                        # Add page to PDF
+                        c.setPageSize((width, height))
+                        c.drawImage(screenshot, 0, 0, width, height)
+                        c.showPage()
+                    
+                    c.save()
+                    
+                    # Clean up screenshots
+                    for screenshot in screenshots:
+                        os.remove(screenshot)
+                    os.rmdir(screenshots_dir)
+                    
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                else:
+                    # For single-page or non-PDF files, just take one screenshot
+                    screenshot_path = os.path.join(screenshots_dir, "screenshot.png")
+                    await page.screenshot(path=screenshot_path, fullPage=True)
+                    
+                    # Convert to requested format if needed
+                    if file_type == 'pdf':
+                        # Create PDF from screenshot
+                        img = Image.open(screenshot_path)
+                        width, height = img.size
+                        
+                        c = canvas.Canvas(save_path, pagesize=(width, height))
+                        c.drawImage(screenshot_path, 0, 0, width, height)
+                        c.save()
+                    else:
+                        # Just copy the screenshot to the destination with proper extension
+                        shutil.copy(screenshot_path, save_path)
+                    
+                    # Clean up
+                    os.remove(screenshot_path)
+                    os.rmdir(screenshots_dir)
+                    
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                
+        except Exception as e:
+            logger.error(f"Error taking screenshots: {e}")
+            return False
+
+    async def export_google_doc(self, file_id, file_type, save_path):
+        """Export Google Docs/Sheets/Slides to downloadable formats"""
+        try:
+            # Map file types to export formats
+            export_formats = {
+                'doc': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',  # docx
+                'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+                'sheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',  # xlsx
+                'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                'ppt': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',  # pptx
+                'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+                'pdf': 'application/pdf',
+            }
+            
+            export_format = export_formats.get(file_type, 'application/pdf')
+            export_url = f"https://docs.google.com/document/d/{file_id}/export?format={file_type}"
+            
+            if 'sheet' in file_type or 'xlsx' in file_type:
+                export_url = f"https://docs.google.com/spreadsheets/d/{file_id}/export?format=xlsx"
+            elif 'ppt' in file_type or 'presentation' in file_type:
+                export_url = f"https://docs.google.com/presentation/d/{file_id}/export/pptx"
+            elif file_type == 'pdf':
+                export_url = f"https://docs.google.com/document/d/{file_id}/export?format=pdf"
+            
+            async with self.context.new_page() as page:
+                # Get cookies from the main view page first
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", wait_until='networkidle')
+                
+                # Now try the export
+                response = await page.goto(export_url, wait_until='networkidle')
+                
+                if response.status == 200:
+                    content = await response.body()
+                    with open(save_path, 'wb') as f:
+                        f.write(content)
+                    return os.path.exists(save_path) and os.path.getsize(save_path) > 0
+                else:
+                    logger.warning(f"Export failed with status {response.status}")
+                    return False
+                    
+        except Exception as e:
+            logger.error(f"Error exporting Google Doc: {e}")
+            return False
+
+    async def get_google_drive_file_info(self, file_id):
+        """Get file type and view-only status from Google Drive"""
+        file_type = None
+        is_view_only = False
+        
+        try:
+            async with self.context.new_page() as page:
+                await page.goto(f"https://drive.google.com/file/d/{file_id}/view", timeout=30000)
+                
+                # Check if view-only
+                view_only_text = await page.query_selector('text="the owner has not granted you permission to download this file"')
+                is_view_only = view_only_text is not None
+                
+                # Check for Google Docs viewer
+                gdocs_viewer = await page.query_selector('iframe[src*="docs.google.com/document"]')
+                gsheets_viewer = await page.query_selector('iframe[src*="docs.google.com/spreadsheets"]')
+                gslides_viewer = await page.query_selector('iframe[src*="docs.google.com/presentation"]')
+                
+                if gdocs_viewer:
+                    file_type = 'docx'
+                elif gsheets_viewer:
+                    file_type = 'xlsx'
+                elif gslides_viewer:
+                    file_type = 'pptx'
+                else:
+                    # Check for PDF viewer
+                    pdf_viewer = await page.query_selector('embed[type="application/pdf"]')
+                    if pdf_viewer:
+                        file_type = 'pdf'
+                    else:
+                        # Check for image viewer
+                        img_viewer = await page.query_selector('img[src*="googleusercontent.com"]')
+                        if img_viewer:
+                            # Get image type from src
+                            img_src = await img_viewer.get_attribute('src')
+                            if 'jpg' in img_src or 'jpeg' in img_src:
+                                file_type = 'jpg'
+                            elif 'png' in img_src:
+                                file_type = 'png'
+                            else:
+                                file_type = 'jpg'  # Default to jpg
+                        else:
+                            # Generic file type fallback
+                            file_type = 'pdf'  # Default to PDF
+                
+                # If still no type, check filename
+                if not file_type:
+                    title_element = await page.query_selector('div[role="heading"]')
+                    if title_element:
+                        title = await title_element.text_content()
+                        if title:
+                            ext_match = re.search(r'\.([a-zA-Z0-9]+)$', title)
+                            if ext_match:
+                                file_type = ext_match.group(1).lower()
+        
+        except Exception as e:
+            logger.error(f"Error getting Google Drive file info: {e}")
+            file_type = 'pdf'  # Default to PDF if we can't determine
+        
+        return file_type, is_view_only
+
+    # IMPROVED: Enhanced sublink extraction method
+    async def get_sublinks(self, url, limit=10000):
+        """Enhanced method to extract sublinks from a website, including dynamic content and interactive elements"""
+        links = set()
+        try:
+            logger.info(f"Fetching sublinks from: {url}")
+            
+            # Check if this is a direct download link
+            if is_download_link(url):
+                logger.info(f"URL appears to be a direct download link: {url}")
+                links.add(url)
+                return list(links)[:limit]
+            
+            # Skip if we've already visited this URL
+            normalized_url = normalize_download_url(url)
+            if normalized_url in self.visited_urls:
+                logger.info(f"Skipping already visited URL for sublink extraction: {normalized_url}")
+                return list(links)[:limit]
+            
+            # Add to visited URLs
+            self.visited_urls.add(normalized_url)
+            
+            # Special handling for educational sites like phsms.cloud.ncnu.edu.tw
+            if "phsms.cloud.ncnu.edu.tw" in url or any(keyword in url.lower() for keyword in 
+                                                      ["exam", "test", "pastpaper", "eduexp"]):
+                logger.info("Using specialized exam site sublink extraction")
+                edu_links = await self.get_edu_exam_links(url)
+                for link in edu_links:
+                    links.add(link)
+                
+                # If we found a good number of links with the specialized method, return them
+                if len(links) > 5:
+                    logger.info(f"Found {len(links)} sublinks with specialized method")
+                    return list(links)[:limit]
+            
+            # Rotate proxy if needed
+            await self.rotate_proxy_if_needed()
+            
+            # Standard sublink extraction for all sites
+            try:
+                await self.page.goto(url, timeout=30000, wait_until='networkidle')
+            except Exception as e:
+                logger.warning(f"Error navigating to URL for sublink extraction: {e}")
+                # Continue with what we have, we'll try to extract links anyway
+                
+            # Get base URL for resolving relative links
+            parsed_base = urlparse(url)
+            base_url = f"{parsed_base.scheme}://{parsed_base.netloc}"
+            path_base = os.path.dirname(parsed_base.path)
+            
+            # Perform initial scrolling to load lazy content
+            await self.page.evaluate("""
+                async () => {
+                    const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
+                    const height = document.body.scrollHeight;
+                    const step = Math.floor(window.innerHeight / 2);
+                    
+                    for (let i = 0; i < height; i += step) {
+                        window.scrollTo(0, i);
+                        await delay(150);
+                    }
+                    
+                    window.scrollTo(0, 0);
+                }
+            """)
+            await self.page.wait_for_timeout(1000)
+            
+            # Check if page has ASP.NET elements which might need special handling
+            is_aspnet = await self.page.evaluate('''
+                () => {
+                    return document.querySelector('form#aspnetForm') !== null || 
+                           document.querySelector('input[name="__VIEWSTATE"]') !== null;
+                }
+            ''')
+            
+            if is_aspnet:
+                logger.info("Detected ASP.NET page, using enhanced extraction method")
+                
+                # Try to interact with ASP.NET controls that might reveal more links
+                # Look for dropdowns, buttons, and grid elements
+                dropdowns = await self.page.query_selector_all('select')
+                buttons = await self.page.query_selector_all('input[type="button"], input[type="submit"], button')
+                
+                # Try interacting with dropdowns first
+                for dropdown in dropdowns:
+                    try:
+                        # Get all options
+                        options = await self.page.evaluate('''
+                            (dropdown) => {
+                                return Array.from(dropdown.options).map(o => o.value);
+                            }
+                        ''', dropdown)
+                        
+                        # Try selecting each option
+                        for option in options:
+                            if option:
+                                await dropdown.select_option(value=option)
+                                await self.page.wait_for_timeout(1000)
+                                await self.page.wait_for_load_state('networkidle', timeout=5000)
+                                
+                                # Extract any new links that appeared
+                                await self.extract_all_link_types(links, base_url, path_base)
+                    except Exception as e:
+                        logger.warning(f"Error interacting with dropdown: {e}")
+                
+                # Try clicking buttons (but avoid dangerous ones like "delete")
+                safe_buttons = []
+                for button in buttons:
+                    button_text = await button.text_content() or ""
+                    button_value = await button.get_attribute("value") or ""
+                    button_id = await button.get_attribute("id") or ""
+                    combined_text = (button_text + button_value + button_id).lower()
+                    
+                    # Skip potentially destructive buttons
+                    if any(keyword in combined_text for keyword in ["delete", "remove", "cancel", "close", "logout"]):
+                        continue
+                    
+                    # Prioritize buttons that might show more content
+                    if any(keyword in combined_text for keyword in ["view", "show", "search", "browse", "list", "go", "display"]):
+                        safe_buttons.append(button)
+                
+                # Click the safe buttons
+                for button in safe_buttons[:5]:  # Limit to first 5 to avoid too many clicks
+                    try:
+                        await button.click()
+                        await self.page.wait_for_timeout(1000)
+                        await self.page.wait_for_load_state('networkidle', timeout=5000)
+                        
+                        # Extract any new links that appeared
+                        await self.extract_all_link_types(links, base_url, path_base)
+                    except Exception as e:
+                        logger.warning(f"Error clicking button: {e}")
+            
+            # Extract links from the initial page state
+            await self.extract_all_link_types(links, base_url, path_base)
+            
+            # Look specifically for links inside grid/table views which are common in ASP.NET applications
+            grid_cells = await self.page.query_selector_all('td a, tr.rgRow a, tr.rgAltRow a, .grid a, .table a')
+            for cell in grid_cells:
+                try:
+                    href = await cell.get_attribute('href')
+                    if href:
+                        full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                        links.add(full_url)
+                except Exception as e:
+                    logger.warning(f"Error extracting grid link: {e}")
+            
+            # Extract links from onclick attributes and javascript:__doPostBack calls
+            postback_links = await self.page.evaluate('''
+                () => {
+                    const results = [];
+                    // Find elements with onclick containing __doPostBack
+                    const elements = document.querySelectorAll('*[onclick*="__doPostBack"]');
+                    for (const el of elements) {
+                        // Extract the postback target
+                        const onclick = el.getAttribute('onclick') || '';
+                        const match = onclick.match(/__doPostBack\\('([^']+)'.*?\\)/);
+                        if (match && match[1]) {
+                            // Get the visible text to use as description
+                            const text = el.innerText || el.textContent || 'Link';
+                            results.push({
+                                id: match[1],
+                                text: text.trim()
+                            });
+                        }
+                    }
+                    return results;
+                }
+            ''')
+            
+            # Try interacting with some of the postback links
+            for postback in postback_links[:10]:  # Limit to first 10 to avoid too many interactions
+                try:
+                    logger.info(f"Trying postback link: {postback['text']} ({postback['id']})")
+                    await self.page.evaluate(f'''
+                        () => {{
+                            if (typeof __doPostBack === 'function') {{
+                                __doPostBack('{postback["id"]}', '');
+                            }}
+                        }}
+                    ''')
+                    await self.page.wait_for_timeout(1500)
+                    await self.page.wait_for_load_state('networkidle', timeout=5000)
+                    
+                    # Extract any new links that appeared
+                    await self.extract_all_link_types(links, base_url, path_base)
+                except Exception as e:
+                    logger.warning(f"Error with postback: {e}")
+            
+            # Look for pagination controls and try to navigate through them
+            pagination_elements = await self.page.query_selector_all(
+                'a[href*="page"], .pagination a, .pager a, [onclick*="page"], [aria-label*="Next"]'
+            )
+            
+            # Try clicking on pagination links (limit to max 5 pages to avoid infinite loops)
+            for i in range(min(5, len(pagination_elements))):
+                try:
+                    # Focus on elements that look like "next page" buttons
+                    el = pagination_elements[i]
+                    el_text = await el.text_content() or ""
+                    
+                    # Only click if this looks like a pagination control
+                    if "next" in el_text.lower() or ">" == el_text.strip() or "→" == el_text.strip():
+                        logger.info(f"Clicking pagination control: {el_text}")
+                        await el.click()
+                        await self.page.wait_for_timeout(2000)
+                        await self.page.wait_for_load_state('networkidle', timeout=5000)
+                        
+                        # Get new links from this page
+                        await self.extract_all_link_types(links, base_url, path_base)
+                except Exception as e:
+                    logger.warning(f"Error clicking pagination: {e}")
+            
+            # Check for hidden links that might be revealed by JavaScript
+            hidden_links = await self.page.evaluate("""
+                () => {
+                    // Try to execute common JavaScript patterns that reveal hidden content
+                    try {
+                        // Common patterns used in websites to initially hide content
+                        const hiddenContainers = document.querySelectorAll(
+                            '.hidden, .hide, [style*="display: none"], [style*="visibility: hidden"]'
+                        );
+                        
+                        // Attempt to make them visible
+                        hiddenContainers.forEach(el => {
+                            el.style.display = 'block';
+                            el.style.visibility = 'visible';
+                            el.classList.remove('hidden', 'hide');
+                        });
+                        
+                        // Return any newly visible links
+                        return Array.from(document.querySelectorAll('a[href]')).map(a => a.href);
+                    } catch (e) {
+                        return [];
+                    }
+                }
+            """)
+            
+            # Add any newly discovered links
+            for href in hidden_links:
+                if href and not href.startswith('javascript:'):
+                    links.add(href)
+            
+            # Find all download links
+            download_links = await self.page.evaluate("""
+                () => {
+                    return Array.from(document.querySelectorAll('a[href]'))
+                        .filter(a => {
+                            const href = a.href.toLowerCase();
+                            return href.includes('download') || 
+                                   href.includes('file') || 
+                                   href.includes('get') ||
+                                   href.includes('view.php') ||
+                                   href.includes('action=') ||
+                                   href.includes('fname=');
+                        })
+                        .map(a => a.href);
+                }
+            """)
+            
+            for download_link in download_links:
+                links.add(download_link)
+                
+            # Also check for hidden links in JavaScript, iframes, or dynamic content
+            js_links = await self.discover_hidden_links(self.page)
+            for link in js_links:
+                links.add(link)
+                
+            logger.info(f"Found {len(links)} sublinks")
+            
+            # Prioritize download links
+            prioritized_links = []
+            normal_links = []
+            
+            for link in links:
+                if is_download_link(link):
+                    prioritized_links.append(link)
+                else:
+                    normal_links.append(link)
+                    
+            # Return prioritized links first, then normal links, up to the limit
+            result = prioritized_links + normal_links
+            return result[:limit]
+        
+        except Exception as e:
+            logger.error(f"Error getting sublinks from {url}: {e}")
+            return list(links)[:limit]  # Return what we have so far
+
+    async def extract_all_link_types(self, links_set, base_url, path_base):
+        """Extract all types of links from the current page"""
+        # Get all <a> tag links
+        a_links = await self.page.query_selector_all('a[href]')
+        for a in a_links:
+            try:
+                href = await a.get_attribute('href')
+                if href and not href.startswith('javascript:') and not href.startswith('#'):
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Get iframe sources
+        iframes = await self.page.query_selector_all('iframe[src]')
+        for iframe in iframes:
+            try:
+                src = await iframe.get_attribute('src')
+                if src and not src.startswith('javascript:') and not src.startswith('about:'):
+                    full_url = src if src.startswith('http') else self.resolve_relative_url(src, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Get links from onclick attributes that reference URLs
+        onclick_elements = await self.page.query_selector_all('*[onclick*="window.location"], *[onclick*="document.location"]')
+        for el in onclick_elements:
+            try:
+                onclick = await el.get_attribute('onclick')
+                urls = re.findall(r'(https?://[^\'"]+)', onclick)
+                for url in urls:
+                    links_set.add(url)
+            except Exception:
+                pass
+        
+        # Look for URLs in data-* attributes
+        data_elements = await self.page.query_selector_all('*[data-url], *[data-href], *[data-src]')
+        for el in data_elements:
+            for attr in ['data-url', 'data-href', 'data-src']:
+                try:
+                    value = await el.get_attribute(attr)
+                    if value and not value.startswith('javascript:'):
+                        full_url = value if value.startswith('http') else self.resolve_relative_url(value, base_url, path_base)
+                        links_set.add(full_url)
+                except Exception:
+                    pass
+        
+        # Look for special anchor links that might not have href attributes
+        special_anchors = await self.page.query_selector_all('.rgMasterTable a, .grid a, #GridView1 a, #gvResults a')
+        for anchor in special_anchors:
+            try:
+                href = await anchor.get_attribute('href')
+                if href and not href.startswith('javascript:') and not href.startswith('#'):
+                    full_url = href if href.startswith('http') else self.resolve_relative_url(href, base_url, path_base)
+                    links_set.add(full_url)
+            except Exception:
+                pass
+        
+        # Extract links from JSON data embedded in the page
+        script_elements = await self.page.query_selector_all('script[type="application/json"], script[type="text/json"]')
+        for script in script_elements:
+            try:
+                script_content = await script.text_content()
+                if script_content:
+                    # Look for URLs in the JSON content
+                    urls = re.findall(r'(https?://[^\'"]+)', script_content)
+                    for url in urls:
+                        links_set.add(url)
+            except Exception:
+                pass
+
+    def resolve_relative_url(self, relative_url, base_url, path_base):
+        """Properly resolve relative URLs considering multiple formats"""
+        if relative_url.startswith('/'):
+            # Absolute path relative to domain
+            return f"{base_url}{relative_url}"
+        elif relative_url.startswith('./'):
+            # Explicit relative path
+            return f"{base_url}{path_base}/{relative_url[2:]}"
+        elif relative_url.startswith('../'):
+            # Parent directory
+            parent_path = '/'.join(path_base.split('/')[:-1])
+            return f"{base_url}{parent_path}/{relative_url[3:]}"
+        else:
+            # Regular relative path
+            return f"{base_url}{path_base}/{relative_url}"
+
+    async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
+        if not custom_ext_list:
+            custom_ext_list = []
+        progress_text = st.empty()
+        progress_bar = st.progress(0)
+        file_count_text = st.empty()
+        
+        try:
+            # Reset the visited URLs for a fresh deep search
+            self.visited_urls = set()
+            
+            progress_text.text("Analyzing main page...")
+            # Special handling for ASP.NET pages
+            is_aspnet = False
+            try:
+                await self.page.goto(url, timeout=30000, wait_until='networkidle')
+                is_aspnet = await self.page.evaluate('''
+                    () => {
+                        return document.querySelector('form#aspnetForm') !== null || 
+                               document.querySelector('input[name="__VIEWSTATE"]') !== null;
+                    }
+                ''')
+            except Exception:
+                pass
+            
+            # Check if this URL is a direct download
+            if is_download_link(url):
+                progress_text.text("URL appears to be a direct download. Analyzing...")
+                
+                # Try to extract file directly
+                normalized_url = normalize_download_url(url)
+                file_info = {
+                    'url': normalized_url,
+                    'download_url': normalized_url,
+                    'filename': os.path.basename(urlparse(normalized_url).path) or 'download',
+                    'size': 'Unknown Size',
+                    'metadata': {}
+                }
+                
+                # Add to visited URLs
+                self.visited_urls.add(normalized_url)
+                progress_bar.progress(1.0)
+                return [file_info]
+            
+            # Extract files from main page
+            main_files = await self.extract_downloadable_files(url, custom_ext_list)
+            initial_count = len(main_files)
+            file_count_text.text(f"Found {initial_count} files on main page")
+            
+            # Get sublinks with enhanced method
+            progress_text.text("Getting sublinks...")
+            sublinks = await self.get_sublinks(url, sublink_limit)
+            total_links = len(sublinks)
+            progress_text.text(f"Found {total_links} sublinks to process")
+            
+            # Always include files from the main page, regardless of sublinks
+            all_files = main_files
+            
+            if not sublinks:
+                progress_bar.progress(1.0)
+                return all_files
+            
+            # Process each sublink
+            for i, sublink in enumerate(sublinks, 1):
+                progress = i / total_links
+                progress_text.text(f"Processing sublink {i}/{total_links}: {sublink}")
+                progress_bar.progress(progress)
+                
+                try:
+                    # Check if this is a direct download link
+                    if is_download_link(sublink):
+                        # For download links, just add the link directly
+                        normalized_url = normalize_download_url(sublink)
+                        
+                        # Skip if already visited
+                        if normalized_url in self.visited_urls:
+                            continue
+                            
+                        # Mark as visited
+                        self.visited_urls.add(normalized_url)
+                        
+                        # Get file size if possible
+                        size_str = await self.get_file_size(normalized_url)
+                        
+                        # Get filename, with fallback to domain-based name
+                        filename = os.path.basename(urlparse(normalized_url).path)
+                        if not filename or filename == '/' or '?' in filename:
+                            domain = get_domain(normalized_url)
+                            ext = '.pdf'  # Default extension
+                            for common_ext in ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.zip']:
+                                if common_ext in normalized_url.lower():
+                                    ext = common_ext
+                                    break
+                            filename = f"file_from_{domain}{ext}"
+                        
+                        # Add file to results
+                        all_files.append({
+                            'url': normalized_url,
+                            'download_url': normalized_url,
+                            'filename': filename,
+                            'size': size_str,
+                            'metadata': {}
+                        })
+                        file_count_text.text(f"Found {len(all_files)} total files")
+                        continue
+                        
+                    # For regular links, use a longer timeout for ASP.NET pages which can be slower
+                    sub_timeout = timeout * 2 if is_aspnet else timeout
+                    
+                    # Skip already visited URLs
+                    if sublink in self.visited_urls:
+                        continue
+                    
+                    # Extract files from sublink
+                    sub_files = await self.extract_downloadable_files(sublink, custom_ext_list)
+                    all_files.extend(sub_files)
+                    file_count_text.text(f"Found {len(all_files)} total files")
+                except Exception as e:
+                    logger.warning(f"Error processing sublink {sublink}: {e}")
+            
+            # Deduplicate files
+            seen_urls = set()
+            unique_files = []
+            for f in all_files:
+                if f['url'] not in seen_urls:
+                    seen_urls.add(f['url'])
+                    unique_files.append(f)
+            
+            final_count = len(unique_files)
+            progress_text.text(f"Deep search complete!")
+            file_count_text.text(f"Found {final_count} unique files")
+            progress_bar.progress(1.0)
+            return unique_files
+        
+        except Exception as e:
+            logger.error(f"Deep search error: {e}")
+            progress_text.text(f"Error during deep search: {str(e)}")
+            return []
+        
+        finally:
+            await asyncio.sleep(2)
+            if not st.session_state.get('keep_progress', False):
+                progress_text.empty()
+                progress_bar.empty()
+
+# -------------------- Main App --------------------
+def main():
+    
+    # Custom CSS for better appearance
+    st.markdown("""
+    <style>
+        .stTabs [data-baseweb="tab-list"] {
+            gap: 10px;
+        }
+        .stTabs [data-baseweb="tab"] {
+            height: 50px;
+            white-space: pre-wrap;
+            border-radius: 4px 4px 0px 0px;
+            padding: 10px 16px;
+            background-color: #f0f2f6;
+        }
+        .stTabs [aria-selected="true"] {
+            background-color: #ffffff !important;
+            border-bottom: 2px solid #4c78a8;
+        }
+        .stFileUploader > div > div > button {
+            width: 100%;
+        }
+        .main-header {
+            font-size: 2.5rem;
+            font-weight: 700;
+            margin-bottom: 10px;
+        }
+        .section-subheader {
+            font-size: 1.3rem;
+            font-weight: 600;
+            margin-top: 20px;
+            margin-bottom: 10px;
+        }
+        .info-text {
+            color: #6c757d;
+            font-size: 0.9rem;
+        }
+        .stButton>button {
+            width: 100%;
+        }
+        .result-card {
+            background-color: #f8f9fa;
+            border-radius: 6px;
+            padding: 16px;
+            margin-bottom: 12px;
+            border-left: 4px solid #4c78a8;
+        }
+        .sidebar-header {
+            font-size: 1.2rem;
+            font-weight: 600;
+            margin-bottom: 10px;
+        }
+        .sidebar-section {
+            margin-bottom: 20px;
+        }
+    </style>
+    """, unsafe_allow_html=True)
+    
+    # Initialize session state for storing files
     if 'files' not in st.session_state:
         st.session_state.files = []
     if 'downloaded_paths' not in st.session_state:
@@ -66,15 +3887,976 @@ def initialize_session_state():
         st.session_state.proxy_string = None
     if 'stealth_mode' not in st.session_state:
         st.session_state.stealth_mode = True
+    
+    # ============================
+    # SIDEBAR
+    # ============================
+    with st.sidebar:
+        st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=50)
+        st.markdown("<p class='sidebar-header'>Advanced File Downloader</p>", unsafe_allow_html=True)
+        
+        # Mode Selection
+        st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
+        st.markdown("<p class='sidebar-header'>Mode</p>", unsafe_allow_html=True)
+        mode = st.radio(
+            "Select Mode",
+            ["Standard", "Education Mode", "Research Mode", "Media Mode"],
+            label_visibility="collapsed",
+            index=["Standard", "Education Mode", "Research Mode", "Media Mode"].index(st.session_state.mode),
+            horizontal=False
+        )
+        
+        if mode != st.session_state.mode:
+            st.session_state.mode = mode
+            # Update mode-specific settings
+            if mode == "Education Mode":
+                st.session_state.custom_extensions = ".pdf,.doc,.docx,.ppt,.pptx"
+                st.session_state.prioritize_pdfs = True
+            elif mode == "Research Mode":
+                st.session_state.custom_extensions = ".pdf,.txt,.csv,.json,.xlsx"
+                st.session_state.prioritize_pdfs = True
+            elif mode == "Media Mode":
+                st.session_state.custom_extensions = ".jpg,.png,.mp3,.mp4,.avi,.mov"
+                st.session_state.prioritize_pdfs = False
+            
+        st.markdown(f"<div class='info-text'>Current: <b>{st.session_state.mode}</b></div>", unsafe_allow_html=True)
+        st.markdown("</div>", unsafe_allow_html=True)
+        
+        # Quick Settings
+        st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
+        st.markdown("<p class='sidebar-header'>Quick Settings</p>", unsafe_allow_html=True)
+        
+        stealth_mode = st.checkbox("Stealth Mode", value=st.session_state.stealth_mode)
+        if stealth_mode != st.session_state.stealth_mode:
+            st.session_state.stealth_mode = stealth_mode
+            
+        use_proxy = st.checkbox("Use Proxy", value=st.session_state.use_proxy)
+        if use_proxy != st.session_state.use_proxy:
+            st.session_state.use_proxy = use_proxy
+            
+        if use_proxy:
+            proxy_string = st.text_input("Proxy Address", 
+                              placeholder="e.g., http://user:pass@host:port",
+                              value=st.session_state.proxy_string or "")
+            if proxy_string != st.session_state.proxy_string:
+                st.session_state.proxy_string = proxy_string
+        
+        st.markdown("</div>", unsafe_allow_html=True)
+        
+        # Google Drive Integration
+        st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
+        st.markdown("<p class='sidebar-header'>Google Drive</p>", unsafe_allow_html=True)
+        
+        if st.session_state.google_credentials:
+            st.success("✅ Connected")
+            
+            drive_folder = st.text_input("Drive Folder", 
+                                     value="File Downloader" if 'drive_folder' not in st.session_state else st.session_state.drive_folder)
+            if 'drive_folder' not in st.session_state or drive_folder != st.session_state.drive_folder:
+                st.session_state.drive_folder = drive_folder
+                
+            if st.button("Disconnect Drive"):
+                st.session_state.google_credentials = None
+                st.rerun()
+        else:
+            st.warning("⚠️ Not Connected")
+            if st.button("Connect Google Drive"):
+                auth_url = get_google_auth_url()
+                st.markdown(f"[Click here to authorize]({auth_url})")
+                auth_code = st.text_input("Enter authorization code:")
+                
+                if auth_code:
+                    with st.spinner("Connecting to Google Drive..."):
+                        credentials, status_msg = exchange_code_for_credentials(auth_code)
+                        if credentials:
+                            st.session_state.google_credentials = credentials
+                            st.success(status_msg)
+                            st.rerun()
+                        else:
+                            st.error(status_msg)
+        
+        st.markdown("</div>", unsafe_allow_html=True)
+        
+        # Preset buttons for common EDU sites
+        if st.session_state.mode == "Education Mode":
+            st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
+            st.markdown("<p class='sidebar-header'>Quick Access</p>", unsafe_allow_html=True)
+            st.markdown("<div class='info-text'>Common Educational Sites</div>", unsafe_allow_html=True)
+            
+            if st.button("Past Exam Papers"):
+                st.session_state.preset_url = "https://pastpapers.example.edu"
+                st.session_state.search_method = "Exam Site Mode"
+                st.rerun()
+                
+            if st.button("Open Course Materials"):
+                st.session_state.preset_url = "https://opencourseware.example.edu"
+                st.session_state.search_method = "Deep Search"
+                st.rerun()
+                
+            if st.button("Research Papers"):
+                st.session_state.preset_url = "https://papers.example.org"
+                st.session_state.search_method = "Deep Search"
+                st.rerun()
+                
+            st.markdown("</div>", unsafe_allow_html=True)
+            
+        # Tool status
+        st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
+        st.markdown("<p class='sidebar-header'>System Status</p>", unsafe_allow_html=True)
+        
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("<div class='info-text'>Search</div>", unsafe_allow_html=True)
+            st.markdown("<div style='color: green; font-weight: bold;'>Active</div>", unsafe_allow_html=True)
+        with col2:
+            st.markdown("<div class='info-text'>Browser</div>", unsafe_allow_html=True)
+            st.markdown("<div style='color: green; font-weight: bold;'>Ready</div>", unsafe_allow_html=True)
+        
+        if st.button("Install Dependencies"):
+            with st.spinner("Installing Playwright dependencies..."):
+                install_playwright_dependencies()
+                
+        st.markdown("</div>", unsafe_allow_html=True)
+        
+        # App info
+        st.markdown("<div class='sidebar-section' style='position: absolute; bottom: 20px; width: 90%;'>", unsafe_allow_html=True)
+        st.markdown("<div class='info-text' style='text-align: center;'>Version 2.0 • March 2025</div>", unsafe_allow_html=True)
+        st.markdown("</div>", unsafe_allow_html=True)
 
-# Import the UI code while keeping the modular structure
-from ui import setup_ui, create_sidebar, display_file_results, handle_downloads, handle_google_drive_upload
-from main import main as app_main
-
-# Set up and run the application
-def main():
-    initialize_session_state()
-    app_main()
+    # ============================
+    # MAIN CONTENT AREA
+    # ============================
+    
+    # Header section
+    col1, col2 = st.columns([5, 1])
+    with col1:
+        st.markdown("<h1 class='main-header'>Advanced File Downloader</h1>", unsafe_allow_html=True)
+    with col2:
+        st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=70)
+    
+    mode_descriptions = {
+        "Standard": "A versatile tool for discovering and downloading files from any website.",
+        "Education Mode": "Optimized for educational resources, exams, and academic materials.",
+        "Research Mode": "Focused on research papers, datasets, and academic publications.",
+        "Media Mode": "Enhanced for finding and downloading images, videos, and audio files."
+    }
+    
+    st.markdown(f"<p class='info-text'>{mode_descriptions[st.session_state.mode]}</p>", unsafe_allow_html=True)
+    
+    # Main tabs
+    tabs = st.tabs(["Search & Download", "Local File Search", "Advanced Configuration", "Help"])
+    
+    # Tab 1: Search & Download
+    with tabs[0]:
+        st.markdown("<h2 class='section-subheader'>Find and Download Files</h2>", unsafe_allow_html=True)
+        
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            url = st.text_input("Enter a URL to search for downloadable files:", 
+                                placeholder="e.g., https://example.com/resources",
+                                value=st.session_state.get('preset_url', ''))
+        with col2:
+            # Initialize search_method with either session state or default value
+            initial_search_method = st.session_state.get('search_method', "Deep Search")
+            search_method = st.selectbox("Search Method", 
+                                         ["Deep Search", "Quick Search", "Exam Site Mode"],
+                                         index=["Deep Search", "Quick Search", "Exam Site Mode"].index(initial_search_method))
+            # Update session state when changed
+            if search_method != st.session_state.get('search_method'):
+                st.session_state.search_method = search_method
+        
+        # Advanced options in an expander
+        with st.expander("Search Options", expanded=False):
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                depth = st.slider("Search Depth", min_value=1, max_value=5, value=2, 
+                                help="Higher values will search more links but take longer")
+                prioritize_pdfs = st.checkbox("Prioritize PDFs", 
+                                            value=st.session_state.get('prioritize_pdfs', True), 
+                                            help="Focus on finding PDF files first")
+            with col2:
+                timeout = st.slider("Timeout (seconds)", min_value=10, max_value=120, value=60)
+                follow_subdomains = st.checkbox("Follow Subdomains", value=True, 
+                                              help="Include links from subdomains in the search")
+            with col3:
+                # Default extensions based on mode
+                default_extensions = {
+                    "Standard": ".pdf,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.zip",
+                    "Education Mode": ".pdf,.doc,.docx,.ppt,.pptx",
+                    "Research Mode": ".pdf,.txt,.csv,.json,.xlsx",
+                    "Media Mode": ".jpg,.png,.mp3,.mp4,.avi,.mov"
+                }
+                
+                custom_extensions = st.text_area(
+                    "Custom File Extensions", 
+                    value=st.session_state.get('custom_extensions', default_extensions[st.session_state.mode]),
+                    help="Comma-separated list of file extensions to look for"
+                )
+                
+                # Update session state when extensions changed
+                if 'custom_extensions' not in st.session_state or custom_extensions != st.session_state.custom_extensions:
+                    st.session_state.custom_extensions = custom_extensions
+                
+        search_col1, search_col2 = st.columns([4, 1])
+        with search_col1:
+            search_button = st.button("🔍 Start Search", use_container_width=True)
+        with search_col2:
+            clear_button = st.button("🧹 Clear Results", use_container_width=True)
+            
+        # File results section
+        if st.session_state.files:
+            st.markdown("<h3 class='section-subheader'>Found Files</h3>", unsafe_allow_html=True)
+            
+            # File filtering options
+            filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 1])
+            with filter_col1:
+                file_filter = st.text_input("Filter files by name:", placeholder="e.g., exam, 2023, etc.")
+            with filter_col2:
+                sort_option = st.selectbox("Sort by:", ["Relevance", "Name", "Size (Largest)", "Size (Smallest)"])
+            with filter_col3:
+                show_only_pdfs = st.checkbox("PDFs Only", value=False)
+            
+            # Sort files based on selection
+            sorted_files = list(st.session_state.files)
+            if sort_option == "Name":
+                sorted_files.sort(key=lambda x: x['filename'])
+            elif sort_option == "Size (Largest)":
+                # Convert size strings to comparable values
+                def parse_size(size_str):
+                    if 'Unknown' in size_str:
+                        return 0
+                    try:
+                        value = float(size_str.split(' ')[0])
+                        unit = size_str.split(' ')[1]
+                        multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
+                        return value * multipliers.get(unit, 0)
+                    except:
+                        return 0
+                
+                sorted_files.sort(key=lambda x: parse_size(x['size']), reverse=True)
+            elif sort_option == "Size (Smallest)":
+                def parse_size(size_str):
+                    if 'Unknown' in size_str:
+                        return float('inf')
+                    try:
+                        value = float(size_str.split(' ')[0])
+                        unit = size_str.split(' ')[1]
+                        multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
+                        return value * multipliers.get(unit, 0)
+                    except:
+                        return float('inf')
+                
+                sorted_files.sort(key=lambda x: parse_size(x['size']))
+            
+            # File list with selection
+            file_container = st.container()
+            with file_container:
+                selected_files = []
+                displayed_files = []
+                
+                for i, file in enumerate(sorted_files):
+                    # Apply filters
+                    if file_filter and file_filter.lower() not in file['filename'].lower():
+                        continue
+                    if show_only_pdfs and not file['filename'].lower().endswith('.pdf'):
+                        continue
+                        
+                    displayed_files.append(i)
+                    with st.container():
+                        col1, col2, col3, col4 = st.columns([0.5, 3, 1, 1])
+                        with col1:
+                            selected = st.checkbox("", key=f"select_{i}", value=True)
+                            if selected:
+                                selected_files.append(i)
+                        with col2:
+                            file_icon = "📄"
+                            if file['filename'].lower().endswith('.pdf'):
+                                file_icon = "📝"
+                            elif file['filename'].lower().endswith(('.doc', '.docx')):
+                                file_icon = "📋"
+                            elif file['filename'].lower().endswith(('.xls', '.xlsx')):
+                                file_icon = "📊"
+                            elif file['filename'].lower().endswith(('.ppt', '.pptx')):
+                                file_icon = "🖼️"
+                            elif file['filename'].lower().endswith(('.jpg', '.png', '.gif')):
+                                file_icon = "🖼️"
+                            elif file['filename'].lower().endswith(('.mp3', '.wav')):
+                                file_icon = "🔊"
+                            elif file['filename'].lower().endswith(('.mp4', '.avi', '.mov')):
+                                file_icon = "🎬"
+                            
+                            st.markdown(f"**{file_icon} {file['filename']}**")
+                            st.markdown(f"<span class='info-text'>{file['url'][:60]}...</span>", unsafe_allow_html=True)
+                        with col3:
+                            st.markdown(f"**Size:** {file['size']}")
+                        with col4:
+                            st.button("Preview", key=f"preview_{i}")
+                        
+                        st.divider()
+                
+                if not displayed_files:
+                    st.info("No files match your current filters. Try adjusting your search criteria.")
+                
+            # Download options
+            if selected_files:
+                col1, col2 = st.columns(2)
+                with col1:
+                    download_dir = st.text_input("Download Directory", value="downloads")
+                with col2:
+                    download_option = st.radio("Download as", ["Individual Files", "ZIP Archive"], horizontal=True)
+                
+                download_col1, download_col2, download_col3 = st.columns([3, 1, 1])
+                with download_col1:
+                    download_button = st.button("⬇️ Download Selected Files", use_container_width=True)
+                with download_col2:
+                    google_drive_button = st.button("📤 Upload to Drive", 
+                                                  use_container_width=True, 
+                                                  disabled=not st.session_state.google_credentials)
+                with download_col3:
+                    select_all = st.button("Select All Files", use_container_width=True)
+                    
+                # Handle select all button
+                if select_all:
+                    for i in displayed_files:
+                        st.session_state[f"select_{i}"] = True
+                    st.rerun()
+            
+            # Download progress/results
+            if st.session_state.download_complete:
+                st.success(f"✅ Downloaded {len(st.session_state.downloaded_paths)} files successfully!")
+                download_links = []
+                for path in st.session_state.downloaded_paths:
+                    with open(path, "rb") as f:
+                        file_content = f.read()
+                    file_name = os.path.basename(path)
+                    download_links.append((file_name, file_content))
+                
+                if len(download_links) > 0:
+                    if download_option == "ZIP Archive":
+                        # Create ZIP archive for download
+                        zip_path = create_zip_file(st.session_state.downloaded_paths, download_dir)
+                        with open(zip_path, "rb") as f:
+                            zip_content = f.read()
+                        st.download_button("📦 Download ZIP Archive", 
+                                          zip_content, 
+                                          file_name=os.path.basename(zip_path), 
+                                          mime="application/zip")
+                    else:
+                        # Show individual file download links
+                        st.markdown("<h4>Download Files</h4>", unsafe_allow_html=True)
+                        
+                        # Create a grid of download buttons
+                        cols = st.columns(3)
+                        for idx, (name, content) in enumerate(download_links):
+                            mime_type = mimetypes.guess_type(name)[0] or 'application/octet-stream'
+                            with cols[idx % 3]:
+                                st.download_button(
+                                    f"📄 {name}", 
+                                    content, 
+                                    file_name=name, 
+                                    mime=mime_type, 
+                                    key=f"dl_{name}",
+                                    use_container_width=True
+                                )
+    
+    # Tab 2: Local File Search
+    with tabs[1]:
+        st.markdown("<h2 class='section-subheader'>Search Downloaded Files</h2>", unsafe_allow_html=True)
+        st.write("Upload files to search through their content with AI-powered semantic search.")
+        
+        # File upload
+        uploaded_files = st.file_uploader("Upload documents for search", 
+                                         accept_multiple_files=True, 
+                                         type=['pdf', 'docx', 'txt', 'csv', 'json'])
+        
+        if uploaded_files:
+            # Build search index on upload
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                use_transformer = st.checkbox("Use AI Transformer Model", value=HAVE_TRANSFORMERS, 
+                                           help="Uses advanced AI for more accurate semantic search (if available)")
+            with col2:
+                if st.button("Build Search Index", use_container_width=True):
+                    with st.spinner("Processing files and building search index..."):
+                        files_added = 0
+                        for uploaded_file in uploaded_files:
+                            file_info = {
+                                'filename': uploaded_file.name,
+                                'url': f'local://{uploaded_file.name}',
+                                'size': humanize_file_size(uploaded_file.size)
+                            }
+                            success = st.session_state.rag_search.add_file(uploaded_file.getvalue(), file_info)
+                            if success:
+                                files_added += 1
+                        
+                        if files_added > 0:
+                            index_built = st.session_state.rag_search.build_index()
+                            if index_built:
+                                st.success(f"✅ Successfully indexed {files_added} files!")
+                            else:
+                                st.error("Failed to build search index.")
+                        else:
+                            st.warning("No valid text could be extracted from the files.")
+            
+            # Search interface
+            st.markdown("<h3 class='section-subheader'>Search Files</h3>", unsafe_allow_html=True)
+            
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                query = st.text_input("Enter search query:", placeholder="e.g., neural networks, climate change")
+            with col2:
+                expand_query = st.checkbox("Auto-expand query", value=True, 
+                                         help="Automatically add related terms to your search")
+            
+            col1, col2 = st.columns([4, 1])
+            with col1:
+                if st.button("🔍 Search Documents", use_container_width=True):
+                    if not query:
+                        st.warning("Please enter a search query")
+                    else:
+                        with st.spinner("Searching..."):
+                            results = st.session_state.rag_search.search(query, top_k=5, search_chunks=True)
+                            
+                            if results:
+                                st.markdown(f"**Found {len(results)} relevant documents:**")
+                                for i, result in enumerate(results):
+                                    with st.container():
+                                        st.markdown(f"<div class='result-card'>", unsafe_allow_html=True)
+                                        st.markdown(f"**{i+1}. {result['file_info']['filename']}** (Score: {result['score']:.2f})")
+                                        
+                                        if result.get('chunk_preview'):
+                                            st.markdown("**Matching content:**")
+                                            st.text(result['chunk_preview'])
+                                        
+                                        st.markdown("</div>", unsafe_allow_html=True)
+                            else:
+                                st.info("No matching documents found. Try a different query.")
+            with col2:
+                num_results = st.number_input("Max results", min_value=1, max_value=20, value=5)
+            
+            # Quick search tips
+            with st.expander("Search Tips", expanded=False):
+                st.markdown("""
+                ### Effective Search Tips
+                
+                - **Be specific** with your queries for more accurate results
+                - **Try different phrasings** if you don't get the results you expect
+                - Use **quotation marks** for exact phrase matching
+                - For **complex topics**, break down your search into multiple queries
+                - **Combine related terms** to improve recall
+                
+                The search engine uses advanced algorithms to understand the semantic meaning of your query, 
+                not just keyword matching.
+                """)
+    
+    # Tab 3: Advanced Configuration
+    with tabs[2]:
+        st.markdown("<h2 class='section-subheader'>Advanced Settings</h2>", unsafe_allow_html=True)
+        
+        config_tabs = st.tabs(["Browser Settings", "Proxy Configuration", "Download Options", "System"])
+        
+        # Browser Settings tab
+        with config_tabs[0]:
+            col1, col2 = st.columns(2)
+            with col1:
+                use_stealth = st.checkbox("Use Stealth Mode", value=st.session_state.stealth_mode, 
+                                        help="Makes browser harder to detect as automated, but may be slower")
+                
+                handle_captchas = st.checkbox("Handle Captchas Automatically", value=False, 
+                                           help="Attempt to solve simple captchas automatically")
+                
+                download_timeout = st.slider("Download Timeout (seconds)", 
+                                          min_value=30, max_value=600, value=300,
+                                          help="Maximum time to wait for downloads to complete")
+            with col2:
+                user_agent = st.selectbox("User Agent", USER_AGENTS, index=0,
+                                        help="Browser identity to use when accessing websites")
+                
+                save_screenshots = st.checkbox("Save Browser Screenshots", value=False,
+                                           help="Save screenshots when errors occur for debugging")
+                
+                browser_lang = st.selectbox("Browser Language", 
+                                         ["English (US)", "English (UK)", "Spanish", "French", "German", "Chinese"],
+                                         index=0)
+            
+            if st.button("Update Browser Settings"):
+                st.session_state.stealth_mode = use_stealth
+                st.success("Browser settings updated!")
+                
+            # Dependency installation section
+            st.markdown("<h4 class='section-subheader'>Dependencies</h4>", unsafe_allow_html=True)
+            if st.button("Install Playwright Dependencies"):
+                with st.spinner("Installing dependencies..."):
+                    install_playwright_dependencies()
+        
+        # Proxy Configuration tab
+        with config_tabs[1]:
+            proxy_enabled = st.checkbox("Enable Proxy", value=st.session_state.use_proxy,
+                                     help="Route requests through a proxy server for anonymity or bypassing restrictions")
+            
+            if proxy_enabled:
+                proxy_col1, proxy_col2 = st.columns(2)
+                with proxy_col1:
+                    proxy_type = st.selectbox("Proxy Type", ["HTTP", "SOCKS5", "HTTPS"])
+                    proxy_host = st.text_input("Proxy Host", placeholder="e.g., 127.0.0.1")
+                with proxy_col2:
+                    proxy_port = st.text_input("Proxy Port", placeholder="e.g., 8080")
+                    proxy_auth = st.text_input("Proxy Authentication (optional)", 
+                                            placeholder="username:password", type="password")
+            
+            st.markdown("<h4 class='section-subheader'>Proxy Rotation</h4>", unsafe_allow_html=True)
+            use_proxy_rotation = st.checkbox("Enable Proxy Rotation", value=False,
+                                          help="Automatically rotate between multiple proxies for better anonymity")
+            
+            if use_proxy_rotation:
+                proxy_list = st.text_area("Proxy List (one per line)", 
+                                       placeholder="http://proxy1.example.com:8080\nhttp://proxy2.example.com:8080")
+                rotation_interval = st.slider("Rotation Interval (requests)", 
+                                           min_value=1, max_value=50, value=10,
+                                           help="How often to switch proxies")
+            
+            if st.button("Save Proxy Configuration"):
+                # Construct the proxy string
+                proxy_string = None
+                if proxy_enabled and proxy_host and proxy_port:
+                    proxy_prefix = f"{proxy_type.lower()}://"
+                    proxy_auth_str = f"{proxy_auth}@" if proxy_auth else ""
+                    proxy_string = f"{proxy_prefix}{proxy_auth_str}{proxy_host}:{proxy_port}"
+                
+                # Update session state
+                st.session_state.use_proxy = proxy_enabled
+                st.session_state.proxy_string = proxy_string
+                
+                # Configure proxy rotation if enabled
+                if use_proxy_rotation and proxy_list:
+                    PROXY_ROTATION_CONFIG["enabled"] = True
+                    PROXY_ROTATION_CONFIG["rotation_interval"] = rotation_interval
+                    PROXY_ROTATION_CONFIG["proxies"] = [p.strip() for p in proxy_list.splitlines() if p.strip()]
+                
+                st.success("Proxy configuration updated!")
+        
+        # Download Options tab
+        with config_tabs[2]:
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("<h4 class='section-subheader'>Download Behavior</h4>", unsafe_allow_html=True)
+                
+                skip_existing = st.checkbox("Skip Existing Files", value=True,
+                                        help="Don't download files that already exist locally")
+                
+                auto_rename = st.checkbox("Auto-Rename Duplicates", value=True,
+                                       help="Automatically rename files instead of overwriting")
+                
+                verify_downloads = st.checkbox("Verify Downloads", value=True,
+                                           help="Check file integrity after download")
+                
+                max_retries = st.slider("Max Retries", min_value=0, max_value=10, value=3,
+                                     help="Number of times to retry failed downloads")
+            
+            with col2:
+                st.markdown("<h4 class='section-subheader'>File Organization</h4>", unsafe_allow_html=True)
+                
+                auto_organize = st.checkbox("Auto-Organize Files", value=True,
+                                         help="Automatically organize files by type")
+                
+                default_dir = st.text_input("Default Download Directory", value="downloads",
+                                         help="Default location to save downloaded files")
+                
+                org_by_domain = st.checkbox("Organize by Domain", value=False,
+                                        help="Create subdirectories based on source domains")
+                
+                org_by_type = st.checkbox("Organize by File Type", value=False,
+                                       help="Create subdirectories based on file types")
+            
+            if st.button("Save Download Settings"):
+                st.session_state.download_settings = {
+                    "skip_existing": skip_existing,
+                    "auto_rename": auto_rename,
+                    "verify_downloads": verify_downloads,
+                    "max_retries": max_retries,
+                    "auto_organize": auto_organize,
+                    "default_dir": default_dir,
+                    "org_by_domain": org_by_domain,
+                    "org_by_type": org_by_type
+                }
+                st.success("Download settings saved!")
+        
+        # System tab
+        with config_tabs[3]:
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("<h4 class='section-subheader'>Memory & Performance</h4>", unsafe_allow_html=True)
+                
+                max_concurrent = st.slider("Max Concurrent Downloads", min_value=1, max_value=10, value=3,
+                                        help="Maximum number of simultaneous downloads")
+                
+                memory_limit = st.slider("Memory Limit (MB)", min_value=256, max_value=4096, value=1024,
+                                      help="Maximum memory to use for file processing")
+                
+                processing_threads = st.slider("Processing Threads", min_value=1, max_value=8, value=2,
+                                           help="Number of threads to use for file processing")
+            
+            with col2:
+                st.markdown("<h4 class='section-subheader'>Logs & Diagnostics</h4>", unsafe_allow_html=True)
+                
+                log_level = st.selectbox("Log Level", ["DEBUG", "INFO", "WARNING", "ERROR"], index=1,
+                                      help="Detail level for application logs")
+                
+                save_debug_info = st.checkbox("Save Debug Information", value=False,
+                                           help="Save detailed information about program execution")
+                
+                log_dir = st.text_input("Log Directory", value="logs",
+                                     help="Directory to save log files")
+            
+            if st.button("Apply System Settings"):
+                st.session_state.system_settings = {
+                    "max_concurrent": max_concurrent,
+                    "memory_limit": memory_limit,
+                    "processing_threads": processing_threads,
+                    "log_level": log_level,
+                    "save_debug_info": save_debug_info,
+                    "log_dir": log_dir
+                }
+                # Update logging configuration
+                log_level_num = getattr(logging, log_level)
+                logging.getLogger().setLevel(log_level_num)
+                st.success("System settings applied!")
+                
+            # Reset application button
+            st.markdown("<h4 class='section-subheader'>Application Control</h4>", unsafe_allow_html=True)
+            reset_col1, reset_col2 = st.columns([1, 3])
+            with reset_col1:
+                if st.button("Reset Application", use_container_width=True):
+                    for key in list(st.session_state.keys()):
+                        if key != 'google_credentials':  # Preserve Google auth
+                            del st.session_state[key]
+                    st.success("Application has been reset!")
+                    st.rerun()
+            with reset_col2:
+                st.info("This will clear all search results, downloaded files, and reset settings to defaults.")
+            
+            # Advanced cleanup options
+            st.markdown("<h4 class='section-subheader'>Advanced Options</h4>", unsafe_allow_html=True)
+            
+            adv_col1, adv_col2 = st.columns(2)
+            with adv_col1:
+                clear_cache = st.button("Clear Cache", use_container_width=True)
+                if clear_cache:
+                    # Clear cached files and temporary data
+                    temp_dir = tempfile.gettempdir()
+                    try:
+                        for f in os.listdir(temp_dir):
+                            if f.startswith("playwright") or f.startswith("download"):
+                                try:
+                                    os.remove(os.path.join(temp_dir, f))
+                                except:
+                                    pass
+                        st.success("Cache cleared successfully!")
+                    except Exception as e:
+                        st.error(f"Error clearing cache: {e}")
+            
+            with adv_col2:
+                export_settings = st.button("Export Settings", use_container_width=True)
+                if export_settings:
+                    # Export current settings to JSON
+                    settings = {
+                        "mode": st.session_state.mode,
+                        "stealth_mode": st.session_state.stealth_mode,
+                        "use_proxy": st.session_state.use_proxy,
+                        "proxy_string": st.session_state.proxy_string,
+                        "custom_extensions": st.session_state.get("custom_extensions", ""),
+                        "prioritize_pdfs": st.session_state.get("prioritize_pdfs", True),
+                        "system_settings": st.session_state.get("system_settings", {}),
+                        "download_settings": st.session_state.get("download_settings", {})
+                    }
+                    
+                    settings_json = json.dumps(settings, indent=2)
+                    b64 = base64.b64encode(settings_json.encode()).decode()
+                    href = f'data:application/json;base64,{b64}'
+                    st.markdown(f'<a href="{href}" download="file_downloader_settings.json">Download Settings File</a>', unsafe_allow_html=True)
+    
+    # Tab 4: Help
+    with tabs[3]:
+        st.markdown("<h2 class='section-subheader'>Help & Documentation</h2>", unsafe_allow_html=True)
+        
+        help_tabs = st.tabs(["Quick Start", "Advanced Features", "Troubleshooting", "About"])
+        
+        with help_tabs[0]:
+            st.markdown("""
+            ### Getting Started
+            
+            1. **Enter a URL** on the Search & Download tab
+            2. Select a **Search Method**:
+               - **Deep Search**: Thorough but slower
+               - **Quick Search**: Fast but may miss some files
+               - **Exam Site Mode**: Optimized for educational resource sites
+            3. Click **Start Search** to find downloadable files
+            4. Select files you want to download
+            5. Click **Download Selected Files**
+            
+            #### Using Different Modes
+            
+            Select a mode from the sidebar to optimize the tool for different use cases:
+            
+            - **Standard Mode**: Balanced for general use
+            - **Education Mode**: Optimized for finding academic materials
+            - **Research Mode**: Better for research papers and datasets
+            - **Media Mode**: Enhanced for finding images, videos, and audio
+            
+            For best results with educational materials, use the **Exam Site Mode** with websites that contain past exams, papers, or course materials.
+            """)
+            
+        with help_tabs[1]:
+            st.markdown("""
+            ### Advanced Features
+            
+            - **Local File Search**: Upload files and search through their content using the enhanced RAG search
+            - **Custom Extensions**: Specify additional file types to look for beyond the default set
+            - **Stealth Mode**: Makes the browser harder to detect as automated, useful for sites that block scrapers
+            - **Proxy Support**: Use proxies to access region-restricted content or improve anonymity
+            - **Google Drive Integration**: Upload downloaded files directly to your Google Drive
+            
+            #### Search Tips
+            
+            - For educational sites, include specific terms like "exam", "test", "paper" in the URL
+            - When using Local File Search, try different variations of your query for better results
+            - Use filtering and sorting options to find the most relevant files quickly
+            
+            #### File Organization
+            
+            You can configure automatic file organization in the Advanced Configuration tab:
+            
+            - **Organize by Domain**: Creates folders based on the source website
+            - **Organize by File Type**: Separates files into folders by their extension
+            - **Auto-Rename**: Prevents overwriting existing files with same names
+            """)
+            
+        with help_tabs[2]:
+            st.markdown("""
+            ### Troubleshooting
+            
+            #### Common Issues
+            
+            - **No files found**: Try using Deep Search with higher depth value, or add more specific file extensions
+            - **Downloads failing**: Check if the site requires authentication or uses captchas
+            - **Slow performance**: Reduce search depth or disable stealth mode for faster results
+            - **Browser errors**: Click "Install Playwright Dependencies" in Advanced Settings
+            
+            #### Captcha Issues
+            
+            Some websites use captchas to prevent automated access. If you encounter captchas:
+            
+            1. Try using a different proxy
+            2. Enable "Handle Captchas Automatically" for simple captchas
+            3. For complex captchas, you may need to manually access the site first
+            
+            #### Proxy Problems
+            
+            If you're having issues with proxies:
+            
+            1. Verify your proxy is working with an external tool
+            2. Check that you've entered the correct format (http://host:port)
+            3. Some websites may block known proxy IPs
+            
+            #### Memory Usage
+            
+            If the application is using too much memory:
+            
+            1. Reduce the "Memory Limit" in System settings
+            2. Process fewer files at once
+            3. Use lower search depth values
+            """)
+            
+        with help_tabs[3]:
+            st.markdown("""
+            ### About This Tool
+            
+            **Advanced File Downloader** is a sophisticated tool designed to discover and download files from websites with enhanced capabilities for educational resources.
+            
+            #### Key Features
+            
+            - **Smart Discovery**: Finds downloadable files even when they're not directly linked
+            - **Enhanced RAG Search**: Search through downloaded documents using advanced AI techniques
+            - **Educational Focus**: Specialized detection for exam papers and academic resources
+            - **Stealth Capabilities**: Avoids detection by anti-scraping measures
+            
+            #### Technical Details
+            
+            This tool uses:
+            
+            - **Playwright**: For browser automation and stealth capabilities
+            - **Sentence Transformers**: For AI-powered semantic search
+            - **Streamlit**: For the user interface
+            - **Google Drive API**: For cloud integration
+            
+            #### Credits
+            
+            Created with Python, Streamlit, Playwright, and various AI libraries.
+            
+            For issues or suggestions, please contact the developer.
+            
+            Version 2.0 - March 2025
+            """)
+    
+    # Handle search and download actions
+    if search_button and url:
+        # Reset files and downloaded paths
+        st.session_state.files = []
+        st.session_state.downloaded_paths = []
+        st.session_state.download_complete = False
+        
+        # Clear the preset URL if it was used
+        if 'preset_url' in st.session_state:
+            st.session_state.preset_url = ''
+        
+        # Prepare custom extensions
+        custom_ext_list = [ext.strip() for ext in custom_extensions.split(",") if ext.strip()]
+        
+        # Configure proxy from session state
+        proxy_string = st.session_state.proxy_string if st.session_state.use_proxy else None
+        
+        # Set up proxy rotation if enabled
+        if 'use_proxy_rotation' in locals() and use_proxy_rotation and proxy_list:
+            PROXY_ROTATION_CONFIG["enabled"] = True
+            PROXY_ROTATION_CONFIG["rotation_interval"] = rotation_interval
+            PROXY_ROTATION_CONFIG["proxies"] = [p.strip() for p in proxy_list.splitlines() if p.strip()]
+        
+        # Configure search parameters based on method
+        sublink_limit = 5000 if search_method == "Deep Search" else 1000
+        search_depth = depth if search_method == "Deep Search" else 1
+        is_exam_site = search_method == "Exam Site Mode"
+        
+        # Execute the search asynchronously
+        async def run_search():
+            async with DownloadManager(
+                use_proxy=st.session_state.use_proxy,
+                proxy=proxy_string,
+                use_stealth=st.session_state.stealth_mode,
+                proxy_rotation=PROXY_ROTATION_CONFIG["enabled"]
+            ) as manager:
+                # For exam sites, use specialized approach
+                if is_exam_site:
+                    st.session_state.keep_progress = True
+                    edu_links = await manager.get_edu_exam_links(url)
+                    all_files = []
+                    
+                    progress_text = st.empty()
+                    progress_bar = st.progress(0)
+                    
+                    # Process each exam link
+                    for i, link in enumerate(edu_links):
+                        progress = (i+1) / max(1, len(edu_links))
+                        progress_text.text(f"Processing exam link {i+1}/{len(edu_links)}: {link}")
+                        progress_bar.progress(progress)
+                        
+                        files = await manager.extract_downloadable_files(link, custom_ext_list)
+                        all_files.extend(files)
+                    
+                    st.session_state.files = all_files
+                    progress_text.empty()
+                    progress_bar.empty()
+                    st.session_state.keep_progress = False
+                    
+                else:
+                    # Use general search method
+                    files = await manager.deep_search(url, custom_ext_list, sublink_limit, timeout)
+                    st.session_state.files = files
+        
+        # Run the search
+        asyncio.run(run_search())
+        st.rerun()
+    
+    # Handle download button
+    if 'download_button' in locals() and download_button and selected_files:
+        # Create download directory
+        os.makedirs(download_dir, exist_ok=True)
+        
+        # Reset download state
+        st.session_state.downloaded_paths = []
+        st.session_state.download_complete = False
+        
+        # Get selected files
+        files_to_download = [st.session_state.files[i] for i in selected_files]
+        
+        # Execute the download asynchronously
+        async def run_download():
+            async with DownloadManager(
+                use_proxy=st.session_state.use_proxy,
+                proxy=st.session_state.proxy_string,
+                use_stealth=st.session_state.stealth_mode
+            ) as manager:
+                download_progress = st.progress(0)
+                status_text = st.empty()
+                
+                for i, file_info in enumerate(files_to_download):
+                    progress = (i) / len(files_to_download)
+                    status_text.text(f"Downloading {i+1}/{len(files_to_download)}: {file_info['filename']}")
+                    download_progress.progress(progress)
+                    
+                    downloaded_path = await manager.download_file(
+                        file_info, 
+                        download_dir, 
+                        get_domain(file_info['url'])
+                    )
+                    
+                    if downloaded_path:
+                        st.session_state.downloaded_paths.append(downloaded_path)
+                
+                download_progress.progress(1.0)
+                status_text.text(f"Downloaded {len(st.session_state.downloaded_paths)}/{len(files_to_download)} files successfully!")
+                st.session_state.download_complete = True
+        
+        # Run the download
+        asyncio.run(run_download())
+        st.rerun()
+    
+    # Handle Google Drive upload
+    if 'google_drive_button' in locals() and google_drive_button and st.session_state.google_credentials and st.session_state.downloaded_paths:
+        with st.spinner("Uploading to Google Drive..."):
+            drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_credentials)
+            
+            # Create folder if it doesn't exist
+            folder_id = None
+            folder_name = st.session_state.drive_folder if 'drive_folder' in st.session_state else "File Downloader"
+            
+            # Check if folder exists
+            query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
+            results = drive_service.files().list(q=query, spaces='drive', fields='files(id)').execute()
+            items = results.get('files', [])
+            
+            if not items:
+                # Create folder
+                folder_id = create_drive_folder(drive_service, folder_name)
+            else:
+                folder_id = items[0]['id']
+            
+            # Upload each file
+            upload_progress = st.progress(0)
+            status_text = st.empty()
+            uploaded_count = 0
+            
+            for i, path in enumerate(st.session_state.downloaded_paths):
+                progress = i / len(st.session_state.downloaded_paths)
+                status_text.text(f"Uploading {i+1}/{len(st.session_state.downloaded_paths)}: {os.path.basename(path)}")
+                upload_progress.progress(progress)
+                
+                result = google_drive_upload(path, st.session_state.google_credentials, folder_id)
+                if isinstance(result, str) and not result.startswith("Error"):
+                    uploaded_count += 1
+            
+            upload_progress.progress(1.0)
+            status_text.text(f"Uploaded {uploaded_count}/{len(st.session_state.downloaded_paths)} files to Google Drive folder '{folder_name}'")
+            
+            st.success(f"✅ Files uploaded to Google Drive successfully!")
+    
+    # Handle clear button
+    if clear_button:
+        st.session_state.files = []
+        st.session_state.downloaded_paths = []
+        st.session_state.download_complete = False
+        if 'preset_url' in st.session_state:
+            st.session_state.preset_url = ''
+        st.rerun()
 
 if __name__ == "__main__":
     main()
\ No newline at end of file