import streamlit as st import os import asyncio import mimetypes from app.utils import create_zip_file, humanize_file_size, show_user_friendly_error from app.download_manager import DownloadManager from app.rag_search import EnhancedRAGSearch from app.google_drive import ( get_google_auth_url, exchange_code_for_credentials, google_drive_upload, create_drive_folder ) import googleapiclient.discovery def setup_ui(): """Setup the main UI elements""" st.markdown(""" """, unsafe_allow_html=True) def create_sidebar(): """Create the sidebar elements""" with st.sidebar: st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=50) st.markdown("", unsafe_allow_html=True) # Mode Selection st.markdown("

", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) mode = st.radio( "Select Mode", ["Standard", "Education Mode", "Research Mode", "Media Mode"], label_visibility="collapsed", index=["Standard", "Education Mode", "Research Mode", "Media Mode"].index(st.session_state.mode), horizontal=False ) if mode != st.session_state.mode: st.session_state.mode = mode # Update mode-specific settings if mode == "Education Mode": st.session_state.custom_extensions = ".pdf,.doc,.docx,.ppt,.pptx" st.session_state.prioritize_pdfs = True elif mode == "Research Mode": st.session_state.custom_extensions = ".pdf,.txt,.csv,.json,.xlsx" st.session_state.prioritize_pdfs = True elif mode == "Media Mode": st.session_state.custom_extensions = ".jpg,.png,.mp3,.mp4,.avi,.mov" st.session_state.prioritize_pdfs = False st.markdown(f"

Current: {st.session_state.mode}

", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) # Quick Settings st.markdown("

", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) stealth_mode = st.checkbox("Stealth Mode", value=st.session_state.stealth_mode) if stealth_mode != st.session_state.stealth_mode: st.session_state.stealth_mode = stealth_mode use_proxy = st.checkbox("Use Proxy", value=st.session_state.use_proxy) if use_proxy != st.session_state.use_proxy: st.session_state.use_proxy = use_proxy if use_proxy: proxy_string = st.text_input("Proxy Address", placeholder="e.g., http://user:pass@host:port", value=st.session_state.proxy_string or "") if proxy_string != st.session_state.proxy_string: st.session_state.proxy_string = proxy_string st.markdown("

", unsafe_allow_html=True) # Google Drive Integration show_google_drive_integration() # Preset buttons for educational sites if st.session_state.mode == "Education Mode": st.markdown("

", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) st.markdown("

Common Educational Sites

", unsafe_allow_html=True) if st.button("Past Exam Papers"): st.session_state.preset_url = "https://pastpapers.example.edu" st.session_state.search_method = "Exam Site Mode" st.rerun() if st.button("Open Course Materials"): st.session_state.preset_url = "https://opencourseware.example.edu" st.session_state.search_method = "Deep Search" st.rerun() if st.button("Research Papers"): st.session_state.preset_url = "https://papers.example.org" st.session_state.search_method = "Deep Search" st.rerun() st.markdown("

", unsafe_allow_html=True) # Tool status st.markdown("

", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) col1, col2 = st.columns(2) with col1: st.markdown("

", unsafe_allow_html=True) st.markdown("

Active

", unsafe_allow_html=True) with col2: st.markdown("

Browser

", unsafe_allow_html=True) st.markdown("

Ready

", unsafe_allow_html=True) if st.button("Install Dependencies"): with st.spinner("Installing Playwright dependencies..."): install_playwright_dependencies() st.markdown("

", unsafe_allow_html=True) # App info st.markdown("", unsafe_allow_html=True) def show_google_drive_integration(): """Display Google Drive integration UI""" st.markdown("

", unsafe_allow_html=True) st.markdown("", unsafe_allow_html=True) if st.session_state.google_credentials: st.success("✅ Connected") drive_folder = st.text_input("Drive Folder", value="File Downloader" if 'drive_folder' not in st.session_state else st.session_state.drive_folder) if 'drive_folder' not in st.session_state or drive_folder != st.session_state.drive_folder: st.session_state.drive_folder = drive_folder if st.button("Disconnect Drive"): st.session_state.google_credentials = None st.rerun() else: st.warning("⚠️ Not Connected") if st.button("Connect Google Drive"): auth_url = get_google_auth_url() st.markdown(f"[Click here to authorize]({auth_url})") auth_code = st.text_input("Enter authorization code:") if auth_code: with st.spinner("Connecting to Google Drive..."): credentials, status_msg = exchange_code_for_credentials(auth_code) if credentials: st.session_state.google_credentials = credentials st.success(status_msg) st.rerun() else: st.error(status_msg) st.markdown("

", unsafe_allow_html=True) def install_playwright_dependencies(): """Install Playwright dependencies""" try: import subprocess import os # Set environment variable for Playwright browsers path os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright") # Install system dependencies subprocess.run(['apt-get', 'update', '-y'], check=True) packages = [ 'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0', 'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1', 'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0' ] subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True) # Install Playwright and dependencies subprocess.run(['pip', 'install', 'playwright'], check=True) subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True) st.success("Playwright dependencies installed successfully!") except Exception as e: st.error(f"Error installing Playwright dependencies: {e}") st.info("You may need to manually install dependencies. Check console for details.") def display_file_results(files): """Display file results with filtering and sorting options""" if not files: return st.markdown("

Found Files

", unsafe_allow_html=True) # File filtering options filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 1]) with filter_col1: file_filter = st.text_input("Filter files by name:", placeholder="e.g., exam, 2023, etc.") with filter_col2: sort_option = st.selectbox("Sort by:", ["Relevance", "Name", "Size (Largest)", "Size (Smallest)"]) with filter_col3: show_only_pdfs = st.checkbox("PDFs Only", value=False) # Sort files based on selection sorted_files = list(files) if sort_option == "Name": sorted_files.sort(key=lambda x: x['filename']) elif sort_option == "Size (Largest)": # Convert size strings to comparable values def parse_size(size_str): if 'Unknown' in size_str: return 0 try: value = float(size_str.split(' ')[0]) unit = size_str.split(' ')[1] multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4} return value * multipliers.get(unit, 0) except: return 0 sorted_files.sort(key=lambda x: parse_size(x['size']), reverse=True) elif sort_option == "Size (Smallest)": def parse_size(size_str): if 'Unknown' in size_str: return float('inf') try: value = float(size_str.split(' ')[0]) unit = size_str.split(' ')[1] multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4} return value * multipliers.get(unit, 0) except: return float('inf') sorted_files.sort(key=lambda x: parse_size(x['size'])) # File list with selection file_container = st.container() with file_container: selected_files = [] displayed_files = [] for i, file in enumerate(sorted_files): # Apply filters if file_filter and file_filter.lower() not in file['filename'].lower(): continue if show_only_pdfs and not file['filename'].lower().endswith('.pdf'): continue displayed_files.append(i) with st.container(): col1, col2, col3, col4 = st.columns([0.5, 3, 1, 1]) with col1: selected = st.checkbox("", key=f"select_{i}", value=True) if selected: selected_files.append(i) with col2: file_icon = get_file_icon(file['filename']) st.markdown(f"**{file_icon} {file['filename']}**") st.markdown(f"{file['url'][:60]}...", unsafe_allow_html=True) with col3: st.markdown(f"**Size:** {file['size']}") with col4: st.button("Preview", key=f"preview_{i}") st.divider() if not displayed_files: st.info("No files match your current filters. Try adjusting your search criteria.") return selected_files, displayed_files def get_file_icon(filename): """Return appropriate icon for file type""" file_icon = "📄" if filename.lower().endswith('.pdf'): file_icon = "📝" elif filename.lower().endswith(('.doc', '.docx')): file_icon = "📋" elif filename.lower().endswith(('.xls', '.xlsx')): file_icon = "📊" elif filename.lower().endswith(('.ppt', '.pptx')): file_icon = "🖼️" elif filename.lower().endswith(('.jpg', '.png', '.gif')): file_icon = "🖼️" elif filename.lower().endswith(('.mp3', '.wav')): file_icon = "🔊" elif filename.lower().endswith(('.mp4', '.avi', '.mov')): file_icon = "🎬" return file_icon def handle_downloads(selected_files, download_dir, download_option, download_col1): """Handle downloading of selected files""" if not selected_files: return # Execute the download asynchronously with download_col1: download_status = st.empty() download_progress = st.progress(0) async def run_download(): async with DownloadManager( use_proxy=st.session_state.use_proxy, proxy=st.session_state.proxy_string, use_stealth=st.session_state.stealth_mode ) as manager: files_to_download = [st.session_state.files[i] for i in selected_files] # Reset download paths st.session_state.downloaded_paths = [] for i, file_info in enumerate(files_to_download): progress = (i) / len(files_to_download) download_status.text(f"Downloading {i+1}/{len(files_to_download)}: {file_info['filename']}") download_progress.progress(progress) downloaded_path = await manager.download_file( file_info, download_dir, get_domain(file_info['url']) ) if downloaded_path: st.session_state.downloaded_paths.append(downloaded_path) download_progress.progress(1.0) download_status.text(f"Downloaded {len(st.session_state.downloaded_paths)}/{len(files_to_download)} files successfully!") st.session_state.download_complete = True # Run the download asyncio.run(run_download()) # Show download results if st.session_state.download_complete: st.success(f"✅ Downloaded {len(st.session_state.downloaded_paths)} files successfully!") download_links = [] for path in st.session_state.downloaded_paths: with open(path, "rb") as f: file_content = f.read() file_name = os.path.basename(path) download_links.append((file_name, file_content)) if len(download_links) > 0: if download_option == "ZIP Archive": # Create ZIP archive for download zip_path = create_zip_file(st.session_state.downloaded_paths, download_dir) with open(zip_path, "rb") as f: zip_content = f.read() st.download_button("📦 Download ZIP Archive", zip_content, file_name=os.path.basename(zip_path), mime="application/zip") else: # Show individual file download links st.markdown("

Download Files

", unsafe_allow_html=True) # Create a grid of download buttons cols = st.columns(3) for idx, (name, content) in enumerate(download_links): mime_type = mimetypes.guess_type(name)[0] or 'application/octet-stream' with cols[idx % 3]: st.download_button( f"📄 {name}", content, file_name=name, mime=mime_type, key=f"dl_{name}", use_container_width=True ) def handle_google_drive_upload(selected_files): """Handle uploading files to Google Drive""" if not st.session_state.google_credentials or not st.session_state.downloaded_paths: return with st.spinner("Uploading to Google Drive..."): drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_credentials) # Create folder if it doesn't exist folder_id = None folder_name = st.session_state.drive_folder if 'drive_folder' in st.session_state else "File Downloader" # Check if folder exists query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false" results = drive_service.files().list(q=query, spaces='drive', fields='files(id)').execute() items = results.get('files', []) if not items: # Create folder folder_id = create_drive_folder(drive_service, folder_name) else: folder_id = items[0]['id'] # Upload each file upload_progress = st.progress(0) status_text = st.empty() uploaded_count = 0 for i, path in enumerate(st.session_state.downloaded_paths): progress = i / len(st.session_state.downloaded_paths) status_text.text(f"Uploading {i+1}/{len(st.session_state.downloaded_paths)}: {os.path.basename(path)}") upload_progress.progress(progress) result = google_drive_upload(path, st.session_state.google_credentials, folder_id) if isinstance(result, str) and not result.startswith("Error"): uploaded_count += 1 upload_progress.progress(1.0) status_text.text(f"Uploaded {uploaded_count}/{len(st.session_state.downloaded_paths)} files to Google Drive folder '{folder_name}'") st.success(f"✅ Files uploaded to Google Drive successfully!") def get_domain(url): """Extract domain from URL""" from urllib.parse import urlparse parsed = urlparse(url) return parsed.netloc