import streamlit as st
import os
import asyncio
import mimetypes
from app.utils import create_zip_file, humanize_file_size, show_user_friendly_error
from app.download_manager import DownloadManager
from app.rag_search import EnhancedRAGSearch
from app.google_drive import (
get_google_auth_url, exchange_code_for_credentials,
google_drive_upload, create_drive_folder
)
import googleapiclient.discovery
def setup_ui():
"""Setup the main UI elements"""
st.markdown("""
""", unsafe_allow_html=True)
def create_sidebar():
"""Create the sidebar elements"""
with st.sidebar:
st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=50)
st.markdown("
", unsafe_allow_html=True)
# Mode Selection
st.markdown("", unsafe_allow_html=True)
# Quick Settings
st.markdown("", unsafe_allow_html=True)
# Google Drive Integration
show_google_drive_integration()
# Preset buttons for educational sites
if st.session_state.mode == "Education Mode":
st.markdown("", unsafe_allow_html=True)
# Tool status
st.markdown("", unsafe_allow_html=True)
# App info
st.markdown("", unsafe_allow_html=True)
def show_google_drive_integration():
"""Display Google Drive integration UI"""
st.markdown("", unsafe_allow_html=True)
def install_playwright_dependencies():
"""Install Playwright dependencies"""
try:
import subprocess
import os
# Set environment variable for Playwright browsers path
os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
# Install system dependencies
subprocess.run(['apt-get', 'update', '-y'], check=True)
packages = [
'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
]
subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
# Install Playwright and dependencies
subprocess.run(['pip', 'install', 'playwright'], check=True)
subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
st.success("Playwright dependencies installed successfully!")
except Exception as e:
st.error(f"Error installing Playwright dependencies: {e}")
st.info("You may need to manually install dependencies. Check console for details.")
def display_file_results(files):
"""Display file results with filtering and sorting options"""
if not files:
return
st.markdown("", unsafe_allow_html=True)
# File filtering options
filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 1])
with filter_col1:
file_filter = st.text_input("Filter files by name:", placeholder="e.g., exam, 2023, etc.")
with filter_col2:
sort_option = st.selectbox("Sort by:", ["Relevance", "Name", "Size (Largest)", "Size (Smallest)"])
with filter_col3:
show_only_pdfs = st.checkbox("PDFs Only", value=False)
# Sort files based on selection
sorted_files = list(files)
if sort_option == "Name":
sorted_files.sort(key=lambda x: x['filename'])
elif sort_option == "Size (Largest)":
# Convert size strings to comparable values
def parse_size(size_str):
if 'Unknown' in size_str:
return 0
try:
value = float(size_str.split(' ')[0])
unit = size_str.split(' ')[1]
multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
return value * multipliers.get(unit, 0)
except:
return 0
sorted_files.sort(key=lambda x: parse_size(x['size']), reverse=True)
elif sort_option == "Size (Smallest)":
def parse_size(size_str):
if 'Unknown' in size_str:
return float('inf')
try:
value = float(size_str.split(' ')[0])
unit = size_str.split(' ')[1]
multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
return value * multipliers.get(unit, 0)
except:
return float('inf')
sorted_files.sort(key=lambda x: parse_size(x['size']))
# File list with selection
file_container = st.container()
with file_container:
selected_files = []
displayed_files = []
for i, file in enumerate(sorted_files):
# Apply filters
if file_filter and file_filter.lower() not in file['filename'].lower():
continue
if show_only_pdfs and not file['filename'].lower().endswith('.pdf'):
continue
displayed_files.append(i)
with st.container():
col1, col2, col3, col4 = st.columns([0.5, 3, 1, 1])
with col1:
selected = st.checkbox("", key=f"select_{i}", value=True)
if selected:
selected_files.append(i)
with col2:
file_icon = get_file_icon(file['filename'])
st.markdown(f"**{file_icon} {file['filename']}**")
st.markdown(f"{file['url'][:60]}...", unsafe_allow_html=True)
with col3:
st.markdown(f"**Size:** {file['size']}")
with col4:
st.button("Preview", key=f"preview_{i}")
st.divider()
if not displayed_files:
st.info("No files match your current filters. Try adjusting your search criteria.")
return selected_files, displayed_files
def get_file_icon(filename):
"""Return appropriate icon for file type"""
file_icon = "📄"
if filename.lower().endswith('.pdf'):
file_icon = "📝"
elif filename.lower().endswith(('.doc', '.docx')):
file_icon = "📋"
elif filename.lower().endswith(('.xls', '.xlsx')):
file_icon = "📊"
elif filename.lower().endswith(('.ppt', '.pptx')):
file_icon = "🖼️"
elif filename.lower().endswith(('.jpg', '.png', '.gif')):
file_icon = "🖼️"
elif filename.lower().endswith(('.mp3', '.wav')):
file_icon = "🔊"
elif filename.lower().endswith(('.mp4', '.avi', '.mov')):
file_icon = "🎬"
return file_icon
def handle_downloads(selected_files, download_dir, download_option, download_col1):
"""Handle downloading of selected files"""
if not selected_files:
return
# Execute the download asynchronously
with download_col1:
download_status = st.empty()
download_progress = st.progress(0)
async def run_download():
async with DownloadManager(
use_proxy=st.session_state.use_proxy,
proxy=st.session_state.proxy_string,
use_stealth=st.session_state.stealth_mode
) as manager:
files_to_download = [st.session_state.files[i] for i in selected_files]
# Reset download paths
st.session_state.downloaded_paths = []
for i, file_info in enumerate(files_to_download):
progress = (i) / len(files_to_download)
download_status.text(f"Downloading {i+1}/{len(files_to_download)}: {file_info['filename']}")
download_progress.progress(progress)
downloaded_path = await manager.download_file(
file_info,
download_dir,
get_domain(file_info['url'])
)
if downloaded_path:
st.session_state.downloaded_paths.append(downloaded_path)
download_progress.progress(1.0)
download_status.text(f"Downloaded {len(st.session_state.downloaded_paths)}/{len(files_to_download)} files successfully!")
st.session_state.download_complete = True
# Run the download
asyncio.run(run_download())
# Show download results
if st.session_state.download_complete:
st.success(f"✅ Downloaded {len(st.session_state.downloaded_paths)} files successfully!")
download_links = []
for path in st.session_state.downloaded_paths:
with open(path, "rb") as f:
file_content = f.read()
file_name = os.path.basename(path)
download_links.append((file_name, file_content))
if len(download_links) > 0:
if download_option == "ZIP Archive":
# Create ZIP archive for download
zip_path = create_zip_file(st.session_state.downloaded_paths, download_dir)
with open(zip_path, "rb") as f:
zip_content = f.read()
st.download_button("📦 Download ZIP Archive",
zip_content,
file_name=os.path.basename(zip_path),
mime="application/zip")
else:
# Show individual file download links
st.markdown("Download Files
", unsafe_allow_html=True)
# Create a grid of download buttons
cols = st.columns(3)
for idx, (name, content) in enumerate(download_links):
mime_type = mimetypes.guess_type(name)[0] or 'application/octet-stream'
with cols[idx % 3]:
st.download_button(
f"📄 {name}",
content,
file_name=name,
mime=mime_type,
key=f"dl_{name}",
use_container_width=True
)
def handle_google_drive_upload(selected_files):
"""Handle uploading files to Google Drive"""
if not st.session_state.google_credentials or not st.session_state.downloaded_paths:
return
with st.spinner("Uploading to Google Drive..."):
drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_credentials)
# Create folder if it doesn't exist
folder_id = None
folder_name = st.session_state.drive_folder if 'drive_folder' in st.session_state else "File Downloader"
# Check if folder exists
query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
results = drive_service.files().list(q=query, spaces='drive', fields='files(id)').execute()
items = results.get('files', [])
if not items:
# Create folder
folder_id = create_drive_folder(drive_service, folder_name)
else:
folder_id = items[0]['id']
# Upload each file
upload_progress = st.progress(0)
status_text = st.empty()
uploaded_count = 0
for i, path in enumerate(st.session_state.downloaded_paths):
progress = i / len(st.session_state.downloaded_paths)
status_text.text(f"Uploading {i+1}/{len(st.session_state.downloaded_paths)}: {os.path.basename(path)}")
upload_progress.progress(progress)
result = google_drive_upload(path, st.session_state.google_credentials, folder_id)
if isinstance(result, str) and not result.startswith("Error"):
uploaded_count += 1
upload_progress.progress(1.0)
status_text.text(f"Uploaded {uploaded_count}/{len(st.session_state.downloaded_paths)} files to Google Drive folder '{folder_name}'")
st.success(f"✅ Files uploaded to Google Drive successfully!")
def get_domain(url):
"""Extract domain from URL"""
from urllib.parse import urlparse
parsed = urlparse(url)
return parsed.netloc