euler314 commited on
Commit
d4e3bdc
Β·
verified Β·
1 Parent(s): 5d6326a

Create app/ui.py

Browse files
Files changed (1) hide show
  1. app/ui.py +465 -0
app/ui.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import asyncio
4
+ import mimetypes
5
+ from app.utils import create_zip_file, humanize_file_size, show_user_friendly_error
6
+ from app.download_manager import DownloadManager
7
+ from app.rag_search import EnhancedRAGSearch
8
+ from app.google_drive import (
9
+ get_google_auth_url, exchange_code_for_credentials,
10
+ google_drive_upload, create_drive_folder
11
+ )
12
+ import googleapiclient.discovery
13
+
14
+ def setup_ui():
15
+ """Setup the main UI elements"""
16
+ st.markdown("""
17
+ <style>
18
+ .stTabs [data-baseweb="tab-list"] {
19
+ gap: 10px;
20
+ }
21
+ .stTabs [data-baseweb="tab"] {
22
+ height: 50px;
23
+ white-space: pre-wrap;
24
+ border-radius: 4px 4px 0px 0px;
25
+ padding: 10px 16px;
26
+ background-color: #f0f2f6;
27
+ }
28
+ .stTabs [aria-selected="true"] {
29
+ background-color: #ffffff !important;
30
+ border-bottom: 2px solid #4c78a8;
31
+ }
32
+ .stFileUploader > div > div > button {
33
+ width: 100%;
34
+ }
35
+ .main-header {
36
+ font-size: 2.5rem;
37
+ font-weight: 700;
38
+ margin-bottom: 10px;
39
+ }
40
+ .section-subheader {
41
+ font-size: 1.3rem;
42
+ font-weight: 600;
43
+ margin-top: 20px;
44
+ margin-bottom: 10px;
45
+ }
46
+ .info-text {
47
+ color: #6c757d;
48
+ font-size: 0.9rem;
49
+ }
50
+ .stButton>button {
51
+ width: 100%;
52
+ }
53
+ .result-card {
54
+ background-color: #f8f9fa;
55
+ border-radius: 6px;
56
+ padding: 16px;
57
+ margin-bottom: 12px;
58
+ border-left: 4px solid #4c78a8;
59
+ }
60
+ .sidebar-header {
61
+ font-size: 1.2rem;
62
+ font-weight: 600;
63
+ margin-bottom: 10px;
64
+ }
65
+ .sidebar-section {
66
+ margin-bottom: 20px;
67
+ }
68
+ </style>
69
+ """, unsafe_allow_html=True)
70
+
71
+ def create_sidebar():
72
+ """Create the sidebar elements"""
73
+ with st.sidebar:
74
+ st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=50)
75
+ st.markdown("<p class='sidebar-header'>Advanced File Downloader</p>", unsafe_allow_html=True)
76
+
77
+ # Mode Selection
78
+ st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
79
+ st.markdown("<p class='sidebar-header'>Mode</p>", unsafe_allow_html=True)
80
+ mode = st.radio(
81
+ "Select Mode",
82
+ ["Standard", "Education Mode", "Research Mode", "Media Mode"],
83
+ label_visibility="collapsed",
84
+ index=["Standard", "Education Mode", "Research Mode", "Media Mode"].index(st.session_state.mode),
85
+ horizontal=False
86
+ )
87
+
88
+ if mode != st.session_state.mode:
89
+ st.session_state.mode = mode
90
+ # Update mode-specific settings
91
+ if mode == "Education Mode":
92
+ st.session_state.custom_extensions = ".pdf,.doc,.docx,.ppt,.pptx"
93
+ st.session_state.prioritize_pdfs = True
94
+ elif mode == "Research Mode":
95
+ st.session_state.custom_extensions = ".pdf,.txt,.csv,.json,.xlsx"
96
+ st.session_state.prioritize_pdfs = True
97
+ elif mode == "Media Mode":
98
+ st.session_state.custom_extensions = ".jpg,.png,.mp3,.mp4,.avi,.mov"
99
+ st.session_state.prioritize_pdfs = False
100
+
101
+ st.markdown(f"<div class='info-text'>Current: <b>{st.session_state.mode}</b></div>", unsafe_allow_html=True)
102
+ st.markdown("</div>", unsafe_allow_html=True)
103
+
104
+ # Quick Settings
105
+ st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
106
+ st.markdown("<p class='sidebar-header'>Quick Settings</p>", unsafe_allow_html=True)
107
+
108
+ stealth_mode = st.checkbox("Stealth Mode", value=st.session_state.stealth_mode)
109
+ if stealth_mode != st.session_state.stealth_mode:
110
+ st.session_state.stealth_mode = stealth_mode
111
+
112
+ use_proxy = st.checkbox("Use Proxy", value=st.session_state.use_proxy)
113
+ if use_proxy != st.session_state.use_proxy:
114
+ st.session_state.use_proxy = use_proxy
115
+
116
+ if use_proxy:
117
+ proxy_string = st.text_input("Proxy Address",
118
+ placeholder="e.g., http://user:pass@host:port",
119
+ value=st.session_state.proxy_string or "")
120
+ if proxy_string != st.session_state.proxy_string:
121
+ st.session_state.proxy_string = proxy_string
122
+
123
+ st.markdown("</div>", unsafe_allow_html=True)
124
+
125
+ # Google Drive Integration
126
+ show_google_drive_integration()
127
+
128
+ # Preset buttons for educational sites
129
+ if st.session_state.mode == "Education Mode":
130
+ st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
131
+ st.markdown("<p class='sidebar-header'>Quick Access</p>", unsafe_allow_html=True)
132
+ st.markdown("<div class='info-text'>Common Educational Sites</div>", unsafe_allow_html=True)
133
+
134
+ if st.button("Past Exam Papers"):
135
+ st.session_state.preset_url = "https://pastpapers.example.edu"
136
+ st.session_state.search_method = "Exam Site Mode"
137
+ st.rerun()
138
+
139
+ if st.button("Open Course Materials"):
140
+ st.session_state.preset_url = "https://opencourseware.example.edu"
141
+ st.session_state.search_method = "Deep Search"
142
+ st.rerun()
143
+
144
+ if st.button("Research Papers"):
145
+ st.session_state.preset_url = "https://papers.example.org"
146
+ st.session_state.search_method = "Deep Search"
147
+ st.rerun()
148
+
149
+ st.markdown("</div>", unsafe_allow_html=True)
150
+
151
+ # Tool status
152
+ st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
153
+ st.markdown("<p class='sidebar-header'>System Status</p>", unsafe_allow_html=True)
154
+
155
+ col1, col2 = st.columns(2)
156
+ with col1:
157
+ st.markdown("<div class='info-text'>Search</div>", unsafe_allow_html=True)
158
+ st.markdown("<div style='color: green; font-weight: bold;'>Active</div>", unsafe_allow_html=True)
159
+ with col2:
160
+ st.markdown("<div class='info-text'>Browser</div>", unsafe_allow_html=True)
161
+ st.markdown("<div style='color: green; font-weight: bold;'>Ready</div>", unsafe_allow_html=True)
162
+
163
+ if st.button("Install Dependencies"):
164
+ with st.spinner("Installing Playwright dependencies..."):
165
+ install_playwright_dependencies()
166
+
167
+ st.markdown("</div>", unsafe_allow_html=True)
168
+
169
+ # App info
170
+ st.markdown("<div class='sidebar-section' style='position: absolute; bottom: 20px; width: 90%;'>", unsafe_allow_html=True)
171
+ st.markdown("<div class='info-text' style='text-align: center;'>Version 2.0 β€’ March 2025</div>", unsafe_allow_html=True)
172
+ st.markdown("</div>", unsafe_allow_html=True)
173
+
174
+ def show_google_drive_integration():
175
+ """Display Google Drive integration UI"""
176
+ st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
177
+ st.markdown("<p class='sidebar-header'>Google Drive</p>", unsafe_allow_html=True)
178
+
179
+ if st.session_state.google_credentials:
180
+ st.success("βœ… Connected")
181
+
182
+ drive_folder = st.text_input("Drive Folder",
183
+ value="File Downloader" if 'drive_folder' not in st.session_state else st.session_state.drive_folder)
184
+ if 'drive_folder' not in st.session_state or drive_folder != st.session_state.drive_folder:
185
+ st.session_state.drive_folder = drive_folder
186
+
187
+ if st.button("Disconnect Drive"):
188
+ st.session_state.google_credentials = None
189
+ st.rerun()
190
+ else:
191
+ st.warning("⚠️ Not Connected")
192
+ if st.button("Connect Google Drive"):
193
+ auth_url = get_google_auth_url()
194
+ st.markdown(f"[Click here to authorize]({auth_url})")
195
+ auth_code = st.text_input("Enter authorization code:")
196
+
197
+ if auth_code:
198
+ with st.spinner("Connecting to Google Drive..."):
199
+ credentials, status_msg = exchange_code_for_credentials(auth_code)
200
+ if credentials:
201
+ st.session_state.google_credentials = credentials
202
+ st.success(status_msg)
203
+ st.rerun()
204
+ else:
205
+ st.error(status_msg)
206
+
207
+ st.markdown("</div>", unsafe_allow_html=True)
208
+
209
+ def install_playwright_dependencies():
210
+ """Install Playwright dependencies"""
211
+ try:
212
+ import subprocess
213
+ import os
214
+
215
+ # Set environment variable for Playwright browsers path
216
+ os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
217
+
218
+ # Install system dependencies
219
+ subprocess.run(['apt-get', 'update', '-y'], check=True)
220
+ packages = [
221
+ 'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
222
+ 'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
223
+ 'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
224
+ ]
225
+ subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
226
+
227
+ # Install Playwright and dependencies
228
+ subprocess.run(['pip', 'install', 'playwright'], check=True)
229
+ subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
230
+
231
+ st.success("Playwright dependencies installed successfully!")
232
+ except Exception as e:
233
+ st.error(f"Error installing Playwright dependencies: {e}")
234
+ st.info("You may need to manually install dependencies. Check console for details.")
235
+
236
+ def display_file_results(files):
237
+ """Display file results with filtering and sorting options"""
238
+ if not files:
239
+ return
240
+
241
+ st.markdown("<h3 class='section-subheader'>Found Files</h3>", unsafe_allow_html=True)
242
+
243
+ # File filtering options
244
+ filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 1])
245
+ with filter_col1:
246
+ file_filter = st.text_input("Filter files by name:", placeholder="e.g., exam, 2023, etc.")
247
+ with filter_col2:
248
+ sort_option = st.selectbox("Sort by:", ["Relevance", "Name", "Size (Largest)", "Size (Smallest)"])
249
+ with filter_col3:
250
+ show_only_pdfs = st.checkbox("PDFs Only", value=False)
251
+
252
+ # Sort files based on selection
253
+ sorted_files = list(files)
254
+ if sort_option == "Name":
255
+ sorted_files.sort(key=lambda x: x['filename'])
256
+ elif sort_option == "Size (Largest)":
257
+ # Convert size strings to comparable values
258
+ def parse_size(size_str):
259
+ if 'Unknown' in size_str:
260
+ return 0
261
+ try:
262
+ value = float(size_str.split(' ')[0])
263
+ unit = size_str.split(' ')[1]
264
+ multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
265
+ return value * multipliers.get(unit, 0)
266
+ except:
267
+ return 0
268
+
269
+ sorted_files.sort(key=lambda x: parse_size(x['size']), reverse=True)
270
+ elif sort_option == "Size (Smallest)":
271
+ def parse_size(size_str):
272
+ if 'Unknown' in size_str:
273
+ return float('inf')
274
+ try:
275
+ value = float(size_str.split(' ')[0])
276
+ unit = size_str.split(' ')[1]
277
+ multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
278
+ return value * multipliers.get(unit, 0)
279
+ except:
280
+ return float('inf')
281
+
282
+ sorted_files.sort(key=lambda x: parse_size(x['size']))
283
+
284
+ # File list with selection
285
+ file_container = st.container()
286
+ with file_container:
287
+ selected_files = []
288
+ displayed_files = []
289
+
290
+ for i, file in enumerate(sorted_files):
291
+ # Apply filters
292
+ if file_filter and file_filter.lower() not in file['filename'].lower():
293
+ continue
294
+ if show_only_pdfs and not file['filename'].lower().endswith('.pdf'):
295
+ continue
296
+
297
+ displayed_files.append(i)
298
+ with st.container():
299
+ col1, col2, col3, col4 = st.columns([0.5, 3, 1, 1])
300
+ with col1:
301
+ selected = st.checkbox("", key=f"select_{i}", value=True)
302
+ if selected:
303
+ selected_files.append(i)
304
+ with col2:
305
+ file_icon = get_file_icon(file['filename'])
306
+ st.markdown(f"**{file_icon} {file['filename']}**")
307
+ st.markdown(f"<span class='info-text'>{file['url'][:60]}...</span>", unsafe_allow_html=True)
308
+ with col3:
309
+ st.markdown(f"**Size:** {file['size']}")
310
+ with col4:
311
+ st.button("Preview", key=f"preview_{i}")
312
+
313
+ st.divider()
314
+
315
+ if not displayed_files:
316
+ st.info("No files match your current filters. Try adjusting your search criteria.")
317
+
318
+ return selected_files, displayed_files
319
+
320
+ def get_file_icon(filename):
321
+ """Return appropriate icon for file type"""
322
+ file_icon = "πŸ“„"
323
+ if filename.lower().endswith('.pdf'):
324
+ file_icon = "πŸ“"
325
+ elif filename.lower().endswith(('.doc', '.docx')):
326
+ file_icon = "πŸ“‹"
327
+ elif filename.lower().endswith(('.xls', '.xlsx')):
328
+ file_icon = "πŸ“Š"
329
+ elif filename.lower().endswith(('.ppt', '.pptx')):
330
+ file_icon = "πŸ–ΌοΈ"
331
+ elif filename.lower().endswith(('.jpg', '.png', '.gif')):
332
+ file_icon = "πŸ–ΌοΈ"
333
+ elif filename.lower().endswith(('.mp3', '.wav')):
334
+ file_icon = "πŸ”Š"
335
+ elif filename.lower().endswith(('.mp4', '.avi', '.mov')):
336
+ file_icon = "🎬"
337
+ return file_icon
338
+
339
+ def handle_downloads(selected_files, download_dir, download_option, download_col1):
340
+ """Handle downloading of selected files"""
341
+ if not selected_files:
342
+ return
343
+
344
+ # Execute the download asynchronously
345
+ with download_col1:
346
+ download_status = st.empty()
347
+ download_progress = st.progress(0)
348
+
349
+ async def run_download():
350
+ async with DownloadManager(
351
+ use_proxy=st.session_state.use_proxy,
352
+ proxy=st.session_state.proxy_string,
353
+ use_stealth=st.session_state.stealth_mode
354
+ ) as manager:
355
+ files_to_download = [st.session_state.files[i] for i in selected_files]
356
+
357
+ # Reset download paths
358
+ st.session_state.downloaded_paths = []
359
+
360
+ for i, file_info in enumerate(files_to_download):
361
+ progress = (i) / len(files_to_download)
362
+ download_status.text(f"Downloading {i+1}/{len(files_to_download)}: {file_info['filename']}")
363
+ download_progress.progress(progress)
364
+
365
+ downloaded_path = await manager.download_file(
366
+ file_info,
367
+ download_dir,
368
+ get_domain(file_info['url'])
369
+ )
370
+
371
+ if downloaded_path:
372
+ st.session_state.downloaded_paths.append(downloaded_path)
373
+
374
+ download_progress.progress(1.0)
375
+ download_status.text(f"Downloaded {len(st.session_state.downloaded_paths)}/{len(files_to_download)} files successfully!")
376
+ st.session_state.download_complete = True
377
+
378
+ # Run the download
379
+ asyncio.run(run_download())
380
+
381
+ # Show download results
382
+ if st.session_state.download_complete:
383
+ st.success(f"βœ… Downloaded {len(st.session_state.downloaded_paths)} files successfully!")
384
+ download_links = []
385
+ for path in st.session_state.downloaded_paths:
386
+ with open(path, "rb") as f:
387
+ file_content = f.read()
388
+ file_name = os.path.basename(path)
389
+ download_links.append((file_name, file_content))
390
+
391
+ if len(download_links) > 0:
392
+ if download_option == "ZIP Archive":
393
+ # Create ZIP archive for download
394
+ zip_path = create_zip_file(st.session_state.downloaded_paths, download_dir)
395
+ with open(zip_path, "rb") as f:
396
+ zip_content = f.read()
397
+ st.download_button("πŸ“¦ Download ZIP Archive",
398
+ zip_content,
399
+ file_name=os.path.basename(zip_path),
400
+ mime="application/zip")
401
+ else:
402
+ # Show individual file download links
403
+ st.markdown("<h4>Download Files</h4>", unsafe_allow_html=True)
404
+
405
+ # Create a grid of download buttons
406
+ cols = st.columns(3)
407
+ for idx, (name, content) in enumerate(download_links):
408
+ mime_type = mimetypes.guess_type(name)[0] or 'application/octet-stream'
409
+ with cols[idx % 3]:
410
+ st.download_button(
411
+ f"πŸ“„ {name}",
412
+ content,
413
+ file_name=name,
414
+ mime=mime_type,
415
+ key=f"dl_{name}",
416
+ use_container_width=True
417
+ )
418
+
419
+ def handle_google_drive_upload(selected_files):
420
+ """Handle uploading files to Google Drive"""
421
+ if not st.session_state.google_credentials or not st.session_state.downloaded_paths:
422
+ return
423
+
424
+ with st.spinner("Uploading to Google Drive..."):
425
+ drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_credentials)
426
+
427
+ # Create folder if it doesn't exist
428
+ folder_id = None
429
+ folder_name = st.session_state.drive_folder if 'drive_folder' in st.session_state else "File Downloader"
430
+
431
+ # Check if folder exists
432
+ query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
433
+ results = drive_service.files().list(q=query, spaces='drive', fields='files(id)').execute()
434
+ items = results.get('files', [])
435
+
436
+ if not items:
437
+ # Create folder
438
+ folder_id = create_drive_folder(drive_service, folder_name)
439
+ else:
440
+ folder_id = items[0]['id']
441
+
442
+ # Upload each file
443
+ upload_progress = st.progress(0)
444
+ status_text = st.empty()
445
+ uploaded_count = 0
446
+
447
+ for i, path in enumerate(st.session_state.downloaded_paths):
448
+ progress = i / len(st.session_state.downloaded_paths)
449
+ status_text.text(f"Uploading {i+1}/{len(st.session_state.downloaded_paths)}: {os.path.basename(path)}")
450
+ upload_progress.progress(progress)
451
+
452
+ result = google_drive_upload(path, st.session_state.google_credentials, folder_id)
453
+ if isinstance(result, str) and not result.startswith("Error"):
454
+ uploaded_count += 1
455
+
456
+ upload_progress.progress(1.0)
457
+ status_text.text(f"Uploaded {uploaded_count}/{len(st.session_state.downloaded_paths)} files to Google Drive folder '{folder_name}'")
458
+
459
+ st.success(f"βœ… Files uploaded to Google Drive successfully!")
460
+
461
+ def get_domain(url):
462
+ """Extract domain from URL"""
463
+ from urllib.parse import urlparse
464
+ parsed = urlparse(url)
465
+ return parsed.netloc