euler314 commited on
Commit
deb3f2f
·
verified ·
1 Parent(s): 59cbf71

Delete ui.py

Browse files
Files changed (1) hide show
  1. ui.py +0 -465
ui.py DELETED
@@ -1,465 +0,0 @@
1
- import streamlit as st
2
- import os
3
- import asyncio
4
- import mimetypes
5
- from utils import create_zip_file, humanize_file_size, show_user_friendly_error
6
- from download_manager import DownloadManager
7
- from rag_search import EnhancedRAGSearch
8
- from google_drive import (
9
- get_google_auth_url, exchange_code_for_credentials,
10
- google_drive_upload, create_drive_folder
11
- )
12
- import googleapiclient.discovery
13
-
14
- def setup_ui():
15
- """Setup the main UI elements"""
16
- st.markdown("""
17
- <style>
18
- .stTabs [data-baseweb="tab-list"] {
19
- gap: 10px;
20
- }
21
- .stTabs [data-baseweb="tab"] {
22
- height: 50px;
23
- white-space: pre-wrap;
24
- border-radius: 4px 4px 0px 0px;
25
- padding: 10px 16px;
26
- background-color: #f0f2f6;
27
- }
28
- .stTabs [aria-selected="true"] {
29
- background-color: #ffffff !important;
30
- border-bottom: 2px solid #4c78a8;
31
- }
32
- .stFileUploader > div > div > button {
33
- width: 100%;
34
- }
35
- .main-header {
36
- font-size: 2.5rem;
37
- font-weight: 700;
38
- margin-bottom: 10px;
39
- }
40
- .section-subheader {
41
- font-size: 1.3rem;
42
- font-weight: 600;
43
- margin-top: 20px;
44
- margin-bottom: 10px;
45
- }
46
- .info-text {
47
- color: #6c757d;
48
- font-size: 0.9rem;
49
- }
50
- .stButton>button {
51
- width: 100%;
52
- }
53
- .result-card {
54
- background-color: #f8f9fa;
55
- border-radius: 6px;
56
- padding: 16px;
57
- margin-bottom: 12px;
58
- border-left: 4px solid #4c78a8;
59
- }
60
- .sidebar-header {
61
- font-size: 1.2rem;
62
- font-weight: 600;
63
- margin-bottom: 10px;
64
- }
65
- .sidebar-section {
66
- margin-bottom: 20px;
67
- }
68
- </style>
69
- """, unsafe_allow_html=True)
70
-
71
- def create_sidebar():
72
- """Create the sidebar elements"""
73
- with st.sidebar:
74
- st.image("https://img.icons8.com/color/96/000000/download--v1.png", width=50)
75
- st.markdown("<p class='sidebar-header'>Advanced File Downloader</p>", unsafe_allow_html=True)
76
-
77
- # Mode Selection
78
- st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
79
- st.markdown("<p class='sidebar-header'>Mode</p>", unsafe_allow_html=True)
80
- mode = st.radio(
81
- "Select Mode",
82
- ["Standard", "Education Mode", "Research Mode", "Media Mode"],
83
- label_visibility="collapsed",
84
- index=["Standard", "Education Mode", "Research Mode", "Media Mode"].index(st.session_state.mode),
85
- horizontal=False
86
- )
87
-
88
- if mode != st.session_state.mode:
89
- st.session_state.mode = mode
90
- # Update mode-specific settings
91
- if mode == "Education Mode":
92
- st.session_state.custom_extensions = ".pdf,.doc,.docx,.ppt,.pptx"
93
- st.session_state.prioritize_pdfs = True
94
- elif mode == "Research Mode":
95
- st.session_state.custom_extensions = ".pdf,.txt,.csv,.json,.xlsx"
96
- st.session_state.prioritize_pdfs = True
97
- elif mode == "Media Mode":
98
- st.session_state.custom_extensions = ".jpg,.png,.mp3,.mp4,.avi,.mov"
99
- st.session_state.prioritize_pdfs = False
100
-
101
- st.markdown(f"<div class='info-text'>Current: <b>{st.session_state.mode}</b></div>", unsafe_allow_html=True)
102
- st.markdown("</div>", unsafe_allow_html=True)
103
-
104
- # Quick Settings
105
- st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
106
- st.markdown("<p class='sidebar-header'>Quick Settings</p>", unsafe_allow_html=True)
107
-
108
- stealth_mode = st.checkbox("Stealth Mode", value=st.session_state.stealth_mode)
109
- if stealth_mode != st.session_state.stealth_mode:
110
- st.session_state.stealth_mode = stealth_mode
111
-
112
- use_proxy = st.checkbox("Use Proxy", value=st.session_state.use_proxy)
113
- if use_proxy != st.session_state.use_proxy:
114
- st.session_state.use_proxy = use_proxy
115
-
116
- if use_proxy:
117
- proxy_string = st.text_input("Proxy Address",
118
- placeholder="e.g., http://user:pass@host:port",
119
- value=st.session_state.proxy_string or "")
120
- if proxy_string != st.session_state.proxy_string:
121
- st.session_state.proxy_string = proxy_string
122
-
123
- st.markdown("</div>", unsafe_allow_html=True)
124
-
125
- # Google Drive Integration
126
- show_google_drive_integration()
127
-
128
- # Preset buttons for educational sites
129
- if st.session_state.mode == "Education Mode":
130
- st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
131
- st.markdown("<p class='sidebar-header'>Quick Access</p>", unsafe_allow_html=True)
132
- st.markdown("<div class='info-text'>Common Educational Sites</div>", unsafe_allow_html=True)
133
-
134
- if st.button("Past Exam Papers"):
135
- st.session_state.preset_url = "https://pastpapers.example.edu"
136
- st.session_state.search_method = "Exam Site Mode"
137
- st.rerun()
138
-
139
- if st.button("Open Course Materials"):
140
- st.session_state.preset_url = "https://opencourseware.example.edu"
141
- st.session_state.search_method = "Deep Search"
142
- st.rerun()
143
-
144
- if st.button("Research Papers"):
145
- st.session_state.preset_url = "https://papers.example.org"
146
- st.session_state.search_method = "Deep Search"
147
- st.rerun()
148
-
149
- st.markdown("</div>", unsafe_allow_html=True)
150
-
151
- # Tool status
152
- st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
153
- st.markdown("<p class='sidebar-header'>System Status</p>", unsafe_allow_html=True)
154
-
155
- col1, col2 = st.columns(2)
156
- with col1:
157
- st.markdown("<div class='info-text'>Search</div>", unsafe_allow_html=True)
158
- st.markdown("<div style='color: green; font-weight: bold;'>Active</div>", unsafe_allow_html=True)
159
- with col2:
160
- st.markdown("<div class='info-text'>Browser</div>", unsafe_allow_html=True)
161
- st.markdown("<div style='color: green; font-weight: bold;'>Ready</div>", unsafe_allow_html=True)
162
-
163
- if st.button("Install Dependencies"):
164
- with st.spinner("Installing Playwright dependencies..."):
165
- install_playwright_dependencies()
166
-
167
- st.markdown("</div>", unsafe_allow_html=True)
168
-
169
- # App info
170
- st.markdown("<div class='sidebar-section' style='position: absolute; bottom: 20px; width: 90%;'>", unsafe_allow_html=True)
171
- st.markdown("<div class='info-text' style='text-align: center;'>Version 2.0 • March 2025</div>", unsafe_allow_html=True)
172
- st.markdown("</div>", unsafe_allow_html=True)
173
-
174
- def show_google_drive_integration():
175
- """Display Google Drive integration UI"""
176
- st.markdown("<div class='sidebar-section'>", unsafe_allow_html=True)
177
- st.markdown("<p class='sidebar-header'>Google Drive</p>", unsafe_allow_html=True)
178
-
179
- if st.session_state.google_credentials:
180
- st.success("✅ Connected")
181
-
182
- drive_folder = st.text_input("Drive Folder",
183
- value="File Downloader" if 'drive_folder' not in st.session_state else st.session_state.drive_folder)
184
- if 'drive_folder' not in st.session_state or drive_folder != st.session_state.drive_folder:
185
- st.session_state.drive_folder = drive_folder
186
-
187
- if st.button("Disconnect Drive"):
188
- st.session_state.google_credentials = None
189
- st.rerun()
190
- else:
191
- st.warning("⚠️ Not Connected")
192
- if st.button("Connect Google Drive"):
193
- auth_url = get_google_auth_url()
194
- st.markdown(f"[Click here to authorize]({auth_url})")
195
- auth_code = st.text_input("Enter authorization code:")
196
-
197
- if auth_code:
198
- with st.spinner("Connecting to Google Drive..."):
199
- credentials, status_msg = exchange_code_for_credentials(auth_code)
200
- if credentials:
201
- st.session_state.google_credentials = credentials
202
- st.success(status_msg)
203
- st.rerun()
204
- else:
205
- st.error(status_msg)
206
-
207
- st.markdown("</div>", unsafe_allow_html=True)
208
-
209
- def install_playwright_dependencies():
210
- """Install Playwright dependencies"""
211
- try:
212
- import subprocess
213
- import os
214
-
215
- # Set environment variable for Playwright browsers path
216
- os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
217
-
218
- # Install system dependencies
219
- subprocess.run(['apt-get', 'update', '-y'], check=True)
220
- packages = [
221
- 'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
222
- 'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
223
- 'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
224
- ]
225
- subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
226
-
227
- # Install Playwright and dependencies
228
- subprocess.run(['pip', 'install', 'playwright'], check=True)
229
- subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
230
-
231
- st.success("Playwright dependencies installed successfully!")
232
- except Exception as e:
233
- st.error(f"Error installing Playwright dependencies: {e}")
234
- st.info("You may need to manually install dependencies. Check console for details.")
235
-
236
- def display_file_results(files):
237
- """Display file results with filtering and sorting options"""
238
- if not files:
239
- return
240
-
241
- st.markdown("<h3 class='section-subheader'>Found Files</h3>", unsafe_allow_html=True)
242
-
243
- # File filtering options
244
- filter_col1, filter_col2, filter_col3 = st.columns([2, 2, 1])
245
- with filter_col1:
246
- file_filter = st.text_input("Filter files by name:", placeholder="e.g., exam, 2023, etc.")
247
- with filter_col2:
248
- sort_option = st.selectbox("Sort by:", ["Relevance", "Name", "Size (Largest)", "Size (Smallest)"])
249
- with filter_col3:
250
- show_only_pdfs = st.checkbox("PDFs Only", value=False)
251
-
252
- # Sort files based on selection
253
- sorted_files = list(files)
254
- if sort_option == "Name":
255
- sorted_files.sort(key=lambda x: x['filename'])
256
- elif sort_option == "Size (Largest)":
257
- # Convert size strings to comparable values
258
- def parse_size(size_str):
259
- if 'Unknown' in size_str:
260
- return 0
261
- try:
262
- value = float(size_str.split(' ')[0])
263
- unit = size_str.split(' ')[1]
264
- multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
265
- return value * multipliers.get(unit, 0)
266
- except:
267
- return 0
268
-
269
- sorted_files.sort(key=lambda x: parse_size(x['size']), reverse=True)
270
- elif sort_option == "Size (Smallest)":
271
- def parse_size(size_str):
272
- if 'Unknown' in size_str:
273
- return float('inf')
274
- try:
275
- value = float(size_str.split(' ')[0])
276
- unit = size_str.split(' ')[1]
277
- multipliers = {'bytes': 1, 'KB': 1024, 'MB': 1024**2, 'GB': 1024**3, 'TB': 1024**4}
278
- return value * multipliers.get(unit, 0)
279
- except:
280
- return float('inf')
281
-
282
- sorted_files.sort(key=lambda x: parse_size(x['size']))
283
-
284
- # File list with selection
285
- file_container = st.container()
286
- with file_container:
287
- selected_files = []
288
- displayed_files = []
289
-
290
- for i, file in enumerate(sorted_files):
291
- # Apply filters
292
- if file_filter and file_filter.lower() not in file['filename'].lower():
293
- continue
294
- if show_only_pdfs and not file['filename'].lower().endswith('.pdf'):
295
- continue
296
-
297
- displayed_files.append(i)
298
- with st.container():
299
- col1, col2, col3, col4 = st.columns([0.5, 3, 1, 1])
300
- with col1:
301
- selected = st.checkbox("", key=f"select_{i}", value=True)
302
- if selected:
303
- selected_files.append(i)
304
- with col2:
305
- file_icon = get_file_icon(file['filename'])
306
- st.markdown(f"**{file_icon} {file['filename']}**")
307
- st.markdown(f"<span class='info-text'>{file['url'][:60]}...</span>", unsafe_allow_html=True)
308
- with col3:
309
- st.markdown(f"**Size:** {file['size']}")
310
- with col4:
311
- st.button("Preview", key=f"preview_{i}")
312
-
313
- st.divider()
314
-
315
- if not displayed_files:
316
- st.info("No files match your current filters. Try adjusting your search criteria.")
317
-
318
- return selected_files, displayed_files
319
-
320
- def get_file_icon(filename):
321
- """Return appropriate icon for file type"""
322
- file_icon = "📄"
323
- if filename.lower().endswith('.pdf'):
324
- file_icon = "📝"
325
- elif filename.lower().endswith(('.doc', '.docx')):
326
- file_icon = "📋"
327
- elif filename.lower().endswith(('.xls', '.xlsx')):
328
- file_icon = "📊"
329
- elif filename.lower().endswith(('.ppt', '.pptx')):
330
- file_icon = "🖼️"
331
- elif filename.lower().endswith(('.jpg', '.png', '.gif')):
332
- file_icon = "🖼️"
333
- elif filename.lower().endswith(('.mp3', '.wav')):
334
- file_icon = "🔊"
335
- elif filename.lower().endswith(('.mp4', '.avi', '.mov')):
336
- file_icon = "🎬"
337
- return file_icon
338
-
339
- def handle_downloads(selected_files, download_dir, download_option, download_col1):
340
- """Handle downloading of selected files"""
341
- if not selected_files:
342
- return
343
-
344
- # Execute the download asynchronously
345
- with download_col1:
346
- download_status = st.empty()
347
- download_progress = st.progress(0)
348
-
349
- async def run_download():
350
- async with DownloadManager(
351
- use_proxy=st.session_state.use_proxy,
352
- proxy=st.session_state.proxy_string,
353
- use_stealth=st.session_state.stealth_mode
354
- ) as manager:
355
- files_to_download = [st.session_state.files[i] for i in selected_files]
356
-
357
- # Reset download paths
358
- st.session_state.downloaded_paths = []
359
-
360
- for i, file_info in enumerate(files_to_download):
361
- progress = (i) / len(files_to_download)
362
- download_status.text(f"Downloading {i+1}/{len(files_to_download)}: {file_info['filename']}")
363
- download_progress.progress(progress)
364
-
365
- downloaded_path = await manager.download_file(
366
- file_info,
367
- download_dir,
368
- get_domain(file_info['url'])
369
- )
370
-
371
- if downloaded_path:
372
- st.session_state.downloaded_paths.append(downloaded_path)
373
-
374
- download_progress.progress(1.0)
375
- download_status.text(f"Downloaded {len(st.session_state.downloaded_paths)}/{len(files_to_download)} files successfully!")
376
- st.session_state.download_complete = True
377
-
378
- # Run the download
379
- asyncio.run(run_download())
380
-
381
- # Show download results
382
- if st.session_state.download_complete:
383
- st.success(f"✅ Downloaded {len(st.session_state.downloaded_paths)} files successfully!")
384
- download_links = []
385
- for path in st.session_state.downloaded_paths:
386
- with open(path, "rb") as f:
387
- file_content = f.read()
388
- file_name = os.path.basename(path)
389
- download_links.append((file_name, file_content))
390
-
391
- if len(download_links) > 0:
392
- if download_option == "ZIP Archive":
393
- # Create ZIP archive for download
394
- zip_path = create_zip_file(st.session_state.downloaded_paths, download_dir)
395
- with open(zip_path, "rb") as f:
396
- zip_content = f.read()
397
- st.download_button("📦 Download ZIP Archive",
398
- zip_content,
399
- file_name=os.path.basename(zip_path),
400
- mime="application/zip")
401
- else:
402
- # Show individual file download links
403
- st.markdown("<h4>Download Files</h4>", unsafe_allow_html=True)
404
-
405
- # Create a grid of download buttons
406
- cols = st.columns(3)
407
- for idx, (name, content) in enumerate(download_links):
408
- mime_type = mimetypes.guess_type(name)[0] or 'application/octet-stream'
409
- with cols[idx % 3]:
410
- st.download_button(
411
- f"📄 {name}",
412
- content,
413
- file_name=name,
414
- mime=mime_type,
415
- key=f"dl_{name}",
416
- use_container_width=True
417
- )
418
-
419
- def handle_google_drive_upload(selected_files):
420
- """Handle uploading files to Google Drive"""
421
- if not st.session_state.google_credentials or not st.session_state.downloaded_paths:
422
- return
423
-
424
- with st.spinner("Uploading to Google Drive..."):
425
- drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_credentials)
426
-
427
- # Create folder if it doesn't exist
428
- folder_id = None
429
- folder_name = st.session_state.drive_folder if 'drive_folder' in st.session_state else "File Downloader"
430
-
431
- # Check if folder exists
432
- query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
433
- results = drive_service.files().list(q=query, spaces='drive', fields='files(id)').execute()
434
- items = results.get('files', [])
435
-
436
- if not items:
437
- # Create folder
438
- folder_id = create_drive_folder(drive_service, folder_name)
439
- else:
440
- folder_id = items[0]['id']
441
-
442
- # Upload each file
443
- upload_progress = st.progress(0)
444
- status_text = st.empty()
445
- uploaded_count = 0
446
-
447
- for i, path in enumerate(st.session_state.downloaded_paths):
448
- progress = i / len(st.session_state.downloaded_paths)
449
- status_text.text(f"Uploading {i+1}/{len(st.session_state.downloaded_paths)}: {os.path.basename(path)}")
450
- upload_progress.progress(progress)
451
-
452
- result = google_drive_upload(path, st.session_state.google_credentials, folder_id)
453
- if isinstance(result, str) and not result.startswith("Error"):
454
- uploaded_count += 1
455
-
456
- upload_progress.progress(1.0)
457
- status_text.text(f"Uploaded {uploaded_count}/{len(st.session_state.downloaded_paths)} files to Google Drive folder '{folder_name}'")
458
-
459
- st.success(f"✅ Files uploaded to Google Drive successfully!")
460
-
461
- def get_domain(url):
462
- """Extract domain from URL"""
463
- from urllib.parse import urlparse
464
- parsed = urlparse(url)
465
- return parsed.netloc