Update app.py
Browse files
app.py
CHANGED
@@ -243,7 +243,24 @@ class DownloadManager:
|
|
243 |
'Referer': 'https://www.bing.com/'
|
244 |
})
|
245 |
return self
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
248 |
if self.browser:
|
249 |
await self.browser.close()
|
@@ -406,47 +423,78 @@ class DownloadManager:
|
|
406 |
base, ext = os.path.splitext(fname)
|
407 |
counter = 1
|
408 |
while os.path.exists(path):
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
os.makedirs(save_dir, exist_ok=True)
|
413 |
-
|
414 |
try:
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
|
451 |
async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
|
452 |
if not custom_ext_list:
|
|
|
243 |
'Referer': 'https://www.bing.com/'
|
244 |
})
|
245 |
return self
|
246 |
+
async def search_bing(self):
|
247 |
+
urls = []
|
248 |
+
try:
|
249 |
+
search_url = f"https://www.bing.com/search?q={self.query}"
|
250 |
+
await self.page.goto(search_url, timeout=30000)
|
251 |
+
await self.page.wait_for_load_state('networkidle')
|
252 |
+
|
253 |
+
# Extract search result links
|
254 |
+
links = await self.page.query_selector_all("li.b_algo h2 a")
|
255 |
+
for link in links[:self.num_results]:
|
256 |
+
href = await link.get_attribute('href')
|
257 |
+
if href:
|
258 |
+
urls.append(href)
|
259 |
+
|
260 |
+
return urls
|
261 |
+
except Exception as e:
|
262 |
+
logger.error(f"Error searching Bing: {e}")
|
263 |
+
return []
|
264 |
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
265 |
if self.browser:
|
266 |
await self.browser.close()
|
|
|
423 |
base, ext = os.path.splitext(fname)
|
424 |
counter = 1
|
425 |
while os.path.exists(path):
|
426 |
+
path = os.path.join(save_dir, f"{base}_{counter}{ext}")
|
427 |
+
counter += 1
|
428 |
+
|
429 |
os.makedirs(save_dir, exist_ok=True)
|
430 |
+
|
431 |
try:
|
432 |
+
if "drive.google.com" in file_url:
|
433 |
+
import gdown
|
434 |
+
try:
|
435 |
+
st.write(f"Downloading from Google Drive: {fname}")
|
436 |
+
|
437 |
+
# Determine file extension or use a default if none available
|
438 |
+
if not ext or ext == "":
|
439 |
+
# Try to determine file type from content-type header
|
440 |
+
async with self.context.new_page() as page:
|
441 |
+
response = await page.request.head(file_url, timeout=15000)
|
442 |
+
content_type = response.headers.get('Content-Type', '')
|
443 |
+
|
444 |
+
# Map content types to extensions
|
445 |
+
extension_map = {
|
446 |
+
'application/pdf': '.pdf',
|
447 |
+
'image/jpeg': '.jpg',
|
448 |
+
'image/png': '.png',
|
449 |
+
'application/msword': '.doc',
|
450 |
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
451 |
+
'application/zip': '.zip',
|
452 |
+
'text/plain': '.txt',
|
453 |
+
'application/vnd.ms-excel': '.xls',
|
454 |
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx'
|
455 |
+
}
|
456 |
+
|
457 |
+
# Get extension from content type or use .bin as fallback
|
458 |
+
ext = extension_map.get(content_type.split(';')[0], '.bin')
|
459 |
+
path = os.path.join(save_dir, f"{base}{ext}")
|
460 |
+
|
461 |
+
# Handle name collisions
|
462 |
+
counter = 1
|
463 |
+
while os.path.exists(path):
|
464 |
+
path = os.path.join(save_dir, f"{base}_{counter}{ext}")
|
465 |
+
counter += 1
|
466 |
+
|
467 |
+
output = gdown.download(file_url, path, quiet=False)
|
468 |
+
if output:
|
469 |
+
return path
|
470 |
+
return None
|
471 |
+
except Exception as e:
|
472 |
+
logger.error(f"Google Drive download error: {e}")
|
473 |
+
return None
|
474 |
+
|
475 |
+
async with self.context.new_page() as page:
|
476 |
+
st.write(f"Downloading: {fname}")
|
477 |
+
|
478 |
+
headers = {
|
479 |
+
'Accept': '*/*',
|
480 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
481 |
+
'Referer': referer
|
482 |
+
}
|
483 |
+
|
484 |
+
response = await page.request.get(file_url, headers=headers, timeout=30000)
|
485 |
+
|
486 |
+
if response.status == 200:
|
487 |
+
content = await response.body()
|
488 |
+
with open(path, 'wb') as f:
|
489 |
+
f.write(content)
|
490 |
+
return path
|
491 |
+
else:
|
492 |
+
logger.error(f"Download failed with status {response.status}: {file_url}")
|
493 |
+
return None
|
494 |
+
|
495 |
+
except Exception as e:
|
496 |
+
logger.error(f"Error downloading {file_url}: {e}")
|
497 |
+
return None
|
498 |
|
499 |
async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
|
500 |
if not custom_ext_list:
|