euler314 commited on
Commit
f81cb89
·
verified ·
1 Parent(s): 592abec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -40
app.py CHANGED
@@ -243,7 +243,24 @@ class DownloadManager:
243
  'Referer': 'https://www.bing.com/'
244
  })
245
  return self
246
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  async def __aexit__(self, exc_type, exc_val, exc_tb):
248
  if self.browser:
249
  await self.browser.close()
@@ -406,47 +423,78 @@ class DownloadManager:
406
  base, ext = os.path.splitext(fname)
407
  counter = 1
408
  while os.path.exists(path):
409
- path = os.path.join(save_dir, f"{base}_{counter}{ext}")
410
- counter += 1
411
-
412
  os.makedirs(save_dir, exist_ok=True)
413
-
414
  try:
415
- if "drive.google.com" in file_url:
416
- import gdown
417
- try:
418
- st.write(f"Downloading from Google Drive: {fname}")
419
- output = gdown.download(file_url, path, quiet=False)
420
- if output:
421
- return path
422
- return None
423
- except Exception as e:
424
- logger.error(f"Google Drive download error: {e}")
425
- return None
426
-
427
- async with self.context.new_page() as page:
428
- st.write(f"Downloading: {fname}")
429
-
430
- headers = {
431
- 'Accept': '*/*',
432
- 'Accept-Encoding': 'gzip, deflate, br',
433
- 'Referer': referer
434
- }
435
-
436
- response = await page.request.get(file_url, headers=headers, timeout=30000)
437
-
438
- if response.status == 200:
439
- content = await response.body()
440
- with open(path, 'wb') as f:
441
- f.write(content)
442
- return path
443
- else:
444
- logger.error(f"Download failed with status {response.status}: {file_url}")
445
- return None
446
-
447
- except Exception as e:
448
- logger.error(f"Error downloading {file_url}: {e}")
449
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
 
451
  async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
452
  if not custom_ext_list:
 
243
  'Referer': 'https://www.bing.com/'
244
  })
245
  return self
246
+ async def search_bing(self):
247
+ urls = []
248
+ try:
249
+ search_url = f"https://www.bing.com/search?q={self.query}"
250
+ await self.page.goto(search_url, timeout=30000)
251
+ await self.page.wait_for_load_state('networkidle')
252
+
253
+ # Extract search result links
254
+ links = await self.page.query_selector_all("li.b_algo h2 a")
255
+ for link in links[:self.num_results]:
256
+ href = await link.get_attribute('href')
257
+ if href:
258
+ urls.append(href)
259
+
260
+ return urls
261
+ except Exception as e:
262
+ logger.error(f"Error searching Bing: {e}")
263
+ return []
264
  async def __aexit__(self, exc_type, exc_val, exc_tb):
265
  if self.browser:
266
  await self.browser.close()
 
423
  base, ext = os.path.splitext(fname)
424
  counter = 1
425
  while os.path.exists(path):
426
+ path = os.path.join(save_dir, f"{base}_{counter}{ext}")
427
+ counter += 1
428
+
429
  os.makedirs(save_dir, exist_ok=True)
430
+
431
  try:
432
+ if "drive.google.com" in file_url:
433
+ import gdown
434
+ try:
435
+ st.write(f"Downloading from Google Drive: {fname}")
436
+
437
+ # Determine file extension or use a default if none available
438
+ if not ext or ext == "":
439
+ # Try to determine file type from content-type header
440
+ async with self.context.new_page() as page:
441
+ response = await page.request.head(file_url, timeout=15000)
442
+ content_type = response.headers.get('Content-Type', '')
443
+
444
+ # Map content types to extensions
445
+ extension_map = {
446
+ 'application/pdf': '.pdf',
447
+ 'image/jpeg': '.jpg',
448
+ 'image/png': '.png',
449
+ 'application/msword': '.doc',
450
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
451
+ 'application/zip': '.zip',
452
+ 'text/plain': '.txt',
453
+ 'application/vnd.ms-excel': '.xls',
454
+ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx'
455
+ }
456
+
457
+ # Get extension from content type or use .bin as fallback
458
+ ext = extension_map.get(content_type.split(';')[0], '.bin')
459
+ path = os.path.join(save_dir, f"{base}{ext}")
460
+
461
+ # Handle name collisions
462
+ counter = 1
463
+ while os.path.exists(path):
464
+ path = os.path.join(save_dir, f"{base}_{counter}{ext}")
465
+ counter += 1
466
+
467
+ output = gdown.download(file_url, path, quiet=False)
468
+ if output:
469
+ return path
470
+ return None
471
+ except Exception as e:
472
+ logger.error(f"Google Drive download error: {e}")
473
+ return None
474
+
475
+ async with self.context.new_page() as page:
476
+ st.write(f"Downloading: {fname}")
477
+
478
+ headers = {
479
+ 'Accept': '*/*',
480
+ 'Accept-Encoding': 'gzip, deflate, br',
481
+ 'Referer': referer
482
+ }
483
+
484
+ response = await page.request.get(file_url, headers=headers, timeout=30000)
485
+
486
+ if response.status == 200:
487
+ content = await response.body()
488
+ with open(path, 'wb') as f:
489
+ f.write(content)
490
+ return path
491
+ else:
492
+ logger.error(f"Download failed with status {response.status}: {file_url}")
493
+ return None
494
+
495
+ except Exception as e:
496
+ logger.error(f"Error downloading {file_url}: {e}")
497
+ return None
498
 
499
  async def deep_search(self, url, custom_ext_list=None, sublink_limit=10000, timeout=60):
500
  if not custom_ext_list: