Arafath10 commited on
Commit
c0bee13
1 Parent(s): 8740a29

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -3
main.py CHANGED
@@ -42,7 +42,7 @@ async def get_data(url: str):
42
  async def scrape_website(url):
43
  async with async_playwright() as p:
44
  # Try using WebKit or Firefox if Chromium fails
45
- browser = await p.webkit.launch(headless=False) # Switch to WebKit
46
 
47
  # Create a new browser context with a realistic user-agent
48
  context = await browser.new_context(
@@ -62,9 +62,17 @@ async def scrape_website(url):
62
  # Route to block images, videos, and CSS to speed up page load
63
  await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
64
 
65
- # Navigate to the page with an extended timeout and alternate loading strategy
66
- await page.goto(url, wait_until='domcontentloaded', timeout=60000)
67
  try:
 
 
 
 
 
 
 
 
 
 
68
  # Get the title of the page
69
  title = await page.title()
70
 
 
42
  async def scrape_website(url):
43
  async with async_playwright() as p:
44
  # Try using WebKit or Firefox if Chromium fails
45
+ browser = await p.webkit.launch(headless=True) # Switch to WebKit
46
 
47
  # Create a new browser context with a realistic user-agent
48
  context = await browser.new_context(
 
62
  # Route to block images, videos, and CSS to speed up page load
63
  await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
64
 
 
 
65
  try:
66
+ # Introduce a slight delay to mimic human behavior
67
+ await asyncio.sleep(random.uniform(1, 3))
68
+
69
+ # Navigate to the page with an extended timeout
70
+ await page.goto(url, wait_until='domcontentloaded', timeout=60000)
71
+
72
+ # Simulate human behavior by scrolling and moving the mouse
73
+ await page.mouse.move(random.uniform(0, 100), random.uniform(0, 100))
74
+ await page.mouse.wheel(0, random.uniform(200, 400))
75
+ await asyncio.sleep(random.uniform(1, 3)) # Random delay
76
  # Get the title of the page
77
  title = await page.title()
78