Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -42,7 +42,7 @@ async def get_data(url: str):
|
|
42 |
async def scrape_website(url):
|
43 |
async with async_playwright() as p:
|
44 |
# Try using WebKit or Firefox if Chromium fails
|
45 |
-
browser = await p.webkit.launch(headless=
|
46 |
|
47 |
# Create a new browser context with a realistic user-agent
|
48 |
context = await browser.new_context(
|
@@ -62,9 +62,17 @@ async def scrape_website(url):
|
|
62 |
# Route to block images, videos, and CSS to speed up page load
|
63 |
await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
|
64 |
|
65 |
-
# Navigate to the page with an extended timeout and alternate loading strategy
|
66 |
-
await page.goto(url, wait_until='domcontentloaded', timeout=60000)
|
67 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# Get the title of the page
|
69 |
title = await page.title()
|
70 |
|
|
|
42 |
async def scrape_website(url):
|
43 |
async with async_playwright() as p:
|
44 |
# Try using WebKit or Firefox if Chromium fails
|
45 |
+
browser = await p.webkit.launch(headless=True) # Switch to WebKit
|
46 |
|
47 |
# Create a new browser context with a realistic user-agent
|
48 |
context = await browser.new_context(
|
|
|
62 |
# Route to block images, videos, and CSS to speed up page load
|
63 |
await page.route("**/*", lambda route: route.abort() if route.request.resource_type in ["image", "media", "stylesheet", "font", "xhr"] else route.continue_())
|
64 |
|
|
|
|
|
65 |
try:
|
66 |
+
# Introduce a slight delay to mimic human behavior
|
67 |
+
await asyncio.sleep(random.uniform(1, 3))
|
68 |
+
|
69 |
+
# Navigate to the page with an extended timeout
|
70 |
+
await page.goto(url, wait_until='domcontentloaded', timeout=60000)
|
71 |
+
|
72 |
+
# Simulate human behavior by scrolling and moving the mouse
|
73 |
+
await page.mouse.move(random.uniform(0, 100), random.uniform(0, 100))
|
74 |
+
await page.mouse.wheel(0, random.uniform(200, 400))
|
75 |
+
await asyncio.sleep(random.uniform(1, 3)) # Random delay
|
76 |
# Get the title of the page
|
77 |
title = await page.title()
|
78 |
|