Spaces:
Sleeping
Sleeping
Update scraper.py
Browse files- scraper.py +1 -8
scraper.py
CHANGED
@@ -17,13 +17,6 @@ class Scraper:
|
|
17 |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
18 |
)
|
19 |
|
20 |
-
# Set additional headers to force HTTP/1.1 and avoid detection
|
21 |
-
await context.set_extra_http_headers({
|
22 |
-
"Accept-Language": "en-US,en;q=0.9",
|
23 |
-
"Upgrade-Insecure-Requests": "1",
|
24 |
-
"Connection": "keep-alive" # Force HTTP/1.1 instead of HTTP/2
|
25 |
-
})
|
26 |
-
|
27 |
# Open a new page
|
28 |
page = await context.new_page()
|
29 |
|
@@ -114,7 +107,7 @@ class Scraper:
|
|
114 |
@staticmethod
|
115 |
async def scrape(url):
|
116 |
try:
|
117 |
-
headers = {'User-Agent': 'Mozilla/5.0'}
|
118 |
response = requests.get(url,timeout=3)
|
119 |
soup = BeautifulSoup(response.content, 'html.parser')
|
120 |
|
|
|
17 |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
18 |
)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
# Open a new page
|
21 |
page = await context.new_page()
|
22 |
|
|
|
107 |
@staticmethod
|
108 |
async def scrape(url):
|
109 |
try:
|
110 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
111 |
response = requests.get(url,timeout=3)
|
112 |
soup = BeautifulSoup(response.content, 'html.parser')
|
113 |
|