Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,10 @@ def extract_links_and_text(base_url, max_depth=1, visited=None):
|
|
39 |
visited.add(base_url)
|
40 |
print(f"🔗 Crawling: {base_url}")
|
41 |
try:
|
42 |
-
|
|
|
|
|
|
|
43 |
response.raise_for_status()
|
44 |
soup = BeautifulSoup(response.text, 'html.parser')
|
45 |
page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])
|
|
|
39 |
visited.add(base_url)
|
40 |
print(f"🔗 Crawling: {base_url}")
|
41 |
try:
|
42 |
+
headers = {
|
43 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
|
44 |
+
}
|
45 |
+
response = requests.get(base_url, headers=headers, timeout=10)
|
46 |
response.raise_for_status()
|
47 |
soup = BeautifulSoup(response.text, 'html.parser')
|
48 |
page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])
|