Bofandra commited on
Commit
2981248
·
verified ·
1 Parent(s): 882e86e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -39,7 +39,10 @@ def extract_links_and_text(base_url, max_depth=1, visited=None):
39
  visited.add(base_url)
40
  print(f"🔗 Crawling: {base_url}")
41
  try:
42
- response = requests.get(base_url, timeout=10)
 
 
 
43
  response.raise_for_status()
44
  soup = BeautifulSoup(response.text, 'html.parser')
45
  page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])
 
39
  visited.add(base_url)
40
  print(f"🔗 Crawling: {base_url}")
41
  try:
42
+ headers = {
43
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
44
+ }
45
+ response = requests.get(base_url, headers=headers, timeout=10)
46
  response.raise_for_status()
47
  soup = BeautifulSoup(response.text, 'html.parser')
48
  page_text = ' '.join([p.get_text() for p in soup.find_all(['p', 'h1', 'h2', 'h3'])])