Arafath10 commited on
Commit
faea8d5
1 Parent(s): 9f12486

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +3 -3
scraper.py CHANGED
@@ -99,18 +99,18 @@ class Scraper:
99
  async def scrape(url):
100
  try:
101
  headers = {'User-Agent': 'Mozilla/5.0'}
102
- response = requests.get(url)
103
  soup = BeautifulSoup(response.content, 'html.parser')
104
 
105
  title = Scraper.get_title(soup)
106
  links = Scraper.get_links(soup)
107
  text_content = Scraper.get_text_content(soup)
108
 
109
- if not text_content:
110
  print("Running alternative scrapper")
111
  links, text_content = await Scraper.power_scrapper_2(url)
112
-
113
  return {"title": title, "URL": links, "Content": text_content}
114
  except:
 
115
  title,links, text_content = await Scraper.power_scrapper_2(url)
116
  return {"title": title, "URL": links, "Content": text_content}
 
99
  async def scrape(url):
100
  try:
101
  headers = {'User-Agent': 'Mozilla/5.0'}
102
+ response = requests.get(url,timeout=5)
103
  soup = BeautifulSoup(response.content, 'html.parser')
104
 
105
  title = Scraper.get_title(soup)
106
  links = Scraper.get_links(soup)
107
  text_content = Scraper.get_text_content(soup)
108
 
109
+ if not links:
110
  print("Running alternative scrapper")
111
  links, text_content = await Scraper.power_scrapper_2(url)
 
112
  return {"title": title, "URL": links, "Content": text_content}
113
  except:
114
+ print("Running alternative scrapper second time")
115
  title,links, text_content = await Scraper.power_scrapper_2(url)
116
  return {"title": title, "URL": links, "Content": text_content}