Arafath10 commited on
Commit
beb1e33
1 Parent(s): 1949a87

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +7 -6
main.py CHANGED
@@ -36,10 +36,11 @@ async def power_scrapper(url):
36
 
37
  # Extract all links
38
  links = await page.query_selector_all('a')
39
- result = []
 
40
  for link in links:
41
  href = await link.get_attribute('href')
42
- result.append({'href': href})
43
 
44
  # Extract all text content
45
  elements = await page.query_selector_all('body *')
@@ -47,10 +48,10 @@ async def power_scrapper(url):
47
  for element in elements:
48
  text_content = await element.text_content()
49
  if text_content and text_content.strip():
50
- result.append({'text': text_content.strip()})
51
 
52
  await browser.close()
53
- return result
54
 
55
 
56
  def get_links(soup):
@@ -89,6 +90,6 @@ async def get_data(url: str):
89
 
90
  if links==[]:
91
  print("running alternative scrapper")
92
- links = await power_scrapper(url)
93
 
94
- return ({"title": title , "contend":links+text_content})
 
36
 
37
  # Extract all links
38
  links = await page.query_selector_all('a')
39
+ page_url = []
40
+ page_content = []
41
  for link in links:
42
  href = await link.get_attribute('href')
43
+ result.append(href)
44
 
45
  # Extract all text content
46
  elements = await page.query_selector_all('body *')
 
48
  for element in elements:
49
  text_content = await element.text_content()
50
  if text_content and text_content.strip():
51
+ page_content.append(text_content.strip())
52
 
53
  await browser.close()
54
+ return page_url,page_content
55
 
56
 
57
  def get_links(soup):
 
90
 
91
  if links==[]:
92
  print("running alternative scrapper")
93
+ links,text_content = await power_scrapper(url)
94
 
95
+ return ({"title": title ,"URL":links,"Content":text_content})