webscrapper

Sleeping

Arafath10 commited on Jul 22, 2024

Commit

9097392

verified ·

1 Parent(s): 37c190c

Update scraper.py

Files changed (1) hide show

scraper.py CHANGED Viewed

@@ -6,38 +6,6 @@ from bs4 import BeautifulSoup
 import requests
-import requests
-from bs4 import BeautifulSoup
-# URL of the page to scrape
-url = "https://www.imf.org/en/News/Articles/2024/03/21/pr2494-sri-lanka-imf-staff-level-agreement-for-second-review-sla"
-# Send a GET request to the URL
-response = requests.get(url)
-# Check if the request was successful
-if response.status_code == 200:
-    # Parse the page content
-    soup = BeautifulSoup(response.content, 'html.parser')
-    # Extract all text content (paragraphs, headers, etc.)
-    elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
-    body_text = "\n".join([element.get_text().strip() for element in elements])
-    # Extract all links
-    links = []
-    for a_tag in soup.find_all('a', href=True):
-        links.append(a_tag['href'])
-    # Print the extracted information
-    print("Body Text:")
-    print(body_text)
-    print("\nLinks:")
-    for link in links:
-        print(link)
-else:
-    print("Failed to retrieve the webpage")
 class Scraper:
     @staticmethod
@@ -99,7 +67,7 @@ class Scraper:
     @staticmethod
     async def scrape(url):
         headers = {'User-Agent': 'Mozilla/5.0'}
-        response = requests.get(url, headers=headers)
         soup = BeautifulSoup(response.content, 'html.parser')
         title = Scraper.get_title(soup)

 import requests
 class Scraper:
     @staticmethod
     @staticmethod
     async def scrape(url):
         headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url)
         soup = BeautifulSoup(response.content, 'html.parser')
         title = Scraper.get_title(soup)