from fastapi import FastAPI, HTTPException from bs4 import BeautifulSoup import requests app = FastAPI() @app.get("/scrape") async def scrape_titles_and_links(url: str): try: # Send a GET request to the URL response = requests.get(url) # Check if the request was successful if response.status_code == 200: # Parse the HTML content using BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') # Find all h1 elements with class "entry-title" titles = soup.find_all('h1', class_='entry-title') # Find all div elements with class "entry-summary" summaries = soup.find_all('div', class_='entry-summary') # Initialize an empty list to store the results results = [] # Iterate over the titles and summaries for title, summary in zip(titles, summaries): # Extract the title text and link title_text = title.find('a').text title_link = title.find('a')['href'] # Extract the links from the summary links = summary.find('p').text.split() # Filter out links that don't start with 'http' links = [link for link in links if link.startswith('http')] # Append the result to the list results.append({ "title": title_text, "link": title_link, "links": links }) # Return the results return {"results": results} else: raise HTTPException(status_code=500, detail="Failed to retrieve the webpage. Status code: " + str(response.status_code)) except Exception as e: raise HTTPException(status_code=500, detail="An error occurred: " + str(e))