import requests from bs4 import BeautifulSoup # NewsAPI Key NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6" def extract_news(company, num_articles=2): """Fetch multiple news articles from NewsAPI and return titles and contents.""" url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}" response = requests.get(url) if response.status_code != 200: print("Error:", response.status_code, response.text) return [] data = response.json() articles = data.get("articles", []) if not articles: print("No articles found.") return [] extracted_articles = [] for article in articles[:num_articles]: # Get the required number of articles article_url = article.get("url", "No URL available.") # Scrape the article for title and content article_response = requests.get(article_url) if article_response.status_code == 200: soup = BeautifulSoup(article_response.content, 'html.parser') title = soup.title.string if soup.title else "No Title Found" # Extract paragraphs and clean the content paragraphs = soup.find_all('p') content = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip()) # Optionally, filter out unwanted text patterns unwanted_patterns = ["Want to read", "Nickname:", "Password:", "The Fine Print:"] for pattern in unwanted_patterns: content = content.replace(pattern, "") # Clean up extra spaces content = ' '.join(content.split()) extracted_articles.append({"title": title, "content": content}) return extracted_articles # import requests # from bs4 import BeautifulSoup # # NewsAPI Key # NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6" # def fetch_articles(company, num_articles=11): # """Fetch multiple news articles from NewsAPI and return their titles and content.""" # url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}" # response = requests.get(url) # if response.status_code != 200: # print("Error:", response.status_code, response.text) # return [] # data = response.json() # articles = data.get("articles", []) # if not articles: # print("No articles found.") # return [] # fetched_articles = [] # for article in articles[:num_articles]: # Fetch only the required number of articles # article_url = article.get("url") # if not article_url: # continue # # Scrape the article for title and content # try: # article_response = requests.get(article_url, timeout=5) # Removed headers # if article_response.status_code == 200: # soup = BeautifulSoup(article_response.content, 'html.parser') # title = soup.title.string if soup.title else "No Title Found" # # Extract paragraphs and clean the content # paragraphs = soup.find_all('p') # content = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip()) # # Remove unwanted text patterns # unwanted_patterns = ["Want to read", "Nickname:", "Password:", "The Fine Print:"] # for pattern in unwanted_patterns: # content = content.replace(pattern, "") # # Clean up extra spaces # content = ' '.join(content.split()) # # Store the article's title and content # fetched_articles.append({"title": title, "content": content}) # except requests.exceptions.RequestException as e: # print(f"Error fetching article: {e}") # return fetched_articles # if __name__ == "__main__": # company = input("Enter the company name for analysis: ").strip() # articles = fetch_articles(company, num_articles=11) # print(articles)