Spaces:

v1shal
/

News_Summarisation_and_Sentiment_Analysis

Sleeping

App Files Files Community

v1shal commited on Mar 22

Commit

de78d88

verified ·

1 Parent(s): 1a2e35e

Update approach_api/utils/news_extraction_api.py

Browse files

Files changed (1) hide show

approach_api/utils/news_extraction_api.py +1 -60

approach_api/utils/news_extraction_api.py CHANGED Viewed

@@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
 # NewsAPI Key
 NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
-def extract_news(company, num_articles=2):
     """Fetch multiple news articles from NewsAPI and return titles and contents."""
     url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
     response = requests.get(url)
@@ -48,62 +48,3 @@ def extract_news(company, num_articles=2):
     return extracted_articles
-# import requests
-# from bs4 import BeautifulSoup
-# # NewsAPI Key
-# NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
-# def fetch_articles(company, num_articles=11):
-#     """Fetch multiple news articles from NewsAPI and return their titles and content."""
-#     url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
-#     response = requests.get(url)
-#     if response.status_code != 200:
-#         print("Error:", response.status_code, response.text)
-#         return []
-#     data = response.json()
-#     articles = data.get("articles", [])
-#     if not articles:
-#         print("No articles found.")
-#         return []
-#     fetched_articles = []
-#     for article in articles[:num_articles]:  # Fetch only the required number of articles
-#         article_url = article.get("url")
-#         if not article_url:
-#             continue
-#         # Scrape the article for title and content
-#         try:
-#             article_response = requests.get(article_url, timeout=5)  # Removed headers
-#             if article_response.status_code == 200:
-#                 soup = BeautifulSoup(article_response.content, 'html.parser')
-#                 title = soup.title.string if soup.title else "No Title Found"
-#                 # Extract paragraphs and clean the content
-#                 paragraphs = soup.find_all('p')
-#                 content = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
-#                 # Remove unwanted text patterns
-#                 unwanted_patterns = ["Want to read", "Nickname:", "Password:", "The Fine Print:"]
-#                 for pattern in unwanted_patterns:
-#                     content = content.replace(pattern, "")
-#                 # Clean up extra spaces
-#                 content = ' '.join(content.split())
-#                 # Store the article's title and content
-#                 fetched_articles.append({"title": title, "content": content})
-#         except requests.exceptions.RequestException as e:
-#             print(f"Error fetching article: {e}")
-#     return fetched_articles
-# if __name__ == "__main__":
-#     company = input("Enter the company name for analysis: ").strip()
-#     articles = fetch_articles(company, num_articles=11)
-#     print(articles)

 # NewsAPI Key
 NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
+def extract_news(company, num_articles=15):
     """Fetch multiple news articles from NewsAPI and return titles and contents."""
     url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
     response = requests.get(url)
     return extracted_articles