v1shal commited on
Commit
de78d88
·
verified ·
1 Parent(s): 1a2e35e

Update approach_api/utils/news_extraction_api.py

Browse files
approach_api/utils/news_extraction_api.py CHANGED
@@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
4
  # NewsAPI Key
5
  NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
6
 
7
- def extract_news(company, num_articles=2):
8
  """Fetch multiple news articles from NewsAPI and return titles and contents."""
9
  url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
10
  response = requests.get(url)
@@ -48,62 +48,3 @@ def extract_news(company, num_articles=2):
48
  return extracted_articles
49
 
50
 
51
- # import requests
52
- # from bs4 import BeautifulSoup
53
-
54
- # # NewsAPI Key
55
- # NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
56
-
57
- # def fetch_articles(company, num_articles=11):
58
- # """Fetch multiple news articles from NewsAPI and return their titles and content."""
59
- # url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
60
- # response = requests.get(url)
61
-
62
- # if response.status_code != 200:
63
- # print("Error:", response.status_code, response.text)
64
- # return []
65
-
66
- # data = response.json()
67
- # articles = data.get("articles", [])
68
-
69
- # if not articles:
70
- # print("No articles found.")
71
- # return []
72
-
73
- # fetched_articles = []
74
-
75
- # for article in articles[:num_articles]: # Fetch only the required number of articles
76
- # article_url = article.get("url")
77
- # if not article_url:
78
- # continue
79
-
80
- # # Scrape the article for title and content
81
- # try:
82
- # article_response = requests.get(article_url, timeout=5) # Removed headers
83
- # if article_response.status_code == 200:
84
- # soup = BeautifulSoup(article_response.content, 'html.parser')
85
- # title = soup.title.string if soup.title else "No Title Found"
86
-
87
- # # Extract paragraphs and clean the content
88
- # paragraphs = soup.find_all('p')
89
- # content = ' '.join(p.get_text().strip() for p in paragraphs if p.get_text().strip())
90
-
91
- # # Remove unwanted text patterns
92
- # unwanted_patterns = ["Want to read", "Nickname:", "Password:", "The Fine Print:"]
93
- # for pattern in unwanted_patterns:
94
- # content = content.replace(pattern, "")
95
-
96
- # # Clean up extra spaces
97
- # content = ' '.join(content.split())
98
-
99
- # # Store the article's title and content
100
- # fetched_articles.append({"title": title, "content": content})
101
- # except requests.exceptions.RequestException as e:
102
- # print(f"Error fetching article: {e}")
103
-
104
- # return fetched_articles
105
-
106
- # if __name__ == "__main__":
107
- # company = input("Enter the company name for analysis: ").strip()
108
- # articles = fetch_articles(company, num_articles=11)
109
- # print(articles)
 
4
  # NewsAPI Key
5
  NEWS_API_KEY = "04a9ea0fe9874092a57d547f4d0e43c6"
6
 
7
+ def extract_news(company, num_articles=15):
8
  """Fetch multiple news articles from NewsAPI and return titles and contents."""
9
  url = f"https://newsapi.org/v2/everything?q={company}&apiKey={NEWS_API_KEY}&language=en&pageSize={num_articles}"
10
  response = requests.get(url)
 
48
  return extracted_articles
49
 
50