Prince-29's picture
Update utils.py
a4c5061 verified
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from deep_translator import GoogleTranslator
from scipy.special import softmax
import gtts
import os
# Initialize HuggingFace sentiment model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
# Initialize summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# News API details
NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1"
NEWS_API_URL = "https://newsapi.org/v2/everything"
# Function to fetch news articles
def fetch_news(company):
params = {
"q": company,
"apiKey": NEWS_API_KEY,
"language": "en",
"sortBy": "relevancy",
"pageSize": 10
}
response = requests.get(NEWS_API_URL, params=params)
data = response.json()
articles = []
if "articles" in data:
for item in data["articles"]:
full_text = scrape_article_text(item["url"])
summary = summarize_text(full_text) if full_text else "No summary available."
articles.append({
"title": item["title"],
"summary": summary,
"link": item["url"],
"published_at": item["publishedAt"],
"source": item["source"]["name"]
})
return articles
# Function to scrape full article text from a URL
def scrape_article_text(url):
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
paragraphs = soup.find_all("p")
full_text = " ".join(p.text for p in paragraphs)
return full_text
except Exception:
return ""
# Function to summarize text using BART model
def summarize_text(text, sentences_count=3):
if not text.strip():
return "No summary available."
# Truncate long text to avoid input limit issues
text = text[:1024]
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
# Function to analyze sentiment
def analyze_sentiment(text):
encoded_text = tokenizer(text, return_tensors='pt')
output = sentiment_model(**encoded_text)
scores = output.logits[0].detach().numpy()
scores = softmax(scores)
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
sentiment = sentiment_labels[scores.argmax()]
return sentiment
# Translate text to Hindi
def translate_to_hindi(text):
return GoogleTranslator(source='en', target='hi').translate(text)
# Convert headline to speech in Hindi
def text_to_speech(text, filename="news_headline.mp3"):
if not text.strip():
return None
hindi_text = translate_to_hindi(text)
tts = gtts.gTTS(text=hindi_text, lang='hi')
tts.save(filename)
return filename