Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
from deep_translator import GoogleTranslator | |
from scipy.special import softmax | |
import gtts | |
import os | |
# Initialize HuggingFace sentiment model | |
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
# Initialize summarizer | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
# News API details | |
NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1" | |
NEWS_API_URL = "https://newsapi.org/v2/everything" | |
# Function to fetch news articles | |
def fetch_news(company): | |
params = { | |
"q": company, | |
"apiKey": NEWS_API_KEY, | |
"language": "en", | |
"sortBy": "relevancy", | |
"pageSize": 10 | |
} | |
response = requests.get(NEWS_API_URL, params=params) | |
data = response.json() | |
articles = [] | |
if "articles" in data: | |
for item in data["articles"]: | |
full_text = scrape_article_text(item["url"]) | |
summary = summarize_text(full_text) if full_text else "No summary available." | |
articles.append({ | |
"title": item["title"], | |
"summary": summary, | |
"link": item["url"], | |
"published_at": item["publishedAt"], | |
"source": item["source"]["name"] | |
}) | |
return articles | |
# Function to scrape full article text from a URL | |
def scrape_article_text(url): | |
try: | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers, timeout=10) | |
soup = BeautifulSoup(response.text, "html.parser") | |
paragraphs = soup.find_all("p") | |
full_text = " ".join(p.text for p in paragraphs) | |
return full_text | |
except Exception: | |
return "" | |
# Function to summarize text using BART model | |
def summarize_text(text, sentences_count=3): | |
if not text.strip(): | |
return "No summary available." | |
# Truncate long text to avoid input limit issues | |
text = text[:1024] | |
summary = summarizer(text, max_length=130, min_length=30, do_sample=False) | |
return summary[0]['summary_text'] | |
# Function to analyze sentiment | |
def analyze_sentiment(text): | |
encoded_text = tokenizer(text, return_tensors='pt') | |
output = sentiment_model(**encoded_text) | |
scores = output.logits[0].detach().numpy() | |
scores = softmax(scores) | |
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} | |
sentiment = sentiment_labels[scores.argmax()] | |
return sentiment | |
# Translate text to Hindi | |
def translate_to_hindi(text): | |
return GoogleTranslator(source='en', target='hi').translate(text) | |
# Convert headline to speech in Hindi | |
def text_to_speech(text, filename="news_headline.mp3"): | |
if not text.strip(): | |
return None | |
hindi_text = translate_to_hindi(text) | |
tts = gtts.gTTS(text=hindi_text, lang='hi') | |
tts.save(filename) | |
return filename | |