import requests from bs4 import BeautifulSoup from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from deep_translator import GoogleTranslator from scipy.special import softmax import gtts import os # Initialize HuggingFace sentiment model MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # Initialize summarizer summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # News API details NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1" NEWS_API_URL = "https://newsapi.org/v2/everything" # Function to fetch news articles def fetch_news(company): params = { "q": company, "apiKey": NEWS_API_KEY, "language": "en", "sortBy": "relevancy", "pageSize": 10 } response = requests.get(NEWS_API_URL, params=params) data = response.json() articles = [] if "articles" in data: for item in data["articles"]: full_text = scrape_article_text(item["url"]) summary = summarize_text(full_text) if full_text else "No summary available." articles.append({ "title": item["title"], "summary": summary, "link": item["url"], "published_at": item["publishedAt"], "source": item["source"]["name"] }) return articles # Function to scrape full article text from a URL def scrape_article_text(url): try: headers = {"User-Agent": "Mozilla/5.0"} response = requests.get(url, headers=headers, timeout=10) soup = BeautifulSoup(response.text, "html.parser") paragraphs = soup.find_all("p") full_text = " ".join(p.text for p in paragraphs) return full_text except Exception: return "" # Function to summarize text using BART model def summarize_text(text, sentences_count=3): if not text.strip(): return "No summary available." # Truncate long text to avoid input limit issues text = text[:1024] summary = summarizer(text, max_length=130, min_length=30, do_sample=False) return summary[0]['summary_text'] # Function to analyze sentiment def analyze_sentiment(text): encoded_text = tokenizer(text, return_tensors='pt') output = sentiment_model(**encoded_text) scores = output.logits[0].detach().numpy() scores = softmax(scores) sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'} sentiment = sentiment_labels[scores.argmax()] return sentiment # Translate text to Hindi def translate_to_hindi(text): return GoogleTranslator(source='en', target='hi').translate(text) # Convert headline to speech in Hindi def text_to_speech(text, filename="news_headline.mp3"): if not text.strip(): return None hindi_text = translate_to_hindi(text) tts = gtts.gTTS(text=hindi_text, lang='hi') tts.save(filename) return filename