Spaces:
Sleeping
Sleeping
File size: 3,051 Bytes
846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d a4c5061 846500d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from deep_translator import GoogleTranslator
from scipy.special import softmax
import gtts
import os
# Initialize HuggingFace sentiment model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
# Initialize summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# News API details
NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1"
NEWS_API_URL = "https://newsapi.org/v2/everything"
# Function to fetch news articles
def fetch_news(company):
params = {
"q": company,
"apiKey": NEWS_API_KEY,
"language": "en",
"sortBy": "relevancy",
"pageSize": 10
}
response = requests.get(NEWS_API_URL, params=params)
data = response.json()
articles = []
if "articles" in data:
for item in data["articles"]:
full_text = scrape_article_text(item["url"])
summary = summarize_text(full_text) if full_text else "No summary available."
articles.append({
"title": item["title"],
"summary": summary,
"link": item["url"],
"published_at": item["publishedAt"],
"source": item["source"]["name"]
})
return articles
# Function to scrape full article text from a URL
def scrape_article_text(url):
try:
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
paragraphs = soup.find_all("p")
full_text = " ".join(p.text for p in paragraphs)
return full_text
except Exception:
return ""
# Function to summarize text using BART model
def summarize_text(text, sentences_count=3):
if not text.strip():
return "No summary available."
# Truncate long text to avoid input limit issues
text = text[:1024]
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
# Function to analyze sentiment
def analyze_sentiment(text):
encoded_text = tokenizer(text, return_tensors='pt')
output = sentiment_model(**encoded_text)
scores = output.logits[0].detach().numpy()
scores = softmax(scores)
sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
sentiment = sentiment_labels[scores.argmax()]
return sentiment
# Translate text to Hindi
def translate_to_hindi(text):
return GoogleTranslator(source='en', target='hi').translate(text)
# Convert headline to speech in Hindi
def text_to_speech(text, filename="news_headline.mp3"):
if not text.strip():
return None
hindi_text = translate_to_hindi(text)
tts = gtts.gTTS(text=hindi_text, lang='hi')
tts.save(filename)
return filename
|