File size: 3,051 Bytes
846500d
 
a4c5061
846500d
a4c5061
846500d
 
 
a4c5061
 
 
 
846500d
a4c5061
 
846500d
a4c5061
846500d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4c5061
846500d
 
 
a4c5061
846500d
 
a4c5061
846500d
 
 
 
a4c5061
846500d
a4c5061
 
 
 
 
 
846500d
a4c5061
846500d
 
a4c5061
846500d
 
 
a4c5061
 
 
846500d
a4c5061
846500d
 
 
a4c5061
846500d
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import requests
from bs4 import BeautifulSoup
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from deep_translator import GoogleTranslator
from scipy.special import softmax
import gtts
import os

# Initialize HuggingFace sentiment model
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

# Initialize summarizer
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# News API details
NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1"
NEWS_API_URL = "https://newsapi.org/v2/everything"

# Function to fetch news articles
def fetch_news(company):
    params = {
        "q": company,
        "apiKey": NEWS_API_KEY,
        "language": "en",
        "sortBy": "relevancy",
        "pageSize": 10
    }
    response = requests.get(NEWS_API_URL, params=params)
    data = response.json()

    articles = []
    if "articles" in data:
        for item in data["articles"]:
            full_text = scrape_article_text(item["url"])
            summary = summarize_text(full_text) if full_text else "No summary available."
            articles.append({
                "title": item["title"],
                "summary": summary,
                "link": item["url"],
                "published_at": item["publishedAt"],
                "source": item["source"]["name"]
            })
    return articles

# Function to scrape full article text from a URL
def scrape_article_text(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")
        paragraphs = soup.find_all("p")
        full_text = " ".join(p.text for p in paragraphs)
        return full_text
    except Exception:
        return ""

# Function to summarize text using BART model
def summarize_text(text, sentences_count=3):
    if not text.strip():
        return "No summary available."
    # Truncate long text to avoid input limit issues
    text = text[:1024]
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Function to analyze sentiment
def analyze_sentiment(text):
    encoded_text = tokenizer(text, return_tensors='pt')
    output = sentiment_model(**encoded_text)
    scores = output.logits[0].detach().numpy()
    scores = softmax(scores)
    
    sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
    sentiment = sentiment_labels[scores.argmax()]
    return sentiment

# Translate text to Hindi
def translate_to_hindi(text):
    return GoogleTranslator(source='en', target='hi').translate(text)

# Convert headline to speech in Hindi
def text_to_speech(text, filename="news_headline.mp3"):
    if not text.strip():
        return None
    hindi_text = translate_to_hindi(text)
    tts = gtts.gTTS(text=hindi_text, lang='hi')
    tts.save(filename)
    return filename