Prince-29 commited on
Commit
a4c5061
·
verified ·
1 Parent(s): 989e78b

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +26 -39
utils.py CHANGED
@@ -1,21 +1,20 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
- from sumy.parsers.plaintext import PlaintextParser
4
- from sumy.nlp.tokenizers import Tokenizer
5
- from sumy.summarizers.lsa import LsaSummarizer
6
  from deep_translator import GoogleTranslator
 
7
  import gtts
8
  import os
9
 
10
- from transformers import AutoTokenizer
11
- from transformers import AutoModelForSequenceClassification
12
- from scipy.special import softmax
13
-
14
- MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
15
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
16
- model = AutoModelForSequenceClassification.from_pretrained(MODEL)
17
 
 
 
18
 
 
19
  NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1"
20
  NEWS_API_URL = "https://newsapi.org/v2/everything"
21
 
@@ -45,55 +44,43 @@ def fetch_news(company):
45
  })
46
  return articles
47
 
48
- # Function to scrape full article text
49
  def scrape_article_text(url):
50
  try:
51
  headers = {"User-Agent": "Mozilla/5.0"}
52
- response = requests.get(url, headers=headers)
53
  soup = BeautifulSoup(response.text, "html.parser")
54
  paragraphs = soup.find_all("p")
55
- full_text = " ".join([p.text for p in paragraphs])
56
  return full_text
57
  except Exception:
58
  return ""
59
 
60
- # Function to summarize text
61
  def summarize_text(text, sentences_count=3):
62
- parser = PlaintextParser.from_string(text, Tokenizer("english"))
63
- summarizer = LsaSummarizer()
64
- summary = summarizer(parser.document, sentences_count)
65
- return " ".join([str(sentence) for sentence in summary])
66
-
67
- from scipy.special import softmax
68
 
 
69
  def analyze_sentiment(text):
70
- # Tokenize input text
71
  encoded_text = tokenizer(text, return_tensors='pt')
72
- # Run through the model
73
- output = model(**encoded_text)
74
- # Extract raw logits and apply softmax
75
  scores = output.logits[0].detach().numpy()
76
  scores = softmax(scores)
77
 
78
- # Create a dictionary (optional but useful for debugging)
79
- scores_dict = {
80
- 'negative': scores[0],
81
- 'neutral': scores[1],
82
- 'positive': scores[2]
83
- }
84
-
85
- # Determine sentiment based on the highest score
86
- sentiment = max(scores_dict, key=scores_dict.get)
87
-
88
- # Capitalize the first letter
89
- return sentiment.capitalize()
90
-
91
 
92
- # Function to translate text to Hindi
93
  def translate_to_hindi(text):
94
  return GoogleTranslator(source='en', target='hi').translate(text)
95
 
96
- # Function to convert **headlines only** to speech
97
  def text_to_speech(text, filename="news_headline.mp3"):
98
  if not text.strip():
99
  return None
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
 
4
  from deep_translator import GoogleTranslator
5
+ from scipy.special import softmax
6
  import gtts
7
  import os
8
 
9
+ # Initialize HuggingFace sentiment model
10
+ MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
12
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
 
 
 
13
 
14
+ # Initialize summarizer
15
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
16
 
17
+ # News API details
18
  NEWS_API_KEY = "7e72763bebb54fd79cb632390738cbb1"
19
  NEWS_API_URL = "https://newsapi.org/v2/everything"
20
 
 
44
  })
45
  return articles
46
 
47
+ # Function to scrape full article text from a URL
48
  def scrape_article_text(url):
49
  try:
50
  headers = {"User-Agent": "Mozilla/5.0"}
51
+ response = requests.get(url, headers=headers, timeout=10)
52
  soup = BeautifulSoup(response.text, "html.parser")
53
  paragraphs = soup.find_all("p")
54
+ full_text = " ".join(p.text for p in paragraphs)
55
  return full_text
56
  except Exception:
57
  return ""
58
 
59
+ # Function to summarize text using BART model
60
  def summarize_text(text, sentences_count=3):
61
+ if not text.strip():
62
+ return "No summary available."
63
+ # Truncate long text to avoid input limit issues
64
+ text = text[:1024]
65
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
66
+ return summary[0]['summary_text']
67
 
68
+ # Function to analyze sentiment
69
  def analyze_sentiment(text):
 
70
  encoded_text = tokenizer(text, return_tensors='pt')
71
+ output = sentiment_model(**encoded_text)
 
 
72
  scores = output.logits[0].detach().numpy()
73
  scores = softmax(scores)
74
 
75
+ sentiment_labels = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
76
+ sentiment = sentiment_labels[scores.argmax()]
77
+ return sentiment
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Translate text to Hindi
80
  def translate_to_hindi(text):
81
  return GoogleTranslator(source='en', target='hi').translate(text)
82
 
83
+ # Convert headline to speech in Hindi
84
  def text_to_speech(text, filename="news_headline.mp3"):
85
  if not text.strip():
86
  return None