loayshabet commited on
Commit
db7a151
·
verified ·
1 Parent(s): fed46e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +203 -50
app.py CHANGED
@@ -1,68 +1,181 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  import feedparser
4
- from datetime import datetime
5
  import json
6
  import os
 
 
 
 
 
7
 
8
- # Initialize the pipeline using an open source model instead of Llama
9
- # Using facebook/bart-large-cnn which is specialized for summarization
10
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def fetch_news_from_rss(interests):
13
  """Fetch news from RSS feeds based on interests"""
14
- rss_feeds = {
15
- "Technology": "https://feeds.feedburner.com/TechCrunch",
16
- "Business": "https://feeds.feedburner.com/BusinessInsider",
17
- "Sports": "https://www.espn.com/espn/rss/news",
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- articles = []
21
- for interest in interests:
22
- if interest in rss_feeds:
23
- try:
24
- feed = feedparser.parse(rss_feeds[interest])
25
- articles.extend(feed.entries[:3])
26
- except Exception as e:
27
- print(f"Error fetching {interest} news: {e}")
28
- return articles
29
-
30
- def generate_summary(text, language="English"):
31
  """Generate summary using BART"""
32
  try:
33
- # BART requires text to be within certain length limits
34
- max_input_length = 1024 # BART's limit
35
  if len(text) > max_input_length:
36
- text = text[:max_input_length]
37
-
38
- summary = summarizer(
39
- text,
40
- max_length=130,
41
- min_length=30,
42
- do_sample=False
43
- )[0]['summary_text']
44
-
45
- # For languages other than English, we can add translation here
46
- # You might want to add a translation pipeline for other languages
47
- return summary
48
-
 
 
 
 
 
49
  except Exception as e:
50
  print(f"Error generating summary: {e}")
51
  return "Error generating summary. Please try again."
52
 
53
- def save_user_preferences(name, language, interests):
54
  if not name or not language or not interests:
55
- return "Please fill in all fields!"
56
 
57
  preferences = {
58
  "name": name,
59
  "language": language,
60
  "interests": interests,
 
 
61
  "last_updated": datetime.now().isoformat()
62
  }
63
 
64
  try:
65
- with open(f"preferences_{name}.json", "w") as f:
 
66
  json.dump(preferences, f)
67
  return f"Preferences saved for {name}!"
68
  except Exception as e:
@@ -73,7 +186,7 @@ def get_personalized_summary(name):
73
  return "Please enter your name!"
74
 
75
  try:
76
- with open(f"preferences_{name}.json", "r") as f:
77
  preferences = json.load(f)
78
  except FileNotFoundError:
79
  return "Please set your preferences first!"
@@ -87,44 +200,84 @@ def get_personalized_summary(name):
87
  summaries = []
88
  for article in articles:
89
  title = article.get("title", "Untitled")
90
- content = article.get("description", article.get("summary", ""))
 
 
91
  if content:
92
- summary = generate_summary(content, preferences["language"])
93
- summaries.append(f"📰 {title}\n\n{summary}\n\n---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  return "\n".join(summaries) if summaries else "No summaries generated. Please try again."
96
 
97
  # Create Gradio interface
98
- with gr.Blocks(title="News Summarizer") as demo:
99
- gr.Markdown("# 📰 AI News Summarizer")
100
 
101
  with gr.Tab("Set Preferences"):
102
  name_input = gr.Textbox(label="Your Name")
103
  language_dropdown = gr.Dropdown(
104
- # For now, we'll keep English as the main language since we're not using translation
105
- choices=["English"],
 
 
 
106
  label="Preferred Language",
107
  value="English"
108
  )
109
  interests_checkboxes = gr.CheckboxGroup(
110
- choices=["Technology", "Business", "Sports"],
111
- label="News Interests"
 
 
 
 
 
 
 
 
 
 
 
 
112
  )
113
  save_button = gr.Button("Save Preferences")
114
  preferences_output = gr.Textbox(label="Status")
115
 
116
  save_button.click(
117
  save_user_preferences,
118
- inputs=[name_input, language_dropdown, interests_checkboxes],
 
119
  outputs=[preferences_output]
120
  )
121
 
122
  with gr.Tab("Get News Summary"):
123
  name_check = gr.Textbox(label="Enter your name to get summary")
 
 
 
 
 
124
  get_summary_button = gr.Button("Get Summary")
125
  summary_output = gr.Textbox(
126
  label="Your Personalized News Summary",
127
- lines=10
128
  )
129
 
130
  get_summary_button.click(
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
3
  import feedparser
4
+ from datetime import datetime, timedelta
5
  import json
6
  import os
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ import threading
10
+ import time
11
+ import schedule
12
 
13
+ # Initialize the summarization model
 
14
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
 
16
+ # Initialize translation model for multiple languages
17
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-MANY")
18
+
19
+ # News sources configuration
20
+ NEWS_SOURCES = {
21
+ "Technology": [
22
+ "https://feeds.feedburner.com/TechCrunch",
23
+ "https://www.wired.com/feed/rss",
24
+ "https://www.theverge.com/rss/index.xml",
25
+ "https://feeds.feedburner.com/thenextweb"
26
+ ],
27
+ "Business": [
28
+ "https://feeds.feedburner.com/BusinessInsider",
29
+ "https://www.forbes.com/innovation/feed2",
30
+ "https://www.entrepreneur.com/feed/latest",
31
+ "https://www.ft.com/rss/home"
32
+ ],
33
+ "Sports": [
34
+ "https://www.espn.com/espn/rss/news",
35
+ "https://www.sports.yahoo.com/rss/",
36
+ "https://www.skysports.com/rss/news-feed",
37
+ "https://www.cbssports.com/rss/headlines/"
38
+ ],
39
+ "Science": [
40
+ "https://www.sciencedaily.com/rss/all.xml",
41
+ "https://www.nature.com/nature.rss",
42
+ "https://www.newscientist.com/feed/home/"
43
+ ],
44
+ "Health": [
45
+ "https://www.who.int/rss-feeds/news-english.xml",
46
+ "https://www.health.harvard.edu/feed",
47
+ "https://rss.medicalnewstoday.com/featurednews.xml"
48
+ ],
49
+ "Entertainment": [
50
+ "https://variety.com/feed/",
51
+ "https://www.hollywoodreporter.com/feed",
52
+ "https://deadline.com/feed"
53
+ ]
54
+ }
55
+
56
+ class NewsCache:
57
+ def __init__(self):
58
+ self.cache = {}
59
+ self.last_updated = None
60
+ self.update_interval = 30 # minutes
61
+
62
+ def needs_update(self):
63
+ if not self.last_updated:
64
+ return True
65
+ return datetime.now() - self.last_updated > timedelta(minutes=self.update_interval)
66
+
67
+ news_cache = NewsCache()
68
+
69
+ def translate_text(text, target_language):
70
+ """Translate text to target language"""
71
+ if target_language == "English":
72
+ return text
73
+
74
+ try:
75
+ translation = translator(text, target_lang=target_language.upper())[0]['translation_text']
76
+ return translation
77
+ except Exception as e:
78
+ print(f"Translation error: {e}")
79
+ return text
80
+
81
+ def fetch_full_article(url):
82
+ """Fetch full article content from URL"""
83
+ try:
84
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
85
+ response = requests.get(url, headers=headers, timeout=10)
86
+ soup = BeautifulSoup(response.text, 'html.parser')
87
+
88
+ # Remove unwanted elements
89
+ for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
90
+ tag.decompose()
91
+
92
+ # Get article content (this might need adjustment based on specific sites)
93
+ article = soup.find('article') or soup.find('main') or soup.find('div', class_=['content', 'article'])
94
+
95
+ if article:
96
+ return article.get_text().strip()
97
+ return None
98
+ except Exception as e:
99
+ print(f"Error fetching article: {e}")
100
+ return None
101
+
102
  def fetch_news_from_rss(interests):
103
  """Fetch news from RSS feeds based on interests"""
104
+ if news_cache.needs_update():
105
+ articles = []
106
+ for interest in interests:
107
+ if interest in NEWS_SOURCES:
108
+ for feed_url in NEWS_SOURCES[interest]:
109
+ try:
110
+ feed = feedparser.parse(feed_url)
111
+ for entry in feed.entries[:5]: # Get top 5 articles from each feed
112
+ article = {
113
+ 'title': entry.get('title', 'Untitled'),
114
+ 'description': entry.get('description', ''),
115
+ 'link': entry.get('link', ''),
116
+ 'category': interest,
117
+ 'published': entry.get('published', datetime.now().isoformat()),
118
+ 'source': feed_url
119
+ }
120
+
121
+ # Try to get full article content
122
+ full_content = fetch_full_article(article['link'])
123
+ if full_content:
124
+ article['content'] = full_content
125
+
126
+ articles.append(article)
127
+ except Exception as e:
128
+ print(f"Error fetching from {feed_url}: {e}")
129
+
130
+ news_cache.cache = articles
131
+ news_cache.last_updated = datetime.now()
132
 
133
+ return news_cache.cache
134
+
135
+ def generate_summary(text, max_length=150, min_length=50):
 
 
 
 
 
 
 
 
136
  """Generate summary using BART"""
137
  try:
138
+ # Handle long texts by chunking
139
+ max_input_length = 1024
140
  if len(text) > max_input_length:
141
+ chunks = [text[i:i + max_input_length] for i in range(0, len(text), max_input_length)]
142
+ summaries = []
143
+ for chunk in chunks:
144
+ summary = summarizer(
145
+ chunk,
146
+ max_length=max_length,
147
+ min_length=min_length,
148
+ do_sample=False
149
+ )[0]['summary_text']
150
+ summaries.append(summary)
151
+ return " ".join(summaries)
152
+ else:
153
+ return summarizer(
154
+ text,
155
+ max_length=max_length,
156
+ min_length=min_length,
157
+ do_sample=False
158
+ )[0]['summary_text']
159
  except Exception as e:
160
  print(f"Error generating summary: {e}")
161
  return "Error generating summary. Please try again."
162
 
163
+ def save_user_preferences(name, language, interests, summary_length, update_frequency):
164
  if not name or not language or not interests:
165
+ return "Please fill in all required fields!"
166
 
167
  preferences = {
168
  "name": name,
169
  "language": language,
170
  "interests": interests,
171
+ "summary_length": summary_length,
172
+ "update_frequency": update_frequency,
173
  "last_updated": datetime.now().isoformat()
174
  }
175
 
176
  try:
177
+ os.makedirs('user_preferences', exist_ok=True)
178
+ with open(f"user_preferences/preferences_{name}.json", "w") as f:
179
  json.dump(preferences, f)
180
  return f"Preferences saved for {name}!"
181
  except Exception as e:
 
186
  return "Please enter your name!"
187
 
188
  try:
189
+ with open(f"user_preferences/preferences_{name}.json", "r") as f:
190
  preferences = json.load(f)
191
  except FileNotFoundError:
192
  return "Please set your preferences first!"
 
200
  summaries = []
201
  for article in articles:
202
  title = article.get("title", "Untitled")
203
+ content = article.get("content", article.get("description", ""))
204
+ category = article.get("category", "Uncategorized")
205
+
206
  if content:
207
+ summary = generate_summary(
208
+ content,
209
+ max_length=preferences["summary_length"],
210
+ min_length=max(30, preferences["summary_length"] // 3)
211
+ )
212
+
213
+ if preferences["language"] != "English":
214
+ summary = translate_text(summary, preferences["language"])
215
+ title = translate_text(title, preferences["language"])
216
+
217
+ formatted_summary = f"""
218
+ 📰 {title}
219
+ 📁 Category: {category}
220
+ ⏰ {datetime.now().strftime('%Y-%m-%d %H:%M')}
221
+
222
+ {summary}
223
+
224
+ ---"""
225
+ summaries.append(formatted_summary)
226
 
227
  return "\n".join(summaries) if summaries else "No summaries generated. Please try again."
228
 
229
  # Create Gradio interface
230
+ with gr.Blocks(title="Enhanced News Summarizer") as demo:
231
+ gr.Markdown("# 📰 Enhanced AI News Summarizer")
232
 
233
  with gr.Tab("Set Preferences"):
234
  name_input = gr.Textbox(label="Your Name")
235
  language_dropdown = gr.Dropdown(
236
+ choices=[
237
+ "English", "Spanish", "French", "German",
238
+ "Italian", "Portuguese", "Russian", "Chinese",
239
+ "Japanese", "Korean", "Arabic"
240
+ ],
241
  label="Preferred Language",
242
  value="English"
243
  )
244
  interests_checkboxes = gr.CheckboxGroup(
245
+ choices=list(NEWS_SOURCES.keys()),
246
+ label="News Interests (Select multiple)"
247
+ )
248
+ summary_length = gr.Slider(
249
+ minimum=50,
250
+ maximum=300,
251
+ value=150,
252
+ step=10,
253
+ label="Summary Length (words)"
254
+ )
255
+ update_frequency = gr.Dropdown(
256
+ choices=["30 minutes", "1 hour", "2 hours", "4 hours", "12 hours", "24 hours"],
257
+ label="News Update Frequency",
258
+ value="1 hour"
259
  )
260
  save_button = gr.Button("Save Preferences")
261
  preferences_output = gr.Textbox(label="Status")
262
 
263
  save_button.click(
264
  save_user_preferences,
265
+ inputs=[name_input, language_dropdown, interests_checkboxes,
266
+ summary_length, update_frequency],
267
  outputs=[preferences_output]
268
  )
269
 
270
  with gr.Tab("Get News Summary"):
271
  name_check = gr.Textbox(label="Enter your name to get summary")
272
+ refresh_interval = gr.Dropdown(
273
+ choices=["Manual", "30 seconds", "1 minute", "5 minutes"],
274
+ label="Auto-refresh interval",
275
+ value="Manual"
276
+ )
277
  get_summary_button = gr.Button("Get Summary")
278
  summary_output = gr.Textbox(
279
  label="Your Personalized News Summary",
280
+ lines=20
281
  )
282
 
283
  get_summary_button.click(