Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,68 +1,181 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
import feedparser
|
4 |
-
from datetime import datetime
|
5 |
import json
|
6 |
import os
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
# Initialize the
|
9 |
-
# Using facebook/bart-large-cnn which is specialized for summarization
|
10 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def fetch_news_from_rss(interests):
|
13 |
"""Fetch news from RSS feeds based on interests"""
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
try:
|
24 |
-
feed = feedparser.parse(rss_feeds[interest])
|
25 |
-
articles.extend(feed.entries[:3])
|
26 |
-
except Exception as e:
|
27 |
-
print(f"Error fetching {interest} news: {e}")
|
28 |
-
return articles
|
29 |
-
|
30 |
-
def generate_summary(text, language="English"):
|
31 |
"""Generate summary using BART"""
|
32 |
try:
|
33 |
-
#
|
34 |
-
max_input_length = 1024
|
35 |
if len(text) > max_input_length:
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
print(f"Error generating summary: {e}")
|
51 |
return "Error generating summary. Please try again."
|
52 |
|
53 |
-
def save_user_preferences(name, language, interests):
|
54 |
if not name or not language or not interests:
|
55 |
-
return "Please fill in all fields!"
|
56 |
|
57 |
preferences = {
|
58 |
"name": name,
|
59 |
"language": language,
|
60 |
"interests": interests,
|
|
|
|
|
61 |
"last_updated": datetime.now().isoformat()
|
62 |
}
|
63 |
|
64 |
try:
|
65 |
-
|
|
|
66 |
json.dump(preferences, f)
|
67 |
return f"Preferences saved for {name}!"
|
68 |
except Exception as e:
|
@@ -73,7 +186,7 @@ def get_personalized_summary(name):
|
|
73 |
return "Please enter your name!"
|
74 |
|
75 |
try:
|
76 |
-
with open(f"preferences_{name}.json", "r") as f:
|
77 |
preferences = json.load(f)
|
78 |
except FileNotFoundError:
|
79 |
return "Please set your preferences first!"
|
@@ -87,44 +200,84 @@ def get_personalized_summary(name):
|
|
87 |
summaries = []
|
88 |
for article in articles:
|
89 |
title = article.get("title", "Untitled")
|
90 |
-
content = article.get("
|
|
|
|
|
91 |
if content:
|
92 |
-
summary = generate_summary(
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
return "\n".join(summaries) if summaries else "No summaries generated. Please try again."
|
96 |
|
97 |
# Create Gradio interface
|
98 |
-
with gr.Blocks(title="News Summarizer") as demo:
|
99 |
-
gr.Markdown("# 📰 AI News Summarizer")
|
100 |
|
101 |
with gr.Tab("Set Preferences"):
|
102 |
name_input = gr.Textbox(label="Your Name")
|
103 |
language_dropdown = gr.Dropdown(
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
106 |
label="Preferred Language",
|
107 |
value="English"
|
108 |
)
|
109 |
interests_checkboxes = gr.CheckboxGroup(
|
110 |
-
choices=
|
111 |
-
label="News Interests"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
)
|
113 |
save_button = gr.Button("Save Preferences")
|
114 |
preferences_output = gr.Textbox(label="Status")
|
115 |
|
116 |
save_button.click(
|
117 |
save_user_preferences,
|
118 |
-
inputs=[name_input, language_dropdown, interests_checkboxes
|
|
|
119 |
outputs=[preferences_output]
|
120 |
)
|
121 |
|
122 |
with gr.Tab("Get News Summary"):
|
123 |
name_check = gr.Textbox(label="Enter your name to get summary")
|
|
|
|
|
|
|
|
|
|
|
124 |
get_summary_button = gr.Button("Get Summary")
|
125 |
summary_output = gr.Textbox(
|
126 |
label="Your Personalized News Summary",
|
127 |
-
lines=
|
128 |
)
|
129 |
|
130 |
get_summary_button.click(
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline, AutoModelForSeq2SeqGeneration, AutoTokenizer
|
3 |
import feedparser
|
4 |
+
from datetime import datetime, timedelta
|
5 |
import json
|
6 |
import os
|
7 |
+
import requests
|
8 |
+
from bs4 import BeautifulSoup
|
9 |
+
import threading
|
10 |
+
import time
|
11 |
+
import schedule
|
12 |
|
13 |
+
# Initialize the summarization model
|
|
|
14 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
15 |
|
16 |
+
# Initialize translation model for multiple languages
|
17 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-MANY")
|
18 |
+
|
19 |
+
# News sources configuration
|
20 |
+
NEWS_SOURCES = {
|
21 |
+
"Technology": [
|
22 |
+
"https://feeds.feedburner.com/TechCrunch",
|
23 |
+
"https://www.wired.com/feed/rss",
|
24 |
+
"https://www.theverge.com/rss/index.xml",
|
25 |
+
"https://feeds.feedburner.com/thenextweb"
|
26 |
+
],
|
27 |
+
"Business": [
|
28 |
+
"https://feeds.feedburner.com/BusinessInsider",
|
29 |
+
"https://www.forbes.com/innovation/feed2",
|
30 |
+
"https://www.entrepreneur.com/feed/latest",
|
31 |
+
"https://www.ft.com/rss/home"
|
32 |
+
],
|
33 |
+
"Sports": [
|
34 |
+
"https://www.espn.com/espn/rss/news",
|
35 |
+
"https://www.sports.yahoo.com/rss/",
|
36 |
+
"https://www.skysports.com/rss/news-feed",
|
37 |
+
"https://www.cbssports.com/rss/headlines/"
|
38 |
+
],
|
39 |
+
"Science": [
|
40 |
+
"https://www.sciencedaily.com/rss/all.xml",
|
41 |
+
"https://www.nature.com/nature.rss",
|
42 |
+
"https://www.newscientist.com/feed/home/"
|
43 |
+
],
|
44 |
+
"Health": [
|
45 |
+
"https://www.who.int/rss-feeds/news-english.xml",
|
46 |
+
"https://www.health.harvard.edu/feed",
|
47 |
+
"https://rss.medicalnewstoday.com/featurednews.xml"
|
48 |
+
],
|
49 |
+
"Entertainment": [
|
50 |
+
"https://variety.com/feed/",
|
51 |
+
"https://www.hollywoodreporter.com/feed",
|
52 |
+
"https://deadline.com/feed"
|
53 |
+
]
|
54 |
+
}
|
55 |
+
|
56 |
+
class NewsCache:
|
57 |
+
def __init__(self):
|
58 |
+
self.cache = {}
|
59 |
+
self.last_updated = None
|
60 |
+
self.update_interval = 30 # minutes
|
61 |
+
|
62 |
+
def needs_update(self):
|
63 |
+
if not self.last_updated:
|
64 |
+
return True
|
65 |
+
return datetime.now() - self.last_updated > timedelta(minutes=self.update_interval)
|
66 |
+
|
67 |
+
news_cache = NewsCache()
|
68 |
+
|
69 |
+
def translate_text(text, target_language):
|
70 |
+
"""Translate text to target language"""
|
71 |
+
if target_language == "English":
|
72 |
+
return text
|
73 |
+
|
74 |
+
try:
|
75 |
+
translation = translator(text, target_lang=target_language.upper())[0]['translation_text']
|
76 |
+
return translation
|
77 |
+
except Exception as e:
|
78 |
+
print(f"Translation error: {e}")
|
79 |
+
return text
|
80 |
+
|
81 |
+
def fetch_full_article(url):
|
82 |
+
"""Fetch full article content from URL"""
|
83 |
+
try:
|
84 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
85 |
+
response = requests.get(url, headers=headers, timeout=10)
|
86 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
87 |
+
|
88 |
+
# Remove unwanted elements
|
89 |
+
for tag in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
90 |
+
tag.decompose()
|
91 |
+
|
92 |
+
# Get article content (this might need adjustment based on specific sites)
|
93 |
+
article = soup.find('article') or soup.find('main') or soup.find('div', class_=['content', 'article'])
|
94 |
+
|
95 |
+
if article:
|
96 |
+
return article.get_text().strip()
|
97 |
+
return None
|
98 |
+
except Exception as e:
|
99 |
+
print(f"Error fetching article: {e}")
|
100 |
+
return None
|
101 |
+
|
102 |
def fetch_news_from_rss(interests):
|
103 |
"""Fetch news from RSS feeds based on interests"""
|
104 |
+
if news_cache.needs_update():
|
105 |
+
articles = []
|
106 |
+
for interest in interests:
|
107 |
+
if interest in NEWS_SOURCES:
|
108 |
+
for feed_url in NEWS_SOURCES[interest]:
|
109 |
+
try:
|
110 |
+
feed = feedparser.parse(feed_url)
|
111 |
+
for entry in feed.entries[:5]: # Get top 5 articles from each feed
|
112 |
+
article = {
|
113 |
+
'title': entry.get('title', 'Untitled'),
|
114 |
+
'description': entry.get('description', ''),
|
115 |
+
'link': entry.get('link', ''),
|
116 |
+
'category': interest,
|
117 |
+
'published': entry.get('published', datetime.now().isoformat()),
|
118 |
+
'source': feed_url
|
119 |
+
}
|
120 |
+
|
121 |
+
# Try to get full article content
|
122 |
+
full_content = fetch_full_article(article['link'])
|
123 |
+
if full_content:
|
124 |
+
article['content'] = full_content
|
125 |
+
|
126 |
+
articles.append(article)
|
127 |
+
except Exception as e:
|
128 |
+
print(f"Error fetching from {feed_url}: {e}")
|
129 |
+
|
130 |
+
news_cache.cache = articles
|
131 |
+
news_cache.last_updated = datetime.now()
|
132 |
|
133 |
+
return news_cache.cache
|
134 |
+
|
135 |
+
def generate_summary(text, max_length=150, min_length=50):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
"""Generate summary using BART"""
|
137 |
try:
|
138 |
+
# Handle long texts by chunking
|
139 |
+
max_input_length = 1024
|
140 |
if len(text) > max_input_length:
|
141 |
+
chunks = [text[i:i + max_input_length] for i in range(0, len(text), max_input_length)]
|
142 |
+
summaries = []
|
143 |
+
for chunk in chunks:
|
144 |
+
summary = summarizer(
|
145 |
+
chunk,
|
146 |
+
max_length=max_length,
|
147 |
+
min_length=min_length,
|
148 |
+
do_sample=False
|
149 |
+
)[0]['summary_text']
|
150 |
+
summaries.append(summary)
|
151 |
+
return " ".join(summaries)
|
152 |
+
else:
|
153 |
+
return summarizer(
|
154 |
+
text,
|
155 |
+
max_length=max_length,
|
156 |
+
min_length=min_length,
|
157 |
+
do_sample=False
|
158 |
+
)[0]['summary_text']
|
159 |
except Exception as e:
|
160 |
print(f"Error generating summary: {e}")
|
161 |
return "Error generating summary. Please try again."
|
162 |
|
163 |
+
def save_user_preferences(name, language, interests, summary_length, update_frequency):
|
164 |
if not name or not language or not interests:
|
165 |
+
return "Please fill in all required fields!"
|
166 |
|
167 |
preferences = {
|
168 |
"name": name,
|
169 |
"language": language,
|
170 |
"interests": interests,
|
171 |
+
"summary_length": summary_length,
|
172 |
+
"update_frequency": update_frequency,
|
173 |
"last_updated": datetime.now().isoformat()
|
174 |
}
|
175 |
|
176 |
try:
|
177 |
+
os.makedirs('user_preferences', exist_ok=True)
|
178 |
+
with open(f"user_preferences/preferences_{name}.json", "w") as f:
|
179 |
json.dump(preferences, f)
|
180 |
return f"Preferences saved for {name}!"
|
181 |
except Exception as e:
|
|
|
186 |
return "Please enter your name!"
|
187 |
|
188 |
try:
|
189 |
+
with open(f"user_preferences/preferences_{name}.json", "r") as f:
|
190 |
preferences = json.load(f)
|
191 |
except FileNotFoundError:
|
192 |
return "Please set your preferences first!"
|
|
|
200 |
summaries = []
|
201 |
for article in articles:
|
202 |
title = article.get("title", "Untitled")
|
203 |
+
content = article.get("content", article.get("description", ""))
|
204 |
+
category = article.get("category", "Uncategorized")
|
205 |
+
|
206 |
if content:
|
207 |
+
summary = generate_summary(
|
208 |
+
content,
|
209 |
+
max_length=preferences["summary_length"],
|
210 |
+
min_length=max(30, preferences["summary_length"] // 3)
|
211 |
+
)
|
212 |
+
|
213 |
+
if preferences["language"] != "English":
|
214 |
+
summary = translate_text(summary, preferences["language"])
|
215 |
+
title = translate_text(title, preferences["language"])
|
216 |
+
|
217 |
+
formatted_summary = f"""
|
218 |
+
📰 {title}
|
219 |
+
📁 Category: {category}
|
220 |
+
⏰ {datetime.now().strftime('%Y-%m-%d %H:%M')}
|
221 |
+
|
222 |
+
{summary}
|
223 |
+
|
224 |
+
---"""
|
225 |
+
summaries.append(formatted_summary)
|
226 |
|
227 |
return "\n".join(summaries) if summaries else "No summaries generated. Please try again."
|
228 |
|
229 |
# Create Gradio interface
|
230 |
+
with gr.Blocks(title="Enhanced News Summarizer") as demo:
|
231 |
+
gr.Markdown("# 📰 Enhanced AI News Summarizer")
|
232 |
|
233 |
with gr.Tab("Set Preferences"):
|
234 |
name_input = gr.Textbox(label="Your Name")
|
235 |
language_dropdown = gr.Dropdown(
|
236 |
+
choices=[
|
237 |
+
"English", "Spanish", "French", "German",
|
238 |
+
"Italian", "Portuguese", "Russian", "Chinese",
|
239 |
+
"Japanese", "Korean", "Arabic"
|
240 |
+
],
|
241 |
label="Preferred Language",
|
242 |
value="English"
|
243 |
)
|
244 |
interests_checkboxes = gr.CheckboxGroup(
|
245 |
+
choices=list(NEWS_SOURCES.keys()),
|
246 |
+
label="News Interests (Select multiple)"
|
247 |
+
)
|
248 |
+
summary_length = gr.Slider(
|
249 |
+
minimum=50,
|
250 |
+
maximum=300,
|
251 |
+
value=150,
|
252 |
+
step=10,
|
253 |
+
label="Summary Length (words)"
|
254 |
+
)
|
255 |
+
update_frequency = gr.Dropdown(
|
256 |
+
choices=["30 minutes", "1 hour", "2 hours", "4 hours", "12 hours", "24 hours"],
|
257 |
+
label="News Update Frequency",
|
258 |
+
value="1 hour"
|
259 |
)
|
260 |
save_button = gr.Button("Save Preferences")
|
261 |
preferences_output = gr.Textbox(label="Status")
|
262 |
|
263 |
save_button.click(
|
264 |
save_user_preferences,
|
265 |
+
inputs=[name_input, language_dropdown, interests_checkboxes,
|
266 |
+
summary_length, update_frequency],
|
267 |
outputs=[preferences_output]
|
268 |
)
|
269 |
|
270 |
with gr.Tab("Get News Summary"):
|
271 |
name_check = gr.Textbox(label="Enter your name to get summary")
|
272 |
+
refresh_interval = gr.Dropdown(
|
273 |
+
choices=["Manual", "30 seconds", "1 minute", "5 minutes"],
|
274 |
+
label="Auto-refresh interval",
|
275 |
+
value="Manual"
|
276 |
+
)
|
277 |
get_summary_button = gr.Button("Get Summary")
|
278 |
summary_output = gr.Textbox(
|
279 |
label="Your Personalized News Summary",
|
280 |
+
lines=20
|
281 |
)
|
282 |
|
283 |
get_summary_button.click(
|