Spaces:
Sleeping
Sleeping
siddhartharya
commited on
Commit
•
47ee377
1
Parent(s):
fe49b51
Update app.py
Browse files
app.py
CHANGED
@@ -142,11 +142,11 @@ def get_page_metadata(soup):
|
|
142 |
|
143 |
return metadata
|
144 |
|
145 |
-
def
|
146 |
"""
|
147 |
-
Generate a concise summary
|
148 |
"""
|
149 |
-
logger.info(f"Generating summary for bookmark: {bookmark.get('url')}")
|
150 |
|
151 |
try:
|
152 |
html_content = bookmark.get('html_content', '')
|
@@ -182,6 +182,7 @@ def generate_summary(bookmark):
|
|
182 |
else:
|
183 |
use_prior_knowledge = False
|
184 |
|
|
|
185 |
if use_prior_knowledge:
|
186 |
# Construct prompt to use prior knowledge
|
187 |
prompt = f"""
|
@@ -189,28 +190,36 @@ You are a knowledgeable assistant with up-to-date information as of 2023.
|
|
189 |
|
190 |
The user provided a URL: {bookmark.get('url')}
|
191 |
|
192 |
-
Please provide
|
|
|
|
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
- Key information or features.
|
197 |
|
198 |
-
|
|
|
|
|
199 |
"""
|
200 |
else:
|
201 |
# Construct the prompt with the extracted content
|
202 |
prompt = f"""
|
203 |
-
You are a helpful assistant that creates concise webpage summaries.
|
204 |
|
205 |
Analyze the following webpage content:
|
206 |
|
207 |
{content_text}
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
212 |
|
213 |
-
|
|
|
|
|
214 |
"""
|
215 |
|
216 |
# Call the LLM via Groq Cloud API
|
@@ -219,71 +228,37 @@ Be concise and objective.
|
|
219 |
messages=[
|
220 |
{"role": "user", "content": prompt}
|
221 |
],
|
222 |
-
max_tokens=
|
223 |
temperature=0.5,
|
224 |
)
|
225 |
-
|
226 |
-
if not
|
227 |
-
raise ValueError("Empty
|
228 |
-
logger.info("Successfully generated LLM summary")
|
229 |
-
bookmark['summary'] = summary
|
230 |
-
time.sleep(3) # Wait to respect rate limits
|
231 |
-
|
232 |
-
except Exception as e:
|
233 |
-
logger.error(f"Error generating summary: {e}", exc_info=True)
|
234 |
-
bookmark['summary'] = 'No summary available.'
|
235 |
-
|
236 |
-
def assign_category(bookmark):
|
237 |
-
"""
|
238 |
-
Assign a category to a bookmark using the LLM based on its summary via the Groq Cloud API.
|
239 |
-
"""
|
240 |
-
if bookmark.get('dead_link'):
|
241 |
-
bookmark['category'] = 'Dead Link'
|
242 |
-
logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
|
243 |
-
return
|
244 |
-
|
245 |
-
summary = bookmark.get('summary', '')
|
246 |
-
if not summary:
|
247 |
-
bookmark['category'] = 'Uncategorized'
|
248 |
-
return
|
249 |
-
|
250 |
-
# Prepare the prompt
|
251 |
-
categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
|
252 |
-
prompt = f"""
|
253 |
-
You are a helpful assistant that categorizes webpages.
|
254 |
|
255 |
-
|
|
|
|
|
256 |
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
model='llama-3.1-70b-versatile',
|
269 |
-
messages=[
|
270 |
-
{"role": "user", "content": prompt}
|
271 |
-
],
|
272 |
-
max_tokens=10,
|
273 |
-
temperature=0,
|
274 |
-
)
|
275 |
-
category = response['choices'][0]['message']['content'].strip().strip('"')
|
276 |
-
# Validate the category
|
277 |
-
if category in CATEGORIES:
|
278 |
-
bookmark['category'] = category
|
279 |
-
logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
|
280 |
else:
|
281 |
bookmark['category'] = 'Uncategorized'
|
282 |
-
|
283 |
-
|
|
|
284 |
|
285 |
except Exception as e:
|
286 |
-
logger.error(f"Error assigning category: {e}", exc_info=True)
|
|
|
287 |
bookmark['category'] = 'Uncategorized'
|
288 |
|
289 |
def parse_bookmarks(file_content):
|
@@ -395,15 +370,15 @@ def display_bookmarks():
|
|
395 |
if bookmark.get('dead_link'):
|
396 |
status = "❌ Dead Link"
|
397 |
card_style = "border: 2px solid red;"
|
398 |
-
text_style = "color:
|
399 |
elif bookmark.get('slow_link'):
|
400 |
status = "⏳ Slow Response"
|
401 |
card_style = "border: 2px solid orange;"
|
402 |
-
text_style = "color:
|
403 |
else:
|
404 |
status = "✅ Active"
|
405 |
card_style = "border: 2px solid green;"
|
406 |
-
text_style = "color:
|
407 |
|
408 |
title = bookmark['title']
|
409 |
url = bookmark['url']
|
@@ -419,7 +394,7 @@ def display_bookmarks():
|
|
419 |
category = escape(category)
|
420 |
|
421 |
card_html = f'''
|
422 |
-
<div class="card" style="{card_style}
|
423 |
<div class="card-content">
|
424 |
<h3 style="{text_style}">{index}. {title} {status}</h3>
|
425 |
<p style="{text_style}"><strong>Category:</strong> {category}</p>
|
@@ -468,10 +443,14 @@ def process_uploaded_file(file):
|
|
468 |
for bookmark in bookmarks:
|
469 |
fetch_url_info(bookmark)
|
470 |
|
471 |
-
# Process bookmarks sequentially with LLM
|
472 |
for bookmark in bookmarks:
|
473 |
-
|
474 |
-
|
|
|
|
|
|
|
|
|
475 |
|
476 |
try:
|
477 |
faiss_index = vectorize_and_index(bookmarks)
|
@@ -628,7 +607,7 @@ Provide a concise and helpful response.
|
|
628 |
)
|
629 |
answer = response['choices'][0]['message']['content'].strip()
|
630 |
logger.info("Chatbot response generated")
|
631 |
-
time.sleep(
|
632 |
return answer
|
633 |
|
634 |
except Exception as e:
|
|
|
142 |
|
143 |
return metadata
|
144 |
|
145 |
+
def generate_summary_and_assign_category(bookmark):
|
146 |
"""
|
147 |
+
Generate a concise summary and assign a category using a single LLM call.
|
148 |
"""
|
149 |
+
logger.info(f"Generating summary and assigning category for bookmark: {bookmark.get('url')}")
|
150 |
|
151 |
try:
|
152 |
html_content = bookmark.get('html_content', '')
|
|
|
182 |
else:
|
183 |
use_prior_knowledge = False
|
184 |
|
185 |
+
# Prepare the prompt
|
186 |
if use_prior_knowledge:
|
187 |
# Construct prompt to use prior knowledge
|
188 |
prompt = f"""
|
|
|
190 |
|
191 |
The user provided a URL: {bookmark.get('url')}
|
192 |
|
193 |
+
Please provide:
|
194 |
+
1. A concise summary in **no more than two sentences** about this website.
|
195 |
+
2. Assign the most appropriate category from the list below for this website.
|
196 |
|
197 |
+
Categories:
|
198 |
+
{', '.join([f'"{cat}"' for cat in CATEGORIES])}
|
|
|
199 |
|
200 |
+
Provide your response in the following format:
|
201 |
+
Summary: [Your summary here]
|
202 |
+
Category: [One of the categories]
|
203 |
"""
|
204 |
else:
|
205 |
# Construct the prompt with the extracted content
|
206 |
prompt = f"""
|
207 |
+
You are a helpful assistant that creates concise webpage summaries and assigns categories.
|
208 |
|
209 |
Analyze the following webpage content:
|
210 |
|
211 |
{content_text}
|
212 |
|
213 |
+
Please provide:
|
214 |
+
1. A concise summary in **no more than two sentences** focusing on the main purpose or topic of the page and key information or features.
|
215 |
+
2. Assign the most appropriate category from the list below for this webpage.
|
216 |
+
|
217 |
+
Categories:
|
218 |
+
{', '.join([f'"{cat}"' for cat in CATEGORIES])}
|
219 |
|
220 |
+
Provide your response in the following format:
|
221 |
+
Summary: [Your summary here]
|
222 |
+
Category: [One of the categories]
|
223 |
"""
|
224 |
|
225 |
# Call the LLM via Groq Cloud API
|
|
|
228 |
messages=[
|
229 |
{"role": "user", "content": prompt}
|
230 |
],
|
231 |
+
max_tokens=200,
|
232 |
temperature=0.5,
|
233 |
)
|
234 |
+
content = response['choices'][0]['message']['content'].strip()
|
235 |
+
if not content:
|
236 |
+
raise ValueError("Empty response received from the model.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
+
# Parse the response
|
239 |
+
summary_match = re.search(r"Summary:\s*(.*)", content)
|
240 |
+
category_match = re.search(r"Category:\s*(.*)", content)
|
241 |
|
242 |
+
if summary_match:
|
243 |
+
bookmark['summary'] = summary_match.group(1).strip()
|
244 |
+
else:
|
245 |
+
bookmark['summary'] = 'No summary available.'
|
246 |
+
|
247 |
+
if category_match:
|
248 |
+
category = category_match.group(1).strip().strip('"')
|
249 |
+
if category in CATEGORIES:
|
250 |
+
bookmark['category'] = category
|
251 |
+
else:
|
252 |
+
bookmark['category'] = 'Uncategorized'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
253 |
else:
|
254 |
bookmark['category'] = 'Uncategorized'
|
255 |
+
|
256 |
+
logger.info("Successfully generated summary and assigned category")
|
257 |
+
time.sleep(1) # Reduced sleep time
|
258 |
|
259 |
except Exception as e:
|
260 |
+
logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
|
261 |
+
bookmark['summary'] = 'No summary available.'
|
262 |
bookmark['category'] = 'Uncategorized'
|
263 |
|
264 |
def parse_bookmarks(file_content):
|
|
|
370 |
if bookmark.get('dead_link'):
|
371 |
status = "❌ Dead Link"
|
372 |
card_style = "border: 2px solid red;"
|
373 |
+
text_style = "color: white;" # Set font color to white
|
374 |
elif bookmark.get('slow_link'):
|
375 |
status = "⏳ Slow Response"
|
376 |
card_style = "border: 2px solid orange;"
|
377 |
+
text_style = "color: white;" # Set font color to white
|
378 |
else:
|
379 |
status = "✅ Active"
|
380 |
card_style = "border: 2px solid green;"
|
381 |
+
text_style = "color: white;" # Set font color to white
|
382 |
|
383 |
title = bookmark['title']
|
384 |
url = bookmark['url']
|
|
|
394 |
category = escape(category)
|
395 |
|
396 |
card_html = f'''
|
397 |
+
<div class="card" style="{card_style} padding: 10px; margin: 10px; border-radius: 5px; background-color: #1e1e1e;">
|
398 |
<div class="card-content">
|
399 |
<h3 style="{text_style}">{index}. {title} {status}</h3>
|
400 |
<p style="{text_style}"><strong>Category:</strong> {category}</p>
|
|
|
443 |
for bookmark in bookmarks:
|
444 |
fetch_url_info(bookmark)
|
445 |
|
446 |
+
# Process bookmarks sequentially with combined LLM call
|
447 |
for bookmark in bookmarks:
|
448 |
+
if bookmark.get('dead_link'):
|
449 |
+
bookmark['summary'] = 'No summary available.'
|
450 |
+
bookmark['category'] = 'Dead Link'
|
451 |
+
logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
|
452 |
+
else:
|
453 |
+
generate_summary_and_assign_category(bookmark)
|
454 |
|
455 |
try:
|
456 |
faiss_index = vectorize_and_index(bookmarks)
|
|
|
607 |
)
|
608 |
answer = response['choices'][0]['message']['content'].strip()
|
609 |
logger.info("Chatbot response generated")
|
610 |
+
time.sleep(1) # Reduced sleep time
|
611 |
return answer
|
612 |
|
613 |
except Exception as e:
|