Spaces:
Running
Running
siddhartharya
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -201,14 +201,13 @@ You are a knowledgeable assistant.
|
|
201 |
|
202 |
The user provided a URL: {bookmark.get('url')}
|
203 |
|
204 |
-
Please provide a concise summary
|
205 |
|
206 |
Focus on:
|
207 |
- The main purpose or topic of the website.
|
208 |
- Key information or features.
|
209 |
-
- Target audience or use case (if apparent).
|
210 |
|
211 |
-
Be
|
212 |
"""
|
213 |
else:
|
214 |
# Construct the prompt with the extracted content
|
@@ -219,12 +218,11 @@ Analyze the following webpage content:
|
|
219 |
|
220 |
{content_text}
|
221 |
|
222 |
-
Provide a concise summary
|
223 |
- The main purpose or topic of the page.
|
224 |
- Key information or features.
|
225 |
-
- Target audience or use case (if apparent).
|
226 |
|
227 |
-
Be
|
228 |
"""
|
229 |
|
230 |
# Call the LLM via Groq Cloud API
|
@@ -235,7 +233,7 @@ Be factual and objective.
|
|
235 |
messages=[
|
236 |
{"role": "user", "content": prompt}
|
237 |
],
|
238 |
-
max_tokens=
|
239 |
temperature=0.5,
|
240 |
)
|
241 |
break # Exit loop if successful
|
@@ -288,9 +286,11 @@ async def fetch_url_info(session, bookmark):
|
|
288 |
bookmark.update(fetch_cache[url])
|
289 |
return bookmark
|
290 |
|
291 |
-
max_retries =
|
292 |
retries = 0
|
293 |
-
|
|
|
|
|
294 |
try:
|
295 |
logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
|
296 |
headers = {
|
@@ -299,7 +299,7 @@ async def fetch_url_info(session, bookmark):
|
|
299 |
'Chrome/91.0.4472.124 Safari/537.36',
|
300 |
'Accept-Language': 'en-US,en;q=0.9',
|
301 |
}
|
302 |
-
async with session.get(url, timeout=
|
303 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
304 |
bookmark['status_code'] = response.status
|
305 |
|
@@ -322,14 +322,14 @@ async def fetch_url_info(session, bookmark):
|
|
322 |
|
323 |
except asyncio.exceptions.TimeoutError:
|
324 |
retries += 1
|
325 |
-
|
326 |
-
|
327 |
-
bookmark['dead_link'] = True
|
328 |
bookmark['etag'] = 'N/A'
|
329 |
bookmark['status_code'] = 'Timeout'
|
330 |
bookmark['description'] = ''
|
331 |
bookmark['html_content'] = ''
|
332 |
-
|
|
|
333 |
except Exception as e:
|
334 |
bookmark['dead_link'] = True
|
335 |
bookmark['etag'] = 'N/A'
|
@@ -345,12 +345,13 @@ async def fetch_url_info(session, bookmark):
|
|
345 |
'dead_link': bookmark.get('dead_link'),
|
346 |
'description': bookmark.get('description'),
|
347 |
'html_content': bookmark.get('html_content', ''),
|
|
|
348 |
}
|
349 |
return bookmark
|
350 |
|
351 |
async def process_bookmarks_async(bookmarks_list):
|
352 |
"""
|
353 |
-
|
354 |
"""
|
355 |
logger.info("Processing bookmarks asynchronously")
|
356 |
try:
|
@@ -456,20 +457,25 @@ def display_bookmarks():
|
|
456 |
cards = ''
|
457 |
for i, bookmark in enumerate(bookmarks):
|
458 |
index = i + 1
|
459 |
-
status = "❌ Dead Link" if bookmark.get('dead_link') else "✅ Active"
|
460 |
-
title = bookmark['title']
|
461 |
-
url = bookmark['url']
|
462 |
-
etag = bookmark.get('etag', 'N/A')
|
463 |
-
summary = bookmark.get('summary', '')
|
464 |
-
category = bookmark.get('category', 'Uncategorized')
|
465 |
-
|
466 |
if bookmark.get('dead_link'):
|
|
|
467 |
card_style = "border: 2px solid var(--error-color);"
|
468 |
text_style = "color: var(--error-color);"
|
|
|
|
|
|
|
|
|
469 |
else:
|
|
|
470 |
card_style = "border: 2px solid var(--success-color);"
|
471 |
text_style = "color: var(--text-color);"
|
472 |
|
|
|
|
|
|
|
|
|
|
|
|
|
473 |
# Escape HTML content to prevent XSS attacks
|
474 |
from html import escape
|
475 |
title = escape(title)
|
@@ -530,10 +536,12 @@ def process_uploaded_file(file):
|
|
530 |
logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
|
531 |
return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
|
532 |
|
533 |
-
#
|
534 |
for bookmark in bookmarks:
|
535 |
generate_summary(bookmark)
|
|
|
536 |
assign_category(bookmark)
|
|
|
537 |
|
538 |
try:
|
539 |
faiss_index = vectorize_and_index(bookmarks)
|
@@ -547,7 +555,7 @@ def process_uploaded_file(file):
|
|
547 |
# Generate displays and updates
|
548 |
bookmark_html = display_bookmarks()
|
549 |
choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
|
550 |
-
|
551 |
|
552 |
return message, bookmark_html, gr.update(choices=choices), bookmark_html
|
553 |
|
|
|
201 |
|
202 |
The user provided a URL: {bookmark.get('url')}
|
203 |
|
204 |
+
Please provide a concise summary in **no more than two sentences** about this website based on your knowledge.
|
205 |
|
206 |
Focus on:
|
207 |
- The main purpose or topic of the website.
|
208 |
- Key information or features.
|
|
|
209 |
|
210 |
+
Be concise and objective.
|
211 |
"""
|
212 |
else:
|
213 |
# Construct the prompt with the extracted content
|
|
|
218 |
|
219 |
{content_text}
|
220 |
|
221 |
+
Provide a concise summary in **no more than two sentences** focusing on:
|
222 |
- The main purpose or topic of the page.
|
223 |
- Key information or features.
|
|
|
224 |
|
225 |
+
Be concise and objective.
|
226 |
"""
|
227 |
|
228 |
# Call the LLM via Groq Cloud API
|
|
|
233 |
messages=[
|
234 |
{"role": "user", "content": prompt}
|
235 |
],
|
236 |
+
max_tokens=100, # Reduced max tokens
|
237 |
temperature=0.5,
|
238 |
)
|
239 |
break # Exit loop if successful
|
|
|
286 |
bookmark.update(fetch_cache[url])
|
287 |
return bookmark
|
288 |
|
289 |
+
max_retries = 1
|
290 |
retries = 0
|
291 |
+
timeout_duration = 15 # Reduced timeout
|
292 |
+
|
293 |
+
while retries <= max_retries:
|
294 |
try:
|
295 |
logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
|
296 |
headers = {
|
|
|
299 |
'Chrome/91.0.4472.124 Safari/537.36',
|
300 |
'Accept-Language': 'en-US,en;q=0.9',
|
301 |
}
|
302 |
+
async with session.get(url, timeout=timeout_duration, headers=headers, ssl=False, allow_redirects=True) as response:
|
303 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
304 |
bookmark['status_code'] = response.status
|
305 |
|
|
|
322 |
|
323 |
except asyncio.exceptions.TimeoutError:
|
324 |
retries += 1
|
325 |
+
if retries > max_retries:
|
326 |
+
bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
|
|
|
327 |
bookmark['etag'] = 'N/A'
|
328 |
bookmark['status_code'] = 'Timeout'
|
329 |
bookmark['description'] = ''
|
330 |
bookmark['html_content'] = ''
|
331 |
+
bookmark['slow_link'] = True # Custom flag to indicate slow response
|
332 |
+
logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
|
333 |
except Exception as e:
|
334 |
bookmark['dead_link'] = True
|
335 |
bookmark['etag'] = 'N/A'
|
|
|
345 |
'dead_link': bookmark.get('dead_link'),
|
346 |
'description': bookmark.get('description'),
|
347 |
'html_content': bookmark.get('html_content', ''),
|
348 |
+
'slow_link': bookmark.get('slow_link', False),
|
349 |
}
|
350 |
return bookmark
|
351 |
|
352 |
async def process_bookmarks_async(bookmarks_list):
|
353 |
"""
|
354 |
+
Fetch all bookmarks asynchronously.
|
355 |
"""
|
356 |
logger.info("Processing bookmarks asynchronously")
|
357 |
try:
|
|
|
457 |
cards = ''
|
458 |
for i, bookmark in enumerate(bookmarks):
|
459 |
index = i + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
if bookmark.get('dead_link'):
|
461 |
+
status = "❌ Dead Link"
|
462 |
card_style = "border: 2px solid var(--error-color);"
|
463 |
text_style = "color: var(--error-color);"
|
464 |
+
elif bookmark.get('slow_link'):
|
465 |
+
status = "⏳ Slow Response"
|
466 |
+
card_style = "border: 2px solid orange;"
|
467 |
+
text_style = "color: orange;"
|
468 |
else:
|
469 |
+
status = "✅ Active"
|
470 |
card_style = "border: 2px solid var(--success-color);"
|
471 |
text_style = "color: var(--text-color);"
|
472 |
|
473 |
+
title = bookmark['title']
|
474 |
+
url = bookmark['url']
|
475 |
+
etag = bookmark.get('etag', 'N/A')
|
476 |
+
summary = bookmark.get('summary', '')
|
477 |
+
category = bookmark.get('category', 'Uncategorized')
|
478 |
+
|
479 |
# Escape HTML content to prevent XSS attacks
|
480 |
from html import escape
|
481 |
title = escape(title)
|
|
|
536 |
logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
|
537 |
return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
|
538 |
|
539 |
+
# Process bookmarks sequentially
|
540 |
for bookmark in bookmarks:
|
541 |
generate_summary(bookmark)
|
542 |
+
time.sleep(0.5)
|
543 |
assign_category(bookmark)
|
544 |
+
time.sleep(0.5)
|
545 |
|
546 |
try:
|
547 |
faiss_index = vectorize_and_index(bookmarks)
|
|
|
555 |
# Generate displays and updates
|
556 |
bookmark_html = display_bookmarks()
|
557 |
choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
|
558 |
+
for i, bookmark in enumerate(bookmarks)]
|
559 |
|
560 |
return message, bookmark_html, gr.update(choices=choices), bookmark_html
|
561 |
|