siddhartharya commited on
Commit
1dbb950
·
verified ·
1 Parent(s): e44b0c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -24
app.py CHANGED
@@ -201,14 +201,13 @@ You are a knowledgeable assistant.
201
 
202
  The user provided a URL: {bookmark.get('url')}
203
 
204
- Please provide a concise summary (2-3 sentences) about this website based on your knowledge.
205
 
206
  Focus on:
207
  - The main purpose or topic of the website.
208
  - Key information or features.
209
- - Target audience or use case (if apparent).
210
 
211
- Be factual and objective.
212
  """
213
  else:
214
  # Construct the prompt with the extracted content
@@ -219,12 +218,11 @@ Analyze the following webpage content:
219
 
220
  {content_text}
221
 
222
- Provide a concise summary (2-3 sentences) focusing on:
223
  - The main purpose or topic of the page.
224
  - Key information or features.
225
- - Target audience or use case (if apparent).
226
 
227
- Be factual and objective.
228
  """
229
 
230
  # Call the LLM via Groq Cloud API
@@ -235,7 +233,7 @@ Be factual and objective.
235
  messages=[
236
  {"role": "user", "content": prompt}
237
  ],
238
- max_tokens=200,
239
  temperature=0.5,
240
  )
241
  break # Exit loop if successful
@@ -288,9 +286,11 @@ async def fetch_url_info(session, bookmark):
288
  bookmark.update(fetch_cache[url])
289
  return bookmark
290
 
291
- max_retries = 3
292
  retries = 0
293
- while retries < max_retries:
 
 
294
  try:
295
  logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
296
  headers = {
@@ -299,7 +299,7 @@ async def fetch_url_info(session, bookmark):
299
  'Chrome/91.0.4472.124 Safari/537.36',
300
  'Accept-Language': 'en-US,en;q=0.9',
301
  }
302
- async with session.get(url, timeout=60, headers=headers, ssl=False, allow_redirects=True) as response:
303
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
304
  bookmark['status_code'] = response.status
305
 
@@ -322,14 +322,14 @@ async def fetch_url_info(session, bookmark):
322
 
323
  except asyncio.exceptions.TimeoutError:
324
  retries += 1
325
- logger.warning(f"Timeout while fetching {url}. Retrying ({retries}/{max_retries})...")
326
- if retries == max_retries:
327
- bookmark['dead_link'] = True
328
  bookmark['etag'] = 'N/A'
329
  bookmark['status_code'] = 'Timeout'
330
  bookmark['description'] = ''
331
  bookmark['html_content'] = ''
332
- logger.error(f"Max retries reached for {url}. Marking as dead link.")
 
333
  except Exception as e:
334
  bookmark['dead_link'] = True
335
  bookmark['etag'] = 'N/A'
@@ -345,12 +345,13 @@ async def fetch_url_info(session, bookmark):
345
  'dead_link': bookmark.get('dead_link'),
346
  'description': bookmark.get('description'),
347
  'html_content': bookmark.get('html_content', ''),
 
348
  }
349
  return bookmark
350
 
351
  async def process_bookmarks_async(bookmarks_list):
352
  """
353
- Process all bookmarks asynchronously.
354
  """
355
  logger.info("Processing bookmarks asynchronously")
356
  try:
@@ -456,20 +457,25 @@ def display_bookmarks():
456
  cards = ''
457
  for i, bookmark in enumerate(bookmarks):
458
  index = i + 1
459
- status = "❌ Dead Link" if bookmark.get('dead_link') else "✅ Active"
460
- title = bookmark['title']
461
- url = bookmark['url']
462
- etag = bookmark.get('etag', 'N/A')
463
- summary = bookmark.get('summary', '')
464
- category = bookmark.get('category', 'Uncategorized')
465
-
466
  if bookmark.get('dead_link'):
 
467
  card_style = "border: 2px solid var(--error-color);"
468
  text_style = "color: var(--error-color);"
 
 
 
 
469
  else:
 
470
  card_style = "border: 2px solid var(--success-color);"
471
  text_style = "color: var(--text-color);"
472
 
 
 
 
 
 
 
473
  # Escape HTML content to prevent XSS attacks
474
  from html import escape
475
  title = escape(title)
@@ -530,10 +536,12 @@ def process_uploaded_file(file):
530
  logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
531
  return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
532
 
533
- # Generate summaries and assign categories
534
  for bookmark in bookmarks:
535
  generate_summary(bookmark)
 
536
  assign_category(bookmark)
 
537
 
538
  try:
539
  faiss_index = vectorize_and_index(bookmarks)
@@ -547,7 +555,7 @@ def process_uploaded_file(file):
547
  # Generate displays and updates
548
  bookmark_html = display_bookmarks()
549
  choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
550
- for i, bookmark in enumerate(bookmarks)]
551
 
552
  return message, bookmark_html, gr.update(choices=choices), bookmark_html
553
 
 
201
 
202
  The user provided a URL: {bookmark.get('url')}
203
 
204
+ Please provide a concise summary in **no more than two sentences** about this website based on your knowledge.
205
 
206
  Focus on:
207
  - The main purpose or topic of the website.
208
  - Key information or features.
 
209
 
210
+ Be concise and objective.
211
  """
212
  else:
213
  # Construct the prompt with the extracted content
 
218
 
219
  {content_text}
220
 
221
+ Provide a concise summary in **no more than two sentences** focusing on:
222
  - The main purpose or topic of the page.
223
  - Key information or features.
 
224
 
225
+ Be concise and objective.
226
  """
227
 
228
  # Call the LLM via Groq Cloud API
 
233
  messages=[
234
  {"role": "user", "content": prompt}
235
  ],
236
+ max_tokens=100, # Reduced max tokens
237
  temperature=0.5,
238
  )
239
  break # Exit loop if successful
 
286
  bookmark.update(fetch_cache[url])
287
  return bookmark
288
 
289
+ max_retries = 1
290
  retries = 0
291
+ timeout_duration = 15 # Reduced timeout
292
+
293
+ while retries <= max_retries:
294
  try:
295
  logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
296
  headers = {
 
299
  'Chrome/91.0.4472.124 Safari/537.36',
300
  'Accept-Language': 'en-US,en;q=0.9',
301
  }
302
+ async with session.get(url, timeout=timeout_duration, headers=headers, ssl=False, allow_redirects=True) as response:
303
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
304
  bookmark['status_code'] = response.status
305
 
 
322
 
323
  except asyncio.exceptions.TimeoutError:
324
  retries += 1
325
+ if retries > max_retries:
326
+ bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
 
327
  bookmark['etag'] = 'N/A'
328
  bookmark['status_code'] = 'Timeout'
329
  bookmark['description'] = ''
330
  bookmark['html_content'] = ''
331
+ bookmark['slow_link'] = True # Custom flag to indicate slow response
332
+ logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
333
  except Exception as e:
334
  bookmark['dead_link'] = True
335
  bookmark['etag'] = 'N/A'
 
345
  'dead_link': bookmark.get('dead_link'),
346
  'description': bookmark.get('description'),
347
  'html_content': bookmark.get('html_content', ''),
348
+ 'slow_link': bookmark.get('slow_link', False),
349
  }
350
  return bookmark
351
 
352
  async def process_bookmarks_async(bookmarks_list):
353
  """
354
+ Fetch all bookmarks asynchronously.
355
  """
356
  logger.info("Processing bookmarks asynchronously")
357
  try:
 
457
  cards = ''
458
  for i, bookmark in enumerate(bookmarks):
459
  index = i + 1
 
 
 
 
 
 
 
460
  if bookmark.get('dead_link'):
461
+ status = "❌ Dead Link"
462
  card_style = "border: 2px solid var(--error-color);"
463
  text_style = "color: var(--error-color);"
464
+ elif bookmark.get('slow_link'):
465
+ status = "⏳ Slow Response"
466
+ card_style = "border: 2px solid orange;"
467
+ text_style = "color: orange;"
468
  else:
469
+ status = "✅ Active"
470
  card_style = "border: 2px solid var(--success-color);"
471
  text_style = "color: var(--text-color);"
472
 
473
+ title = bookmark['title']
474
+ url = bookmark['url']
475
+ etag = bookmark.get('etag', 'N/A')
476
+ summary = bookmark.get('summary', '')
477
+ category = bookmark.get('category', 'Uncategorized')
478
+
479
  # Escape HTML content to prevent XSS attacks
480
  from html import escape
481
  title = escape(title)
 
536
  logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
537
  return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
538
 
539
+ # Process bookmarks sequentially
540
  for bookmark in bookmarks:
541
  generate_summary(bookmark)
542
+ time.sleep(0.5)
543
  assign_category(bookmark)
544
+ time.sleep(0.5)
545
 
546
  try:
547
  faiss_index = vectorize_and_index(bookmarks)
 
555
  # Generate displays and updates
556
  bookmark_html = display_bookmarks()
557
  choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
558
+ for i, bookmark in enumerate(bookmarks)]
559
 
560
  return message, bookmark_html, gr.update(choices=choices), bookmark_html
561