siddhartharya commited on
Commit
ac8ac70
·
verified ·
1 Parent(s): 03eb0e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -2
app.py CHANGED
@@ -19,6 +19,10 @@ import threading
19
  # Import OpenAI library
20
  import openai
21
 
 
 
 
 
22
  # Set up logging to output to the console
23
  logger = logging.getLogger(__name__)
24
  logger.setLevel(logging.INFO)
@@ -295,6 +299,7 @@ Category: [One category]
295
  time.sleep(wait_time)
296
  except Exception as e:
297
  logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
 
298
  bookmark['summary'] = 'No summary available.'
299
  bookmark['category'] = 'Uncategorized'
300
  break # Exit the retry loop on other exceptions
@@ -337,7 +342,7 @@ def fetch_url_info(bookmark):
337
  'User-Agent': 'Mozilla/5.0',
338
  'Accept-Language': 'en-US,en;q=0.9',
339
  }
340
- response = requests.get(url, headers=headers, timeout=5, verify=False, allow_redirects=True)
341
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
342
  bookmark['status_code'] = response.status_code
343
 
@@ -357,6 +362,13 @@ def fetch_url_info(bookmark):
357
  bookmark['description'] = ''
358
  logger.info(f"Fetched information for {url}")
359
 
 
 
 
 
 
 
 
360
  except requests.exceptions.Timeout:
361
  bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
362
  bookmark['etag'] = 'N/A'
@@ -390,7 +402,15 @@ def vectorize_and_index(bookmarks_list):
390
  global faiss_index
391
  logger.info("Vectorizing summaries and building FAISS index")
392
  try:
393
- summaries = [bookmark['summary'] for bookmark in bookmarks_list]
 
 
 
 
 
 
 
 
394
  embeddings = get_embedding_model().encode(summaries).astype('float32')
395
  dimension = embeddings.shape[1]
396
  if faiss_index is None:
@@ -509,6 +529,13 @@ def process_uploaded_file(file, state_bookmarks):
509
  with ThreadPoolExecutor(max_workers=3) as executor: # Adjusted max_workers
510
  executor.map(generate_summary_and_assign_category, bookmarks)
511
 
 
 
 
 
 
 
 
512
  try:
513
  vectorize_and_index(bookmarks)
514
  except Exception as e:
 
19
  # Import OpenAI library
20
  import openai
21
 
22
+ # Suppress only the single warning from urllib3 needed.
23
+ import urllib3
24
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
25
+
26
  # Set up logging to output to the console
27
  logger = logging.getLogger(__name__)
28
  logger.setLevel(logging.INFO)
 
299
  time.sleep(wait_time)
300
  except Exception as e:
301
  logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
302
+ # Ensure 'summary' is always set, even on failure
303
  bookmark['summary'] = 'No summary available.'
304
  bookmark['category'] = 'Uncategorized'
305
  break # Exit the retry loop on other exceptions
 
342
  'User-Agent': 'Mozilla/5.0',
343
  'Accept-Language': 'en-US,en;q=0.9',
344
  }
345
+ response = requests.get(url, headers=headers, timeout=5, verify=True, allow_redirects=True) # Set verify=True
346
  bookmark['etag'] = response.headers.get('ETag', 'N/A')
347
  bookmark['status_code'] = response.status_code
348
 
 
362
  bookmark['description'] = ''
363
  logger.info(f"Fetched information for {url}")
364
 
365
+ except requests.exceptions.SSLError as e:
366
+ bookmark['dead_link'] = True
367
+ bookmark['etag'] = 'N/A'
368
+ bookmark['status_code'] = 'SSL Error'
369
+ bookmark['description'] = ''
370
+ bookmark['html_content'] = ''
371
+ logger.error(f"SSL error fetching URL info for {url}: {e}", exc_info=True)
372
  except requests.exceptions.Timeout:
373
  bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
374
  bookmark['etag'] = 'N/A'
 
402
  global faiss_index
403
  logger.info("Vectorizing summaries and building FAISS index")
404
  try:
405
+ # Use .get('summary', '') to avoid KeyError
406
+ summaries = [bookmark.get('summary', '') for bookmark in bookmarks_list]
407
+
408
+ # Check for any empty summaries and log them
409
+ for i, summary in enumerate(summaries):
410
+ if not summary:
411
+ logger.warning(f"Bookmark at index {i} is missing a summary.")
412
+ summaries[i] = 'No summary available.'
413
+
414
  embeddings = get_embedding_model().encode(summaries).astype('float32')
415
  dimension = embeddings.shape[1]
416
  if faiss_index is None:
 
529
  with ThreadPoolExecutor(max_workers=3) as executor: # Adjusted max_workers
530
  executor.map(generate_summary_and_assign_category, bookmarks)
531
 
532
+ # Log bookmarks to verify 'summary' and 'category' presence
533
+ for idx, bookmark in enumerate(bookmarks):
534
+ if 'summary' not in bookmark or 'category' not in bookmark:
535
+ logger.error(f"Bookmark at index {idx} is missing 'summary' or 'category': {bookmark}")
536
+ else:
537
+ logger.debug(f"Bookmark {idx} processed with summary and category.")
538
+
539
  try:
540
  vectorize_and_index(bookmarks)
541
  except Exception as e: