Spaces:
Running
Running
siddhartharya
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -19,6 +19,10 @@ import threading
|
|
19 |
# Import OpenAI library
|
20 |
import openai
|
21 |
|
|
|
|
|
|
|
|
|
22 |
# Set up logging to output to the console
|
23 |
logger = logging.getLogger(__name__)
|
24 |
logger.setLevel(logging.INFO)
|
@@ -295,6 +299,7 @@ Category: [One category]
|
|
295 |
time.sleep(wait_time)
|
296 |
except Exception as e:
|
297 |
logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
|
|
|
298 |
bookmark['summary'] = 'No summary available.'
|
299 |
bookmark['category'] = 'Uncategorized'
|
300 |
break # Exit the retry loop on other exceptions
|
@@ -337,7 +342,7 @@ def fetch_url_info(bookmark):
|
|
337 |
'User-Agent': 'Mozilla/5.0',
|
338 |
'Accept-Language': 'en-US,en;q=0.9',
|
339 |
}
|
340 |
-
response = requests.get(url, headers=headers, timeout=5, verify=
|
341 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
342 |
bookmark['status_code'] = response.status_code
|
343 |
|
@@ -357,6 +362,13 @@ def fetch_url_info(bookmark):
|
|
357 |
bookmark['description'] = ''
|
358 |
logger.info(f"Fetched information for {url}")
|
359 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
except requests.exceptions.Timeout:
|
361 |
bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
|
362 |
bookmark['etag'] = 'N/A'
|
@@ -390,7 +402,15 @@ def vectorize_and_index(bookmarks_list):
|
|
390 |
global faiss_index
|
391 |
logger.info("Vectorizing summaries and building FAISS index")
|
392 |
try:
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
embeddings = get_embedding_model().encode(summaries).astype('float32')
|
395 |
dimension = embeddings.shape[1]
|
396 |
if faiss_index is None:
|
@@ -509,6 +529,13 @@ def process_uploaded_file(file, state_bookmarks):
|
|
509 |
with ThreadPoolExecutor(max_workers=3) as executor: # Adjusted max_workers
|
510 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
511 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
try:
|
513 |
vectorize_and_index(bookmarks)
|
514 |
except Exception as e:
|
|
|
19 |
# Import OpenAI library
|
20 |
import openai
|
21 |
|
22 |
+
# Suppress only the single warning from urllib3 needed.
|
23 |
+
import urllib3
|
24 |
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
25 |
+
|
26 |
# Set up logging to output to the console
|
27 |
logger = logging.getLogger(__name__)
|
28 |
logger.setLevel(logging.INFO)
|
|
|
299 |
time.sleep(wait_time)
|
300 |
except Exception as e:
|
301 |
logger.error(f"Error generating summary and assigning category: {e}", exc_info=True)
|
302 |
+
# Ensure 'summary' is always set, even on failure
|
303 |
bookmark['summary'] = 'No summary available.'
|
304 |
bookmark['category'] = 'Uncategorized'
|
305 |
break # Exit the retry loop on other exceptions
|
|
|
342 |
'User-Agent': 'Mozilla/5.0',
|
343 |
'Accept-Language': 'en-US,en;q=0.9',
|
344 |
}
|
345 |
+
response = requests.get(url, headers=headers, timeout=5, verify=True, allow_redirects=True) # Set verify=True
|
346 |
bookmark['etag'] = response.headers.get('ETag', 'N/A')
|
347 |
bookmark['status_code'] = response.status_code
|
348 |
|
|
|
362 |
bookmark['description'] = ''
|
363 |
logger.info(f"Fetched information for {url}")
|
364 |
|
365 |
+
except requests.exceptions.SSLError as e:
|
366 |
+
bookmark['dead_link'] = True
|
367 |
+
bookmark['etag'] = 'N/A'
|
368 |
+
bookmark['status_code'] = 'SSL Error'
|
369 |
+
bookmark['description'] = ''
|
370 |
+
bookmark['html_content'] = ''
|
371 |
+
logger.error(f"SSL error fetching URL info for {url}: {e}", exc_info=True)
|
372 |
except requests.exceptions.Timeout:
|
373 |
bookmark['dead_link'] = False # Mark as 'Unknown' instead of 'Dead'
|
374 |
bookmark['etag'] = 'N/A'
|
|
|
402 |
global faiss_index
|
403 |
logger.info("Vectorizing summaries and building FAISS index")
|
404 |
try:
|
405 |
+
# Use .get('summary', '') to avoid KeyError
|
406 |
+
summaries = [bookmark.get('summary', '') for bookmark in bookmarks_list]
|
407 |
+
|
408 |
+
# Check for any empty summaries and log them
|
409 |
+
for i, summary in enumerate(summaries):
|
410 |
+
if not summary:
|
411 |
+
logger.warning(f"Bookmark at index {i} is missing a summary.")
|
412 |
+
summaries[i] = 'No summary available.'
|
413 |
+
|
414 |
embeddings = get_embedding_model().encode(summaries).astype('float32')
|
415 |
dimension = embeddings.shape[1]
|
416 |
if faiss_index is None:
|
|
|
529 |
with ThreadPoolExecutor(max_workers=3) as executor: # Adjusted max_workers
|
530 |
executor.map(generate_summary_and_assign_category, bookmarks)
|
531 |
|
532 |
+
# Log bookmarks to verify 'summary' and 'category' presence
|
533 |
+
for idx, bookmark in enumerate(bookmarks):
|
534 |
+
if 'summary' not in bookmark or 'category' not in bookmark:
|
535 |
+
logger.error(f"Bookmark at index {idx} is missing 'summary' or 'category': {bookmark}")
|
536 |
+
else:
|
537 |
+
logger.debug(f"Bookmark {idx} processed with summary and category.")
|
538 |
+
|
539 |
try:
|
540 |
vectorize_and_index(bookmarks)
|
541 |
except Exception as e:
|