Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

e44b0c3

verified ·

1 Parent(s): fb6f5e6

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -84

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import base64
 import logging
 import os
 import sys
 # Import OpenAI library
 import openai
@@ -74,6 +75,16 @@ if not GROQ_API_KEY:
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
@@ -173,10 +184,13 @@ def generate_summary(bookmark):
         content_text = '\n'.join(content_parts)
         # Detect insufficient or erroneous content
-        error_keywords = ['Access Denied', 'Error', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic', 'Page Not Found', '404 Not Found', 'Forbidden']
-        if not content_text or len(content_text.split()) < 50 or any(keyword.lower() in content_text.lower() for keyword in error_keywords):
             use_prior_knowledge = True
-            logger.info(f"Content for {bookmark.get('url')} is insufficient or contains error messages. Instructing LLM to use prior knowledge.")
         else:
             use_prior_knowledge = False
@@ -205,8 +219,6 @@ Analyze the following webpage content:
 {content_text}
-If the content is insufficient or seems to be an error page, please use your own knowledge to provide an accurate summary.
 Provide a concise summary (2-3 sentences) focusing on:
 - The main purpose or topic of the page.
 - Key information or features.
@@ -216,14 +228,25 @@ Be factual and objective.
 """
         # Call the LLM via Groq Cloud API
-        response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=200,
-            temperature=0.5,
-        )
         summary = response['choices'][0]['message']['content'].strip()
         if not summary:
@@ -265,49 +288,64 @@ async def fetch_url_info(session, bookmark):
         bookmark.update(fetch_cache[url])
         return bookmark
-    try:
-        logger.info(f"Fetching URL info for: {url}")
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-                          'AppleWebKit/537.36 (KHTML, like Gecko) '
-                          'Chrome/91.0.4472.124 Safari/537.36',
-            'Accept-Language': 'en-US,en;q=0.9',
-        }
-        async with session.get(url, timeout=20, headers=headers, ssl=False, allow_redirects=True) as response:
-            bookmark['etag'] = response.headers.get('ETag', 'N/A')
-            bookmark['status_code'] = response.status
-            content = await response.text()
-            logger.info(f"Fetched content length for {url}: {len(content)} characters")
-            # Handle status codes
-            if response.status >= 500:
-                # Server error, consider as dead link
                 bookmark['dead_link'] = True
                 bookmark['description'] = ''
                 bookmark['html_content'] = ''
-                logger.warning(f"Dead link detected: {url} with status {response.status}")
-            else:
-                bookmark['dead_link'] = False
-                bookmark['html_content'] = content
-                bookmark['description'] = ''
-                logger.info(f"Fetched information for {url}")
-    except Exception as e:
-        bookmark['dead_link'] = True
-        bookmark['etag'] = 'N/A'
-        bookmark['status_code'] = 'N/A'
-        bookmark['description'] = ''
-        bookmark['html_content'] = ''
-        logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
-    finally:
-        fetch_cache[url] = {
-            'etag': bookmark.get('etag'),
-            'status_code': bookmark.get('status_code'),
-            'dead_link': bookmark.get('dead_link'),
-            'description': bookmark.get('description'),
-            'html_content': bookmark.get('html_content', ''),
-        }
     return bookmark
 async def process_bookmarks_async(bookmarks_list):
@@ -317,7 +355,7 @@ async def process_bookmarks_async(bookmarks_list):
     logger.info("Processing bookmarks asynchronously")
     try:
         connector = aiohttp.TCPConnector(limit=5)  # Limit concurrent connections
-        timeout = aiohttp.ClientTimeout(total=30)  # Set timeout
         async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
             tasks = []
             for bookmark in bookmarks_list:
@@ -359,32 +397,37 @@ Categories:
 Respond with only the category name.
 """
-    try:
-        response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=10,
-            temperature=0,
-        )
-        category = response['choices'][0]['message']['content'].strip().strip('"')
-        # Validate the category
-        if category in CATEGORIES:
-            bookmark['category'] = category
-            logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
-        else:
             bookmark['category'] = 'Uncategorized'
-            logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
-        return bookmark
-    except Exception as e:
-        logger.error(f"Error assigning category: {e}", exc_info=True)
         bookmark['category'] = 'Uncategorized'
-        return bookmark
 def vectorize_and_index(bookmarks_list):
     """
@@ -637,14 +680,25 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=500,
-            temperature=0.7,
-        )
         answer = response['choices'][0]['message']['content'].strip()
         logger.info("Chatbot response generated using Groq Cloud API")

 import logging
 import os
 import sys
+import time
 # Import OpenAI library
 import openai
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
+def extract_retry_after(error_message):
+    """
+    Extract the retry-after time from the rate limit error message.
+    """
+    match = re.search(r'Please try again in (\d+\.?\d*)s', error_message)
+    if match:
+        return float(match.group(1)) + 1  # Add a buffer of 1 second
+    else:
+        return 5  # Default retry after 5 seconds
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
         content_text = '\n'.join(content_parts)
         # Detect insufficient or erroneous content
+        error_keywords = ['Access Denied', 'Security Check', 'Cloudflare', 'captcha', 'unusual traffic']
+        if not content_text or len(content_text.split()) < 50:
+            use_prior_knowledge = True
+            logger.info(f"Content for {bookmark.get('url')} is insufficient. Instructing LLM to use prior knowledge.")
+        elif any(keyword.lower() in content_text.lower() for keyword in error_keywords):
             use_prior_knowledge = True
+            logger.info(f"Content for {bookmark.get('url')} contains error messages. Instructing LLM to use prior knowledge.")
         else:
             use_prior_knowledge = False
 {content_text}
 Provide a concise summary (2-3 sentences) focusing on:
 - The main purpose or topic of the page.
 - Key information or features.
 """
         # Call the LLM via Groq Cloud API
+        while True:
+            try:
+                response = openai.ChatCompletion.create(
+                    model='llama-3.1-70b-versatile',
+                    messages=[
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=200,
+                    temperature=0.5,
+                )
+                break  # Exit loop if successful
+            except openai.error.RateLimitError as e:
+                retry_after = extract_retry_after(str(e))
+                logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
+                time.sleep(retry_after)
+            except Exception as e:
+                logger.error(f"Error generating summary: {e}", exc_info=True)
+                bookmark['summary'] = 'No summary available.'
+                return bookmark
         summary = response['choices'][0]['message']['content'].strip()
         if not summary:
         bookmark.update(fetch_cache[url])
         return bookmark
+    max_retries = 3
+    retries = 0
+    while retries < max_retries:
+        try:
+            logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+                              'AppleWebKit/537.36 (KHTML, like Gecko) '
+                              'Chrome/91.0.4472.124 Safari/537.36',
+                'Accept-Language': 'en-US,en;q=0.9',
+            }
+            async with session.get(url, timeout=60, headers=headers, ssl=False, allow_redirects=True) as response:
+                bookmark['etag'] = response.headers.get('ETag', 'N/A')
+                bookmark['status_code'] = response.status
+                content = await response.text()
+                logger.info(f"Fetched content length for {url}: {len(content)} characters")
+                # Handle status codes
+                if response.status >= 500:
+                    # Server error, consider as dead link
+                    bookmark['dead_link'] = True
+                    bookmark['description'] = ''
+                    bookmark['html_content'] = ''
+                    logger.warning(f"Dead link detected: {url} with status {response.status}")
+                else:
+                    bookmark['dead_link'] = False
+                    bookmark['html_content'] = content
+                    bookmark['description'] = ''
+                    logger.info(f"Fetched information for {url}")
+                break  # Exit loop if successful
+        except asyncio.exceptions.TimeoutError:
+            retries += 1
+            logger.warning(f"Timeout while fetching {url}. Retrying ({retries}/{max_retries})...")
+            if retries == max_retries:
                 bookmark['dead_link'] = True
+                bookmark['etag'] = 'N/A'
+                bookmark['status_code'] = 'Timeout'
                 bookmark['description'] = ''
                 bookmark['html_content'] = ''
+                logger.error(f"Max retries reached for {url}. Marking as dead link.")
+        except Exception as e:
+            bookmark['dead_link'] = True
+            bookmark['etag'] = 'N/A'
+            bookmark['status_code'] = 'Error'
+            bookmark['description'] = ''
+            bookmark['html_content'] = ''
+            logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
+            break
+        finally:
+            fetch_cache[url] = {
+                'etag': bookmark.get('etag'),
+                'status_code': bookmark.get('status_code'),
+                'dead_link': bookmark.get('dead_link'),
+                'description': bookmark.get('description'),
+                'html_content': bookmark.get('html_content', ''),
+            }
     return bookmark
 async def process_bookmarks_async(bookmarks_list):
     logger.info("Processing bookmarks asynchronously")
     try:
         connector = aiohttp.TCPConnector(limit=5)  # Limit concurrent connections
+        timeout = aiohttp.ClientTimeout(total=60)  # Set timeout
         async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
             tasks = []
             for bookmark in bookmarks_list:
 Respond with only the category name.
 """
+    while True:
+        try:
+            response = openai.ChatCompletion.create(
+                model='llama-3.1-70b-versatile',
+                messages=[
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=10,
+                temperature=0,
+            )
+            break  # Exit loop if successful
+        except openai.error.RateLimitError as e:
+            retry_after = extract_retry_after(str(e))
+            logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
+            time.sleep(retry_after)
+        except Exception as e:
+            logger.error(f"Error assigning category: {e}", exc_info=True)
             bookmark['category'] = 'Uncategorized'
+            return bookmark
+    category = response['choices'][0]['message']['content'].strip().strip('"')
+    # Validate the category
+    if category in CATEGORIES:
+        bookmark['category'] = category
+        logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
+    else:
         bookmark['category'] = 'Uncategorized'
+        logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
+    return bookmark
 def vectorize_and_index(bookmarks_list):
     """
 Provide a concise and helpful response.
 """
+        while True:
+            try:
+                response = openai.ChatCompletion.create(
+                    model='llama-3.1-70b-versatile',
+                    messages=[
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=500,
+                    temperature=0.7,
+                )
+                break  # Exit loop if successful
+            except openai.error.RateLimitError as e:
+                retry_after = extract_retry_after(str(e))
+                logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
+                time.sleep(retry_after)
+            except Exception as e:
+                error_message = f"⚠️ Error processing your query: {str(e)}"
+                logger.error(error_message, exc_info=True)
+                return error_message
         answer = response['choices'][0]['message']['content'].strip()
         logger.info("Chatbot response generated using Groq Cloud API")