Spaces:

siddhartharya
/

Bookmark-Manager

Sleeping

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

fe49b51

•

1 Parent(s): dcf746e

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -222

app.py CHANGED Viewed

@@ -5,14 +5,13 @@ from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
-import asyncio
-import aiohttp
 import re
 import base64
 import logging
 import os
 import sys
-import time
 # Import OpenAI library
 import openai
@@ -71,23 +70,9 @@ GROQ_API_KEY = os.getenv('GROQ_API_KEY')
 if not GROQ_API_KEY:
     logger.error("GROQ_API_KEY environment variable not set.")
-# Set OpenAI API key and base URL to use Groq Cloud API
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
-def extract_retry_after(error_message):
-    """
-    Extract the retry-after time from the rate limit error message.
-    """
-    match = re.search(r'Please try again in (\d+\.?\d*)s', error_message)
-    if match:
-        return float(match.group(1)) + 1  # Add a buffer of 1 second
-    else:
-        return 5  # Default retry after 5 seconds
-def exponential_backoff(retries):
-    return min(60, (2 ** retries))  # Cap the wait time at 60 seconds
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
@@ -157,10 +142,6 @@ def get_page_metadata(soup):
     return metadata
-async def generate_summary_async(bookmark):
-    async with llm_semaphore:
-        await asyncio.get_event_loop().run_in_executor(None, generate_summary, bookmark)
 def generate_summary(bookmark):
     """
     Generate a concise summary for a bookmark using available content and LLM via the Groq Cloud API.
@@ -204,11 +185,11 @@ def generate_summary(bookmark):
         if use_prior_knowledge:
             # Construct prompt to use prior knowledge
             prompt = f"""
-You are a knowledgeable assistant.
 The user provided a URL: {bookmark.get('url')}
-Please provide a concise summary in **no more than two sentences** about this website based on your knowledge.
 Focus on:
 - The main purpose or topic of the website.
@@ -233,44 +214,24 @@ Be concise and objective.
 """
         # Call the LLM via Groq Cloud API
-        retries = 0
-        max_retries = 5
-        while retries <= max_retries:
-            try:
-                response = openai.ChatCompletion.create(
-                    model='llama-3.1-70b-versatile',
-                    messages=[
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=100,  # Reduced max tokens
-                    temperature=0.5,
-                )
-                break  # Exit loop if successful
-            except openai.error.RateLimitError as e:
-                retry_after = extract_retry_after(str(e)) or exponential_backoff(retries)
-                logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
-                time.sleep(retry_after)
-                retries += 1
-            except Exception as e:
-                logger.error(f"Error generating summary: {e}", exc_info=True)
-                bookmark['summary'] = 'No summary available.'
-                return bookmark
         summary = response['choices'][0]['message']['content'].strip()
         if not summary:
             raise ValueError("Empty summary received from the model.")
         logger.info("Successfully generated LLM summary")
         bookmark['summary'] = summary
-        return bookmark
     except Exception as e:
         logger.error(f"Error generating summary: {e}", exc_info=True)
         bookmark['summary'] = 'No summary available.'
-        return bookmark
-async def assign_category_async(bookmark):
-    async with llm_semaphore:
-        await asyncio.get_event_loop().run_in_executor(None, assign_category, bookmark)
 def assign_category(bookmark):
     """
@@ -279,12 +240,12 @@ def assign_category(bookmark):
     if bookmark.get('dead_link'):
         bookmark['category'] = 'Dead Link'
         logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
-        return bookmark
     summary = bookmark.get('summary', '')
     if not summary:
         bookmark['category'] = 'Uncategorized'
-        return bookmark
     # Prepare the prompt
     categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
@@ -302,40 +263,28 @@ Categories:
 Respond with only the category name.
 """
-    retries = 0
-    max_retries = 5
-    while retries <= max_retries:
-        try:
-            response = openai.ChatCompletion.create(
-                model='llama-3.1-70b-versatile',
-                messages=[
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=10,
-                temperature=0,
-            )
-            break  # Exit loop if successful
-        except openai.error.RateLimitError as e:
-            retry_after = extract_retry_after(str(e)) or exponential_backoff(retries)
-            logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
-            time.sleep(retry_after)
-            retries += 1
-        except Exception as e:
-            logger.error(f"Error assigning category: {e}", exc_info=True)
             bookmark['category'] = 'Uncategorized'
-            return bookmark
-    category = response['choices'][0]['message']['content'].strip().strip('"')
-    # Validate the category
-    if category in CATEGORIES:
-        bookmark['category'] = category
-        logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
-    else:
         bookmark['category'] = 'Uncategorized'
-        logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
-    return bookmark
 def parse_bookmarks(file_content):
     """
@@ -356,109 +305,65 @@ def parse_bookmarks(file_content):
         logger.error("Error parsing bookmarks: %s", e, exc_info=True)
         raise
-async def fetch_url_info(session, bookmark):
     """
-    Fetch information about a URL asynchronously.
     """
     url = bookmark['url']
     if url in fetch_cache:
         bookmark.update(fetch_cache[url])
-        return bookmark
-    max_retries = 0  # No retries
-    retries = 0
-    timeout_duration = 5  # Reduced timeout
-    while retries <= max_retries:
-        try:
-            logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
-            headers = {
-                'User-Agent': 'Mozilla/5.0',
-                'Accept-Language': 'en-US,en;q=0.9',
-            }
-            async with session.get(url, timeout=timeout_duration, headers=headers, ssl=False, allow_redirects=True) as response:
-                bookmark['etag'] = response.headers.get('ETag', 'N/A')
-                bookmark['status_code'] = response.status
-                content = await response.text()
-                logger.info(f"Fetched content length for {url}: {len(content)} characters")
-                # Handle status codes
-                if response.status >= 500:
-                    # Server error, consider as dead link
-                    bookmark['dead_link'] = True
-                    bookmark['description'] = ''
-                    bookmark['html_content'] = ''
-                    logger.warning(f"Dead link detected: {url} with status {response.status}")
-                else:
-                    bookmark['dead_link'] = False
-                    bookmark['html_content'] = content
-                    bookmark['description'] = ''
-                    logger.info(f"Fetched information for {url}")
-                break  # Exit loop if successful
-        except asyncio.exceptions.TimeoutError:
-            bookmark['dead_link'] = False  # Mark as 'Unknown' instead of 'Dead'
-            bookmark['etag'] = 'N/A'
-            bookmark['status_code'] = 'Timeout'
-            bookmark['description'] = ''
-            bookmark['html_content'] = ''
-            bookmark['slow_link'] = True  # Custom flag to indicate slow response
-            logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
-            break  # Exit loop after timeout
-        except Exception as e:
             bookmark['dead_link'] = True
-            bookmark['etag'] = 'N/A'
-            bookmark['status_code'] = 'Error'
             bookmark['description'] = ''
             bookmark['html_content'] = ''
-            logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
-            break
-        finally:
-            fetch_cache[url] = {
-                'etag': bookmark.get('etag'),
-                'status_code': bookmark.get('status_code'),
-                'dead_link': bookmark.get('dead_link'),
-                'description': bookmark.get('description'),
-                'html_content': bookmark.get('html_content', ''),
-                'slow_link': bookmark.get('slow_link', False),
-            }
-    return bookmark
-async def process_bookmarks_async(bookmarks_list):
-    """
-    Fetch all bookmarks asynchronously.
-    """
-    logger.info("Processing bookmarks asynchronously")
-    try:
-        connector = aiohttp.TCPConnector(limit=10)  # Increase limit if necessary
-        timeout = aiohttp.ClientTimeout(total=60)  # Set timeout
-        async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
-            tasks = []
-            for bookmark in bookmarks_list:
-                task = asyncio.ensure_future(fetch_url_info(session, bookmark))
-                tasks.append(task)
-            await asyncio.gather(*tasks)
-        logger.info("Completed processing bookmarks asynchronously")
     except Exception as e:
-        logger.error(f"Error in asynchronous processing of bookmarks: {e}", exc_info=True)
-        raise
-async def process_bookmarks_llm(bookmarks_list):
-    """
-    Process bookmarks asynchronously for LLM API calls.
-    """
-    logger.info("Processing bookmarks with LLM asynchronously")
-    tasks = []
-    for bookmark in bookmarks_list:
-        tasks.append(generate_summary_async(bookmark))
-    await asyncio.gather(*tasks)
-    tasks = []
-    for bookmark in bookmarks_list:
-        tasks.append(assign_category_async(bookmark))
-    await asyncio.gather(*tasks)
-    logger.info("Completed LLM processing of bookmarks")
 def vectorize_and_index(bookmarks_list):
     """
@@ -489,16 +394,16 @@ def display_bookmarks():
         index = i + 1
         if bookmark.get('dead_link'):
             status = "❌ Dead Link"
-            card_style = "border: 2px solid var(--error-color);"
-            text_style = "color: var(--error-color);"
         elif bookmark.get('slow_link'):
             status = "⏳ Slow Response"
             card_style = "border: 2px solid orange;"
             text_style = "color: orange;"
         else:
             status = "✅ Active"
-            card_style = "border: 2px solid var(--success-color);"
-            text_style = "color: var(--text-color);"
         title = bookmark['title']
         url = bookmark['url']
@@ -559,19 +464,14 @@ def process_uploaded_file(file):
     for idx, bookmark in enumerate(bookmarks):
         bookmark['id'] = idx
-    # Asynchronously fetch bookmark info
-    try:
-        asyncio.run(process_bookmarks_async(bookmarks))
-    except Exception as e:
-        logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
-        return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
-    # Asynchronously process bookmarks with LLM
-    try:
-        asyncio.run(process_bookmarks_llm(bookmarks))
-    except Exception as e:
-        logger.error(f"Error processing bookmarks with LLM: {e}", exc_info=True)
-        return "Error processing bookmarks with LLM.", '', gr.update(choices=[]), display_bookmarks()
     try:
         faiss_index = vectorize_and_index(bookmarks)
@@ -642,7 +542,7 @@ def edit_selected_bookmarks_category(selected_indices, new_category):
     # Update choices and display
     choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-              for i, bookmark in enumerate(bookmarks)]
     return message, gr.update(choices=choices), display_bookmarks()
@@ -718,31 +618,17 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        retries = 0
-        max_retries = 5
-        while retries <= max_retries:
-            try:
-                response = openai.ChatCompletion.create(
-                    model='llama-3.1-70b-versatile',
-                    messages=[
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=500,
-                    temperature=0.7,
-                )
-                break  # Exit loop if successful
-            except openai.error.RateLimitError as e:
-                retry_after = extract_retry_after(str(e)) or exponential_backoff(retries)
-                logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
-                time.sleep(retry_after)
-                retries += 1
-            except Exception as e:
-                error_message = f"⚠️ Error processing your query: {str(e)}"
-                logger.error(error_message, exc_info=True)
-                return error_message
         answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Chatbot response generated using Groq Cloud API")
         return answer
     except Exception as e:
@@ -868,6 +754,4 @@ def build_app():
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
-    # Define a semaphore to limit concurrent LLM API calls
-    llm_semaphore = asyncio.Semaphore(3)  # Adjust based on allowed concurrency
     build_app()

 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
+import requests
+import time
 import re
 import base64
 import logging
 import os
 import sys
 # Import OpenAI library
 import openai
 if not GROQ_API_KEY:
     logger.error("GROQ_API_KEY environment variable not set.")
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
     return metadata
 def generate_summary(bookmark):
     """
     Generate a concise summary for a bookmark using available content and LLM via the Groq Cloud API.
         if use_prior_knowledge:
             # Construct prompt to use prior knowledge
             prompt = f"""
+You are a knowledgeable assistant with up-to-date information as of 2023.
 The user provided a URL: {bookmark.get('url')}
+Please provide a concise summary in **no more than two sentences** about this website.
 Focus on:
 - The main purpose or topic of the website.
 """
         # Call the LLM via Groq Cloud API
+        response = openai.ChatCompletion.create(
+            model='llama-3.1-70b-versatile',
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=100,
+            temperature=0.5,
+        )
         summary = response['choices'][0]['message']['content'].strip()
         if not summary:
             raise ValueError("Empty summary received from the model.")
         logger.info("Successfully generated LLM summary")
         bookmark['summary'] = summary
+        time.sleep(3)  # Wait to respect rate limits
     except Exception as e:
         logger.error(f"Error generating summary: {e}", exc_info=True)
         bookmark['summary'] = 'No summary available.'
 def assign_category(bookmark):
     """
     if bookmark.get('dead_link'):
         bookmark['category'] = 'Dead Link'
         logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
+        return
     summary = bookmark.get('summary', '')
     if not summary:
         bookmark['category'] = 'Uncategorized'
+        return
     # Prepare the prompt
     categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
 Respond with only the category name.
 """
+    try:
+        response = openai.ChatCompletion.create(
+            model='llama-3.1-70b-versatile',
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=10,
+            temperature=0,
+        )
+        category = response['choices'][0]['message']['content'].strip().strip('"')
+        # Validate the category
+        if category in CATEGORIES:
+            bookmark['category'] = category
+            logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
+        else:
             bookmark['category'] = 'Uncategorized'
+            logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
+        time.sleep(3)  # Wait to respect rate limits
+    except Exception as e:
+        logger.error(f"Error assigning category: {e}", exc_info=True)
         bookmark['category'] = 'Uncategorized'
 def parse_bookmarks(file_content):
     """
         logger.error("Error parsing bookmarks: %s", e, exc_info=True)
         raise
+def fetch_url_info(bookmark):
     """
+    Fetch information about a URL.
     """
     url = bookmark['url']
     if url in fetch_cache:
         bookmark.update(fetch_cache[url])
+        return
+    try:
+        logger.info(f"Fetching URL info for: {url}")
+        headers = {
+            'User-Agent': 'Mozilla/5.0',
+            'Accept-Language': 'en-US,en;q=0.9',
+        }
+        response = requests.get(url, headers=headers, timeout=5, verify=False, allow_redirects=True)
+        bookmark['etag'] = response.headers.get('ETag', 'N/A')
+        bookmark['status_code'] = response.status_code
+        content = response.text
+        logger.info(f"Fetched content length for {url}: {len(content)} characters")
+        # Handle status codes
+        if response.status_code >= 500:
+            # Server error, consider as dead link
             bookmark['dead_link'] = True
             bookmark['description'] = ''
             bookmark['html_content'] = ''
+            logger.warning(f"Dead link detected: {url} with status {response.status_code}")
+        else:
+            bookmark['dead_link'] = False
+            bookmark['html_content'] = content
+            bookmark['description'] = ''
+            logger.info(f"Fetched information for {url}")
+    except requests.exceptions.Timeout:
+        bookmark['dead_link'] = False  # Mark as 'Unknown' instead of 'Dead'
+        bookmark['etag'] = 'N/A'
+        bookmark['status_code'] = 'Timeout'
+        bookmark['description'] = ''
+        bookmark['html_content'] = ''
+        bookmark['slow_link'] = True  # Custom flag to indicate slow response
+        logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
     except Exception as e:
+        bookmark['dead_link'] = True
+        bookmark['etag'] = 'N/A'
+        bookmark['status_code'] = 'Error'
+        bookmark['description'] = ''
+        bookmark['html_content'] = ''
+        logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
+    finally:
+        fetch_cache[url] = {
+            'etag': bookmark.get('etag'),
+            'status_code': bookmark.get('status_code'),
+            'dead_link': bookmark.get('dead_link'),
+            'description': bookmark.get('description'),
+            'html_content': bookmark.get('html_content', ''),
+            'slow_link': bookmark.get('slow_link', False),
+        }
 def vectorize_and_index(bookmarks_list):
     """
         index = i + 1
         if bookmark.get('dead_link'):
             status = "❌ Dead Link"
+            card_style = "border: 2px solid red;"
+            text_style = "color: red;"
         elif bookmark.get('slow_link'):
             status = "⏳ Slow Response"
             card_style = "border: 2px solid orange;"
             text_style = "color: orange;"
         else:
             status = "✅ Active"
+            card_style = "border: 2px solid green;"
+            text_style = "color: black;"
         title = bookmark['title']
         url = bookmark['url']
     for idx, bookmark in enumerate(bookmarks):
         bookmark['id'] = idx
+    # Fetch bookmark info sequentially
+    for bookmark in bookmarks:
+        fetch_url_info(bookmark)
+    # Process bookmarks sequentially with LLM
+    for bookmark in bookmarks:
+        generate_summary(bookmark)
+        assign_category(bookmark)
     try:
         faiss_index = vectorize_and_index(bookmarks)
     # Update choices and display
     choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
+               for i, bookmark in enumerate(bookmarks)]
     return message, gr.update(choices=choices), display_bookmarks()
 Provide a concise and helpful response.
 """
+        response = openai.ChatCompletion.create(
+            model='llama-3.1-70b-versatile',
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=500,
+            temperature=0.7,
+        )
         answer = response['choices'][0]['message']['content'].strip()
+        logger.info("Chatbot response generated")
+        time.sleep(3)  # Wait to respect rate limits
         return answer
     except Exception as e:
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
     build_app()