Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

8f32801

verified ·

1 Parent(s): 1dbb950

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -239

app.py CHANGED Viewed

@@ -85,6 +85,9 @@ def extract_retry_after(error_message):
     else:
         return 5  # Default retry after 5 seconds
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
@@ -154,6 +157,10 @@ def get_page_metadata(soup):
     return metadata
 def generate_summary(bookmark):
     """
     Generate a concise summary for a bookmark using available content and LLM via the Groq Cloud API.
@@ -226,7 +233,9 @@ Be concise and objective.
 """
         # Call the LLM via Groq Cloud API
-        while True:
             try:
                 response = openai.ChatCompletion.create(
                     model='llama-3.1-70b-versatile',
@@ -238,9 +247,10 @@ Be concise and objective.
                 )
                 break  # Exit loop if successful
             except openai.error.RateLimitError as e:
-                retry_after = extract_retry_after(str(e))
                 logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
                 time.sleep(retry_after)
             except Exception as e:
                 logger.error(f"Error generating summary: {e}", exc_info=True)
                 bookmark['summary'] = 'No summary available.'
@@ -258,6 +268,75 @@ Be concise and objective.
         bookmark['summary'] = 'No summary available.'
         return bookmark
 def parse_bookmarks(file_content):
     """
     Parse bookmarks from HTML file.
@@ -286,17 +365,15 @@ async def fetch_url_info(session, bookmark):
         bookmark.update(fetch_cache[url])
         return bookmark
-    max_retries = 1
     retries = 0
-    timeout_duration = 15  # Reduced timeout
     while retries <= max_retries:
         try:
             logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
             headers = {
-                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-                              'AppleWebKit/537.36 (KHTML, like Gecko) '
-                              'Chrome/91.0.4472.124 Safari/537.36',
                 'Accept-Language': 'en-US,en;q=0.9',
             }
             async with session.get(url, timeout=timeout_duration, headers=headers, ssl=False, allow_redirects=True) as response:
@@ -321,15 +398,14 @@ async def fetch_url_info(session, bookmark):
                 break  # Exit loop if successful
         except asyncio.exceptions.TimeoutError:
-            retries += 1
-            if retries > max_retries:
-                bookmark['dead_link'] = False  # Mark as 'Unknown' instead of 'Dead'
-                bookmark['etag'] = 'N/A'
-                bookmark['status_code'] = 'Timeout'
-                bookmark['description'] = ''
-                bookmark['html_content'] = ''
-                bookmark['slow_link'] = True  # Custom flag to indicate slow response
-                logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
         except Exception as e:
             bookmark['dead_link'] = True
             bookmark['etag'] = 'N/A'
@@ -355,7 +431,7 @@ async def process_bookmarks_async(bookmarks_list):
     """
     logger.info("Processing bookmarks asynchronously")
     try:
-        connector = aiohttp.TCPConnector(limit=5)  # Limit concurrent connections
         timeout = aiohttp.ClientTimeout(total=60)  # Set timeout
         async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
             tasks = []
@@ -368,67 +444,17 @@ async def process_bookmarks_async(bookmarks_list):
         logger.error(f"Error in asynchronous processing of bookmarks: {e}", exc_info=True)
         raise
-def assign_category(bookmark):
     """
-    Assign a category to a bookmark using the LLM based on its summary via the Groq Cloud API.
     """
-    if bookmark.get('dead_link'):
-        bookmark['category'] = 'Dead Link'
-        logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
-        return bookmark
-    summary = bookmark.get('summary', '')
-    if not summary:
-        bookmark['category'] = 'Uncategorized'
-        return bookmark
-    # Prepare the prompt
-    categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
-    prompt = f"""
-You are a helpful assistant that categorizes webpages.
-Based on the following summary, assign the most appropriate category from the list below.
-Summary:
-{summary}
-Categories:
-{categories_str}
-Respond with only the category name.
-"""
-    while True:
-        try:
-            response = openai.ChatCompletion.create(
-                model='llama-3.1-70b-versatile',
-                messages=[
-                    {"role": "user", "content": prompt}
-                ],
-                max_tokens=10,
-                temperature=0,
-            )
-            break  # Exit loop if successful
-        except openai.error.RateLimitError as e:
-            retry_after = extract_retry_after(str(e))
-            logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
-            time.sleep(retry_after)
-        except Exception as e:
-            logger.error(f"Error assigning category: {e}", exc_info=True)
-            bookmark['category'] = 'Uncategorized'
-            return bookmark
-    category = response['choices'][0]['message']['content'].strip().strip('"')
-    # Validate the category
-    if category in CATEGORIES:
-        bookmark['category'] = category
-        logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
-    else:
-        bookmark['category'] = 'Uncategorized'
-        logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
-    return bookmark
 def vectorize_and_index(bookmarks_list):
     """
@@ -536,12 +562,12 @@ def process_uploaded_file(file):
         logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
         return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
-    # Process bookmarks sequentially
-    for bookmark in bookmarks:
-        generate_summary(bookmark)
-        time.sleep(0.5)
-        assign_category(bookmark)
-        time.sleep(0.5)
     try:
         faiss_index = vectorize_and_index(bookmarks)
@@ -559,164 +585,9 @@ def process_uploaded_file(file):
     return message, bookmark_html, gr.update(choices=choices), bookmark_html
-def delete_selected_bookmarks(selected_indices):
-    """
-    Delete selected bookmarks and remove their vectors from the FAISS index.
-    """
-    global bookmarks, faiss_index
-    if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
-    ids_to_delete = []
-    indices_to_delete = []
-    for s in selected_indices:
-        idx = int(s.split('.')[0]) - 1
-        if 0 <= idx < len(bookmarks):
-            bookmark_id = bookmarks[idx]['id']
-            ids_to_delete.append(bookmark_id)
-            indices_to_delete.append(idx)
-            logger.info(f"Deleting bookmark at index {idx + 1}")
-    # Remove vectors from FAISS index
-    if faiss_index is not None and ids_to_delete:
-        faiss_index.remove_ids(np.array(ids_to_delete, dtype=np.int64))
-    # Remove bookmarks from the list (reverse order to avoid index shifting)
-    for idx in sorted(indices_to_delete, reverse=True):
-        bookmarks.pop(idx)
-    message = "🗑️ Selected bookmarks deleted successfully."
-    logger.info(message)
-    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-               for i, bookmark in enumerate(bookmarks)]
-    return message, gr.update(choices=choices), display_bookmarks()
-def edit_selected_bookmarks_category(selected_indices, new_category):
-    """
-    Edit category of selected bookmarks.
-    """
-    if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
-    if not new_category:
-        return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks()
-    indices = [int(s.split('.')[0])-1 for s in selected_indices]
-    for idx in indices:
-        if 0 <= idx < len(bookmarks):
-            bookmarks[idx]['category'] = new_category
-            logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
-    message = "✏️ Category updated for selected bookmarks."
-    logger.info(message)
-    # Update choices and display
-    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-              for i, bookmark in enumerate(bookmarks)]
-    return message, gr.update(choices=choices), display_bookmarks()
-def export_bookmarks():
-    """
-    Export bookmarks to HTML file.
-    """
-    if not bookmarks:
-        logger.warning("No bookmarks to export")
-        return "⚠️ No bookmarks to export."
-    try:
-        logger.info("Exporting bookmarks to HTML")
-        soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
-        dl = soup.new_tag('DL')
-        for bookmark in bookmarks:
-            dt = soup.new_tag('DT')
-            a = soup.new_tag('A', href=bookmark['url'])
-            a.string = bookmark['title']
-            dt.append(a)
-            dl.append(dt)
-        soup.append(dl)
-        html_content = str(soup)
-        b64 = base64.b64encode(html_content.encode()).decode()
-        href = f'data:text/html;base64,{b64}'
-        logger.info("Bookmarks exported successfully")
-        return f'<a href="{href}" download="bookmarks.html">💾 Download Exported Bookmarks</a>'
-    except Exception as e:
-        logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
-        return "⚠️ Error exporting bookmarks."
-def chatbot_response(user_query):
-    """
-    Generate chatbot response using the FAISS index and embeddings.
-    """
-    if not bookmarks or faiss_index is None:
-        logger.warning("No bookmarks available for chatbot")
-        return "⚠️ No bookmarks available. Please upload and process your bookmarks first."
-    logger.info(f"Chatbot received query: {user_query}")
-    try:
-        # Encode the user query
-        query_vector = embedding_model.encode([user_query]).astype('float32')
-        # Search the FAISS index
-        k = 5  # Number of results to return
-        distances, ids = faiss_index.search(query_vector, k)
-        ids = ids.flatten()
-        # Retrieve the bookmarks
-        id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
-        matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark]
-        if not matching_bookmarks:
-            return "No relevant bookmarks found for your query."
-        # Format the response
-        bookmarks_info = "\n".join([
-            f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
-            for bookmark in matching_bookmarks
-        ])
-        # Use the LLM via Groq Cloud API to generate a response
-        prompt = f"""
-A user asked: "{user_query}"
-Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
-Bookmarks:
-{bookmarks_info}
-Provide a concise and helpful response.
-"""
-        while True:
-            try:
-                response = openai.ChatCompletion.create(
-                    model='llama-3.1-70b-versatile',
-                    messages=[
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=500,
-                    temperature=0.7,
-                )
-                break  # Exit loop if successful
-            except openai.error.RateLimitError as e:
-                retry_after = extract_retry_after(str(e))
-                logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
-                time.sleep(retry_after)
-            except Exception as e:
-                error_message = f"⚠️ Error processing your query: {str(e)}"
-                logger.error(error_message, exc_info=True)
-                return error_message
-        answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Chatbot response generated using Groq Cloud API")
-        return answer
-    except Exception as e:
-        error_message = f"⚠️ Error processing your query: {str(e)}"
-        logger.error(error_message, exc_info=True)
-        return error_message
 def build_app():
     """
     Build and launch the Gradio app.
@@ -835,4 +706,6 @@ def build_app():
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
     build_app()

     else:
         return 5  # Default retry after 5 seconds
+def exponential_backoff(retries):
+    return min(60, (2 ** retries))  # Cap the wait time at 60 seconds
 def extract_main_content(soup):
     """
     Extract the main content from a webpage while filtering out boilerplate content.
     return metadata
+async def generate_summary_async(bookmark):
+    async with llm_semaphore:
+        generate_summary(bookmark)
 def generate_summary(bookmark):
     """
     Generate a concise summary for a bookmark using available content and LLM via the Groq Cloud API.
 """
         # Call the LLM via Groq Cloud API
+        retries = 0
+        max_retries = 5
+        while retries <= max_retries:
             try:
                 response = openai.ChatCompletion.create(
                     model='llama-3.1-70b-versatile',
                 )
                 break  # Exit loop if successful
             except openai.error.RateLimitError as e:
+                retry_after = extract_retry_after(str(e)) or exponential_backoff(retries)
                 logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
                 time.sleep(retry_after)
+                retries += 1
             except Exception as e:
                 logger.error(f"Error generating summary: {e}", exc_info=True)
                 bookmark['summary'] = 'No summary available.'
         bookmark['summary'] = 'No summary available.'
         return bookmark
+async def assign_category_async(bookmark):
+    async with llm_semaphore:
+        assign_category(bookmark)
+def assign_category(bookmark):
+    """
+    Assign a category to a bookmark using the LLM based on its summary via the Groq Cloud API.
+    """
+    if bookmark.get('dead_link'):
+        bookmark['category'] = 'Dead Link'
+        logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
+        return bookmark
+    summary = bookmark.get('summary', '')
+    if not summary:
+        bookmark['category'] = 'Uncategorized'
+        return bookmark
+    # Prepare the prompt
+    categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES if cat != 'Dead Link'])
+    prompt = f"""
+You are a helpful assistant that categorizes webpages.
+Based on the following summary, assign the most appropriate category from the list below.
+Summary:
+{summary}
+Categories:
+{categories_str}
+Respond with only the category name.
+"""
+    retries = 0
+    max_retries = 5
+    while retries <= max_retries:
+        try:
+            response = openai.ChatCompletion.create(
+                model='llama-3.1-70b-versatile',
+                messages=[
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=10,
+                temperature=0,
+            )
+            break  # Exit loop if successful
+        except openai.error.RateLimitError as e:
+            retry_after = extract_retry_after(str(e)) or exponential_backoff(retries)
+            logger.warning(f"Rate limit exceeded. Retrying after {retry_after} seconds.")
+            time.sleep(retry_after)
+            retries += 1
+        except Exception as e:
+            logger.error(f"Error assigning category: {e}", exc_info=True)
+            bookmark['category'] = 'Uncategorized'
+            return bookmark
+    category = response['choices'][0]['message']['content'].strip().strip('"')
+    # Validate the category
+    if category in CATEGORIES:
+        bookmark['category'] = category
+        logger.info(f"Assigned category '{category}' to bookmark: {bookmark.get('url')}")
+    else:
+        bookmark['category'] = 'Uncategorized'
+        logger.warning(f"Invalid category '{category}' returned by LLM for bookmark: {bookmark.get('url')}")
+    return bookmark
 def parse_bookmarks(file_content):
     """
     Parse bookmarks from HTML file.
         bookmark.update(fetch_cache[url])
         return bookmark
+    max_retries = 0  # No retries
     retries = 0
+    timeout_duration = 5  # Reduced timeout
     while retries <= max_retries:
         try:
             logger.info(f"Fetching URL info for: {url} (Attempt {retries + 1})")
             headers = {
+                'User-Agent': 'Mozilla/5.0',
                 'Accept-Language': 'en-US,en;q=0.9',
             }
             async with session.get(url, timeout=timeout_duration, headers=headers, ssl=False, allow_redirects=True) as response:
                 break  # Exit loop if successful
         except asyncio.exceptions.TimeoutError:
+            bookmark['dead_link'] = False  # Mark as 'Unknown' instead of 'Dead'
+            bookmark['etag'] = 'N/A'
+            bookmark['status_code'] = 'Timeout'
+            bookmark['description'] = ''
+            bookmark['html_content'] = ''
+            bookmark['slow_link'] = True  # Custom flag to indicate slow response
+            logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
+            break  # Exit loop after timeout
         except Exception as e:
             bookmark['dead_link'] = True
             bookmark['etag'] = 'N/A'
     """
     logger.info("Processing bookmarks asynchronously")
     try:
+        connector = aiohttp.TCPConnector(limit=10)  # Increase limit if necessary
         timeout = aiohttp.ClientTimeout(total=60)  # Set timeout
         async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
             tasks = []
         logger.error(f"Error in asynchronous processing of bookmarks: {e}", exc_info=True)
         raise
+async def process_bookmarks_llm(bookmarks_list):
     """
+    Process bookmarks asynchronously for LLM API calls.
     """
+    logger.info("Processing bookmarks with LLM asynchronously")
+    tasks = []
+    for bookmark in bookmarks_list:
+        tasks.append(generate_summary_async(bookmark))
+        tasks.append(assign_category_async(bookmark))
+    await asyncio.gather(*tasks)
+    logger.info("Completed LLM processing of bookmarks")
 def vectorize_and_index(bookmarks_list):
     """
         logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
         return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
+    # Asynchronously process bookmarks with LLM
+    try:
+        asyncio.run(process_bookmarks_llm(bookmarks))
+    except Exception as e:
+        logger.error(f"Error processing bookmarks with LLM: {e}", exc_info=True)
+        return "Error processing bookmarks with LLM.", '', gr.update(choices=[]), display_bookmarks()
     try:
         faiss_index = vectorize_and_index(bookmarks)
     return message, bookmark_html, gr.update(choices=choices), bookmark_html
+# The rest of the code remains unchanged (e.g., delete_selected_bookmarks, edit_selected_bookmarks_category, etc.)
+# Build and launch the Gradio app
 def build_app():
     """
     Build and launch the Gradio app.
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
+    # Define a semaphore to limit concurrent LLM API calls
+    llm_semaphore = asyncio.Semaphore(3)  # Adjust based on allowed concurrency
     build_app()