Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

3c7c197

verified ·

1 Parent(s): 4d7269c

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -64

app.py CHANGED Viewed

@@ -8,18 +8,17 @@ import numpy as np
 import requests
 import time
 import re
-import base64
 import logging
 import os
 import sys
-import concurrent.futures
 from concurrent.futures import ThreadPoolExecutor
 import threading
 # Import OpenAI library
 import openai
-# Suppress only the single warning from urllib3 needed.
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -74,14 +73,15 @@ CATEGORIES = [
     "Uncategorized",
 ]
-# Set up Groq Cloud API key and base URL
-GROQ_API_KEY = os.getenv('GROQ_API_KEY')
-if not GROQ_API_KEY:
-    logger.error("GROQ_API_KEY environment variable not set.")
-openai.api_key = GROQ_API_KEY
-openai.api_base = "https://api.groq.com/openai/v1"  # Ensure this is the correct base URL
 # Initialize global variables for rate limiting
 api_lock = threading.Lock()
@@ -178,7 +178,7 @@ def generate_summary_and_assign_category(bookmark):
                     time.sleep(sleep_duration)
                 last_api_call_time = time.time()
-            # Existing logic to prepare the prompt
             html_content = bookmark.get('html_content', '')
             soup = BeautifulSoup(html_content, 'html.parser')
             metadata = get_page_metadata(soup)
@@ -208,7 +208,7 @@ def generate_summary_and_assign_category(bookmark):
             else:
                 use_prior_knowledge = False
-            # Shortened prompts
             if use_prior_knowledge:
                 prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
@@ -237,27 +237,13 @@ Summary: [Your summary]
 Category: [One category]
 """
-            # Estimate tokens
-            def estimate_tokens(text):
-                return len(text) / 4  # Approximate token estimation
-            prompt_tokens = estimate_tokens(prompt)
-            max_tokens = 150  # Adjusted from 200
-            total_tokens = prompt_tokens + max_tokens
-            # Calculate required delay
-            tokens_per_minute = 40000
-            tokens_per_second = tokens_per_minute / 60
-            required_delay = total_tokens / tokens_per_second
-            sleep_time = max(required_delay, 2)  # Ensure at least 2 seconds
-            # Call the LLM via Groq Cloud API
             response = openai.ChatCompletion.create(
-                model='llama-3.1-70b-versatile',  # Using the specified model
                 messages=[
                     {"role": "user", "content": prompt}
                 ],
-                max_tokens=int(max_tokens),
                 temperature=0.5,
             )
@@ -283,7 +269,7 @@ Category: [One category]
             else:
                 bookmark['category'] = 'Uncategorized'
-            # Simple keyword-based validation (Optional)
             summary_lower = bookmark['summary'].lower()
             url_lower = bookmark['url'].lower()
             if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
@@ -292,7 +278,6 @@ Category: [One category]
                 bookmark['category'] = 'Reference and Knowledge Bases'
             logger.info("Successfully generated summary and assigned category")
-            time.sleep(sleep_time)
             break  # Exit the retry loop upon success
         except openai.error.RateLimitError as e:
@@ -439,7 +424,6 @@ def display_bookmarks():
         category = bookmark.get('category', 'Uncategorized')
         # Escape HTML content to prevent XSS attacks
-        from html import escape
         title = escape(title)
         url = escape(url)
         summary = escape(summary)
@@ -493,12 +477,12 @@ def process_uploaded_file(file, state_bookmarks):
     # Fetch bookmark info concurrently
     logger.info("Fetching URL info concurrently")
-    with ThreadPoolExecutor(max_workers=10) as executor:  # Adjusted max_workers as needed
         executor.map(fetch_url_info, bookmarks)
     # Process bookmarks concurrently with LLM calls
     logger.info("Processing bookmarks with LLM concurrently")
-    with ThreadPoolExecutor(max_workers=1) as executor:  # Reduced max_workers to 1 to serialize API calls
         executor.map(generate_summary_and_assign_category, bookmarks)
     try:
@@ -531,12 +515,15 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
     ids_to_delete = []
     indices_to_delete = []
     for s in selected_indices:
-        idx = int(s.split('.')[0]) - 1
-        if 0 <= idx < len(bookmarks):
-            bookmark_id = bookmarks[idx]['id']
-            ids_to_delete.append(bookmark_id)
-            indices_to_delete.append(idx)
-            logger.info(f"Deleting bookmark at index {idx + 1}")
     # Remove vectors from FAISS index
     if faiss_index is not None and ids_to_delete:
@@ -565,11 +552,20 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
     if not new_category:
         return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
-    indices = [int(s.split('.')[0])-1 for s in selected_indices]
     for idx in indices:
-        if 0 <= idx < len(bookmarks):
-            bookmarks[idx]['category'] = new_category
-            logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
     message = "✏️ Category updated for selected bookmarks."
     logger.info(message)
@@ -589,7 +585,7 @@ def export_bookmarks():
     """
     if not bookmarks:
         logger.warning("No bookmarks to export")
-        return None  # Return None instead of a message
     try:
         logger.info("Exporting bookmarks to HTML")
@@ -639,7 +635,7 @@ def chatbot_response(user_query, chat_history):
                 time.sleep(sleep_duration)
             last_api_call_time = time.time()
-        # Existing logic to encode the query and search the FAISS index
         query_vector = embedding_model.encode([user_query]).astype('float32')
         k = 5  # Number of results to return
         distances, ids = faiss_index.search(query_vector, k)
@@ -660,7 +656,7 @@ def chatbot_response(user_query, chat_history):
             for bookmark in matching_bookmarks
         ])
-        # Use the LLM via Groq Cloud API to generate a response
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
@@ -669,33 +665,18 @@ Bookmarks:
 Provide a concise and helpful response.
 """
-        # Estimate tokens
-        def estimate_tokens(text):
-            return len(text) / 4  # Approximate token estimation
-        prompt_tokens = estimate_tokens(prompt)
-        max_tokens = 300  # Adjust as needed
-        total_tokens = prompt_tokens + max_tokens
-        # Calculate required delay
-        tokens_per_minute = 40000
-        tokens_per_second = tokens_per_minute / 60
-        required_delay = total_tokens / tokens_per_second
-        sleep_time = max(required_delay, 2)  # Ensure at least 2 seconds
-        # Call the LLM via Groq Cloud API
         response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',  # Using the specified model
             messages=[
                 {"role": "user", "content": prompt}
             ],
-            max_tokens=int(max_tokens),
             temperature=0.7,
         )
         answer = response['choices'][0]['message']['content'].strip()
         logger.info("Chatbot response generated")
-        time.sleep(sleep_time)
         # Append the assistant's response to chat history
         chat_history.append({"role": "assistant", "content": answer})

 import requests
 import time
 import re
 import logging
 import os
 import sys
 from concurrent.futures import ThreadPoolExecutor
 import threading
+from html import escape
 # Import OpenAI library
 import openai
+# Suppress specific warnings
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
     "Uncategorized",
 ]
+# Set up OpenAI API key and base URL
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+if not OPENAI_API_KEY:
+    logger.error("OPENAI_API_KEY environment variable not set.")
+openai.api_key = OPENAI_API_KEY
+# If you're using a custom API base, uncomment and set it
+# openai.api_base = "https://api.your-provider.com/v1"
 # Initialize global variables for rate limiting
 api_lock = threading.Lock()
                     time.sleep(sleep_duration)
                 last_api_call_time = time.time()
+            # Prepare the prompt
             html_content = bookmark.get('html_content', '')
             soup = BeautifulSoup(html_content, 'html.parser')
             metadata = get_page_metadata(soup)
             else:
                 use_prior_knowledge = False
+            # Craft the prompt based on content availability
             if use_prior_knowledge:
                 prompt = f"""
 You are a knowledgeable assistant with up-to-date information as of 2023.
 Category: [One category]
 """
+            # Call the LLM via OpenAI API
             response = openai.ChatCompletion.create(
+                model='gpt-4',  # Ensure you're using a valid and accessible model
                 messages=[
                     {"role": "user", "content": prompt}
                 ],
+                max_tokens=150,
                 temperature=0.5,
             )
             else:
                 bookmark['category'] = 'Uncategorized'
+            # Optional: Simple keyword-based validation
             summary_lower = bookmark['summary'].lower()
             url_lower = bookmark['url'].lower()
             if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
                 bookmark['category'] = 'Reference and Knowledge Bases'
             logger.info("Successfully generated summary and assigned category")
             break  # Exit the retry loop upon success
         except openai.error.RateLimitError as e:
         category = bookmark.get('category', 'Uncategorized')
         # Escape HTML content to prevent XSS attacks
         title = escape(title)
         url = escape(url)
         summary = escape(summary)
     # Fetch bookmark info concurrently
     logger.info("Fetching URL info concurrently")
+    with ThreadPoolExecutor(max_workers=10) as executor:  # Adjust max_workers as needed
         executor.map(fetch_url_info, bookmarks)
     # Process bookmarks concurrently with LLM calls
     logger.info("Processing bookmarks with LLM concurrently")
+    with ThreadPoolExecutor(max_workers=1) as executor:  # Serialize API calls to respect rate limits
         executor.map(generate_summary_and_assign_category, bookmarks)
     try:
     ids_to_delete = []
     indices_to_delete = []
     for s in selected_indices:
+        try:
+            idx = int(s.split('.')[0]) - 1
+            if 0 <= idx < len(bookmarks):
+                bookmark_id = bookmarks[idx]['id']
+                ids_to_delete.append(bookmark_id)
+                indices_to_delete.append(idx)
+                logger.info(f"Deleting bookmark at index {idx + 1}")
+        except (ValueError, IndexError):
+            logger.warning(f"Invalid selection format: {s}")
     # Remove vectors from FAISS index
     if faiss_index is not None and ids_to_delete:
     if not new_category:
         return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
+    indices = []
+    for s in selected_indices:
+        try:
+            idx = int(s.split('.')[0])-1
+            if 0 <= idx < len(bookmarks):
+                indices.append(idx)
+            else:
+                logger.warning(f"Index out of range: {idx + 1}")
+        except ValueError:
+            logger.warning(f"Invalid selection format: {s}")
     for idx in indices:
+        bookmarks[idx]['category'] = new_category
+        logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
     message = "✏️ Category updated for selected bookmarks."
     logger.info(message)
     """
     if not bookmarks:
         logger.warning("No bookmarks to export")
+        return None  # Return None to indicate no file
     try:
         logger.info("Exporting bookmarks to HTML")
                 time.sleep(sleep_duration)
             last_api_call_time = time.time()
+        # Encode the query and search the FAISS index
         query_vector = embedding_model.encode([user_query]).astype('float32')
         k = 5  # Number of results to return
         distances, ids = faiss_index.search(query_vector, k)
             for bookmark in matching_bookmarks
         ])
+        # Craft the prompt for the LLM
         prompt = f"""
 A user asked: "{user_query}"
 Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 Provide a concise and helpful response.
 """
+        # Call the LLM via OpenAI API
         response = openai.ChatCompletion.create(
+            model='gpt-4',  # Ensure you're using a valid and accessible model
             messages=[
                 {"role": "user", "content": prompt}
             ],
+            max_tokens=300,
             temperature=0.7,
         )
         answer = response['choices'][0]['message']['content'].strip()
         logger.info("Chatbot response generated")
         # Append the assistant's response to chat history
         chat_history.append({"role": "assistant", "content": answer})