Spaces:

siddhartharya
/

Bookmark-Manager

Sleeping

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

6e6eade

•

1 Parent(s): 163ff62

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -42

app.py CHANGED Viewed

@@ -74,19 +74,28 @@ CATEGORIES = [
     "Uncategorized",
 ]
-# Set up Groq Cloud API key and base URL
-GROQ_API_KEY = os.getenv('GROQ_API_KEY')
-if not GROQ_API_KEY:
-    logger.error("GROQ_API_KEY environment variable not set.")
-openai.api_key = GROQ_API_KEY
-openai.api_base = "https://api.groq.com/openai/v1"
 # Rate Limiter Configuration
-RPM_LIMIT = 60       # Requests per minute (adjust based on your API's limit)
-TPM_LIMIT = 60000    # Tokens per minute (adjust based on your API's limit)
-BATCH_SIZE = 5       # Number of bookmarks per batch
 # Implementing a Token Bucket Rate Limiter
 class TokenBucket:
@@ -115,14 +124,21 @@ class TokenBucket:
             time.sleep(0.05)
 # Initialize rate limiters
-rpm_rate = RPM_LIMIT / 60  # tokens per second
-tpm_rate = TPM_LIMIT / 60  # tokens per second
-rpm_bucket = TokenBucket(rate=rpm_rate, capacity=RPM_LIMIT)
-tpm_bucket = TokenBucket(rate=tpm_rate, capacity=TPM_LIMIT)
-# Queue for LLM tasks
-llm_queue = Queue()
 def categorize_based_on_summary(summary, url):
     """
@@ -229,20 +245,20 @@ def get_page_metadata(soup):
     return metadata
-def llm_worker():
     """
     Worker thread to process LLM tasks from the queue while respecting rate limits.
     """
-    logger.info("LLM worker started.")
     while True:
         batch = []
         try:
-            # Collect bookmarks up to BATCH_SIZE
-            while len(batch) < BATCH_SIZE:
-                bookmark = llm_queue.get(timeout=1)
                 if bookmark is None:
                     # Shutdown signal
-                    logger.info("LLM worker shutting down.")
                     return
                 if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
                     batch.append(bookmark)
@@ -250,7 +266,7 @@ def llm_worker():
                     # Skip processing for dead or slow links
                     bookmark['summary'] = 'No summary available.'
                     bookmark['category'] = 'Uncategorized'
-                    llm_queue.task_done()
         except Empty:
             pass  # No more bookmarks at the moment
@@ -288,8 +304,11 @@ Provide summaries and categories for the following bookmarks:
                 prompt += "}\n\n"
                 prompt += "Now, provide the summaries and categories for the bookmarks listed above."
                 response = openai.ChatCompletion.create(
-                    model='llama-3.1-70b-versatile',  # Retaining the original model
                     messages=[
                         {"role": "user", "content": prompt}
                     ],
@@ -330,28 +349,36 @@ Provide summaries and categories for the following bookmarks:
                         logger.info(f"Processed bookmark: {url}")
                 except json.JSONDecodeError:
-                    logger.error("Failed to parse JSON response from LLM. Using fallback methods.")
                     for bookmark in batch:
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
                         bookmark['category'] = validate_category(bookmark)
                 except Exception as e:
-                    logger.error(f"Error processing LLM response: {e}", exc_info=True)
                     for bookmark in batch:
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = 'Uncategorized'
             except openai.error.RateLimitError:
-                logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
-                # Re-enqueue the entire batch for retry
                 for bookmark in batch:
-                    llm_queue.put(bookmark)
-                time.sleep(60)  # Wait before retrying
-                continue  # Skip the rest and retry
             except Exception as e:
-                logger.error(f"Error during LLM processing: {e}", exc_info=True)
                 for bookmark in batch:
                     bookmark['summary'] = 'No summary available.'
                     bookmark['category'] = 'Uncategorized'
@@ -359,7 +386,7 @@ Provide summaries and categories for the following bookmarks:
             finally:
                 # Mark all bookmarks in the batch as done
                 for _ in batch:
-                    llm_queue.task_done()
 def parse_bookmarks(file_content):
     """
@@ -550,13 +577,19 @@ def process_uploaded_file(file, state_bookmarks):
     with ThreadPoolExecutor(max_workers=10) as executor:
         executor.map(fetch_url_info, bookmarks)
-    # Enqueue bookmarks for LLM processing
     logger.info("Enqueuing bookmarks for LLM processing")
     for bookmark in bookmarks:
-        llm_queue.put(bookmark)
     # Wait until all LLM tasks are completed
-    llm_queue.join()
     logger.info("All LLM tasks have been processed")
     try:
@@ -685,11 +718,12 @@ def chatbot_response(user_query, chat_history):
         chat_history.append({"role": "user", "content": user_query})
         # Rate Limiting
-        rpm_bucket.wait_for_token()
         # Estimate tokens: prompt + max_tokens
         # Here, we assume max_tokens=300 per chatbot response
         total_tokens = 300  # Adjust based on actual usage
-        tpm_bucket.wait_for_token(tokens=total_tokens)
         query_vector = embedding_model.encode([user_query]).astype('float32')
         k = 5
@@ -717,8 +751,10 @@ Bookmarks:
 Provide a concise and helpful response.
 '''
         response = openai.ChatCompletion.create(
-            model='llama-3.1-70b-versatile',  # Retaining the original model
             messages=[
                 {"role": "user", "content": prompt}
             ],
@@ -734,7 +770,7 @@ Provide a concise and helpful response.
     except openai.error.RateLimitError:
         wait_time = int(60)  # Wait time can be adjusted or extracted from headers if available
-        logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
         time.sleep(wait_time)
         return chatbot_response(user_query, chat_history)
     except Exception as e:
@@ -923,8 +959,19 @@ Navigate through the tabs to explore each feature in detail.
         print(f"Error building Gradio app: {e}")
 if __name__ == "__main__":
-    # Start the LLM worker thread before launching the app
-    llm_thread = threading.Thread(target=llm_worker, daemon=True)
-    llm_thread.start()
     build_app()

     "Uncategorized",
 ]
+# Set up Groq Cloud API keys and base URLs
+GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
+GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
+if not GROQ_API_KEY_BASIC:
+    logger.error("GROQ_API_KEY_BASIC environment variable not set.")
+if not GROQ_API_KEY_ADVANCED:
+    logger.error("GROQ_API_KEY_ADVANCED environment variable not set.")
+# Define models
+MODEL_BASIC = 'llama-3.1-8b-instant'
+MODEL_ADVANCED = 'llama-3.1-70b-versatile'
 # Rate Limiter Configuration
+RPM_LIMIT_BASIC = 60       # Requests per minute for basic model
+TPM_LIMIT_BASIC = 60000    # Tokens per minute for basic model
+RPM_LIMIT_ADVANCED = 30    # Requests per minute for advanced model
+TPM_LIMIT_ADVANCED = 30000 # Tokens per minute for advanced model
+BATCH_SIZE_BASIC = 5       # Number of bookmarks per batch for basic model
+BATCH_SIZE_ADVANCED = 3    # Number of bookmarks per batch for advanced model
 # Implementing a Token Bucket Rate Limiter
 class TokenBucket:
             time.sleep(0.05)
 # Initialize rate limiters
+rpm_rate_basic = RPM_LIMIT_BASIC / 60  # tokens per second
+tpm_rate_basic = TPM_LIMIT_BASIC / 60  # tokens per second
+rpm_rate_advanced = RPM_LIMIT_ADVANCED / 60  # tokens per second
+tpm_rate_advanced = TPM_LIMIT_ADVANCED / 60  # tokens per second
+rpm_bucket_basic = TokenBucket(rate=rpm_rate_basic, capacity=RPM_LIMIT_BASIC)
+tpm_bucket_basic = TokenBucket(rate=tpm_rate_basic, capacity=TPM_LIMIT_BASIC)
+rpm_bucket_advanced = TokenBucket(rate=rpm_rate_advanced, capacity=RPM_LIMIT_ADVANCED)
+tpm_bucket_advanced = TokenBucket(rate=tpm_rate_advanced, capacity=TPM_LIMIT_ADVANCED)
+# Queues for LLM tasks
+llm_queue_basic = Queue()
+llm_queue_advanced = Queue()
 def categorize_based_on_summary(summary, url):
     """
     return metadata
+def llm_worker(queue, model_name, api_key, rpm_bucket, tpm_bucket, batch_size):
     """
     Worker thread to process LLM tasks from the queue while respecting rate limits.
     """
+    logger.info(f"LLM worker for {model_name} started.")
     while True:
         batch = []
         try:
+            # Collect bookmarks up to batch_size
+            while len(batch) < batch_size:
+                bookmark = queue.get(timeout=1)
                 if bookmark is None:
                     # Shutdown signal
+                    logger.info(f"LLM worker for {model_name} shutting down.")
                     return
                 if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
                     batch.append(bookmark)
                     # Skip processing for dead or slow links
                     bookmark['summary'] = 'No summary available.'
                     bookmark['category'] = 'Uncategorized'
+                    queue.task_done()
         except Empty:
             pass  # No more bookmarks at the moment
                 prompt += "}\n\n"
                 prompt += "Now, provide the summaries and categories for the bookmarks listed above."
+                # Set API key and model
+                openai.api_key = api_key
                 response = openai.ChatCompletion.create(
+                    model=model_name,
                     messages=[
                         {"role": "user", "content": prompt}
                     ],
                         logger.info(f"Processed bookmark: {url}")
                 except json.JSONDecodeError:
+                    logger.error(f"Failed to parse JSON response from {model_name}. Using fallback methods.")
                     for bookmark in batch:
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
                         bookmark['category'] = validate_category(bookmark)
                 except Exception as e:
+                    logger.error(f"Error processing LLM response from {model_name}: {e}", exc_info=True)
                     for bookmark in batch:
                         bookmark['summary'] = 'No summary available.'
                         bookmark['category'] = 'Uncategorized'
             except openai.error.RateLimitError:
+                logger.warning(f"Rate limit reached for {model_name}. Fallback to other model if possible.")
+                # Re-enqueue the entire batch to the other queue
+                if model_name == MODEL_BASIC:
+                    target_queue = llm_queue_advanced
+                    target_model = MODEL_ADVANCED
+                    target_api_key = GROQ_API_KEY_ADVANCED
+                else:
+                    target_queue = llm_queue_basic
+                    target_model = MODEL_BASIC
+                    target_api_key = GROQ_API_KEY_BASIC
                 for bookmark in batch:
+                    logger.info(f"Reassigning bookmark {bookmark['url']} to {target_model} due to rate limit.")
+                    target_queue.put(bookmark)
             except Exception as e:
+                logger.error(f"Error during LLM processing for {model_name}: {e}", exc_info=True)
                 for bookmark in batch:
                     bookmark['summary'] = 'No summary available.'
                     bookmark['category'] = 'Uncategorized'
             finally:
                 # Mark all bookmarks in the batch as done
                 for _ in batch:
+                    queue.task_done()
 def parse_bookmarks(file_content):
     """
     with ThreadPoolExecutor(max_workers=10) as executor:
         executor.map(fetch_url_info, bookmarks)
+    # Enqueue bookmarks for LLM processing based on task complexity
     logger.info("Enqueuing bookmarks for LLM processing")
     for bookmark in bookmarks:
+        # Determine task complexity
+        # Example logic: Assign to basic model if title is short, else to advanced
+        if len(bookmark['title']) < 50:
+            llm_queue_basic.put(bookmark)
+        else:
+            llm_queue_advanced.put(bookmark)
     # Wait until all LLM tasks are completed
+    llm_queue_basic.join()
+    llm_queue_advanced.join()
     logger.info("All LLM tasks have been processed")
     try:
         chat_history.append({"role": "user", "content": user_query})
         # Rate Limiting
+        # Assuming the chatbot uses the advanced model
+        rpm_bucket_advanced.wait_for_token()
         # Estimate tokens: prompt + max_tokens
         # Here, we assume max_tokens=300 per chatbot response
         total_tokens = 300  # Adjust based on actual usage
+        tpm_bucket_advanced.wait_for_token(tokens=total_tokens)
         query_vector = embedding_model.encode([user_query]).astype('float32')
         k = 5
 Provide a concise and helpful response.
 '''
+        # Use the advanced model for chatbot responses
+        openai.api_key = GROQ_API_KEY_ADVANCED
         response = openai.ChatCompletion.create(
+            model=MODEL_ADVANCED,  # Retaining the original model
             messages=[
                 {"role": "user", "content": prompt}
             ],
     except openai.error.RateLimitError:
         wait_time = int(60)  # Wait time can be adjusted or extracted from headers if available
+        logger.warning(f"Rate limit reached for chatbot. Waiting for {wait_time} seconds before retrying...")
         time.sleep(wait_time)
         return chatbot_response(user_query, chat_history)
     except Exception as e:
         print(f"Error building Gradio app: {e}")
 if __name__ == "__main__":
+    # Start the LLM worker threads before launching the app
+    llm_thread_basic = threading.Thread(
+        target=llm_worker,
+        args=(llm_queue_basic, MODEL_BASIC, GROQ_API_KEY_BASIC, rpm_bucket_basic, tpm_bucket_basic, BATCH_SIZE_BASIC),
+        daemon=True
+    )
+    llm_thread_advanced = threading.Thread(
+        target=llm_worker,
+        args=(llm_queue_advanced, MODEL_ADVANCED, GROQ_API_KEY_ADVANCED, rpm_bucket_advanced, tpm_bucket_advanced, BATCH_SIZE_ADVANCED),
+        daemon=True
+    )
+    llm_thread_basic.start()
+    llm_thread_advanced.start()
     build_app()