siddhartharya commited on
Commit
6e6eade
1 Parent(s): 163ff62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -42
app.py CHANGED
@@ -74,19 +74,28 @@ CATEGORIES = [
74
  "Uncategorized",
75
  ]
76
 
77
- # Set up Groq Cloud API key and base URL
78
- GROQ_API_KEY = os.getenv('GROQ_API_KEY')
 
79
 
80
- if not GROQ_API_KEY:
81
- logger.error("GROQ_API_KEY environment variable not set.")
82
 
83
- openai.api_key = GROQ_API_KEY
84
- openai.api_base = "https://api.groq.com/openai/v1"
 
 
 
 
85
 
86
  # Rate Limiter Configuration
87
- RPM_LIMIT = 60 # Requests per minute (adjust based on your API's limit)
88
- TPM_LIMIT = 60000 # Tokens per minute (adjust based on your API's limit)
89
- BATCH_SIZE = 5 # Number of bookmarks per batch
 
 
 
 
90
 
91
  # Implementing a Token Bucket Rate Limiter
92
  class TokenBucket:
@@ -115,14 +124,21 @@ class TokenBucket:
115
  time.sleep(0.05)
116
 
117
  # Initialize rate limiters
118
- rpm_rate = RPM_LIMIT / 60 # tokens per second
119
- tpm_rate = TPM_LIMIT / 60 # tokens per second
 
 
 
120
 
121
- rpm_bucket = TokenBucket(rate=rpm_rate, capacity=RPM_LIMIT)
122
- tpm_bucket = TokenBucket(rate=tpm_rate, capacity=TPM_LIMIT)
123
 
124
- # Queue for LLM tasks
125
- llm_queue = Queue()
 
 
 
 
126
 
127
  def categorize_based_on_summary(summary, url):
128
  """
@@ -229,20 +245,20 @@ def get_page_metadata(soup):
229
 
230
  return metadata
231
 
232
- def llm_worker():
233
  """
234
  Worker thread to process LLM tasks from the queue while respecting rate limits.
235
  """
236
- logger.info("LLM worker started.")
237
  while True:
238
  batch = []
239
  try:
240
- # Collect bookmarks up to BATCH_SIZE
241
- while len(batch) < BATCH_SIZE:
242
- bookmark = llm_queue.get(timeout=1)
243
  if bookmark is None:
244
  # Shutdown signal
245
- logger.info("LLM worker shutting down.")
246
  return
247
  if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
248
  batch.append(bookmark)
@@ -250,7 +266,7 @@ def llm_worker():
250
  # Skip processing for dead or slow links
251
  bookmark['summary'] = 'No summary available.'
252
  bookmark['category'] = 'Uncategorized'
253
- llm_queue.task_done()
254
 
255
  except Empty:
256
  pass # No more bookmarks at the moment
@@ -288,8 +304,11 @@ Provide summaries and categories for the following bookmarks:
288
  prompt += "}\n\n"
289
  prompt += "Now, provide the summaries and categories for the bookmarks listed above."
290
 
 
 
 
291
  response = openai.ChatCompletion.create(
292
- model='llama-3.1-70b-versatile', # Retaining the original model
293
  messages=[
294
  {"role": "user", "content": prompt}
295
  ],
@@ -330,28 +349,36 @@ Provide summaries and categories for the following bookmarks:
330
  logger.info(f"Processed bookmark: {url}")
331
 
332
  except json.JSONDecodeError:
333
- logger.error("Failed to parse JSON response from LLM. Using fallback methods.")
334
  for bookmark in batch:
335
  bookmark['summary'] = 'No summary available.'
336
  bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
337
  bookmark['category'] = validate_category(bookmark)
338
 
339
  except Exception as e:
340
- logger.error(f"Error processing LLM response: {e}", exc_info=True)
341
  for bookmark in batch:
342
  bookmark['summary'] = 'No summary available.'
343
  bookmark['category'] = 'Uncategorized'
344
 
345
  except openai.error.RateLimitError:
346
- logger.warning(f"LLM Rate limit reached. Retrying after 60 seconds.")
347
- # Re-enqueue the entire batch for retry
 
 
 
 
 
 
 
 
 
348
  for bookmark in batch:
349
- llm_queue.put(bookmark)
350
- time.sleep(60) # Wait before retrying
351
- continue # Skip the rest and retry
352
 
353
  except Exception as e:
354
- logger.error(f"Error during LLM processing: {e}", exc_info=True)
355
  for bookmark in batch:
356
  bookmark['summary'] = 'No summary available.'
357
  bookmark['category'] = 'Uncategorized'
@@ -359,7 +386,7 @@ Provide summaries and categories for the following bookmarks:
359
  finally:
360
  # Mark all bookmarks in the batch as done
361
  for _ in batch:
362
- llm_queue.task_done()
363
 
364
  def parse_bookmarks(file_content):
365
  """
@@ -550,13 +577,19 @@ def process_uploaded_file(file, state_bookmarks):
550
  with ThreadPoolExecutor(max_workers=10) as executor:
551
  executor.map(fetch_url_info, bookmarks)
552
 
553
- # Enqueue bookmarks for LLM processing
554
  logger.info("Enqueuing bookmarks for LLM processing")
555
  for bookmark in bookmarks:
556
- llm_queue.put(bookmark)
 
 
 
 
 
557
 
558
  # Wait until all LLM tasks are completed
559
- llm_queue.join()
 
560
  logger.info("All LLM tasks have been processed")
561
 
562
  try:
@@ -685,11 +718,12 @@ def chatbot_response(user_query, chat_history):
685
  chat_history.append({"role": "user", "content": user_query})
686
 
687
  # Rate Limiting
688
- rpm_bucket.wait_for_token()
 
689
  # Estimate tokens: prompt + max_tokens
690
  # Here, we assume max_tokens=300 per chatbot response
691
  total_tokens = 300 # Adjust based on actual usage
692
- tpm_bucket.wait_for_token(tokens=total_tokens)
693
 
694
  query_vector = embedding_model.encode([user_query]).astype('float32')
695
  k = 5
@@ -717,8 +751,10 @@ Bookmarks:
717
  Provide a concise and helpful response.
718
  '''
719
 
 
 
720
  response = openai.ChatCompletion.create(
721
- model='llama-3.1-70b-versatile', # Retaining the original model
722
  messages=[
723
  {"role": "user", "content": prompt}
724
  ],
@@ -734,7 +770,7 @@ Provide a concise and helpful response.
734
 
735
  except openai.error.RateLimitError:
736
  wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
737
- logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
738
  time.sleep(wait_time)
739
  return chatbot_response(user_query, chat_history)
740
  except Exception as e:
@@ -923,8 +959,19 @@ Navigate through the tabs to explore each feature in detail.
923
  print(f"Error building Gradio app: {e}")
924
 
925
  if __name__ == "__main__":
926
- # Start the LLM worker thread before launching the app
927
- llm_thread = threading.Thread(target=llm_worker, daemon=True)
928
- llm_thread.start()
 
 
 
 
 
 
 
 
 
 
 
929
 
930
  build_app()
 
74
  "Uncategorized",
75
  ]
76
 
77
+ # Set up Groq Cloud API keys and base URLs
78
+ GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
79
+ GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
80
 
81
+ if not GROQ_API_KEY_BASIC:
82
+ logger.error("GROQ_API_KEY_BASIC environment variable not set.")
83
 
84
+ if not GROQ_API_KEY_ADVANCED:
85
+ logger.error("GROQ_API_KEY_ADVANCED environment variable not set.")
86
+
87
+ # Define models
88
+ MODEL_BASIC = 'llama-3.1-8b-instant'
89
+ MODEL_ADVANCED = 'llama-3.1-70b-versatile'
90
 
91
  # Rate Limiter Configuration
92
+ RPM_LIMIT_BASIC = 60 # Requests per minute for basic model
93
+ TPM_LIMIT_BASIC = 60000 # Tokens per minute for basic model
94
+ RPM_LIMIT_ADVANCED = 30 # Requests per minute for advanced model
95
+ TPM_LIMIT_ADVANCED = 30000 # Tokens per minute for advanced model
96
+
97
+ BATCH_SIZE_BASIC = 5 # Number of bookmarks per batch for basic model
98
+ BATCH_SIZE_ADVANCED = 3 # Number of bookmarks per batch for advanced model
99
 
100
  # Implementing a Token Bucket Rate Limiter
101
  class TokenBucket:
 
124
  time.sleep(0.05)
125
 
126
  # Initialize rate limiters
127
+ rpm_rate_basic = RPM_LIMIT_BASIC / 60 # tokens per second
128
+ tpm_rate_basic = TPM_LIMIT_BASIC / 60 # tokens per second
129
+
130
+ rpm_rate_advanced = RPM_LIMIT_ADVANCED / 60 # tokens per second
131
+ tpm_rate_advanced = TPM_LIMIT_ADVANCED / 60 # tokens per second
132
 
133
+ rpm_bucket_basic = TokenBucket(rate=rpm_rate_basic, capacity=RPM_LIMIT_BASIC)
134
+ tpm_bucket_basic = TokenBucket(rate=tpm_rate_basic, capacity=TPM_LIMIT_BASIC)
135
 
136
+ rpm_bucket_advanced = TokenBucket(rate=rpm_rate_advanced, capacity=RPM_LIMIT_ADVANCED)
137
+ tpm_bucket_advanced = TokenBucket(rate=tpm_rate_advanced, capacity=TPM_LIMIT_ADVANCED)
138
+
139
+ # Queues for LLM tasks
140
+ llm_queue_basic = Queue()
141
+ llm_queue_advanced = Queue()
142
 
143
  def categorize_based_on_summary(summary, url):
144
  """
 
245
 
246
  return metadata
247
 
248
+ def llm_worker(queue, model_name, api_key, rpm_bucket, tpm_bucket, batch_size):
249
  """
250
  Worker thread to process LLM tasks from the queue while respecting rate limits.
251
  """
252
+ logger.info(f"LLM worker for {model_name} started.")
253
  while True:
254
  batch = []
255
  try:
256
+ # Collect bookmarks up to batch_size
257
+ while len(batch) < batch_size:
258
+ bookmark = queue.get(timeout=1)
259
  if bookmark is None:
260
  # Shutdown signal
261
+ logger.info(f"LLM worker for {model_name} shutting down.")
262
  return
263
  if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
264
  batch.append(bookmark)
 
266
  # Skip processing for dead or slow links
267
  bookmark['summary'] = 'No summary available.'
268
  bookmark['category'] = 'Uncategorized'
269
+ queue.task_done()
270
 
271
  except Empty:
272
  pass # No more bookmarks at the moment
 
304
  prompt += "}\n\n"
305
  prompt += "Now, provide the summaries and categories for the bookmarks listed above."
306
 
307
+ # Set API key and model
308
+ openai.api_key = api_key
309
+
310
  response = openai.ChatCompletion.create(
311
+ model=model_name,
312
  messages=[
313
  {"role": "user", "content": prompt}
314
  ],
 
349
  logger.info(f"Processed bookmark: {url}")
350
 
351
  except json.JSONDecodeError:
352
+ logger.error(f"Failed to parse JSON response from {model_name}. Using fallback methods.")
353
  for bookmark in batch:
354
  bookmark['summary'] = 'No summary available.'
355
  bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
356
  bookmark['category'] = validate_category(bookmark)
357
 
358
  except Exception as e:
359
+ logger.error(f"Error processing LLM response from {model_name}: {e}", exc_info=True)
360
  for bookmark in batch:
361
  bookmark['summary'] = 'No summary available.'
362
  bookmark['category'] = 'Uncategorized'
363
 
364
  except openai.error.RateLimitError:
365
+ logger.warning(f"Rate limit reached for {model_name}. Fallback to other model if possible.")
366
+ # Re-enqueue the entire batch to the other queue
367
+ if model_name == MODEL_BASIC:
368
+ target_queue = llm_queue_advanced
369
+ target_model = MODEL_ADVANCED
370
+ target_api_key = GROQ_API_KEY_ADVANCED
371
+ else:
372
+ target_queue = llm_queue_basic
373
+ target_model = MODEL_BASIC
374
+ target_api_key = GROQ_API_KEY_BASIC
375
+
376
  for bookmark in batch:
377
+ logger.info(f"Reassigning bookmark {bookmark['url']} to {target_model} due to rate limit.")
378
+ target_queue.put(bookmark)
 
379
 
380
  except Exception as e:
381
+ logger.error(f"Error during LLM processing for {model_name}: {e}", exc_info=True)
382
  for bookmark in batch:
383
  bookmark['summary'] = 'No summary available.'
384
  bookmark['category'] = 'Uncategorized'
 
386
  finally:
387
  # Mark all bookmarks in the batch as done
388
  for _ in batch:
389
+ queue.task_done()
390
 
391
  def parse_bookmarks(file_content):
392
  """
 
577
  with ThreadPoolExecutor(max_workers=10) as executor:
578
  executor.map(fetch_url_info, bookmarks)
579
 
580
+ # Enqueue bookmarks for LLM processing based on task complexity
581
  logger.info("Enqueuing bookmarks for LLM processing")
582
  for bookmark in bookmarks:
583
+ # Determine task complexity
584
+ # Example logic: Assign to basic model if title is short, else to advanced
585
+ if len(bookmark['title']) < 50:
586
+ llm_queue_basic.put(bookmark)
587
+ else:
588
+ llm_queue_advanced.put(bookmark)
589
 
590
  # Wait until all LLM tasks are completed
591
+ llm_queue_basic.join()
592
+ llm_queue_advanced.join()
593
  logger.info("All LLM tasks have been processed")
594
 
595
  try:
 
718
  chat_history.append({"role": "user", "content": user_query})
719
 
720
  # Rate Limiting
721
+ # Assuming the chatbot uses the advanced model
722
+ rpm_bucket_advanced.wait_for_token()
723
  # Estimate tokens: prompt + max_tokens
724
  # Here, we assume max_tokens=300 per chatbot response
725
  total_tokens = 300 # Adjust based on actual usage
726
+ tpm_bucket_advanced.wait_for_token(tokens=total_tokens)
727
 
728
  query_vector = embedding_model.encode([user_query]).astype('float32')
729
  k = 5
 
751
  Provide a concise and helpful response.
752
  '''
753
 
754
+ # Use the advanced model for chatbot responses
755
+ openai.api_key = GROQ_API_KEY_ADVANCED
756
  response = openai.ChatCompletion.create(
757
+ model=MODEL_ADVANCED, # Retaining the original model
758
  messages=[
759
  {"role": "user", "content": prompt}
760
  ],
 
770
 
771
  except openai.error.RateLimitError:
772
  wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
773
+ logger.warning(f"Rate limit reached for chatbot. Waiting for {wait_time} seconds before retrying...")
774
  time.sleep(wait_time)
775
  return chatbot_response(user_query, chat_history)
776
  except Exception as e:
 
959
  print(f"Error building Gradio app: {e}")
960
 
961
  if __name__ == "__main__":
962
+ # Start the LLM worker threads before launching the app
963
+ llm_thread_basic = threading.Thread(
964
+ target=llm_worker,
965
+ args=(llm_queue_basic, MODEL_BASIC, GROQ_API_KEY_BASIC, rpm_bucket_basic, tpm_bucket_basic, BATCH_SIZE_BASIC),
966
+ daemon=True
967
+ )
968
+ llm_thread_advanced = threading.Thread(
969
+ target=llm_worker,
970
+ args=(llm_queue_advanced, MODEL_ADVANCED, GROQ_API_KEY_ADVANCED, rpm_bucket_advanced, tpm_bucket_advanced, BATCH_SIZE_ADVANCED),
971
+ daemon=True
972
+ )
973
+
974
+ llm_thread_basic.start()
975
+ llm_thread_advanced.start()
976
 
977
  build_app()