Spaces:
Sleeping
Sleeping
siddhartharya
commited on
Commit
•
6e6eade
1
Parent(s):
163ff62
Update app.py
Browse files
app.py
CHANGED
@@ -74,19 +74,28 @@ CATEGORIES = [
|
|
74 |
"Uncategorized",
|
75 |
]
|
76 |
|
77 |
-
# Set up Groq Cloud API
|
78 |
-
|
|
|
79 |
|
80 |
-
if not
|
81 |
-
logger.error("
|
82 |
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
|
86 |
# Rate Limiter Configuration
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# Implementing a Token Bucket Rate Limiter
|
92 |
class TokenBucket:
|
@@ -115,14 +124,21 @@ class TokenBucket:
|
|
115 |
time.sleep(0.05)
|
116 |
|
117 |
# Initialize rate limiters
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
126 |
|
127 |
def categorize_based_on_summary(summary, url):
|
128 |
"""
|
@@ -229,20 +245,20 @@ def get_page_metadata(soup):
|
|
229 |
|
230 |
return metadata
|
231 |
|
232 |
-
def llm_worker():
|
233 |
"""
|
234 |
Worker thread to process LLM tasks from the queue while respecting rate limits.
|
235 |
"""
|
236 |
-
logger.info("LLM worker started.")
|
237 |
while True:
|
238 |
batch = []
|
239 |
try:
|
240 |
-
# Collect bookmarks up to
|
241 |
-
while len(batch) <
|
242 |
-
bookmark =
|
243 |
if bookmark is None:
|
244 |
# Shutdown signal
|
245 |
-
logger.info("LLM worker shutting down.")
|
246 |
return
|
247 |
if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
|
248 |
batch.append(bookmark)
|
@@ -250,7 +266,7 @@ def llm_worker():
|
|
250 |
# Skip processing for dead or slow links
|
251 |
bookmark['summary'] = 'No summary available.'
|
252 |
bookmark['category'] = 'Uncategorized'
|
253 |
-
|
254 |
|
255 |
except Empty:
|
256 |
pass # No more bookmarks at the moment
|
@@ -288,8 +304,11 @@ Provide summaries and categories for the following bookmarks:
|
|
288 |
prompt += "}\n\n"
|
289 |
prompt += "Now, provide the summaries and categories for the bookmarks listed above."
|
290 |
|
|
|
|
|
|
|
291 |
response = openai.ChatCompletion.create(
|
292 |
-
model=
|
293 |
messages=[
|
294 |
{"role": "user", "content": prompt}
|
295 |
],
|
@@ -330,28 +349,36 @@ Provide summaries and categories for the following bookmarks:
|
|
330 |
logger.info(f"Processed bookmark: {url}")
|
331 |
|
332 |
except json.JSONDecodeError:
|
333 |
-
logger.error("Failed to parse JSON response from
|
334 |
for bookmark in batch:
|
335 |
bookmark['summary'] = 'No summary available.'
|
336 |
bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
|
337 |
bookmark['category'] = validate_category(bookmark)
|
338 |
|
339 |
except Exception as e:
|
340 |
-
logger.error(f"Error processing LLM response: {e}", exc_info=True)
|
341 |
for bookmark in batch:
|
342 |
bookmark['summary'] = 'No summary available.'
|
343 |
bookmark['category'] = 'Uncategorized'
|
344 |
|
345 |
except openai.error.RateLimitError:
|
346 |
-
logger.warning(f"
|
347 |
-
# Re-enqueue the entire batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
for bookmark in batch:
|
349 |
-
|
350 |
-
|
351 |
-
continue # Skip the rest and retry
|
352 |
|
353 |
except Exception as e:
|
354 |
-
logger.error(f"Error during LLM processing: {e}", exc_info=True)
|
355 |
for bookmark in batch:
|
356 |
bookmark['summary'] = 'No summary available.'
|
357 |
bookmark['category'] = 'Uncategorized'
|
@@ -359,7 +386,7 @@ Provide summaries and categories for the following bookmarks:
|
|
359 |
finally:
|
360 |
# Mark all bookmarks in the batch as done
|
361 |
for _ in batch:
|
362 |
-
|
363 |
|
364 |
def parse_bookmarks(file_content):
|
365 |
"""
|
@@ -550,13 +577,19 @@ def process_uploaded_file(file, state_bookmarks):
|
|
550 |
with ThreadPoolExecutor(max_workers=10) as executor:
|
551 |
executor.map(fetch_url_info, bookmarks)
|
552 |
|
553 |
-
# Enqueue bookmarks for LLM processing
|
554 |
logger.info("Enqueuing bookmarks for LLM processing")
|
555 |
for bookmark in bookmarks:
|
556 |
-
|
|
|
|
|
|
|
|
|
|
|
557 |
|
558 |
# Wait until all LLM tasks are completed
|
559 |
-
|
|
|
560 |
logger.info("All LLM tasks have been processed")
|
561 |
|
562 |
try:
|
@@ -685,11 +718,12 @@ def chatbot_response(user_query, chat_history):
|
|
685 |
chat_history.append({"role": "user", "content": user_query})
|
686 |
|
687 |
# Rate Limiting
|
688 |
-
|
|
|
689 |
# Estimate tokens: prompt + max_tokens
|
690 |
# Here, we assume max_tokens=300 per chatbot response
|
691 |
total_tokens = 300 # Adjust based on actual usage
|
692 |
-
|
693 |
|
694 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
695 |
k = 5
|
@@ -717,8 +751,10 @@ Bookmarks:
|
|
717 |
Provide a concise and helpful response.
|
718 |
'''
|
719 |
|
|
|
|
|
720 |
response = openai.ChatCompletion.create(
|
721 |
-
model=
|
722 |
messages=[
|
723 |
{"role": "user", "content": prompt}
|
724 |
],
|
@@ -734,7 +770,7 @@ Provide a concise and helpful response.
|
|
734 |
|
735 |
except openai.error.RateLimitError:
|
736 |
wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
|
737 |
-
logger.warning(f"Rate limit reached. Waiting for {wait_time} seconds before retrying...")
|
738 |
time.sleep(wait_time)
|
739 |
return chatbot_response(user_query, chat_history)
|
740 |
except Exception as e:
|
@@ -923,8 +959,19 @@ Navigate through the tabs to explore each feature in detail.
|
|
923 |
print(f"Error building Gradio app: {e}")
|
924 |
|
925 |
if __name__ == "__main__":
|
926 |
-
# Start the LLM worker
|
927 |
-
|
928 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
929 |
|
930 |
build_app()
|
|
|
74 |
"Uncategorized",
|
75 |
]
|
76 |
|
77 |
+
# Set up Groq Cloud API keys and base URLs
|
78 |
+
GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
|
79 |
+
GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
|
80 |
|
81 |
+
if not GROQ_API_KEY_BASIC:
|
82 |
+
logger.error("GROQ_API_KEY_BASIC environment variable not set.")
|
83 |
|
84 |
+
if not GROQ_API_KEY_ADVANCED:
|
85 |
+
logger.error("GROQ_API_KEY_ADVANCED environment variable not set.")
|
86 |
+
|
87 |
+
# Define models
|
88 |
+
MODEL_BASIC = 'llama-3.1-8b-instant'
|
89 |
+
MODEL_ADVANCED = 'llama-3.1-70b-versatile'
|
90 |
|
91 |
# Rate Limiter Configuration
|
92 |
+
RPM_LIMIT_BASIC = 60 # Requests per minute for basic model
|
93 |
+
TPM_LIMIT_BASIC = 60000 # Tokens per minute for basic model
|
94 |
+
RPM_LIMIT_ADVANCED = 30 # Requests per minute for advanced model
|
95 |
+
TPM_LIMIT_ADVANCED = 30000 # Tokens per minute for advanced model
|
96 |
+
|
97 |
+
BATCH_SIZE_BASIC = 5 # Number of bookmarks per batch for basic model
|
98 |
+
BATCH_SIZE_ADVANCED = 3 # Number of bookmarks per batch for advanced model
|
99 |
|
100 |
# Implementing a Token Bucket Rate Limiter
|
101 |
class TokenBucket:
|
|
|
124 |
time.sleep(0.05)
|
125 |
|
126 |
# Initialize rate limiters
|
127 |
+
rpm_rate_basic = RPM_LIMIT_BASIC / 60 # tokens per second
|
128 |
+
tpm_rate_basic = TPM_LIMIT_BASIC / 60 # tokens per second
|
129 |
+
|
130 |
+
rpm_rate_advanced = RPM_LIMIT_ADVANCED / 60 # tokens per second
|
131 |
+
tpm_rate_advanced = TPM_LIMIT_ADVANCED / 60 # tokens per second
|
132 |
|
133 |
+
rpm_bucket_basic = TokenBucket(rate=rpm_rate_basic, capacity=RPM_LIMIT_BASIC)
|
134 |
+
tpm_bucket_basic = TokenBucket(rate=tpm_rate_basic, capacity=TPM_LIMIT_BASIC)
|
135 |
|
136 |
+
rpm_bucket_advanced = TokenBucket(rate=rpm_rate_advanced, capacity=RPM_LIMIT_ADVANCED)
|
137 |
+
tpm_bucket_advanced = TokenBucket(rate=tpm_rate_advanced, capacity=TPM_LIMIT_ADVANCED)
|
138 |
+
|
139 |
+
# Queues for LLM tasks
|
140 |
+
llm_queue_basic = Queue()
|
141 |
+
llm_queue_advanced = Queue()
|
142 |
|
143 |
def categorize_based_on_summary(summary, url):
|
144 |
"""
|
|
|
245 |
|
246 |
return metadata
|
247 |
|
248 |
+
def llm_worker(queue, model_name, api_key, rpm_bucket, tpm_bucket, batch_size):
|
249 |
"""
|
250 |
Worker thread to process LLM tasks from the queue while respecting rate limits.
|
251 |
"""
|
252 |
+
logger.info(f"LLM worker for {model_name} started.")
|
253 |
while True:
|
254 |
batch = []
|
255 |
try:
|
256 |
+
# Collect bookmarks up to batch_size
|
257 |
+
while len(batch) < batch_size:
|
258 |
+
bookmark = queue.get(timeout=1)
|
259 |
if bookmark is None:
|
260 |
# Shutdown signal
|
261 |
+
logger.info(f"LLM worker for {model_name} shutting down.")
|
262 |
return
|
263 |
if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
|
264 |
batch.append(bookmark)
|
|
|
266 |
# Skip processing for dead or slow links
|
267 |
bookmark['summary'] = 'No summary available.'
|
268 |
bookmark['category'] = 'Uncategorized'
|
269 |
+
queue.task_done()
|
270 |
|
271 |
except Empty:
|
272 |
pass # No more bookmarks at the moment
|
|
|
304 |
prompt += "}\n\n"
|
305 |
prompt += "Now, provide the summaries and categories for the bookmarks listed above."
|
306 |
|
307 |
+
# Set API key and model
|
308 |
+
openai.api_key = api_key
|
309 |
+
|
310 |
response = openai.ChatCompletion.create(
|
311 |
+
model=model_name,
|
312 |
messages=[
|
313 |
{"role": "user", "content": prompt}
|
314 |
],
|
|
|
349 |
logger.info(f"Processed bookmark: {url}")
|
350 |
|
351 |
except json.JSONDecodeError:
|
352 |
+
logger.error(f"Failed to parse JSON response from {model_name}. Using fallback methods.")
|
353 |
for bookmark in batch:
|
354 |
bookmark['summary'] = 'No summary available.'
|
355 |
bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
|
356 |
bookmark['category'] = validate_category(bookmark)
|
357 |
|
358 |
except Exception as e:
|
359 |
+
logger.error(f"Error processing LLM response from {model_name}: {e}", exc_info=True)
|
360 |
for bookmark in batch:
|
361 |
bookmark['summary'] = 'No summary available.'
|
362 |
bookmark['category'] = 'Uncategorized'
|
363 |
|
364 |
except openai.error.RateLimitError:
|
365 |
+
logger.warning(f"Rate limit reached for {model_name}. Fallback to other model if possible.")
|
366 |
+
# Re-enqueue the entire batch to the other queue
|
367 |
+
if model_name == MODEL_BASIC:
|
368 |
+
target_queue = llm_queue_advanced
|
369 |
+
target_model = MODEL_ADVANCED
|
370 |
+
target_api_key = GROQ_API_KEY_ADVANCED
|
371 |
+
else:
|
372 |
+
target_queue = llm_queue_basic
|
373 |
+
target_model = MODEL_BASIC
|
374 |
+
target_api_key = GROQ_API_KEY_BASIC
|
375 |
+
|
376 |
for bookmark in batch:
|
377 |
+
logger.info(f"Reassigning bookmark {bookmark['url']} to {target_model} due to rate limit.")
|
378 |
+
target_queue.put(bookmark)
|
|
|
379 |
|
380 |
except Exception as e:
|
381 |
+
logger.error(f"Error during LLM processing for {model_name}: {e}", exc_info=True)
|
382 |
for bookmark in batch:
|
383 |
bookmark['summary'] = 'No summary available.'
|
384 |
bookmark['category'] = 'Uncategorized'
|
|
|
386 |
finally:
|
387 |
# Mark all bookmarks in the batch as done
|
388 |
for _ in batch:
|
389 |
+
queue.task_done()
|
390 |
|
391 |
def parse_bookmarks(file_content):
|
392 |
"""
|
|
|
577 |
with ThreadPoolExecutor(max_workers=10) as executor:
|
578 |
executor.map(fetch_url_info, bookmarks)
|
579 |
|
580 |
+
# Enqueue bookmarks for LLM processing based on task complexity
|
581 |
logger.info("Enqueuing bookmarks for LLM processing")
|
582 |
for bookmark in bookmarks:
|
583 |
+
# Determine task complexity
|
584 |
+
# Example logic: Assign to basic model if title is short, else to advanced
|
585 |
+
if len(bookmark['title']) < 50:
|
586 |
+
llm_queue_basic.put(bookmark)
|
587 |
+
else:
|
588 |
+
llm_queue_advanced.put(bookmark)
|
589 |
|
590 |
# Wait until all LLM tasks are completed
|
591 |
+
llm_queue_basic.join()
|
592 |
+
llm_queue_advanced.join()
|
593 |
logger.info("All LLM tasks have been processed")
|
594 |
|
595 |
try:
|
|
|
718 |
chat_history.append({"role": "user", "content": user_query})
|
719 |
|
720 |
# Rate Limiting
|
721 |
+
# Assuming the chatbot uses the advanced model
|
722 |
+
rpm_bucket_advanced.wait_for_token()
|
723 |
# Estimate tokens: prompt + max_tokens
|
724 |
# Here, we assume max_tokens=300 per chatbot response
|
725 |
total_tokens = 300 # Adjust based on actual usage
|
726 |
+
tpm_bucket_advanced.wait_for_token(tokens=total_tokens)
|
727 |
|
728 |
query_vector = embedding_model.encode([user_query]).astype('float32')
|
729 |
k = 5
|
|
|
751 |
Provide a concise and helpful response.
|
752 |
'''
|
753 |
|
754 |
+
# Use the advanced model for chatbot responses
|
755 |
+
openai.api_key = GROQ_API_KEY_ADVANCED
|
756 |
response = openai.ChatCompletion.create(
|
757 |
+
model=MODEL_ADVANCED, # Retaining the original model
|
758 |
messages=[
|
759 |
{"role": "user", "content": prompt}
|
760 |
],
|
|
|
770 |
|
771 |
except openai.error.RateLimitError:
|
772 |
wait_time = int(60) # Wait time can be adjusted or extracted from headers if available
|
773 |
+
logger.warning(f"Rate limit reached for chatbot. Waiting for {wait_time} seconds before retrying...")
|
774 |
time.sleep(wait_time)
|
775 |
return chatbot_response(user_query, chat_history)
|
776 |
except Exception as e:
|
|
|
959 |
print(f"Error building Gradio app: {e}")
|
960 |
|
961 |
if __name__ == "__main__":
|
962 |
+
# Start the LLM worker threads before launching the app
|
963 |
+
llm_thread_basic = threading.Thread(
|
964 |
+
target=llm_worker,
|
965 |
+
args=(llm_queue_basic, MODEL_BASIC, GROQ_API_KEY_BASIC, rpm_bucket_basic, tpm_bucket_basic, BATCH_SIZE_BASIC),
|
966 |
+
daemon=True
|
967 |
+
)
|
968 |
+
llm_thread_advanced = threading.Thread(
|
969 |
+
target=llm_worker,
|
970 |
+
args=(llm_queue_advanced, MODEL_ADVANCED, GROQ_API_KEY_ADVANCED, rpm_bucket_advanced, tpm_bucket_advanced, BATCH_SIZE_ADVANCED),
|
971 |
+
daemon=True
|
972 |
+
)
|
973 |
+
|
974 |
+
llm_thread_basic.start()
|
975 |
+
llm_thread_advanced.start()
|
976 |
|
977 |
build_app()
|