siddhartharya commited on
Commit
3c7c197
·
verified ·
1 Parent(s): 4d7269c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -64
app.py CHANGED
@@ -8,18 +8,17 @@ import numpy as np
8
  import requests
9
  import time
10
  import re
11
- import base64
12
  import logging
13
  import os
14
  import sys
15
- import concurrent.futures
16
  from concurrent.futures import ThreadPoolExecutor
17
  import threading
 
18
 
19
  # Import OpenAI library
20
  import openai
21
 
22
- # Suppress only the single warning from urllib3 needed.
23
  import urllib3
24
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
25
 
@@ -74,14 +73,15 @@ CATEGORIES = [
74
  "Uncategorized",
75
  ]
76
 
77
- # Set up Groq Cloud API key and base URL
78
- GROQ_API_KEY = os.getenv('GROQ_API_KEY')
79
 
80
- if not GROQ_API_KEY:
81
- logger.error("GROQ_API_KEY environment variable not set.")
82
 
83
- openai.api_key = GROQ_API_KEY
84
- openai.api_base = "https://api.groq.com/openai/v1" # Ensure this is the correct base URL
 
85
 
86
  # Initialize global variables for rate limiting
87
  api_lock = threading.Lock()
@@ -178,7 +178,7 @@ def generate_summary_and_assign_category(bookmark):
178
  time.sleep(sleep_duration)
179
  last_api_call_time = time.time()
180
 
181
- # Existing logic to prepare the prompt
182
  html_content = bookmark.get('html_content', '')
183
  soup = BeautifulSoup(html_content, 'html.parser')
184
  metadata = get_page_metadata(soup)
@@ -208,7 +208,7 @@ def generate_summary_and_assign_category(bookmark):
208
  else:
209
  use_prior_knowledge = False
210
 
211
- # Shortened prompts
212
  if use_prior_knowledge:
213
  prompt = f"""
214
  You are a knowledgeable assistant with up-to-date information as of 2023.
@@ -237,27 +237,13 @@ Summary: [Your summary]
237
  Category: [One category]
238
  """
239
 
240
- # Estimate tokens
241
- def estimate_tokens(text):
242
- return len(text) / 4 # Approximate token estimation
243
-
244
- prompt_tokens = estimate_tokens(prompt)
245
- max_tokens = 150 # Adjusted from 200
246
- total_tokens = prompt_tokens + max_tokens
247
-
248
- # Calculate required delay
249
- tokens_per_minute = 40000
250
- tokens_per_second = tokens_per_minute / 60
251
- required_delay = total_tokens / tokens_per_second
252
- sleep_time = max(required_delay, 2) # Ensure at least 2 seconds
253
-
254
- # Call the LLM via Groq Cloud API
255
  response = openai.ChatCompletion.create(
256
- model='llama-3.1-70b-versatile', # Using the specified model
257
  messages=[
258
  {"role": "user", "content": prompt}
259
  ],
260
- max_tokens=int(max_tokens),
261
  temperature=0.5,
262
  )
263
 
@@ -283,7 +269,7 @@ Category: [One category]
283
  else:
284
  bookmark['category'] = 'Uncategorized'
285
 
286
- # Simple keyword-based validation (Optional)
287
  summary_lower = bookmark['summary'].lower()
288
  url_lower = bookmark['url'].lower()
289
  if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
@@ -292,7 +278,6 @@ Category: [One category]
292
  bookmark['category'] = 'Reference and Knowledge Bases'
293
 
294
  logger.info("Successfully generated summary and assigned category")
295
- time.sleep(sleep_time)
296
  break # Exit the retry loop upon success
297
 
298
  except openai.error.RateLimitError as e:
@@ -439,7 +424,6 @@ def display_bookmarks():
439
  category = bookmark.get('category', 'Uncategorized')
440
 
441
  # Escape HTML content to prevent XSS attacks
442
- from html import escape
443
  title = escape(title)
444
  url = escape(url)
445
  summary = escape(summary)
@@ -493,12 +477,12 @@ def process_uploaded_file(file, state_bookmarks):
493
 
494
  # Fetch bookmark info concurrently
495
  logger.info("Fetching URL info concurrently")
496
- with ThreadPoolExecutor(max_workers=10) as executor: # Adjusted max_workers as needed
497
  executor.map(fetch_url_info, bookmarks)
498
 
499
  # Process bookmarks concurrently with LLM calls
500
  logger.info("Processing bookmarks with LLM concurrently")
501
- with ThreadPoolExecutor(max_workers=1) as executor: # Reduced max_workers to 1 to serialize API calls
502
  executor.map(generate_summary_and_assign_category, bookmarks)
503
 
504
  try:
@@ -531,12 +515,15 @@ def delete_selected_bookmarks(selected_indices, state_bookmarks):
531
  ids_to_delete = []
532
  indices_to_delete = []
533
  for s in selected_indices:
534
- idx = int(s.split('.')[0]) - 1
535
- if 0 <= idx < len(bookmarks):
536
- bookmark_id = bookmarks[idx]['id']
537
- ids_to_delete.append(bookmark_id)
538
- indices_to_delete.append(idx)
539
- logger.info(f"Deleting bookmark at index {idx + 1}")
 
 
 
540
 
541
  # Remove vectors from FAISS index
542
  if faiss_index is not None and ids_to_delete:
@@ -565,11 +552,20 @@ def edit_selected_bookmarks_category(selected_indices, new_category, state_bookm
565
  if not new_category:
566
  return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
567
 
568
- indices = [int(s.split('.')[0])-1 for s in selected_indices]
 
 
 
 
 
 
 
 
 
 
569
  for idx in indices:
570
- if 0 <= idx < len(bookmarks):
571
- bookmarks[idx]['category'] = new_category
572
- logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
573
 
574
  message = "✏️ Category updated for selected bookmarks."
575
  logger.info(message)
@@ -589,7 +585,7 @@ def export_bookmarks():
589
  """
590
  if not bookmarks:
591
  logger.warning("No bookmarks to export")
592
- return None # Return None instead of a message
593
 
594
  try:
595
  logger.info("Exporting bookmarks to HTML")
@@ -639,7 +635,7 @@ def chatbot_response(user_query, chat_history):
639
  time.sleep(sleep_duration)
640
  last_api_call_time = time.time()
641
 
642
- # Existing logic to encode the query and search the FAISS index
643
  query_vector = embedding_model.encode([user_query]).astype('float32')
644
  k = 5 # Number of results to return
645
  distances, ids = faiss_index.search(query_vector, k)
@@ -660,7 +656,7 @@ def chatbot_response(user_query, chat_history):
660
  for bookmark in matching_bookmarks
661
  ])
662
 
663
- # Use the LLM via Groq Cloud API to generate a response
664
  prompt = f"""
665
  A user asked: "{user_query}"
666
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
@@ -669,33 +665,18 @@ Bookmarks:
669
  Provide a concise and helpful response.
670
  """
671
 
672
- # Estimate tokens
673
- def estimate_tokens(text):
674
- return len(text) / 4 # Approximate token estimation
675
-
676
- prompt_tokens = estimate_tokens(prompt)
677
- max_tokens = 300 # Adjust as needed
678
- total_tokens = prompt_tokens + max_tokens
679
-
680
- # Calculate required delay
681
- tokens_per_minute = 40000
682
- tokens_per_second = tokens_per_minute / 60
683
- required_delay = total_tokens / tokens_per_second
684
- sleep_time = max(required_delay, 2) # Ensure at least 2 seconds
685
-
686
- # Call the LLM via Groq Cloud API
687
  response = openai.ChatCompletion.create(
688
- model='llama-3.1-70b-versatile', # Using the specified model
689
  messages=[
690
  {"role": "user", "content": prompt}
691
  ],
692
- max_tokens=int(max_tokens),
693
  temperature=0.7,
694
  )
695
 
696
  answer = response['choices'][0]['message']['content'].strip()
697
  logger.info("Chatbot response generated")
698
- time.sleep(sleep_time)
699
 
700
  # Append the assistant's response to chat history
701
  chat_history.append({"role": "assistant", "content": answer})
 
8
  import requests
9
  import time
10
  import re
 
11
  import logging
12
  import os
13
  import sys
 
14
  from concurrent.futures import ThreadPoolExecutor
15
  import threading
16
+ from html import escape
17
 
18
  # Import OpenAI library
19
  import openai
20
 
21
+ # Suppress specific warnings
22
  import urllib3
23
  urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
24
 
 
73
  "Uncategorized",
74
  ]
75
 
76
+ # Set up OpenAI API key and base URL
77
+ OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
78
 
79
+ if not OPENAI_API_KEY:
80
+ logger.error("OPENAI_API_KEY environment variable not set.")
81
 
82
+ openai.api_key = OPENAI_API_KEY
83
+ # If you're using a custom API base, uncomment and set it
84
+ # openai.api_base = "https://api.your-provider.com/v1"
85
 
86
  # Initialize global variables for rate limiting
87
  api_lock = threading.Lock()
 
178
  time.sleep(sleep_duration)
179
  last_api_call_time = time.time()
180
 
181
+ # Prepare the prompt
182
  html_content = bookmark.get('html_content', '')
183
  soup = BeautifulSoup(html_content, 'html.parser')
184
  metadata = get_page_metadata(soup)
 
208
  else:
209
  use_prior_knowledge = False
210
 
211
+ # Craft the prompt based on content availability
212
  if use_prior_knowledge:
213
  prompt = f"""
214
  You are a knowledgeable assistant with up-to-date information as of 2023.
 
237
  Category: [One category]
238
  """
239
 
240
+ # Call the LLM via OpenAI API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  response = openai.ChatCompletion.create(
242
+ model='gpt-4', # Ensure you're using a valid and accessible model
243
  messages=[
244
  {"role": "user", "content": prompt}
245
  ],
246
+ max_tokens=150,
247
  temperature=0.5,
248
  )
249
 
 
269
  else:
270
  bookmark['category'] = 'Uncategorized'
271
 
272
+ # Optional: Simple keyword-based validation
273
  summary_lower = bookmark['summary'].lower()
274
  url_lower = bookmark['url'].lower()
275
  if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
 
278
  bookmark['category'] = 'Reference and Knowledge Bases'
279
 
280
  logger.info("Successfully generated summary and assigned category")
 
281
  break # Exit the retry loop upon success
282
 
283
  except openai.error.RateLimitError as e:
 
424
  category = bookmark.get('category', 'Uncategorized')
425
 
426
  # Escape HTML content to prevent XSS attacks
 
427
  title = escape(title)
428
  url = escape(url)
429
  summary = escape(summary)
 
477
 
478
  # Fetch bookmark info concurrently
479
  logger.info("Fetching URL info concurrently")
480
+ with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
481
  executor.map(fetch_url_info, bookmarks)
482
 
483
  # Process bookmarks concurrently with LLM calls
484
  logger.info("Processing bookmarks with LLM concurrently")
485
+ with ThreadPoolExecutor(max_workers=1) as executor: # Serialize API calls to respect rate limits
486
  executor.map(generate_summary_and_assign_category, bookmarks)
487
 
488
  try:
 
515
  ids_to_delete = []
516
  indices_to_delete = []
517
  for s in selected_indices:
518
+ try:
519
+ idx = int(s.split('.')[0]) - 1
520
+ if 0 <= idx < len(bookmarks):
521
+ bookmark_id = bookmarks[idx]['id']
522
+ ids_to_delete.append(bookmark_id)
523
+ indices_to_delete.append(idx)
524
+ logger.info(f"Deleting bookmark at index {idx + 1}")
525
+ except (ValueError, IndexError):
526
+ logger.warning(f"Invalid selection format: {s}")
527
 
528
  # Remove vectors from FAISS index
529
  if faiss_index is not None and ids_to_delete:
 
552
  if not new_category:
553
  return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), display_bookmarks(), state_bookmarks
554
 
555
+ indices = []
556
+ for s in selected_indices:
557
+ try:
558
+ idx = int(s.split('.')[0])-1
559
+ if 0 <= idx < len(bookmarks):
560
+ indices.append(idx)
561
+ else:
562
+ logger.warning(f"Index out of range: {idx + 1}")
563
+ except ValueError:
564
+ logger.warning(f"Invalid selection format: {s}")
565
+
566
  for idx in indices:
567
+ bookmarks[idx]['category'] = new_category
568
+ logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
 
569
 
570
  message = "✏️ Category updated for selected bookmarks."
571
  logger.info(message)
 
585
  """
586
  if not bookmarks:
587
  logger.warning("No bookmarks to export")
588
+ return None # Return None to indicate no file
589
 
590
  try:
591
  logger.info("Exporting bookmarks to HTML")
 
635
  time.sleep(sleep_duration)
636
  last_api_call_time = time.time()
637
 
638
+ # Encode the query and search the FAISS index
639
  query_vector = embedding_model.encode([user_query]).astype('float32')
640
  k = 5 # Number of results to return
641
  distances, ids = faiss_index.search(query_vector, k)
 
656
  for bookmark in matching_bookmarks
657
  ])
658
 
659
+ # Craft the prompt for the LLM
660
  prompt = f"""
661
  A user asked: "{user_query}"
662
  Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
 
665
  Provide a concise and helpful response.
666
  """
667
 
668
+ # Call the LLM via OpenAI API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
669
  response = openai.ChatCompletion.create(
670
+ model='gpt-4', # Ensure you're using a valid and accessible model
671
  messages=[
672
  {"role": "user", "content": prompt}
673
  ],
674
+ max_tokens=300,
675
  temperature=0.7,
676
  )
677
 
678
  answer = response['choices'][0]['message']['content'].strip()
679
  logger.info("Chatbot response generated")
 
680
 
681
  # Append the assistant's response to chat history
682
  chat_history.append({"role": "assistant", "content": answer})