Spaces:

siddhartharya
/

Bookmark-Manager

Sleeping

App Files Files Community

siddhartharya commited on Nov 26, 2024

Commit

05de921

•

1 Parent(s): 6e6eade

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -933

app.py CHANGED Viewed

@@ -1,977 +1,158 @@
-# app.py
-import gradio as gr
 from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
-import requests
-import time
-import re
-import logging
-import os
-import sys
-import threading
-from queue import Queue, Empty
-import json
 from concurrent.futures import ThreadPoolExecutor
-# Import OpenAI library
-import openai
-# Suppress only the single warning from urllib3 needed.
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-# Set up logging to output to the console
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
-# Create a console handler
-console_handler = logging.StreamHandler(sys.stdout)
-console_handler.setLevel(logging.INFO)
-# Create a formatter and set it for the handler
-formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s')
-console_handler.setFormatter(formatter)
-# Add the handler to the logger
-logger.addHandler(console_handler)
-# Initialize variables and models
-logger.info("Initializing variables and models")
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 faiss_index = None
 bookmarks = []
-fetch_cache = {}
-# Lock for thread-safe operations
-lock = threading.Lock()
-# Define the categories
 CATEGORIES = [
-    "Social Media",
-    "News and Media",
-    "Education and Learning",
-    "Entertainment",
-    "Shopping and E-commerce",
-    "Finance and Banking",
-    "Technology",
-    "Health and Fitness",
-    "Travel and Tourism",
-    "Food and Recipes",
-    "Sports",
-    "Arts and Culture",
-    "Government and Politics",
-    "Business and Economy",
-    "Science and Research",
-    "Personal Blogs and Journals",
-    "Job Search and Careers",
-    "Music and Audio",
-    "Videos and Movies",
-    "Reference and Knowledge Bases",
-    "Dead Link",
-    "Uncategorized",
 ]
-# Set up Groq Cloud API keys and base URLs
-GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
-GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
-if not GROQ_API_KEY_BASIC:
-    logger.error("GROQ_API_KEY_BASIC environment variable not set.")
-if not GROQ_API_KEY_ADVANCED:
-    logger.error("GROQ_API_KEY_ADVANCED environment variable not set.")
-# Define models
-MODEL_BASIC = 'llama-3.1-8b-instant'
-MODEL_ADVANCED = 'llama-3.1-70b-versatile'
-# Rate Limiter Configuration
-RPM_LIMIT_BASIC = 60       # Requests per minute for basic model
-TPM_LIMIT_BASIC = 60000    # Tokens per minute for basic model
-RPM_LIMIT_ADVANCED = 30    # Requests per minute for advanced model
-TPM_LIMIT_ADVANCED = 30000 # Tokens per minute for advanced model
-BATCH_SIZE_BASIC = 5       # Number of bookmarks per batch for basic model
-BATCH_SIZE_ADVANCED = 3    # Number of bookmarks per batch for advanced model
-# Implementing a Token Bucket Rate Limiter
-class TokenBucket:
-    def __init__(self, rate, capacity):
-        self.rate = rate  # tokens per second
-        self.capacity = capacity
-        self.tokens = capacity
-        self.timestamp = time.time()
-        self.lock = threading.Lock()
-    def consume(self, tokens=1):
-        with self.lock:
-            now = time.time()
-            elapsed = now - self.timestamp
-            refill = elapsed * self.rate
-            self.tokens = min(self.capacity, self.tokens + refill)
-            self.timestamp = now
-            if self.tokens >= tokens:
-                self.tokens -= tokens
-                return True
-            else:
-                return False
-    def wait_for_token(self, tokens=1):
-        while not self.consume(tokens):
-            time.sleep(0.05)
-# Initialize rate limiters
-rpm_rate_basic = RPM_LIMIT_BASIC / 60  # tokens per second
-tpm_rate_basic = TPM_LIMIT_BASIC / 60  # tokens per second
-rpm_rate_advanced = RPM_LIMIT_ADVANCED / 60  # tokens per second
-tpm_rate_advanced = TPM_LIMIT_ADVANCED / 60  # tokens per second
-rpm_bucket_basic = TokenBucket(rate=rpm_rate_basic, capacity=RPM_LIMIT_BASIC)
-tpm_bucket_basic = TokenBucket(rate=tpm_rate_basic, capacity=TPM_LIMIT_BASIC)
-rpm_bucket_advanced = TokenBucket(rate=rpm_rate_advanced, capacity=RPM_LIMIT_ADVANCED)
-tpm_bucket_advanced = TokenBucket(rate=tpm_rate_advanced, capacity=TPM_LIMIT_ADVANCED)
-# Queues for LLM tasks
-llm_queue_basic = Queue()
-llm_queue_advanced = Queue()
-def categorize_based_on_summary(summary, url):
-    """
-    Assign category based on keywords in the summary or URL.
-    """
-    summary_lower = summary.lower()
-    url_lower = url.lower()
-    if 'social media' in summary_lower or 'twitter' in summary_lower or 'x.com' in url_lower:
-        return 'Social Media'
-    elif 'wikipedia' in url_lower:
-        return 'Reference and Knowledge Bases'
-    elif 'cloud computing' in summary_lower or 'aws' in summary_lower:
-        return 'Technology'
-    elif 'news' in summary_lower or 'media' in summary_lower:
-        return 'News and Media'
-    elif 'education' in summary_lower or 'learning' in summary_lower:
-        return 'Education and Learning'
-    # Add more conditions as needed
-    else:
-        return 'Uncategorized'
-def validate_category(bookmark):
-    """
-    Further validate and adjust the category if needed.
-    """
-    # Example: Specific cases based on URL
-    url_lower = bookmark['url'].lower()
-    if 'facebook' in url_lower or 'x.com' in url_lower:
-        return 'Social Media'
-    elif 'wikipedia' in url_lower:
-        return 'Reference and Knowledge Bases'
-    elif 'aws.amazon.com' in url_lower:
-        return 'Technology'
-    # Add more specific cases as needed
-    else:
-        return bookmark['category']
-def extract_main_content(soup):
-    """
-    Extract the main content from a webpage while filtering out boilerplate content.
-    """
-    if not soup:
-        return ""
-    # Remove unwanted elements
-    for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside', 'form', 'noscript']):
-        element.decompose()
-    # Extract text from <p> tags
-    p_tags = soup.find_all('p')
-    if p_tags:
-        content = ' '.join([p.get_text(strip=True, separator=' ') for p in p_tags])
-    else:
-        # Fallback to body content
-        content = soup.get_text(separator=' ', strip=True)
-    # Clean up the text
-    content = re.sub(r'\s+', ' ', content)
-    # Truncate content to a reasonable length (e.g., 1500 words)
-    words = content.split()
-    if len(words) > 1500:
-        content = ' '.join(words[:1500])
-    return content
-def get_page_metadata(soup):
-    """
-    Extract metadata from the webpage including title, description, and keywords.
-    """
-    metadata = {
-        'title': '',
-        'description': '',
-        'keywords': ''
-    }
-    if not soup:
-        return metadata
-    # Get title
-    title_tag = soup.find('title')
-    if title_tag and title_tag.string:
-        metadata['title'] = title_tag.string.strip()
-    # Get meta description
-    meta_desc = (
-        soup.find('meta', attrs={'name': 'description'}) or
-        soup.find('meta', attrs={'property': 'og:description'}) or
-        soup.find('meta', attrs={'name': 'twitter:description'})
-    )
-    if meta_desc:
-        metadata['description'] = meta_desc.get('content', '').strip()
-    # Get meta keywords
-    meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
-    if meta_keywords:
-        metadata['keywords'] = meta_keywords.get('content', '').strip()
-    # Get OG title if main title is empty
-    if not metadata['title']:
-        og_title = soup.find('meta', attrs={'property': 'og:title'})
-        if og_title:
-            metadata['title'] = og_title.get('content', '').strip()
-    return metadata
-def llm_worker(queue, model_name, api_key, rpm_bucket, tpm_bucket, batch_size):
-    """
-    Worker thread to process LLM tasks from the queue while respecting rate limits.
-    """
-    logger.info(f"LLM worker for {model_name} started.")
-    while True:
-        batch = []
-        try:
-            # Collect bookmarks up to batch_size
-            while len(batch) < batch_size:
-                bookmark = queue.get(timeout=1)
-                if bookmark is None:
-                    # Shutdown signal
-                    logger.info(f"LLM worker for {model_name} shutting down.")
-                    return
-                if not bookmark.get('dead_link') and not bookmark.get('slow_link'):
-                    batch.append(bookmark)
-                else:
-                    # Skip processing for dead or slow links
-                    bookmark['summary'] = 'No summary available.'
-                    bookmark['category'] = 'Uncategorized'
-                    queue.task_done()
-        except Empty:
-            pass  # No more bookmarks at the moment
-        if batch:
-            try:
-                # Rate Limiting
-                rpm_bucket.wait_for_token()
-                # Estimate tokens: prompt + max_tokens
-                # Here, we assume max_tokens=150 per bookmark
-                total_tokens = 150 * len(batch)
-                tpm_bucket.wait_for_token(tokens=total_tokens)
-                # Prepare prompt
-                prompt = '''
-You are an assistant that creates concise webpage summaries and assigns categories.
-Provide summaries and categories for the following bookmarks:
-'''
-                for idx, bookmark in enumerate(batch, 1):
-                    prompt += f'Bookmark {idx}:\nURL: {bookmark["url"]}\nTitle: {bookmark["title"]}\n\n'
-                # Corrected f-string without backslashes
-                categories_str = ', '.join([f'"{cat}"' for cat in CATEGORIES])
-                prompt += f"Categories:\n{categories_str}\n\n"
-                prompt += "Format your response as a JSON object where each key is the bookmark URL and the value is another JSON object containing 'summary' and 'category'.\n\n"
-                prompt += "Example:\n"
-                prompt += "{\n"
-                prompt += '  "https://example.com": {\n'
-                prompt += '    "summary": "This is an example summary.",\n'
-                prompt += '    "category": "Technology"\n'
-                prompt += "  }\n"
-                prompt += "}\n\n"
-                prompt += "Now, provide the summaries and categories for the bookmarks listed above."
-                # Set API key and model
-                openai.api_key = api_key
-                response = openai.ChatCompletion.create(
-                    model=model_name,
-                    messages=[
-                        {"role": "user", "content": prompt}
-                    ],
-                    max_tokens=150 * len(batch),
-                    temperature=0.5,
-                )
-                content = response['choices'][0]['message']['content'].strip()
-                if not content:
-                    raise ValueError("Empty response received from the model.")
-                # Parse JSON response
-                try:
-                    json_response = json.loads(content)
-                    for bookmark in batch:
-                        url = bookmark['url']
-                        if url in json_response:
-                            summary = json_response[url].get('summary', '').strip()
-                            category = json_response[url].get('category', '').strip()
-                            if not summary:
-                                summary = 'No summary available.'
-                            bookmark['summary'] = summary
-                            if category in CATEGORIES:
-                                bookmark['category'] = category
-                            else:
-                                # Fallback to keyword-based categorization
-                                bookmark['category'] = categorize_based_on_summary(summary, url)
-                        else:
-                            logger.warning(f"No data returned for {url}. Using fallback methods.")
-                            bookmark['summary'] = 'No summary available.'
-                            bookmark['category'] = 'Uncategorized'
-                        # Additional keyword-based validation
-                        bookmark['category'] = validate_category(bookmark)
-                        logger.info(f"Processed bookmark: {url}")
-                except json.JSONDecodeError:
-                    logger.error(f"Failed to parse JSON response from {model_name}. Using fallback methods.")
-                    for bookmark in batch:
-                        bookmark['summary'] = 'No summary available.'
-                        bookmark['category'] = categorize_based_on_summary(bookmark.get('summary', ''), bookmark['url'])
-                        bookmark['category'] = validate_category(bookmark)
-                except Exception as e:
-                    logger.error(f"Error processing LLM response from {model_name}: {e}", exc_info=True)
-                    for bookmark in batch:
-                        bookmark['summary'] = 'No summary available.'
-                        bookmark['category'] = 'Uncategorized'
-            except openai.error.RateLimitError:
-                logger.warning(f"Rate limit reached for {model_name}. Fallback to other model if possible.")
-                # Re-enqueue the entire batch to the other queue
-                if model_name == MODEL_BASIC:
-                    target_queue = llm_queue_advanced
-                    target_model = MODEL_ADVANCED
-                    target_api_key = GROQ_API_KEY_ADVANCED
-                else:
-                    target_queue = llm_queue_basic
-                    target_model = MODEL_BASIC
-                    target_api_key = GROQ_API_KEY_BASIC
-                for bookmark in batch:
-                    logger.info(f"Reassigning bookmark {bookmark['url']} to {target_model} due to rate limit.")
-                    target_queue.put(bookmark)
-            except Exception as e:
-                logger.error(f"Error during LLM processing for {model_name}: {e}", exc_info=True)
-                for bookmark in batch:
-                    bookmark['summary'] = 'No summary available.'
-                    bookmark['category'] = 'Uncategorized'
-            finally:
-                # Mark all bookmarks in the batch as done
-                for _ in batch:
-                    queue.task_done()
-def parse_bookmarks(file_content):
-    """
-    Parse bookmarks from HTML file.
-    """
-    logger.info("Parsing bookmarks")
-    try:
-        soup = BeautifulSoup(file_content, 'html.parser')
-        extracted_bookmarks = []
-        for link in soup.find_all('a'):
-            url = link.get('href')
-            title = link.text.strip()
-            if url and title:
-                if url.startswith('http://') or url.startswith('https://'):
-                    extracted_bookmarks.append({'url': url, 'title': title})
-                else:
-                    logger.info(f"Skipping non-http/https URL: {url}")
-        logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
-        return extracted_bookmarks
-    except Exception as e:
-        logger.error("Error parsing bookmarks: %s", e, exc_info=True)
-        raise
 def fetch_url_info(bookmark):
-    """
-    Fetch information about a URL.
-    """
-    url = bookmark['url']
-    if url in fetch_cache:
-        with lock:
-            bookmark.update(fetch_cache[url])
-        return
     try:
-        logger.info(f"Fetching URL info for: {url}")
-        headers = {
-            'User-Agent': 'Mozilla/5.0',
-            'Accept-Language': 'en-US,en;q=0.9',
-        }
-        response = requests.get(url, headers=headers, timeout=5, verify=False, allow_redirects=True)
-        bookmark['etag'] = response.headers.get('ETag', 'N/A')
         bookmark['status_code'] = response.status_code
-        content = response.text
-        logger.info(f"Fetched content length for {url}: {len(content)} characters")
-        if response.status_code >= 500:
-            bookmark['dead_link'] = True
-            bookmark['description'] = ''
-            bookmark['html_content'] = ''
-            logger.warning(f"Dead link detected: {url} with status {response.status_code}")
-        else:
-            bookmark['dead_link'] = False
-            bookmark['html_content'] = content
-            bookmark['description'] = ''
-            logger.info(f"Fetched information for {url}")
-    except requests.exceptions.Timeout:
-        bookmark['dead_link'] = False
-        bookmark['etag'] = 'N/A'
-        bookmark['status_code'] = 'Timeout'
-        bookmark['description'] = ''
-        bookmark['html_content'] = ''
-        bookmark['slow_link'] = True
-        logger.warning(f"Timeout while fetching {url}. Marking as 'Slow'.")
     except Exception as e:
-        bookmark['dead_link'] = True
-        bookmark['etag'] = 'N/A'
-        bookmark['status_code'] = 'Error'
-        bookmark['description'] = ''
         bookmark['html_content'] = ''
-        logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
-    finally:
-        with lock:
-            fetch_cache[url] = {
-                'etag': bookmark.get('etag'),
-                'status_code': bookmark.get('status_code'),
-                'dead_link': bookmark.get('dead_link'),
-                'description': bookmark.get('description'),
-                'html_content': bookmark.get('html_content', ''),
-                'slow_link': bookmark.get('slow_link', False),
-            }
-def vectorize_and_index(bookmarks_list):
-    """
-    Create vector embeddings for bookmarks and build FAISS index with ID mapping.
-    """
-    global faiss_index
-    logger.info("Vectorizing summaries and building FAISS index")
-    try:
-        summaries = [bookmark['summary'] for bookmark in bookmarks_list]
-        embeddings = embedding_model.encode(summaries)
-        dimension = embeddings.shape[1]
-        index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
-        ids = np.array([bookmark['id'] for bookmark in bookmarks_list], dtype=np.int64)
-        index.add_with_ids(np.array(embeddings).astype('float32'), ids)
-        faiss_index = index
-        logger.info("FAISS index built successfully with IDs")
-        return index
-    except Exception as e:
-        logger.error(f"Error in vectorizing and indexing: {e}", exc_info=True)
-        raise
-def display_bookmarks():
-    """
-    Generate HTML display for bookmarks.
-    """
-    logger.info("Generating HTML display for bookmarks")
-    cards = ''
-    for i, bookmark in enumerate(bookmarks):
-        index = i + 1
-        if bookmark.get('dead_link'):
-            status = "❌ Dead Link"
-            card_style = "border: 2px solid red;"
-            text_style = "color: white;"
-            summary = 'No summary available.'
-        elif bookmark.get('slow_link'):
-            status = "⏳ Slow Response"
-            card_style = "border: 2px solid orange;"
-            text_style = "color: white;"
-            summary = bookmark.get('summary', 'No summary available.')
-        else:
-            status = "✅ Active"
-            card_style = "border: 2px solid green;"
-            text_style = "color: white;"
-            summary = bookmark.get('summary', 'No summary available.')
-        title = bookmark['title']
-        url = bookmark['url']
-        etag = bookmark.get('etag', 'N/A')
-        category = bookmark.get('category', 'Uncategorized')
-        # Escape HTML content to prevent XSS attacks
-        from html import escape
-        title = escape(title)
-        url = escape(url)
-        summary = escape(summary)
-        category = escape(category)
-        card_html = f'''
-        <div class="card" style="{card_style} padding: 10px; margin: 10px; border-radius: 5px; background-color: #1e1e1e;">
-            <div class="card-content">
-                <h3 style="{text_style}">{index}. {title} {status}</h3>
-                <p style="{text_style}"><strong>Category:</strong> {category}</p>
-                <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
-                <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
-                <p style="{text_style}"><strong>Summary:</strong> {summary}</p>
-            </div>
-        </div>
-        '''
-        cards += card_html
-    logger.info("HTML display generated")
-    return cards
-def process_uploaded_file(file, state_bookmarks):
-    """
-    Process the uploaded bookmarks file.
-    """
-    global bookmarks, faiss_index
-    logger.info("Processing uploaded file")
-    if file is None:
-        logger.warning("No file uploaded")
-        return "Please upload a bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
-    try:
-        file_content = file.decode('utf-8')
-    except UnicodeDecodeError as e:
-        logger.error(f"Error decoding the file: {e}", exc_info=True)
-        return "Error decoding the file. Please ensure it's a valid HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
-    try:
-        bookmarks = parse_bookmarks(file_content)
-    except Exception as e:
-        logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
-        return "Error parsing the bookmarks HTML file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
-    if not bookmarks:
-        logger.warning("No bookmarks found in the uploaded file")
-        return "No bookmarks found in the uploaded file.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
-    # Assign unique IDs to bookmarks
-    for idx, bookmark in enumerate(bookmarks):
-        bookmark['id'] = idx
-    # Fetch bookmark info concurrently
-    logger.info("Fetching URL info concurrently")
-    with ThreadPoolExecutor(max_workers=10) as executor:
-        executor.map(fetch_url_info, bookmarks)
-    # Enqueue bookmarks for LLM processing based on task complexity
-    logger.info("Enqueuing bookmarks for LLM processing")
-    for bookmark in bookmarks:
-        # Determine task complexity
-        # Example logic: Assign to basic model if title is short, else to advanced
-        if len(bookmark['title']) < 50:
-            llm_queue_basic.put(bookmark)
-        else:
-            llm_queue_advanced.put(bookmark)
-    # Wait until all LLM tasks are completed
-    llm_queue_basic.join()
-    llm_queue_advanced.join()
-    logger.info("All LLM tasks have been processed")
-    try:
-        faiss_index = vectorize_and_index(bookmarks)
-    except Exception as e:
-        logger.error(f"Error building FAISS index: {e}", exc_info=True)
-        return "Error building search index.", '', state_bookmarks, display_bookmarks(), gr.update(choices=[])
-    message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
-    logger.info(message)
-    # Generate displays and updates
-    bookmark_html = display_bookmarks()
-    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-               for i, bookmark in enumerate(bookmarks)]
-    # Update state
-    state_bookmarks = bookmarks.copy()
-    return message, bookmark_html, state_bookmarks, bookmark_html, gr.update(choices=choices)
-def delete_selected_bookmarks(selected_indices, state_bookmarks):
-    """
-    Delete selected bookmarks and remove their vectors from the FAISS index.
-    """
-    global bookmarks, faiss_index
-    if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
-    ids_to_delete = []
-    indices_to_delete = []
-    for s in selected_indices:
-        idx = int(s.split('.')[0]) - 1
-        if 0 <= idx < len(bookmarks):
-            bookmark_id = bookmarks[idx]['id']
-            ids_to_delete.append(bookmark_id)
-            indices_to_delete.append(idx)
-            logger.info(f"Deleting bookmark at index {idx + 1}")
-    # Remove vectors from FAISS index
-    if faiss_index is not None and ids_to_delete:
-        faiss_index.remove_ids(np.array(ids_to_delete, dtype=np.int64))
-    # Remove bookmarks from the list (reverse order to avoid index shifting)
-    for idx in sorted(indices_to_delete, reverse=True):
-        bookmarks.pop(idx)
-    message = "🗑️ Selected bookmarks deleted successfully."
-    logger.info(message)
-    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-               for i, bookmark in enumerate(bookmarks)]
-    # Update state
-    state_bookmarks = bookmarks.copy()
-    return message, gr.update(choices=choices), display_bookmarks()
-def edit_selected_bookmarks_category(selected_indices, new_category, state_bookmarks):
-    """
-    Edit category of selected bookmarks.
-    """
-    if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
-    if not new_category:
-        return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks(), state_bookmarks
-    indices = [int(s.split('.')[0])-1 for s in selected_indices]
-    for idx in indices:
-        if 0 <= idx < len(bookmarks):
-            bookmarks[idx]['category'] = new_category
-            logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
-    message = "✏️ Category updated for selected bookmarks."
-    logger.info(message)
-    # Update choices and display
-    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-               for i, bookmark in enumerate(bookmarks)]
-    # Update state
-    state_bookmarks = bookmarks.copy()
-    return message, gr.update(choices=choices), display_bookmarks(), state_bookmarks
-def export_bookmarks():
-    """
-    Export bookmarks to an HTML file.
-    """
-    if not bookmarks:
-        logger.warning("No bookmarks to export")
-        return None
-    try:
-        logger.info("Exporting bookmarks to HTML")
-        soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
-        dl = soup.new_tag('DL')
-        for bookmark in bookmarks:
-            dt = soup.new_tag('DT')
-            a = soup.new_tag('A', href=bookmark['url'])
-            a.string = bookmark['title']
-            dt.append(a)
-            dl.append(dt)
-        soup.append(dl)
-        html_content = str(soup)
-        output_file = "exported_bookmarks.html"
-        with open(output_file, 'w', encoding='utf-8') as f:
-            f.write(html_content)
-        logger.info("Bookmarks exported successfully")
-        return output_file
-    except Exception as e:
-        logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
-        return None
-def chatbot_response(user_query, chat_history):
     """
-    Generate chatbot response using the FAISS index and embeddings.
-    """
-    if not bookmarks or faiss_index is None:
-        logger.warning("No bookmarks available for chatbot")
-        chat_history.append({"role": "assistant", "content": "⚠️ No bookmarks available. Please upload and process your bookmarks first."})
-        return chat_history
-    logger.info(f"Chatbot received query: {user_query}")
     try:
-        chat_history.append({"role": "user", "content": user_query})
-        # Rate Limiting
-        # Assuming the chatbot uses the advanced model
-        rpm_bucket_advanced.wait_for_token()
-        # Estimate tokens: prompt + max_tokens
-        # Here, we assume max_tokens=300 per chatbot response
-        total_tokens = 300  # Adjust based on actual usage
-        tpm_bucket_advanced.wait_for_token(tokens=total_tokens)
-        query_vector = embedding_model.encode([user_query]).astype('float32')
-        k = 5
-        distances, ids = faiss_index.search(query_vector, k)
-        ids = ids.flatten()
-        id_to_bookmark = {bookmark['id']: bookmark for bookmark in bookmarks}
-        matching_bookmarks = [id_to_bookmark.get(id) for id in ids if id in id_to_bookmark and id_to_bookmark.get(id).get('summary')]
-        if not matching_bookmarks:
-            answer = "No relevant bookmarks found for your query."
-            chat_history.append({"role": "assistant", "content": answer})
-            return chat_history
-        bookmarks_info = "\n".join([
-            f"Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}"
-            for bookmark in matching_bookmarks
-        ])
-        prompt = f'''
-A user asked: "{user_query}"
-Based on the bookmarks below, provide a helpful answer to the user's query, referencing the relevant bookmarks.
-Bookmarks:
-{bookmarks_info}
-Provide a concise and helpful response.
-'''
-        # Use the advanced model for chatbot responses
-        openai.api_key = GROQ_API_KEY_ADVANCED
-        response = openai.ChatCompletion.create(
-            model=MODEL_ADVANCED,  # Retaining the original model
-            messages=[
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=300,
-            temperature=0.7,
         )
-        answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Chatbot response generated")
-        chat_history.append({"role": "assistant", "content": answer})
-        return chat_history
-    except openai.error.RateLimitError:
-        wait_time = int(60)  # Wait time can be adjusted or extracted from headers if available
-        logger.warning(f"Rate limit reached for chatbot. Waiting for {wait_time} seconds before retrying...")
-        time.sleep(wait_time)
-        return chatbot_response(user_query, chat_history)
     except Exception as e:
-        error_message = f"⚠️ Error processing your query: {str(e)}"
-        logger.error(error_message, exc_info=True)
-        chat_history.append({"role": "assistant", "content": error_message})
-        return chat_history
-def build_app():
-    """
-    Build and launch the Gradio app.
-    """
-    try:
-        logger.info("Building Gradio app")
-        with gr.Blocks(css="app.css") as demo:
-            # Initialize state
-            state_bookmarks = gr.State([])
-            # General Overview
-            gr.Markdown("""
-# 📚 SmartMarks - AI Browser Bookmarks Manager
-Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
----
-## 🚀 **How to Use SmartMarks**
-SmartMarks is divided into three main sections:
-1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
-2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
-3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
-Navigate through the tabs to explore each feature in detail.
-""")
-            # Upload and Process Bookmarks Tab
-            with gr.Tab("Upload and Process Bookmarks"):
-                gr.Markdown("""
-## 📂 **Upload and Process Bookmarks**
-### 📝 **Steps to Upload and Process:**
-1. **Upload Bookmarks File:**
-   - Click on the **"📁 Upload Bookmarks HTML File"** button.
-   - Select your browser's exported bookmarks HTML file from your device.
-2. **Process Bookmarks:**
-   - After uploading, click on the **"⚙️ Process Bookmarks"** button.
-   - SmartMarks will parse your bookmarks, fetch additional information, generate summaries, and categorize each link based on predefined categories.
-3. **View Processed Bookmarks:**
-   - Once processing is complete, your bookmarks will be displayed in an organized and visually appealing format below.
-""")
-                upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
-                process_button = gr.Button("⚙️ Process Bookmarks")
-                output_text = gr.Textbox(label="✅ Output", interactive=False)
-                bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
-            # Chat with Bookmarks Tab
-            with gr.Tab("Chat with Bookmarks"):
-                gr.Markdown("""
-## 💬 **Chat with Bookmarks**
-### 🤖 **How to Interact:**
-1. **Enter Your Query:**
-   - In the **"✍️ Ask about your bookmarks"** textbox, type your question or keyword related to your bookmarks.
-2. **Submit Your Query:**
-   - Click the **"📨 Send"** button to submit your query.
-3. **Receive AI-Driven Responses:**
-   - SmartMarks will analyze your query and provide relevant bookmarks that match your request.
-4. **View Chat History:**
-   - All your queries and the corresponding AI responses are displayed in the chat history.
-""")
-                chatbot = gr.Chatbot(label="💬 Chat with SmartMarks", type='messages')
-                user_input = gr.Textbox(
-                    label="✍️ Ask about your bookmarks",
-                    placeholder="e.g., Do I have any bookmarks about AI?"
-                )
-                chat_button = gr.Button("📨 Send")
-                chat_button.click(
-                    chatbot_response,
-                    inputs=[user_input, chatbot],
-                    outputs=chatbot
-                )
-            # Manage Bookmarks Tab
-            with gr.Tab("Manage Bookmarks"):
-                gr.Markdown("""
-## 🛠️ **Manage Bookmarks**
-### 🗂️ **Features:**
-1. **View Bookmarks:**
-   - All your processed bookmarks are displayed here with their respective categories and summaries.
-2. **Select Bookmarks:**
-   - Use the checkboxes next to each bookmark to select one, multiple, or all bookmarks you wish to manage.
-3. **Delete Selected Bookmarks:**
-   - After selecting the desired bookmarks, click the **"🗑️ Delete Selected"** button to remove them from your list.
-4. **Edit Categories:**
-   - Select the bookmarks you want to re-categorize.
-   - Choose a new category from the dropdown menu labeled **"🆕 New Category"**.
-   - Click the **"✏️ Edit Category"** button to update their categories.
-5. **Export Bookmarks:**
-   - Click the **"💾 Export"** button to download your updated bookmarks as an HTML file.
-6. **Refresh Bookmarks:**
-   - Click the **"🔄 Refresh Bookmarks"** button to ensure the latest state is reflected in the display.
-""")
-                manage_output = gr.Textbox(label="🔄 Status", interactive=False)
-                # CheckboxGroup for selecting bookmarks
-                bookmark_selector = gr.CheckboxGroup(
-                    label="✅ Select Bookmarks",
-                    choices=[]
-                )
-                new_category = gr.Dropdown(
-                    label="🆕 New Category",
-                    choices=CATEGORIES,
-                    value="Uncategorized"
-                )
-                bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
-                with gr.Row():
-                    delete_button = gr.Button("🗑️ Delete Selected")
-                    edit_category_button = gr.Button("✏️ Edit Category")
-                    export_button = gr.Button("💾 Export")
-                    refresh_button = gr.Button("🔄 Refresh Bookmarks")
-                download_link = gr.File(label="📥 Download Exported Bookmarks")
-                # Connect all the button actions
-                process_button.click(
-                    process_uploaded_file,
-                    inputs=[upload, state_bookmarks],
-                    outputs=[output_text, bookmark_display, state_bookmarks, bookmark_display, bookmark_selector]
-                )
-                delete_button.click(
-                    delete_selected_bookmarks,
-                    inputs=[bookmark_selector, state_bookmarks],
-                    outputs=[manage_output, bookmark_selector, bookmark_display_manage]
-                )
-                edit_category_button.click(
-                    edit_selected_bookmarks_category,
-                    inputs=[bookmark_selector, new_category, state_bookmarks],
-                    outputs=[manage_output, bookmark_selector, bookmark_display_manage, state_bookmarks]
-                )
-                export_button.click(
-                    export_bookmarks,
-                    outputs=download_link
-                )
-                refresh_button.click(
-                    lambda state_bookmarks: (
-                        [
-                            f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-                            for i, bookmark in enumerate(state_bookmarks)
-                        ],
-                        display_bookmarks()
-                    ),
-                    inputs=[state_bookmarks],
-                    outputs=[bookmark_selector, bookmark_display_manage]
-                )
-        logger.info("Launching Gradio app")
-        demo.launch(debug=True)
-    except Exception as e:
-        logger.error(f"Error building Gradio app: {e}", exc_info=True)
-        print(f"Error building Gradio app: {e}")
-if __name__ == "__main__":
-    # Start the LLM worker threads before launching the app
-    llm_thread_basic = threading.Thread(
-        target=llm_worker,
-        args=(llm_queue_basic, MODEL_BASIC, GROQ_API_KEY_BASIC, rpm_bucket_basic, tpm_bucket_basic, BATCH_SIZE_BASIC),
-        daemon=True
-    )
-    llm_thread_advanced = threading.Thread(
-        target=llm_worker,
-        args=(llm_queue_advanced, MODEL_ADVANCED, GROQ_API_KEY_ADVANCED, rpm_bucket_advanced, tpm_bucket_advanced, BATCH_SIZE_ADVANCED),
-        daemon=True
-    )
-    llm_thread_basic.start()
-    llm_thread_advanced.start()
-    build_app()

+import os
+import time
+import threading
+import requests
 from bs4 import BeautifulSoup
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
+import gradio as gr
 from concurrent.futures import ThreadPoolExecutor
+import logging
+# Suppress warnings from urllib3
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+# Logging setup
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# Environment variable keys for API access
+GROQ_API_KEY_BASIC = os.getenv('GROQ_API_KEY_BASIC')
+GROQ_API_KEY_ADVANCED = os.getenv('GROQ_API_KEY_ADVANCED')
+# LLM Models
+MODEL_BASIC = 'llama-3.1-8b-instant'
+MODEL_ADVANCED = 'llama-3.1-70b-versatile'
+# Verify API keys
+if not GROQ_API_KEY_BASIC or not GROQ_API_KEY_ADVANCED:
+    logger.error("Both GROQ_API_KEY_BASIC and GROQ_API_KEY_ADVANCED must be set.")
+    exit()
+# Embedding model and FAISS index initialization
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 faiss_index = None
 bookmarks = []
+# Define categories
 CATEGORIES = [
+    "Social Media", "News and Media", "Education and Learning", "Entertainment",
+    "Shopping and E-commerce", "Finance and Banking", "Technology", "Health and Fitness",
+    "Travel and Tourism", "Food and Recipes", "Sports", "Arts and Culture",
+    "Government and Politics", "Business and Economy", "Science and Research",
+    "Personal Blogs and Journals", "Job Search and Careers", "Music and Audio",
+    "Videos and Movies", "Reference and Knowledge Bases", "Dead Link", "Uncategorized"
 ]
+# Task routing logic
+def select_model_for_task(content_length):
+    """Choose LLM model based on task complexity."""
+    if content_length < 500:  # Simple tasks
+        return GROQ_API_KEY_BASIC, MODEL_BASIC
+    else:  # Complex tasks
+        return GROQ_API_KEY_ADVANCED, MODEL_ADVANCED
+# Fetch URL info function
 def fetch_url_info(bookmark):
     try:
+        response = requests.get(bookmark['url'], timeout=10, verify=False)
+        bookmark['html_content'] = response.text
         bookmark['status_code'] = response.status_code
     except Exception as e:
+        logger.error(f"Failed to fetch URL info for {bookmark['url']}: {e}")
         bookmark['html_content'] = ''
+        bookmark['status_code'] = 'Error'
+# Generate summary and assign category
+def generate_summary_and_assign_category(bookmark):
+    content_length = len(bookmark.get('html_content', ''))
+    api_key, model_name = select_model_for_task(content_length)
+    # Prepare the prompt
+    prompt = f"""
+You are an assistant. Summarize the following webpage content:
+{bookmark.get('html_content', '')}
+Assign one category from this list: {', '.join(CATEGORIES)}.
+Respond in the format:
+Summary: [Your summary]
+Category: [One category]
     """
     try:
+        response = requests.post(
+            f"https://api.openai.com/v1/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={
+                "model": model_name,
+                "messages": [{"role": "user", "content": prompt}],
+                "max_tokens": 150,
+                "temperature": 0.7,
+            },
         )
+        result = response.json()
+        content = result['choices'][0]['message']['content']
+        # Extract summary and category
+        summary_start = content.find("Summary:")
+        category_start = content.find("Category:")
+        bookmark['summary'] = content[summary_start + 9:category_start].strip()
+        bookmark['category'] = content[category_start + 9:].strip()
     except Exception as e:
+        logger.error(f"Error processing LLM response for {bookmark['url']}: {e}")
+        bookmark['summary'] = 'No summary available.'
+        bookmark['category'] = 'Uncategorized'
+# Vectorize summaries and build FAISS index
+def vectorize_and_index(bookmarks):
+    global faiss_index
+    summaries = [b['summary'] for b in bookmarks]
+    embeddings = embedding_model.encode(summaries)
+    dimension = embeddings.shape[1]
+    index = faiss.IndexIDMap(faiss.IndexFlatL2(dimension))
+    ids = np.arange(len(bookmarks))
+    index.add_with_ids(embeddings, ids)
+    faiss_index = index
+# Gradio interface setup
+def process_bookmarks(file):
+    global bookmarks
+    file_content = file.read().decode('utf-8')
+    soup = BeautifulSoup(file_content, 'html.parser')
+    # Parse bookmarks
+    bookmarks = [
+        {'url': link.get('href'), 'title': link.text, 'html_content': ''}
+        for link in soup.find_all('a') if link.get('href')
+    ]
+    # Fetch URLs concurrently
+    with ThreadPoolExecutor() as executor:
+        executor.map(fetch_url_info, bookmarks)
+    # Process bookmarks with LLM
+    with ThreadPoolExecutor() as executor:
+        executor.map(generate_summary_and_assign_category, bookmarks)
+    # Build FAISS index
+    vectorize_and_index(bookmarks)
+    return bookmarks
+# Build Gradio app
+with gr.Blocks() as demo:
+    gr.Markdown("# Smart Bookmark Manager")
+    file_input = gr.File(label="Upload Bookmark File", type="binary")
+    submit_button = gr.Button("Process")
+    output = gr.Textbox(label="Output")
+    def handle_submit(file):
+        processed = process_bookmarks(file)
+        return "\n".join([f"{b['title']} - {b['category']}" for b in processed])
+    submit_button.click(handle_submit, inputs=file_input, outputs=output)
+demo.launch()