Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 17, 2024

Commit

813f784

verified ·

1 Parent(s): 3c4e128

Update app.py

Browse files

Files changed (1) hide show

app.py +399 -650

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import base64
 import logging
 import os
 import sys
-import urllib.parse
 # Import OpenAI library
 import openai
@@ -75,121 +74,37 @@ if not GROQ_API_KEY:
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
-def determine_page_type(soup, url):
     """
-    Determine the type of webpage for better content extraction.
-    """
-    url_lower = url.lower()
-    # Check for common platforms
-    if 'facebook.com' in url_lower:
-        return 'social_media_profile'
-    elif 'wikipedia.org' in url_lower:
-        return 'wiki_article'
-    elif any(domain in url_lower for domain in ['news', 'huffpost', 'times']):
-        return 'news_article'
-    elif 'youtube.com' in url_lower:
-        return 'video_platform'
-    elif '.gov' in url_lower or 'government' in url_lower:
-        return 'government_site'
-    elif 'x.com' in url_lower or 'twitter.com' in url_lower:
-        return 'social_media_platform'
-    # Check page structure
-    if soup.find('article'):
-        return 'article'
-    elif soup.find(['shop', 'product', 'price']):
-        return 'ecommerce'
-    elif soup.find(['forum', 'comment', 'discussion']):
-        return 'forum'
-    return 'general'
-def extract_main_content_by_type(soup, page_type):
-    """
-    Extract content based on page type for better relevance.
     """
     if not soup:
         return ""
-    content = ""
-    if page_type == 'news_article':
-        # Try to find the main article content
-        article_body = soup.find(['article', 'main', 'div'],
-                               class_=lambda x: x and any(c in str(x).lower()
-                               for c in ['article', 'story', 'content', 'body']))
-        if article_body:
-            # Get first few paragraphs
-            paragraphs = article_body.find_all('p')
-            content = ' '.join(p.get_text() for p in paragraphs[:5])
-    elif page_type == 'wiki_article':
-        # For Wikipedia articles
-        content_div = soup.find('div', {'id': 'mw-content-text'})
-        if content_div:
-            paragraphs = content_div.find_all('p')
-            content = ' '.join(p.get_text() for p in paragraphs[:3])
-    elif page_type in ['social_media_profile', 'social_media_platform']:
-        # For social media pages
-        about_section = soup.find(['div', 'section'],
-                                class_=lambda x: x and any(c in str(x).lower()
-                                for c in ['about', 'bio', 'profile', 'description']))
-        if about_section:
-            content = about_section.get_text()
-        else:
-            # Try to get main content area
-            content = soup.find(['div', 'main'],
-                              class_=lambda x: x and 'content' in str(x).lower())
-            if content:
-                content = content.get_text()
-    # If no content found using specific extractors, use general extraction
-    if not content.strip():
-        # Remove unwanted elements
-        for element in soup(['script', 'style', 'nav', 'footer', 'header']):
-            element.decompose()
-        # Try to find main content area
-        main_content = soup.find(['main', 'article', 'div'],
-                               class_=lambda x: x and 'content' in str(x).lower())
-        if main_content:
-            # Get all text from paragraphs
-            paragraphs = main_content.find_all('p')
-            content = ' '.join(p.get_text() for p in paragraphs)
         else:
             # Fallback to body content
-            content = soup.get_text()
-    # Clean the extracted content
-    content = clean_text(content)
-    return content[:5000]  # Limit content length
-def clean_text(text):
-    """
-    Clean extracted text content.
-    """
-    if not text:
-        return ""
-    # Convert to string if necessary
-    text = str(text)
-    # Remove extra whitespace
-    text = re.sub(r'\s+', ' ', text)
-    # Remove special characters but keep basic punctuation
-    text = re.sub(r'[^\w\s.,!?-]', '', text)
-    # Remove multiple punctuation
-    text = re.sub(r'([.,!?])\1+', r'\1', text)
-    # Remove very short words (likely garbage)
-    text = ' '.join(word for word in text.split() if len(word) > 1)
-    return text.strip()
 def get_page_metadata(soup):
     """
@@ -204,213 +119,170 @@ def get_page_metadata(soup):
     if not soup:
         return metadata
-    # Get title (try multiple sources)
     title_tag = soup.find('title')
-    og_title = soup.find('meta', {'property': 'og:title'})
-    twitter_title = soup.find('meta', {'name': 'twitter:title'})
     if title_tag and title_tag.string:
         metadata['title'] = title_tag.string.strip()
-    elif og_title and og_title.get('content'):
-        metadata['title'] = og_title.get('content').strip()
-    elif twitter_title and twitter_title.get('content'):
-        metadata['title'] = twitter_title.get('content').strip()
-    # Get meta description (try multiple sources)
-    desc_sources = [
-        ('meta', {'name': 'description'}),
-        ('meta', {'property': 'og:description'}),
-        ('meta', {'name': 'twitter:description'}),
-    ]
-    for tag, attrs in desc_sources:
-        desc = soup.find(tag, attrs=attrs)
-        if desc and desc.get('content'):
-            metadata['description'] = desc.get('content').strip()
-            break
     # Get meta keywords
-    keywords_tag = soup.find('meta', {'name': 'keywords'})
-    if keywords_tag and keywords_tag.get('content'):
-        metadata['keywords'] = keywords_tag.get('content').strip()
-    return metadata
-def generate_contextual_summary(context):
-    """
-    Generate summary with context awareness using LLM.
-    """
-    page_type = context['page_type']
-    # Customize prompt based on page type
-    type_specific_prompts = {
-        'news_article': "This is a news article. Focus on the main news event, key facts, and significance.",
-        'wiki_article': "This is a Wikipedia article. Focus on the main topic, key facts, and historical context.",
-        'social_media_profile': "This is a social media profile. Focus on the platform's purpose and key features.",
-        'social_media_platform': "This is a social media platform. Describe its main purpose and unique features.",
-        'ecommerce': "This is an e-commerce site. Focus on what products/services are offered and target audience.",
-        'government_site': "This is a government website. Focus on services offered and public information provided.",
-        'video_platform': "This is a video platform. Describe its main purpose and content sharing features.",
-        'general': "Describe the main purpose and key features of this webpage."
-    }
-    prompt = f"""
-    Analyze this webpage and create a clear, factual summary:
-    Title: {context['title']}
-    Type: {page_type}
-    Description: {context['description']}
-    Keywords: {context['keywords']}
-    Additional Content:
-    {context['content'][:3000]}
-    {type_specific_prompts.get(page_type, type_specific_prompts['general'])}
-    Create a natural, informative 2-3 sentence summary that:
-    1. States the primary purpose/main topic
-    2. Mentions key features or information
-    3. Indicates target audience or use case (if clear)
-    Keep the tone professional and factual.
-    """
-    try:
-        response = openai.ChatCompletion.create(
-            model='llama3-8b-8192',
-            messages=[
-                {"role": "system", "content": "You are a precise webpage summarizer that creates clear, accurate summaries."},
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=150,
-            temperature=0.3,
-        )
-        return response['choices'][0]['message']['content'].strip()
-    except Exception as e:
-        logger.error(f"Error generating LLM summary: {e}")
-        return None
 def generate_summary(bookmark):
     """
     Generate a comprehensive summary for a bookmark using available content and LLM.
     """
-    logger.info(f"Generating summary for {bookmark.get('url')}")
     try:
         soup = BeautifulSoup(bookmark.get('html_content', ''), 'html.parser')
-        # 1. Extract all available metadata
         metadata = get_page_metadata(soup)
-        # 2. Determine page type and context
-        page_type = determine_page_type(soup, bookmark['url'])
-        # 3. Extract relevant content based on page type
-        main_content = extract_main_content_by_type(soup, page_type)
-        # 4. Generate summary using LLM with contextual awareness
         try:
-            context = {
-                'title': metadata['title'] or bookmark.get('title', ''),
-                'description': metadata['description'],
-                'keywords': metadata['keywords'],
-                'page_type': page_type,
-                'content': main_content
-            }
-            summary = generate_contextual_summary(context)
-            if summary:
-                bookmark['summary'] = summary
                 return bookmark
         except Exception as e:
-            logger.error(f"Error in LLM summary generation: {e}")
-        # Fallback mechanism
-        if metadata['description']:
-            bookmark['summary'] = metadata['description']
-        elif main_content:
-            bookmark['summary'] = ' '.join(main_content.split()[:50]) + '...'
-        else:
-            bookmark['summary'] = metadata.get('title', bookmark.get('title', 'No summary available.'))
     except Exception as e:
         logger.error(f"Error in generate_summary: {e}")
         bookmark['summary'] = bookmark.get('title', 'No summary available.')
-    return bookmark
 async def fetch_url_info(session, bookmark):
     """
-    Enhanced URL fetching with better error handling and request configuration.
     """
     url = bookmark['url']
     if url in fetch_cache:
         bookmark.update(fetch_cache[url])
         return bookmark
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
-        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-        'Accept-Language': 'en-US,en;q=0.5',
-        'Accept-Encoding': 'gzip, deflate, br',
-        'Connection': 'keep-alive',
-        'Upgrade-Insecure-Requests': '1',
-        'Sec-Fetch-Dest': 'document',
-        'Sec-Fetch-Mode': 'navigate',
-        'Sec-Fetch-Site': 'none',
-        'Sec-Fetch-User': '?1',
-        'Cache-Control': 'max-age=0'
-    }
     try:
         logger.info(f"Fetching URL info for: {url}")
-        timeout = aiohttp.ClientTimeout(total=30)
-        async with session.get(
-            url,
-            timeout=timeout,
-            headers=headers,
-            ssl=False,
-            allow_redirects=True
-        ) as response:
-            status = response.status
-            bookmark['status_code'] = status
             bookmark['etag'] = response.headers.get('ETag', 'N/A')
-            # Handle different status codes
-            if status == 200:
-                content = await response.text()
-                bookmark['html_content'] = content
-                bookmark['dead_link'] = False
-                bookmark['description'] = ''  # Will be set by generate_summary
-                logger.info(f"Successfully fetched content for {url}")
-            elif status in [301, 302, 307, 308]:
-                # Handle redirects manually if needed
-                bookmark['dead_link'] = False
-                bookmark['html_content'] = ''
-                logger.info(f"Redirect detected for {url}")
-            else:
                 bookmark['dead_link'] = True
                 bookmark['html_content'] = ''
-                logger.warning(f"Non-success status {status} for {url}")
-    except asyncio.TimeoutError:
-        logger.warning(f"Timeout while fetching {url}")
-        bookmark['dead_link'] = False  # Don't mark as dead just because of timeout
-        bookmark['status_code'] = 'Timeout'
     except Exception as e:
-        logger.error(f"Error fetching {url}: {str(e)}")
-        bookmark['dead_link'] = False  # Don't mark as dead for other errors
-        bookmark['status_code'] = str(e)
     finally:
-        # Ensure all required fields are present
-        bookmark.setdefault('html_content', '')
-        bookmark.setdefault('description', '')
-        bookmark.setdefault('etag', 'N/A')
-        # Update cache
         fetch_cache[url] = {
             'etag': bookmark.get('etag'),
             'status_code': bookmark.get('status_code'),
@@ -418,80 +290,76 @@ async def fetch_url_info(session, bookmark):
             'description': bookmark.get('description'),
             'html_content': bookmark.get('html_content', '')
         }
     return bookmark
 async def process_bookmarks_async(bookmarks_list):
     """
-    Process all bookmarks asynchronously with improved error handling.
     """
     logger.info("Processing bookmarks asynchronously")
     try:
-        # Configure connection pool and timeout
-        tcp_connector = aiohttp.TCPConnector(
-            limit=5,  # Limit concurrent connections
-            force_close=True,  # Force close connections
-            enable_cleanup_closed=True,  # Clean up closed connections
-            ssl=False  # Disable SSL verification
-        )
-        timeout = aiohttp.ClientTimeout(total=30)
-        async with aiohttp.ClientSession(
-            connector=tcp_connector,
-            timeout=timeout,
-            raise_for_status=False  # Don't raise exceptions for non-200 status
-        ) as session:
             tasks = []
             for bookmark in bookmarks_list:
                 task = asyncio.ensure_future(fetch_url_info(session, bookmark))
                 tasks.append(task)
-            # Process bookmarks in batches to avoid overwhelming servers
-            batch_size = 5
-            for i in range(0, len(tasks), batch_size):
-                batch = tasks[i:i + batch_size]
-                await asyncio.gather(*batch)
-                await asyncio.sleep(1)  # Small delay between batches
         logger.info("Completed processing bookmarks asynchronously")
     except Exception as e:
         logger.error(f"Error in asynchronous processing of bookmarks: {e}")
         raise
-def parse_bookmarks(file_content):
     """
-    Parse bookmarks from HTML file with enhanced error handling.
     """
-    logger.info("Parsing bookmarks")
-    try:
-        soup = BeautifulSoup(file_content, 'html.parser')
-        extracted_bookmarks = []
-        # Find all bookmark links
-        for link in soup.find_all('a'):
-            url = link.get('href', '').strip()
-            title = link.text.strip()
-            # Validate URL and title
-            if url and title and url.startswith(('http://', 'https://')):
-                # Clean and normalize URL
-                parsed_url = urllib.parse.urlparse(url)
-                normalized_url = urllib.parse.urlunparse(parsed_url)
-                bookmark = {
-                    'url': normalized_url,
-                    'title': title,
-                    'add_date': link.get('add_date', ''),
-                    'icon': link.get('icon', '')
-                }
-                extracted_bookmarks.append(bookmark)
-        logger.info(f"Extracted {len(extracted_bookmarks)} valid bookmarks")
-        return extracted_bookmarks
-    except Exception as e:
-        logger.error(f"Error parsing bookmarks: {e}")
-        raise
 def vectorize_and_index(bookmarks_list):
     """
@@ -499,25 +367,11 @@ def vectorize_and_index(bookmarks_list):
     """
     logger.info("Vectorizing summaries and building FAISS index")
     try:
-        # Prepare summaries for vectorization
-        summaries = []
-        for bookmark in bookmarks_list:
-            summary = bookmark.get('summary', '').strip()
-            title = bookmark.get('title', '').strip()
-            # Combine title and summary for better embedding
-            text = f"{title} {summary}".strip()
-            summaries.append(text if text else "No content available")
-        # Generate embeddings
         embeddings = embedding_model.encode(summaries)
-        # Create and configure FAISS index
         dimension = embeddings.shape[1]
         faiss_idx = faiss.IndexFlatL2(dimension)
-        # Add vectors to index
         faiss_idx.add(np.array(embeddings))
         logger.info("FAISS index built successfully")
         return faiss_idx, embeddings
     except Exception as e:
@@ -526,7 +380,7 @@ def vectorize_and_index(bookmarks_list):
 def display_bookmarks():
     """
-    Generate HTML display for bookmarks with enhanced styling.
     """
     logger.info("Generating HTML display for bookmarks")
     cards = ''
@@ -538,350 +392,201 @@ def display_bookmarks():
         etag = bookmark.get('etag', 'N/A')
         summary = bookmark.get('summary', '')
         category = bookmark.get('category', 'Uncategorized')
-        status_code = bookmark.get('status_code', 'N/A')
-        # Enhanced styling based on status
         if bookmark.get('dead_link'):
-            card_style = "border: 2px solid #ff4444; background-color: rgba(255, 68, 68, 0.1);"
-            text_style = "color: #ff4444;"
         else:
-            card_style = "border: 2px solid #00C851; background-color: rgba(0, 200, 81, 0.1);"
             text_style = "color: var(--text-color);"
-        # Properly escape any backslashes if present in summary or other fields
-        # (Not strictly necessary here, but good practice)
-        summary_escaped = summary.replace('\\', '\\\\')
         card_html = f'''
-        <div class="card" style="{card_style} padding: 15px; margin: 15px 0; border-radius: 8px; box-shadow: 0 2px 5px rgba(0,0,0,0.1);">
             <div class="card-content">
-                <h3 style="{text_style} margin-bottom: 10px; font-size: 1.2em;">
-                    {index}. {title} {status}
-                    {f'<span style="font-size: 0.8em; color: #666;">({status_code})</span>' if status_code != 'N/A' else ''}
-                </h3>
                 <p style="{text_style}"><strong>Category:</strong> {category}</p>
                 <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
                 <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
-                <p style="{text_style}"><strong>Summary:</strong> {summary_escaped}</p>
             </div>
         </div>
         '''
         cards += card_html
-    # Add container with max width and padding
-    display_html = f'''
-    <div style="max-width: 1200px; margin: 0 auto; padding: 20px;">
-        {cards}
-    </div>
-    '''
     logger.info("HTML display generated")
-    return display_html
-def assign_category(bookmark):
     """
-    Assign a category to a bookmark based on its title or summary.
-    This is a simple implementation and can be enhanced with more sophisticated methods.
-    """
-    title = bookmark.get('title', '').lower()
-    summary = bookmark.get('summary', '').lower()
-    # Simple keyword-based categorization
-    if any(keyword in title or keyword in summary for keyword in ['facebook', 'twitter', 'instagram']):
-        bookmark['category'] = 'Social Media'
-    elif any(keyword in title or keyword in summary for keyword in ['news', 'media', 'huffpost', 'times']):
-        bookmark['category'] = 'News and Media'
-    elif any(keyword in title or keyword in summary for keyword in ['course', 'learning', 'education']):
-        bookmark['category'] = 'Education and Learning'
-    elif any(keyword in title or keyword in summary for keyword in ['movie', 'music', 'audio', 'video']):
-        bookmark['category'] = 'Entertainment'
-    elif any(keyword in title or keyword in summary for keyword in ['shop', 'e-commerce', 'buy', 'purchase']):
-        bookmark['category'] = 'Shopping and E-commerce'
-    elif any(keyword in title or keyword in summary for keyword in ['finance', 'banking', 'investment']):
-        bookmark['category'] = 'Finance and Banking'
-    elif any(keyword in title or keyword in summary for keyword in ['tech', 'technology', 'software']):
-        bookmark['category'] = 'Technology'
-    elif any(keyword in title or keyword in summary for keyword in ['health', 'fitness', 'wellness']):
-        bookmark['category'] = 'Health and Fitness'
-    elif any(keyword in title or keyword in summary for keyword in ['travel', 'tourism', 'flight', 'hotel']):
-        bookmark['category'] = 'Travel and Tourism'
-    elif any(keyword in title or keyword in summary for keyword in ['recipe', 'food', 'cooking']):
-        bookmark['category'] = 'Food and Recipes'
-    elif any(keyword in title or keyword in summary for keyword in ['sport', 'game', 'fitness']):
-        bookmark['category'] = 'Sports'
-    elif any(keyword in title or keyword in summary for keyword in ['art', 'culture', 'museum']):
-        bookmark['category'] = 'Arts and Culture'
-    elif any(keyword in title or keyword in summary for keyword in ['gov', 'government', 'politics']):
-        bookmark['category'] = 'Government and Politics'
-    elif any(keyword in title or keyword in summary for keyword in ['business', 'economy', 'market']):
-        bookmark['category'] = 'Business and Economy'
-    elif any(keyword in title or keyword in summary for keyword in ['science', 'research', 'study']):
-        bookmark['category'] = 'Science and Research'
-    elif any(keyword in title or keyword in summary for keyword in ['blog', 'journal']):
-        bookmark['category'] = 'Personal Blogs and Journals'
-    elif any(keyword in title or keyword in summary for keyword in ['job', 'career', 'employment']):
-        bookmark['category'] = 'Job Search and Careers'
-    elif any(keyword in title or keyword in summary for keyword in ['audio', 'music']):
-        bookmark['category'] = 'Music and Audio'
-    elif any(keyword in title or keyword in summary for keyword in ['video', 'movie']):
-        bookmark['category'] = 'Videos and Movies'
-    elif any(keyword in title or keyword in summary for keyword in ['reference', 'knowledge', 'wiki']):
-        bookmark['category'] = 'Reference and Knowledge Bases'
-    elif bookmark.get('dead_link'):
-        bookmark['category'] = 'Dead Link'
-    else:
-        bookmark['category'] = 'Uncategorized'
-def process_uploaded_file(file, delete_checkbox, edit_checkbox):
-    """
-    Process the uploaded bookmarks file with enhanced error handling and user feedback.
     """
     global bookmarks, faiss_index
     logger.info("Processing uploaded file")
     if file is None:
-        return "⚠️ Please upload a bookmarks HTML file.", '', gr.Dropdown.update(choices=[]), gr.Dropdown.update(choices=[])
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
-        logger.error(f"Error decoding file: {e}")
-        return "⚠️ Error decoding file. Please ensure it's a valid HTML file.", '', gr.Dropdown.update(choices=[]), gr.Dropdown.update(choices=[])
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
         logger.error(f"Error parsing bookmarks: {e}")
-        return "⚠️ Error parsing the bookmarks file.", '', gr.Dropdown.update(choices=[]), gr.Dropdown.update(choices=[])
     if not bookmarks:
-        return "⚠️ No valid bookmarks found in the file.", '', gr.Dropdown.update(choices=[]), gr.Dropdown.update(choices=[])
     try:
-        logger.info("Processing bookmarks...")
         asyncio.run(process_bookmarks_async(bookmarks))
-        # Process in batches for progress tracking
-        total = len(bookmarks)
-        for i, bookmark in enumerate(bookmarks, 1):
-            generate_summary(bookmark)
-            assign_category(bookmark)
-            logger.info(f"Processed bookmark {i}/{total}")
-        faiss_index, embeddings = vectorize_and_index(bookmarks)
-        message = f"✅ Successfully processed {len(bookmarks)} bookmarks!"
-        choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-                  for i, bookmark in enumerate(bookmarks)]
-        bookmark_html = display_bookmarks()
-        return message, bookmark_html, gr.CheckboxGroup.update(choices=choices), gr.CheckboxGroup.update(choices=choices)
     except Exception as e:
-        logger.error(f"Error processing bookmarks: {e}")
-        return "⚠️ Error processing bookmarks. Please try again.", '', gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[])
-def delete_selected_bookmarks(selected_indices, delete_checkbox, edit_checkbox):
     """
-    Delete selected bookmarks with enhanced error handling.
     """
     global bookmarks, faiss_index
     if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[]), display_bookmarks()
-    try:
-        indices = [int(s.split('.')[0])-1 for s in selected_indices]
-        indices = sorted(indices, reverse=True)
-        deleted_count = 0
-        for idx in indices:
-            if 0 <= idx < len(bookmarks):
-                logger.info(f"Deleting bookmark: {bookmarks[idx]['title']}")
-                bookmarks.pop(idx)
-                deleted_count += 1
-        if bookmarks:
-            faiss_index, embeddings = vectorize_and_index(bookmarks)
-        else:
-            faiss_index = None
-        message = f"✅ Successfully deleted {deleted_count} bookmark{'s' if deleted_count != 1 else ''}."
-        choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-                  for i, bookmark in enumerate(bookmarks)]
-        return message, gr.CheckboxGroup.update(choices=choices), gr.CheckboxGroup.update(choices=choices), display_bookmarks()
-    except Exception as e:
-        logger.error(f"Error deleting bookmarks: {e}")
-        return "⚠️ Error deleting bookmarks.", gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[]), display_bookmarks()
-def edit_selected_bookmarks_category(selected_indices, new_category, delete_checkbox, edit_checkbox):
     """
-    Edit category of selected bookmarks with enhanced error handling.
     """
     if not selected_indices:
-        return "⚠️ No bookmarks selected.", gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[]), display_bookmarks()
     if not new_category:
-        return "⚠️ No new category selected.", gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[]), display_bookmarks()
-    try:
-        indices = [int(s.split('.')[0])-1 for s in selected_indices]
-        updated_count = 0
-        for idx in indices:
-            if 0 <= idx < len(bookmarks):
-                old_category = bookmarks[idx]['category']
-                bookmarks[idx]['category'] = new_category
-                logger.info(f"Updated category for '{bookmarks[idx]['title']}' from '{old_category}' to '{new_category}'")
-                updated_count += 1
-        message = f"✅ Updated category for {updated_count} bookmark{'s' if updated_count != 1 else ''}."
-        choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
-                  for i, bookmark in enumerate(bookmarks)]
-        return message, gr.CheckboxGroup.update(choices=choices), gr.CheckboxGroup.update(choices=choices), display_bookmarks()
-    except Exception as e:
-        logger.error(f"Error updating categories: {e}")
-        return "⚠️ Error updating categories.", gr.CheckboxGroup.update(choices=[]), gr.CheckboxGroup.update(choices=[]), display_bookmarks()
 def export_bookmarks():
     """
-    Export bookmarks to HTML file with enhanced formatting.
     """
     if not bookmarks:
         return "⚠️ No bookmarks to export."
     try:
-        logger.info("Exporting bookmarks")
-        soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1>", 'html.parser')
-        # Add metadata
-        meta = soup.new_tag('META')
-        meta['HTTP-EQUIV'] = 'Content-Type'
-        meta['CONTENT'] = 'text/html; charset=UTF-8'
-        soup.append(meta)
-        # Add title
-        title = soup.new_tag('TITLE')
-        title.string = 'Bookmarks'
-        soup.append(title)
-        # Add heading
-        h1 = soup.new_tag('H1')
-        h1.string = 'Bookmarks'
-        soup.append(h1)
-        # Create main bookmark list
         dl = soup.new_tag('DL')
-        soup.append(dl)
-        # Add bookmarks with categories
-        current_category = None
         for bookmark in bookmarks:
-            category = bookmark.get('category', 'Uncategorized')
-            # Create category folder if needed
-            if category != current_category:
-                current_category = category
-                dt_cat = soup.new_tag('DT')
-                h3_cat = soup.new_tag('H3')
-                h3_cat.string = category
-                dt_cat.append(h3_cat)
-                dl_cat = soup.new_tag('DL')
-                dt_cat.append(dl_cat)
-                dl.append(dt_cat)
-            # Add bookmark
             dt = soup.new_tag('DT')
             a = soup.new_tag('A', href=bookmark['url'])
-            if 'add_date' in bookmark and bookmark['add_date']:
-                a['ADD_DATE'] = bookmark['add_date']
-            if 'icon' in bookmark and bookmark['icon']:
-                a['ICON'] = bookmark['icon']
             a.string = bookmark['title']
             dt.append(a)
-            dl_cat.append(dt)
         html_content = str(soup)
         b64 = base64.b64encode(html_content.encode()).decode()
         href = f'data:text/html;base64,{b64}'
         logger.info("Bookmarks exported successfully")
-        return f'''
-        <div style="text-align: center;">
-            <a href="{href}"
-               download="bookmarks.html"
-               style="display: inline-block;
-                      padding: 10px 20px;
-                      background-color: #4CAF50;
-                      color: white;
-                      text-decoration: none;
-                      border-radius: 5px;
-                      margin: 10px;">
-                💾 Download Exported Bookmarks
-            </a>
-        </div>
-        '''
     except Exception as e:
         logger.error(f"Error exporting bookmarks: {e}")
         return "⚠️ Error exporting bookmarks."
 def chatbot_response(user_query):
     """
-    Generate chatbot response with enhanced context understanding.
     """
     if not GROQ_API_KEY:
         return "⚠️ API key not set. Please set the GROQ_API_KEY environment variable."
     if not bookmarks:
         return "⚠️ No bookmarks available. Please upload and process your bookmarks first."
-    logger.info(f"Processing query: {user_query}")
     try:
-        # Get relevant bookmarks using FAISS
-        query_embedding = embedding_model.encode([user_query]).astype('float32')
-        k = min(5, len(bookmarks))  # Get top 5 or all if less than 5
-        D, I = faiss_index.search(query_embedding, k)
-        relevant_bookmarks = []
-        for idx in I[0]:
-            if idx != -1:  # Valid index
-                bookmark_data = bookmarks[idx]
-                relevant_bookmarks.append({
-                    'title': bookmark_data['title'],
-                    'url': bookmark_data['url'],
-                    'summary': bookmark_data['summary'],
-                    'category': bookmark_data['category']
-                })
-        # Prepare context for LLM
-        bookmark_descriptions = []
-        for i, bm in enumerate(relevant_bookmarks, 1):
-            desc = f"{i}. Title: {bm['title']}\n   URL: {bm['url']}\n   Category: {bm['category']}\n   Summary: {bm['summary']}"
-            bookmark_descriptions.append(desc)
-        # Precompute the joined descriptions to avoid backslashes in f-string expressions
-        joined_bookmark_descriptions = '\\n\\n'.join(bookmark_descriptions)
         prompt = f"""
-        User Query: {user_query}
-        Relevant Bookmarks:
-        {joined_bookmark_descriptions}
-        Please provide a helpful response that:
-        1. Identifies the most relevant bookmarks for the query
-        2. Explains why each bookmark might be useful
-        3. Suggests how the user might use these resources
-        Format the response in a clear, readable way with appropriate spacing and structure.
         """
         response = openai.ChatCompletion.create(
             model='llama3-8b-8192',
             messages=[
-                {"role": "system", "content": "You are a helpful assistant that finds and explains relevant bookmarks."},
                 {"role": "user", "content": prompt}
             ],
             max_tokens=500,
@@ -889,7 +594,7 @@ def chatbot_response(user_query):
         )
         answer = response['choices'][0]['message']['content'].strip()
-        logger.info("Generated response successfully")
         return answer
     except Exception as e:
@@ -899,69 +604,113 @@ def chatbot_response(user_query):
 def build_app():
     """
-    Build and launch the Gradio app with enhanced UI and functionality.
     """
     try:
         logger.info("Building Gradio app")
         with gr.Blocks(css="app.css") as demo:
-            gr.Markdown("# 📚 Bookmark Manager")
-            with gr.Row():
-                with gr.Column():
-                    file_input = gr.File(label="Upload Bookmarks HTML File", file_types=["file"])
-                    process_button = gr.Button("Process Bookmarks")
-                    process_message = gr.Markdown("")
-                    category_dropdown = gr.Dropdown(choices=CATEGORIES, label="New Category")
-                    edit_button = gr.Button("Edit Selected Bookmarks Category")
-                    delete_button = gr.Button("Delete Selected Bookmarks")
-                    export_button = gr.Button("Export Bookmarks")
-                    # Define CheckboxGroups and assign to variables
-                    delete_checkbox = gr.CheckboxGroup(label="Select Bookmarks to Delete", choices=[])
-                    edit_checkbox = gr.CheckboxGroup(label="Select Bookmarks to Edit", choices=[])
-                with gr.Column():
-                    bookmarks_display = gr.HTML(label="Bookmarks")
-            with gr.Row():
-                chatbot_input = gr.Textbox(label="Ask about your bookmarks", placeholder="Enter your query here...")
-                chatbot_output = gr.Textbox(label="Chatbot Response", interactive=False)
-            # Processing File
             process_button.click(
-                fn=process_uploaded_file,
-                inputs=[file_input, delete_checkbox, edit_checkbox],
-                outputs=[process_message, bookmarks_display, delete_checkbox, edit_checkbox]
             )
-            # Deleting Bookmarks
             delete_button.click(
-                fn=delete_selected_bookmarks,
-                inputs=[delete_checkbox, edit_checkbox],
-                outputs=[process_message, delete_checkbox, edit_checkbox, bookmarks_display]
             )
-            # Editing Categories
-            edit_button.click(
-                fn=edit_selected_bookmarks_category,
-                inputs=[edit_checkbox, category_dropdown, delete_checkbox, edit_checkbox],
-                outputs=[process_message, delete_checkbox, edit_checkbox, bookmarks_display]
             )
-            # Exporting Bookmarks
             export_button.click(
-                fn=export_bookmarks,
-                inputs=None,
-                outputs=gr.HTML(label="Export")
-            )
-            # Chatbot
-            chatbot_input.submit(
-                fn=chatbot_response,
-                inputs=chatbot_input,
-                outputs=chatbot_output
             )
         logger.info("Launching Gradio app")
@@ -971,4 +720,4 @@ def build_app():
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
-    build_app()

 import logging
 import os
 import sys
 # Import OpenAI library
 import openai
 openai.api_key = GROQ_API_KEY
 openai.api_base = "https://api.groq.com/openai/v1"
+def extract_main_content(soup):
     """
+    Extract the main content from a webpage while filtering out boilerplate content.
     """
     if not soup:
         return ""
+    # Remove script and style elements
+    for element in soup(['script', 'style', 'header', 'footer', 'nav', 'ads', 'sidebar']):
+        element.decompose()
+    # First try to find content in main content areas
+    main_content_tags = soup.find_all(['article', 'main', 'div.content', 'div.post', 'div.entry-content'])
+    if main_content_tags:
+        content = ' '.join([tag.get_text(strip=True, separator=' ') for tag in main_content_tags])
+    else:
+        # Try to find content in <p> tags
+        p_tags = soup.find_all('p')
+        if p_tags:
+            content = ' '.join([p.get_text(strip=True, separator=' ') for p in p_tags])
         else:
             # Fallback to body content
+            content = soup.body.get_text(strip=True, separator=' ') if soup.body else soup.get_text(strip=True, separator=' ')
+    # Clean up the text
+    content = ' '.join(content.split())
+    content = re.sub(r'\s+', ' ', content)  # Remove multiple spaces
+    content = re.sub(r'[\n\r\t]', ' ', content)  # Remove newlines and tabs
+    # Limit content length to avoid token limits (adjust as needed)
+    return content[:5000]
 def get_page_metadata(soup):
     """
     if not soup:
         return metadata
+    # Get title
     title_tag = soup.find('title')
     if title_tag and title_tag.string:
         metadata['title'] = title_tag.string.strip()
+    # Get meta description (try multiple variants)
+    meta_desc = (
+        soup.find('meta', attrs={'name': 'description'}) or
+        soup.find('meta', attrs={'property': 'og:description'}) or
+        soup.find('meta', attrs={'name': 'twitter:description'})
+    )
+    if meta_desc:
+        metadata['description'] = meta_desc.get('content', '').strip()
     # Get meta keywords
+    meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
+    if meta_keywords:
+        metadata['keywords'] = meta_keywords.get('content', '').strip()
+    # Get OG title if main title is empty
+    if not metadata['title']:
+        og_title = soup.find('meta', attrs={'property': 'og:title'})
+        if og_title:
+            metadata['title'] = og_title.get('content', '').strip()
+    return metadata
 def generate_summary(bookmark):
     """
     Generate a comprehensive summary for a bookmark using available content and LLM.
     """
+    logger.info(f"Generating summary for bookmark: {bookmark.get('url')}")
     try:
+        # Get the HTML soup object from the bookmark if it exists
         soup = BeautifulSoup(bookmark.get('html_content', ''), 'html.parser')
+        # Step 1: Extract all available information
         metadata = get_page_metadata(soup)
+        main_content = extract_main_content(soup)
+        # Step 2: Generate summary using LLM with all available content
         try:
+            # Prepare comprehensive context for LLM
+            available_content = []
+            if metadata['title']:
+                available_content.append(f"Title: {metadata['title']}")
+            if metadata['description']:
+                available_content.append(f"Description: {metadata['description']}")
+            if metadata['keywords']:
+                available_content.append(f"Keywords: {metadata['keywords']}")
+            if main_content:
+                available_content.append(f"Main Content: {main_content}")
+            if not available_content:
+                logger.warning("No content available for summary generation")
+                bookmark['summary'] = bookmark.get('title', 'No summary available.')
                 return bookmark
+            prompt = f"""
+            Analyze and summarize this webpage based on the following information:
+            {' | '.join(available_content)}
+            Please provide a concise summary (2-3 sentences) focusing on:
+            1. The main purpose or topic of the page
+            2. Key information or features
+            3. Target audience or use case (if apparent)
+            Be factual and objective.
+            """
+            response = openai.ChatCompletion.create(
+                model='llama3-8b-8192',
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=150,
+                temperature=0.5,
+            )
+            summary = response['choices'][0]['message']['content'].strip()
+            logger.info("Successfully generated LLM summary")
+            bookmark['summary'] = summary
+            return bookmark
         except Exception as e:
+            logger.error(f"Error generating LLM summary: {e}")
+            # Fallback mechanisms in order of preference
+            if metadata['description']:
+                logger.info("Falling back to meta description")
+                bookmark['summary'] = metadata['description']
+            elif main_content:
+                logger.info("Falling back to truncated main content")
+                bookmark['summary'] = ' '.join(main_content.split()[:50]) + '...'
+            elif metadata['title']:
+                logger.info("Falling back to title")
+                bookmark['summary'] = metadata['title']
+            else:
+                bookmark['summary'] = bookmark.get('title', 'No summary available.')
+            return bookmark
     except Exception as e:
         logger.error(f"Error in generate_summary: {e}")
         bookmark['summary'] = bookmark.get('title', 'No summary available.')
+        return bookmark
+def parse_bookmarks(file_content):
+    """
+    Parse bookmarks from HTML file.
+    """
+    logger.info("Parsing bookmarks")
+    try:
+        soup = BeautifulSoup(file_content, 'html.parser')
+        extracted_bookmarks = []
+        for link in soup.find_all('a'):
+            url = link.get('href')
+            title = link.text.strip()
+            if url and title:
+                extracted_bookmarks.append({'url': url, 'title': title})
+        logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
+        return extracted_bookmarks
+    except Exception as e:
+        logger.error("Error parsing bookmarks: %s", e)
+        raise
 async def fetch_url_info(session, bookmark):
     """
+    Fetch information about a URL asynchronously.
     """
     url = bookmark['url']
     if url in fetch_cache:
         bookmark.update(fetch_cache[url])
         return bookmark
     try:
         logger.info(f"Fetching URL info for: {url}")
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+        }
+        async with session.get(url, timeout=10, headers=headers) as response:
             bookmark['etag'] = response.headers.get('ETag', 'N/A')
+            bookmark['status_code'] = response.status
+            if response.status >= 400:
                 bookmark['dead_link'] = True
+                bookmark['description'] = ''
                 bookmark['html_content'] = ''
+                logger.warning(f"Dead link detected: {url} with status {response.status}")
+            else:
+                bookmark['dead_link'] = False
+                content = await response.text()
+                bookmark['html_content'] = content  # Store full HTML for summary generation
+                bookmark['description'] = ''  # Will be set by generate_summary function
+                logger.info(f"Fetched information for {url}")
     except Exception as e:
+        bookmark['dead_link'] = True
+        bookmark['etag'] = 'N/A'
+        bookmark['status_code'] = 'N/A'
+        bookmark['description'] = ''
+        bookmark['html_content'] = ''
+        logger.error(f"Error fetching URL info for {url}: {e}")
     finally:
         fetch_cache[url] = {
             'etag': bookmark.get('etag'),
             'status_code': bookmark.get('status_code'),
             'description': bookmark.get('description'),
             'html_content': bookmark.get('html_content', '')
         }
     return bookmark
 async def process_bookmarks_async(bookmarks_list):
     """
+    Process all bookmarks asynchronously.
     """
     logger.info("Processing bookmarks asynchronously")
     try:
+        connector = aiohttp.TCPConnector(limit=5)  # Limit concurrent connections
+        timeout = aiohttp.ClientTimeout(total=30)  # Set timeout
+        async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
             tasks = []
             for bookmark in bookmarks_list:
                 task = asyncio.ensure_future(fetch_url_info(session, bookmark))
                 tasks.append(task)
+            await asyncio.gather(*tasks)
         logger.info("Completed processing bookmarks asynchronously")
     except Exception as e:
         logger.error(f"Error in asynchronous processing of bookmarks: {e}")
         raise
+def assign_category(bookmark):
     """
+    Assign a category to a bookmark based on its content.
     """
+    if bookmark.get('dead_link'):
+        bookmark['category'] = 'Dead Link'
+        logger.info(f"Assigned category 'Dead Link' to bookmark: {bookmark.get('url')}")
+        return bookmark
+    summary = bookmark.get('summary', '').lower()
+    assigned_category = 'Uncategorized'
+    # Keywords associated with each category
+    category_keywords = {
+        "Social Media": ["social media", "networking", "friends", "connect", "posts", "profile"],
+        "News and Media": ["news", "journalism", "media", "headlines", "breaking news"],
+        "Education and Learning": ["education", "learning", "courses", "tutorial", "university", "academy", "study"],
+        "Entertainment": ["entertainment", "movies", "tv shows", "games", "comics", "fun"],
+        "Shopping and E-commerce": ["shopping", "e-commerce", "buy", "sell", "marketplace", "deals", "store"],
+        "Finance and Banking": ["finance", "banking", "investment", "money", "economy", "stock", "trading"],
+        "Technology": ["technology", "tech", "gadgets", "software", "computers", "innovation"],
+        "Health and Fitness": ["health", "fitness", "medical", "wellness", "exercise", "diet"],
+        "Travel and Tourism": ["travel", "tourism", "destinations", "hotels", "flights", "vacation"],
+        "Food and Recipes": ["food", "recipes", "cooking", "cuisine", "restaurant", "dining"],
+        "Sports": ["sports", "scores", "teams", "athletics", "matches", "leagues"],
+        "Arts and Culture": ["arts", "culture", "museum", "gallery", "exhibition", "artistic"],
+        "Government and Politics": ["government", "politics", "policy", "election", "public service"],
+        "Business and Economy": ["business", "corporate", "industry", "economy", "markets"],
+        "Science and Research": ["science", "research", "experiment", "laboratory", "study", "scientific"],
+        "Personal Blogs and Journals": ["blog", "journal", "personal", "diary", "thoughts", "opinions"],
+        "Job Search and Careers": ["jobs", "careers", "recruitment", "resume", "employment", "hiring"],
+        "Music and Audio": ["music", "audio", "songs", "albums", "artists", "bands"],
+        "Videos and Movies": ["video", "movies", "film", "clips", "trailers", "cinema"],
+        "Reference and Knowledge Bases": ["reference", "encyclopedia", "dictionary", "wiki", "knowledge", "information"],
+    }
+    for category, keywords in category_keywords.items():
+        for keyword in keywords:
+            if re.search(r'\b' + re.escape(keyword) + r'\b', summary):
+                assigned_category = category
+                logger.info(f"Assigned category '{assigned_category}' to bookmark: {bookmark.get('url')}")
+                break
+        if assigned_category != 'Uncategorized':
+            break
+    bookmark['category'] = assigned_category
+    if assigned_category == 'Uncategorized':
+        logger.info(f"No matching category found for bookmark: {bookmark.get('url')}")
+    return bookmark
 def vectorize_and_index(bookmarks_list):
     """
     """
     logger.info("Vectorizing summaries and building FAISS index")
     try:
+        summaries = [bookmark['summary'] for bookmark in bookmarks_list]
         embeddings = embedding_model.encode(summaries)
         dimension = embeddings.shape[1]
         faiss_idx = faiss.IndexFlatL2(dimension)
         faiss_idx.add(np.array(embeddings))
         logger.info("FAISS index built successfully")
         return faiss_idx, embeddings
     except Exception as e:
 def display_bookmarks():
     """
+    Generate HTML display for bookmarks.
     """
     logger.info("Generating HTML display for bookmarks")
     cards = ''
         etag = bookmark.get('etag', 'N/A')
         summary = bookmark.get('summary', '')
         category = bookmark.get('category', 'Uncategorized')
         if bookmark.get('dead_link'):
+            card_style = "border: 2px solid var(--error-color);"
+            text_style = "color: var(--error-color);"
         else:
+            card_style = "border: 2px solid var(--success-color);"
             text_style = "color: var(--text-color);"
         card_html = f'''
+        <div class="card" style="{card_style}; padding: 10px; margin: 10px; border-radius: 5px;">
             <div class="card-content">
+                <h3 style="{text_style}">{index}. {title} {status}</h3>
                 <p style="{text_style}"><strong>Category:</strong> {category}</p>
                 <p style="{text_style}"><strong>URL:</strong> <a href="{url}" target="_blank" style="{text_style}">{url}</a></p>
                 <p style="{text_style}"><strong>ETag:</strong> {etag}</p>
+                <p style="{text_style}"><strong>Summary:</strong> {summary}</p>
             </div>
         </div>
         '''
         cards += card_html
     logger.info("HTML display generated")
+    return cards
+def process_uploaded_file(file):
     """
+    Process the uploaded bookmarks file.
     """
     global bookmarks, faiss_index
     logger.info("Processing uploaded file")
     if file is None:
+        logger.warning("No file uploaded")
+        return "Please upload a bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
+        logger.error(f"Error decoding the file: {e}")
+        return "Error decoding the file. Please ensure it's a valid HTML file.", '', gr.update(choices=[]), display_bookmarks()
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
         logger.error(f"Error parsing bookmarks: {e}")
+        return "Error parsing the bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
     if not bookmarks:
+        logger.warning("No bookmarks found in the uploaded file")
+        return "No bookmarks found in the uploaded file.", '', gr.update(choices=[]), display_bookmarks()
+    # Asynchronously fetch bookmark info
     try:
         asyncio.run(process_bookmarks_async(bookmarks))
+    except Exception as e:
+        logger.error(f"Error processing bookmarks asynchronously: {e}")
+        return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
+    # Generate summaries and assign categories
+    for bookmark in bookmarks:
+        generate_summary(bookmark)
+        assign_category(bookmark)
+    try:
+        faiss_index, embeddings = vectorize_and_index(bookmarks)
     except Exception as e:
+        logger.error(f"Error building FAISS index: {e}")
+        return "Error building search index.", '', gr.update(choices=[]), display_bookmarks()
+    message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
+    logger.info(message)
+    # Generate displays and updates
+    bookmark_html = display_bookmarks()
+    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
+              for i, bookmark in enumerate(bookmarks)]
+    return message, bookmark_html, gr.update(choices=choices), bookmark_html
+def delete_selected_bookmarks(selected_indices):
     """
+    Delete selected bookmarks.
     """
     global bookmarks, faiss_index
     if not selected_indices:
+        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
+    indices = [int(s.split('.')[0])-1 for s in selected_indices]
+    indices = sorted(indices, reverse=True)
+    for idx in indices:
+        if 0 <= idx < len(bookmarks):
+            logger.info(f"Deleting bookmark at index {idx + 1}")
+            bookmarks.pop(idx)
+    if bookmarks:
+        faiss_index, embeddings = vectorize_and_index(bookmarks)
+    else:
+        faiss_index = None
+    message = "🗑️ Selected bookmarks deleted successfully."
+    logger.info(message)
+    # Update choices and display
+    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
+              for i, bookmark in enumerate(bookmarks)]
+    return message, gr.update(choices=choices), display_bookmarks()
+def edit_selected_bookmarks_category(selected_indices, new_category):
     """
+    Edit category of selected bookmarks.
     """
     if not selected_indices:
+        return "⚠️ No bookmarks selected.", gr.update(choices=[]), display_bookmarks()
     if not new_category:
+        return "⚠️ No new category selected.", gr.update(choices=[]), display_bookmarks()
+    indices = [int(s.split('.')[0])-1 for s in selected_indices]
+    for idx in indices:
+        if 0 <= idx < len(bookmarks):
+            bookmarks[idx]['category'] = new_category
+            logger.info(f"Updated category for bookmark {idx + 1} to {new_category}")
+    message = "✏️ Category updated for selected bookmarks."
+    logger.info(message)
+    # Update choices and display
+    choices = [f"{i+1}. {bookmark['title']} (Category: {bookmark['category']})"
+              for i, bookmark in enumerate(bookmarks)]
+    return message, gr.update(choices=choices), display_bookmarks()
 def export_bookmarks():
     """
+    Export bookmarks to HTML file.
     """
     if not bookmarks:
+        logger.warning("No bookmarks to export")
         return "⚠️ No bookmarks to export."
     try:
+        logger.info("Exporting bookmarks to HTML")
+        soup = BeautifulSoup("<!DOCTYPE NETSCAPE-Bookmark-file-1><Title>Bookmarks</Title><H1>Bookmarks</H1>", 'html.parser')
         dl = soup.new_tag('DL')
         for bookmark in bookmarks:
             dt = soup.new_tag('DT')
             a = soup.new_tag('A', href=bookmark['url'])
             a.string = bookmark['title']
             dt.append(a)
+            dl.append(dt)
+        soup.append(dl)
         html_content = str(soup)
         b64 = base64.b64encode(html_content.encode()).decode()
         href = f'data:text/html;base64,{b64}'
         logger.info("Bookmarks exported successfully")
+        return f'<a href="{href}" download="bookmarks.html">💾 Download Exported Bookmarks</a>'
     except Exception as e:
         logger.error(f"Error exporting bookmarks: {e}")
         return "⚠️ Error exporting bookmarks."
 def chatbot_response(user_query):
     """
+    Generate chatbot response using Groq Cloud API.
     """
     if not GROQ_API_KEY:
+        logger.warning("GROQ_API_KEY not set.")
         return "⚠️ API key not set. Please set the GROQ_API_KEY environment variable."
     if not bookmarks:
+        logger.warning("No bookmarks available for chatbot")
         return "⚠️ No bookmarks available. Please upload and process your bookmarks first."
+    logger.info(f"Chatbot received query: {user_query}")
     try:
+        max_bookmarks = 50
+        bookmark_data = ""
+        for idx, bookmark in enumerate(bookmarks[:max_bookmarks]):
+            bookmark_data += f"{idx+1}. Title: {bookmark['title']}\nURL: {bookmark['url']}\nSummary: {bookmark['summary']}\n\n"
         prompt = f"""
+        You are an assistant that helps users find relevant bookmarks from their collection based on their queries.
+        User Query:
+        {user_query}
+        Bookmarks:
+        {bookmark_data}
+        Please identify the most relevant bookmarks that match the user's query. Provide a concise list including the index, title, URL, and a brief summary.
         """
         response = openai.ChatCompletion.create(
             model='llama3-8b-8192',
             messages=[
+                {"role": "system", "content": "You help users find relevant bookmarks based on their queries."},
                 {"role": "user", "content": prompt}
             ],
             max_tokens=500,
         )
         answer = response['choices'][0]['message']['content'].strip()
+        logger.info("Chatbot response generated using Groq Cloud API")
         return answer
     except Exception as e:
 def build_app():
     """
+    Build and launch the Gradio app.
     """
     try:
         logger.info("Building Gradio app")
         with gr.Blocks(css="app.css") as demo:
+            # General Overview
+            gr.Markdown("""
+            # 📚 SmartMarks - AI Browser Bookmarks Manager
+            Welcome to **SmartMarks**, your intelligent assistant for managing browser bookmarks. SmartMarks leverages AI to help you organize, search, and interact with your bookmarks seamlessly.
+            ---
+            ## 🚀 **How to Use SmartMarks**
+            SmartMarks is divided into three main sections:
+            1. **📂 Upload and Process Bookmarks:** Import your existing bookmarks and let SmartMarks analyze and categorize them for you.
+            2. **💬 Chat with Bookmarks:** Interact with your bookmarks using natural language queries to find relevant links effortlessly.
+            3. **🛠️ Manage Bookmarks:** View, edit, delete, and export your bookmarks with ease.
+            """)
+            # Upload and Process Bookmarks Tab
+            with gr.Tab("Upload and Process Bookmarks"):
+                gr.Markdown("""
+                ## 📂 **Upload and Process Bookmarks**
+                ### 📝 **Steps:**
+                1. Click on the "Upload Bookmarks HTML File" button
+                2. Select your bookmarks file
+                3. Click "Process Bookmarks" to analyze and organize your bookmarks
+                """)
+                upload = gr.File(label="📁 Upload Bookmarks HTML File", type='binary')
+                process_button = gr.Button("⚙️ Process Bookmarks")
+                output_text = gr.Textbox(label="✅ Output", interactive=False)
+                bookmark_display = gr.HTML(label="📄 Processed Bookmarks")
+            # Chat with Bookmarks Tab
+            with gr.Tab("Chat with Bookmarks"):
+                gr.Markdown("""
+                ## 💬 **Chat with Bookmarks**
+                Ask questions about your bookmarks and get relevant results.
+                """)
+                user_input = gr.Textbox(
+                    label="✍️ Ask about your bookmarks",
+                    placeholder="e.g., Do I have any bookmarks about AI?"
+                )
+                chat_button = gr.Button("📨 Send")
+                chat_output = gr.Textbox(label="💬 Response", interactive=False)
+            # Manage Bookmarks Tab
+            with gr.Tab("Manage Bookmarks"):
+                gr.Markdown("""
+                ## 🛠️ **Manage Bookmarks**
+                Select bookmarks to delete or edit their categories.
+                """)
+                manage_output = gr.Textbox(label="🔄 Status", interactive=False)
+                bookmark_selector = gr.CheckboxGroup(
+                    label="✅ Select Bookmarks",
+                    choices=[]
+                )
+                new_category = gr.Dropdown(
+                    label="🆕 New Category",
+                    choices=CATEGORIES,
+                    value="Uncategorized"
+                )
+                bookmark_display_manage = gr.HTML(label="📄 Bookmarks")
+                with gr.Row():
+                    delete_button = gr.Button("🗑️ Delete Selected")
+                    edit_category_button = gr.Button("✏️ Edit Category")
+                    export_button = gr.Button("💾 Export")
+                download_link = gr.HTML(label="📥 Download")
+            # Set up event handlers
             process_button.click(
+                process_uploaded_file,
+                inputs=upload,
+                outputs=[output_text, bookmark_display, bookmark_selector, bookmark_display_manage]
             )
+            chat_button.click(
+                chatbot_response,
+                inputs=user_input,
+                outputs=chat_output
+            )
             delete_button.click(
+                delete_selected_bookmarks,
+                inputs=bookmark_selector,
+                outputs=[manage_output, bookmark_selector, bookmark_display_manage]
             )
+            edit_category_button.click(
+                edit_selected_bookmarks_category,
+                inputs=[bookmark_selector, new_category],
+                outputs=[manage_output, bookmark_selector, bookmark_display_manage]
             )
             export_button.click(
+                export_bookmarks,
+                outputs=download_link
             )
         logger.info("Launching Gradio app")
         print(f"Error building the app: {e}")
 if __name__ == "__main__":
+    build_app()