Spaces:

siddhartharya
/

Bookmark-Manager

Running

App Files Files Community

siddhartharya commited on Nov 25, 2024

Commit

b8183dd

verified ·

1 Parent(s): 3f6cb23

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -39

app.py CHANGED Viewed

@@ -171,27 +171,39 @@ def generate_summary(bookmark):
         if main_content:
             available_content.append(f"Main Content: {main_content}")
-        if not available_content:
-            logger.warning("No content available for summary generation")
-            bookmark['summary'] = bookmark.get('title', 'No summary available.')
-            return bookmark
-        # Estimate token count and trim content if necessary
-        max_total_tokens = 8000  # Adjust based on model's maximum context length
-        prompt_tokens_estimate = len(' '.join(available_content).split()) + 200  # 200 tokens reserved for response
-        if prompt_tokens_estimate > max_total_tokens:
-            # Trim main content
-            allowable_content_tokens = max_total_tokens - 200  # Reserve 200 tokens for response
-            main_content_tokens = len(main_content.split())
-            if main_content_tokens > allowable_content_tokens:
-                main_content = ' '.join(main_content.split()[:allowable_content_tokens])
-                logger.info("Trimmed main content to fit within token limits.")
-            # Update available content
-            available_content[-1] = f"Main Content: {main_content}"
-        # Construct the prompt
-        prompt = f"""
 Analyze and summarize the following webpage content:
 {' '.join(available_content)}
@@ -221,7 +233,7 @@ Be factual and objective.
         return bookmark
     except Exception as e:
-        logger.error(f"Error generating summary: {e}")
         # Fallback mechanisms
         if metadata['description']:
             logger.info("Falling back to meta description")
@@ -233,7 +245,37 @@ Be factual and objective.
             logger.info("Falling back to title")
             bookmark['summary'] = metadata['title']
         else:
-            bookmark['summary'] = 'No summary available.'
         return bookmark
 def parse_bookmarks(file_content):
@@ -252,7 +294,7 @@ def parse_bookmarks(file_content):
         logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
         return extracted_bookmarks
     except Exception as e:
-        logger.error("Error parsing bookmarks: %s", e)
         raise
 async def fetch_url_info(session, bookmark):
@@ -267,13 +309,38 @@ async def fetch_url_info(session, bookmark):
     try:
         logger.info(f"Fetching URL info for: {url}")
         headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
-        async with session.get(url, timeout=10, headers=headers) as response:
             bookmark['etag'] = response.headers.get('ETag', 'N/A')
             bookmark['status_code'] = response.status
-            if response.status >= 400:
                 bookmark['dead_link'] = True
                 bookmark['description'] = ''
                 bookmark['html_content'] = ''
@@ -282,7 +349,7 @@ async def fetch_url_info(session, bookmark):
                 bookmark['dead_link'] = False
                 content = await response.text()
                 bookmark['html_content'] = content  # Store full HTML for summary generation
-                bookmark['description'] = ''  # Will be set by generate_summary function
                 logger.info(f"Fetched information for {url}")
     except Exception as e:
         bookmark['dead_link'] = True
@@ -290,7 +357,7 @@ async def fetch_url_info(session, bookmark):
         bookmark['status_code'] = 'N/A'
         bookmark['description'] = ''
         bookmark['html_content'] = ''
-        logger.error(f"Error fetching URL info for {url}: {e}")
     finally:
         fetch_cache[url] = {
             'etag': bookmark.get('etag'),
@@ -317,7 +384,7 @@ async def process_bookmarks_async(bookmarks_list):
             await asyncio.gather(*tasks)
         logger.info("Completed processing bookmarks asynchronously")
     except Exception as e:
-        logger.error(f"Error in asynchronous processing of bookmarks: {e}")
         raise
 def assign_category(bookmark):
@@ -372,7 +439,7 @@ Respond with only the category name.
         return bookmark
     except Exception as e:
-        logger.error(f"Error assigning category: {e}")
         bookmark['category'] = 'Uncategorized'
         return bookmark
@@ -392,7 +459,7 @@ def vectorize_and_index(bookmarks_list):
         logger.info("FAISS index built successfully with IDs")
         return index
     except Exception as e:
-        logger.error(f"Error in vectorizing and indexing: {e}")
         raise
 def display_bookmarks():
@@ -453,13 +520,13 @@ def process_uploaded_file(file):
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
-        logger.error(f"Error decoding the file: {e}")
         return "Error decoding the file. Please ensure it's a valid HTML file.", '', gr.update(choices=[]), display_bookmarks()
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
-        logger.error(f"Error parsing bookmarks: {e}")
         return "Error parsing the bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
     if not bookmarks:
@@ -474,7 +541,7 @@ def process_uploaded_file(file):
     try:
         asyncio.run(process_bookmarks_async(bookmarks))
     except Exception as e:
-        logger.error(f"Error processing bookmarks asynchronously: {e}")
         return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
     # Generate summaries and assign categories
@@ -485,7 +552,7 @@ def process_uploaded_file(file):
     try:
         faiss_index = vectorize_and_index(bookmarks)
     except Exception as e:
-        logger.error(f"Error building FAISS index: {e}")
         return "Error building search index.", '', gr.update(choices=[]), display_bookmarks()
     message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
@@ -580,7 +647,7 @@ def export_bookmarks():
         logger.info("Bookmarks exported successfully")
         return f'<a href="{href}" download="bookmarks.html">💾 Download Exported Bookmarks</a>'
     except Exception as e:
-        logger.error(f"Error exporting bookmarks: {e}")
         return "⚠️ Error exporting bookmarks."
 def chatbot_response(user_query):
@@ -643,7 +710,7 @@ Provide a concise and helpful response.
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
-        logger.error(error_message)
         return error_message
 def build_app():
@@ -760,7 +827,7 @@ def build_app():
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
     except Exception as e:
-        logger.error(f"Error building the app: {e}")
         print(f"Error building the app: {e}")
 if __name__ == "__main__":

         if main_content:
             available_content.append(f"Main Content: {main_content}")
+        # If content is insufficient, instruct the LLM to use prior knowledge
+        if not available_content or len(' '.join(available_content).split()) < 50:
+            prompt = f"""
+You are a knowledgeable assistant.
+The user provided a URL: {bookmark.get('url')}
+Please provide a concise summary (2-3 sentences) about this website based on your knowledge.
+Focus on:
+- The main purpose or topic of the website.
+- Key information or features.
+- Target audience or use case (if apparent).
+Be factual and objective.
+"""
+        else:
+            # Estimate token count and trim content if necessary
+            max_total_tokens = 8000  # Adjust based on model's maximum context length
+            prompt_tokens_estimate = len(' '.join(available_content).split()) + 200  # 200 tokens reserved for response
+            if prompt_tokens_estimate > max_total_tokens:
+                # Trim main content
+                allowable_content_tokens = max_total_tokens - 200  # Reserve 200 tokens for response
+                main_content_tokens = len(main_content.split())
+                if main_content_tokens > allowable_content_tokens:
+                    main_content = ' '.join(main_content.split()[:allowable_content_tokens])
+                    logger.info("Trimmed main content to fit within token limits.")
+                # Update available content
+                available_content[-1] = f"Main Content: {main_content}"
+            # Construct the prompt
+            prompt = f"""
 Analyze and summarize the following webpage content:
 {' '.join(available_content)}
         return bookmark
     except Exception as e:
+        logger.error(f"Error generating summary: {e}", exc_info=True)
         # Fallback mechanisms
         if metadata['description']:
             logger.info("Falling back to meta description")
             logger.info("Falling back to title")
             bookmark['summary'] = metadata['title']
         else:
+            # If all else fails, prompt the LLM to use prior knowledge
+            prompt = f"""
+You are a knowledgeable assistant.
+The user provided a URL: {bookmark.get('url')}
+Please provide a concise summary (2-3 sentences) about this website based on your knowledge.
+Focus on:
+- The main purpose or topic of the website.
+- Key information or features.
+- Target audience or use case (if apparent).
+Be factual and objective.
+"""
+            try:
+                response = openai.ChatCompletion.create(
+                    model='llama3-8b-8192',
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant that creates concise webpage summaries."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    max_tokens=200,
+                    temperature=0.5,
+                )
+                summary = response['choices'][0]['message']['content'].strip()
+                logger.info("Successfully generated LLM summary using prior knowledge")
+                bookmark['summary'] = summary
+            except Exception as e:
+                logger.error(f"Error generating summary using prior knowledge: {e}", exc_info=True)
+                bookmark['summary'] = 'No summary available.'
         return bookmark
 def parse_bookmarks(file_content):
         logger.info(f"Extracted {len(extracted_bookmarks)} bookmarks")
         return extracted_bookmarks
     except Exception as e:
+        logger.error("Error parsing bookmarks: %s", e, exc_info=True)
         raise
 async def fetch_url_info(session, bookmark):
     try:
         logger.info(f"Fetching URL info for: {url}")
         headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
+                          'AppleWebKit/537.36 (KHTML, like Gecko) '
+                          'Chrome/91.0.4472.124 Safari/537.36',
+            'Accept-Language': 'en-US,en;q=0.9',
         }
+        async with session.get(url, timeout=10, headers=headers, allow_redirects=True) as response:
             bookmark['etag'] = response.headers.get('ETag', 'N/A')
             bookmark['status_code'] = response.status
+            if response.status >= 500:
+                # Server error, consider as dead link
+                bookmark['dead_link'] = True
+                bookmark['description'] = ''
+                bookmark['html_content'] = ''
+                logger.warning(f"Dead link detected: {url} with status {response.status}")
+            elif response.status == 403:
+                # Forbidden, but may be accessible with proper headers
+                logger.info(f"Received 403 for {url}, retrying with different headers")
+                # Try with different headers or methods if necessary
+                # For now, we'll proceed to read the content
+                content = await response.text()
+                bookmark['dead_link'] = False
+                bookmark['html_content'] = content
+                bookmark['description'] = ''
+            elif response.status == 400:
+                # Bad request, may be due to missing parameters
+                bookmark['dead_link'] = False
+                content = await response.text()
+                bookmark['html_content'] = content
+                bookmark['description'] = ''
+            elif response.status >= 400:
+                # Other client errors
                 bookmark['dead_link'] = True
                 bookmark['description'] = ''
                 bookmark['html_content'] = ''
                 bookmark['dead_link'] = False
                 content = await response.text()
                 bookmark['html_content'] = content  # Store full HTML for summary generation
+                bookmark['description'] = ''
                 logger.info(f"Fetched information for {url}")
     except Exception as e:
         bookmark['dead_link'] = True
         bookmark['status_code'] = 'N/A'
         bookmark['description'] = ''
         bookmark['html_content'] = ''
+        logger.error(f"Error fetching URL info for {url}: {e}", exc_info=True)
     finally:
         fetch_cache[url] = {
             'etag': bookmark.get('etag'),
             await asyncio.gather(*tasks)
         logger.info("Completed processing bookmarks asynchronously")
     except Exception as e:
+        logger.error(f"Error in asynchronous processing of bookmarks: {e}", exc_info=True)
         raise
 def assign_category(bookmark):
         return bookmark
     except Exception as e:
+        logger.error(f"Error assigning category: {e}", exc_info=True)
         bookmark['category'] = 'Uncategorized'
         return bookmark
         logger.info("FAISS index built successfully with IDs")
         return index
     except Exception as e:
+        logger.error(f"Error in vectorizing and indexing: {e}", exc_info=True)
         raise
 def display_bookmarks():
     try:
         file_content = file.decode('utf-8')
     except UnicodeDecodeError as e:
+        logger.error(f"Error decoding the file: {e}", exc_info=True)
         return "Error decoding the file. Please ensure it's a valid HTML file.", '', gr.update(choices=[]), display_bookmarks()
     try:
         bookmarks = parse_bookmarks(file_content)
     except Exception as e:
+        logger.error(f"Error parsing bookmarks: {e}", exc_info=True)
         return "Error parsing the bookmarks HTML file.", '', gr.update(choices=[]), display_bookmarks()
     if not bookmarks:
     try:
         asyncio.run(process_bookmarks_async(bookmarks))
     except Exception as e:
+        logger.error(f"Error processing bookmarks asynchronously: {e}", exc_info=True)
         return "Error processing bookmarks.", '', gr.update(choices=[]), display_bookmarks()
     # Generate summaries and assign categories
     try:
         faiss_index = vectorize_and_index(bookmarks)
     except Exception as e:
+        logger.error(f"Error building FAISS index: {e}", exc_info=True)
         return "Error building search index.", '', gr.update(choices=[]), display_bookmarks()
     message = f"✅ Successfully processed {len(bookmarks)} bookmarks."
         logger.info("Bookmarks exported successfully")
         return f'<a href="{href}" download="bookmarks.html">💾 Download Exported Bookmarks</a>'
     except Exception as e:
+        logger.error(f"Error exporting bookmarks: {e}", exc_info=True)
         return "⚠️ Error exporting bookmarks."
 def chatbot_response(user_query):
     except Exception as e:
         error_message = f"⚠️ Error processing your query: {str(e)}"
+        logger.error(error_message, exc_info=True)
         return error_message
 def build_app():
         logger.info("Launching Gradio app")
         demo.launch(debug=True)
     except Exception as e:
+        logger.error(f"Error building the app: {e}", exc_info=True)
         print(f"Error building the app: {e}")
 if __name__ == "__main__":