gemma-3-chat-api-2

Sleeping

App Files Files Community

NanobotzAI commited on Apr 8

Commit

f1790d7

verified ·

1 Parent(s): 9d2b0e2

Update app.py

Browse files

updated the bot to feel like sentry assist

Files changed (1) hide show

app.py +298 -151

app.py CHANGED Viewed

@@ -1,3 +1,8 @@
 from openai import OpenAI
 from os import getenv
 from flask import Flask, request, jsonify, render_template
@@ -6,199 +11,285 @@ import faiss  # FAISS for vector search
 import numpy as np
 import os
 from sentence_transformers import SentenceTransformer
-# from huggingface_hub import InferenceClient # Not used in the current code, removed for clarity
 from typing import List, Tuple
-from io import BytesIO # Added for BytesIO
 app = Flask(__name__, template_folder=os.getcwd())
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # --- Configuration ---
 class ChatConfig:
-    MODEL = "google/gemma-3-27b-it:free"
-    DEFAULT_MAX_TOKENS = 512
-    DEFAULT_TEMP = 0.5 # Slightly increased for more natural variance
-    DEFAULT_TOP_P = 0.95
-    # --- NEW: System Prompt Options ---
-    # Option 1: Friendly & Helpful Assistant
-    SYSTEM_PROMPT_FRIENDLY = """You are a friendly and helpful AI assistant. Your knowledge is strictly limited to the content of the uploaded PDF document.
-    Your primary goal is to answer user questions accurately based *only* on the provided context sections from the document.
-    Engage in a natural, conversational tone. Be polite and helpful.
-    If the provided context does not contain the answer to the question, clearly state that the information is not available in the relevant sections of the document you have access to.
-    Do *not* invent answers or use any external knowledge.
-    When answering, subtly weave in that your information comes from the document, e.g., "According to the document..." or "Based on the text provided...".
-    Keep your answers concise but informative, directly addressing the user's query.
-    """
-    # Option 2: Knowledgeable Expert (More Formal)
-    SYSTEM_PROMPT_EXPERT = """You are a knowledgeable AI expert specializing in the content of the uploaded PDF document.
-    You must answer user questions with precision, drawing *exclusively* from the provided context segments.
-    Maintain a professional and informative tone.
-    If the provided context does not contain the necessary information to answer the question, explicitly state that the information is not found within the scope of the provided text.
-    Do not speculate, infer beyond the text, or utilize any external information sources.
-    Clearly attribute your answers to the document, for instance, by starting with "The document indicates that..." or "Based on the provided context...".
-    Provide comprehensive answers derived solely from the text.
     """
-    # --- Select the desired prompt ---
-    SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_FRIENDLY # Choose which personality to use
-# --- API Client & Embedding Setup ---
 OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
 if not OPENROUTER_API_KEY:
-    raise ValueError("OPENROUTER_API_KEY environment variable not set.")
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
     api_key=OPENROUTER_API_KEY,
 )
-# Use a temporary cache directory if needed, or configure appropriately
-embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache"))
-vector_dim = 384
-index = faiss.IndexFlatL2(vector_dim)
-documents = [] # Store original text chunks corresponding to index entries
 # --- Core Functions ---
 def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
-    """Extracts text from PDF stream"""
-    # Ensure the stream is BytesIO
-    if not isinstance(pdf_stream, BytesIO):
-         pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream
-    doc = fitz.open(stream=pdf_stream, filetype="pdf")
-    # Simple chunking by page - consider more advanced chunking (by paragraph, sentence, fixed size) for better RAG performance
-    text_chunks = [page.get_text("text").strip() for page in doc if page.get_text("text").strip()]
-    doc.close()
-    print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
     return text_chunks
-# Renamed for clarity, added error handling
 def build_vector_index(text_chunks: List[str]):
-    """Embeds text chunks and builds the FAISS index."""
-    global documents, index, vector_dim
     if not text_chunks:
         print("Warning: No text chunks provided to build the vector index.")
         documents = []
-        index = faiss.IndexFlatL2(vector_dim) # Reinitialize empty index
         return
     print(f"Building vector index for {len(text_chunks)} chunks...")
-    documents = text_chunks # Store the original text
-    # Reset the index
-    index = faiss.IndexFlatL2(vector_dim)
     try:
         embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
-        embeddings = np.array(embeddings, dtype=np.float32)
-        if embeddings.ndim == 1:
-            embeddings = embeddings.reshape(1, -1)
-        if embeddings.shape[1] != vector_dim:
-             raise ValueError(f"Embedding dimension mismatch: expected {vector_dim}, got {embeddings.shape[1]}")
         index.add(embeddings)
         print(f"FAISS index built successfully with {index.ntotal} vectors.")
     except Exception as e:
         print(f"Error during embedding or indexing: {e}")
-        # Reset state in case of error
         documents = []
-        index = faiss.IndexFlatL2(vector_dim)
-        raise # Re-raise the exception to signal failure
-# Renamed for clarity, added checks
 def search_relevant_chunks(query: str, k: int = 3) -> str:
-    """Finds the most relevant text chunks for the given query using FAISS."""
-    global index, documents
     if index.ntotal == 0:
         print("Warning: Search attempted on an empty index.")
         return "" # Return empty string if index is not ready
     if not query:
         return ""
     try:
         query_embedding = embed_model.encode([query])
         query_embedding = np.array(query_embedding, dtype=np.float32)
-        # Perform the search
-        distances, indices = index.search(query_embedding, k=min(k, index.ntotal)) # Ensure k <= index size
-        # Filter out potential invalid indices (-1 can sometimes occur if k > ntotal, though min() handles it)
-        valid_indices = [idx for idx in indices[0] if idx != -1 and idx < len(documents)]
         if not valid_indices:
              print(f"No relevant chunks found for query: '{query[:50]}...'")
              return ""
-        # Retrieve the actual text chunks
         relevant_docs = [documents[i] for i in valid_indices]
-        print(f"Retrieved {len(relevant_docs)} relevant chunks.")
-        return "\n\n---\n\n".join(relevant_docs) # Join with a clear separator
     except Exception as e:
-        print(f"Error during similarity search: {e}")
-        return "" # Return empty on error
-# --- Improved Generation Function ---
 def generate_response(
     message: str,
     history: List[Tuple[str, str]],
-    system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT, # Use the chosen system prompt
     max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
     temperature: float = ChatConfig.DEFAULT_TEMP,
     top_p: float = ChatConfig.DEFAULT_TOP_P
 ) -> str:
-    if index.ntotal == 0: # Check if index is built
-        return "I need a PDF document to be uploaded and processed first before I can answer questions."
-    # 1. Retrieve Context
-    context = search_relevant_chunks(message, k=3) # Retrieve top 3 chunks
-    # Prepare the prompt messages list
     messages = []
-    # 2. Add the System Prompt (Crucial Change)
     messages.append({"role": "system", "content": system_message})
     # 3. Add Conversation History (if any)
-    # Ensure alternating user/assistant roles, starting with user
     for user_msg, assistant_msg in history:
-        if user_msg: # Add user message if not empty
-             messages.append({"role": "user", "content": user_msg})
-        if assistant_msg: # Add assistant message if not empty
-             messages.append({"role": "assistant", "content": assistant_msg})
-    # 4. Construct the Final User Prompt with Context
-    # We include context here, clearly marked.
-    # The system prompt already told the AI *how* to use this context.
     if context:
-        user_prompt_content = f"Based on the following context from the document, please answer the question:\n\nCONTEXT:\n{context}\n\n---\n\nQUESTION:\n{message}"
     else:
-        # If no context found, still ask the question but the system prompt guides the "I don't know" response.
-        # Alternatively, you could return a hardcoded message here *before* calling the LLM if desired.
-        # Forcing the LLM to respond based on the prompt is generally better for natural language.
-        user_prompt_content = f"Regarding the document, I have the following question, although I couldn't retrieve specific context for it:\n\nQUESTION:\n{message}"
-        # Or, more simply:
-        # user_prompt_content = f"QUESTION: {message}\n\n(Note: No specific context sections were retrieved for this question based on similarity search.)"
     messages.append({"role": "user", "content": user_prompt_content})
-    # 5. Call the LLM API
     try:
-        print(f"--- Sending to {ChatConfig.MODEL} ---")
-        # print("System Prompt:", system_message) # Optional: Debug logging
-        # print("History:", history)              # Optional: Debug logging
-        # print("User Prompt:", user_prompt_content) # Optional: Debug logging
         completion = client.chat.completions.create(
             model=ChatConfig.MODEL,
@@ -206,97 +297,153 @@ def generate_response(
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
-            # Consider adding stop sequences if needed, e.g., stop=["\nUSER:", "\nASSISTANT:"]
         )
         response = completion.choices[0].message.content
-        print(f"--- Received Response ({len(response)} chars) ---")
-        return response.strip()
     except Exception as e:
         print(f"Error generating response from LLM: {str(e)}")
-        # Provide a more user-friendly error message
-        return "I'm sorry, but I encountered an issue while trying to process your request. Please check the connection or try again later."
-# --- Flask Routes (Mostly Unchanged, added checks) ---
 @app.route('/')
-def index_route(): # Renamed to avoid conflict with faiss.Index object
-    """Serve the HTML page for the user interface"""
     return render_template('index.html')
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
-    """Handle PDF upload, extract text, and build vector index."""
-    global documents, index # Ensure we are modifying the global state
     if 'pdf' not in request.files:
         return jsonify({"error": "No PDF file part in the request."}), 400
     file = request.files['pdf']
-    if file.filename == "":
         return jsonify({"error": "No file selected."}), 400
     if not file.filename.lower().endswith('.pdf'):
-         return jsonify({"error": "Invalid file type. Please upload a PDF."}), 400
-    print(f"Received file: {file.filename}")
     try:
-        pdf_stream = BytesIO(file.read()) # Read file into memory
-        # Extract text
         text_chunks = extract_text_from_pdf(pdf_stream)
         if not text_chunks:
-             return jsonify({"error": "Could not extract any text from the PDF."}), 400
-        # Build vector database (index)
-        build_vector_index(text_chunks) # This function now handles index creation
-        return jsonify({"message": f"PDF '{file.filename}' processed successfully. {len(documents)} chunks indexed."}), 200
     except fitz.fitz.FileDataError:
-         return jsonify({"error": "Invalid or corrupted PDF file."}), 400
     except Exception as e:
-        print(f"Error processing PDF upload: {str(e)}")
-        # Reset state on error
         documents = []
-        index = faiss.IndexFlatL2(vector_dim)
-        return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
 @app.route('/ask_question', methods=['POST'])
 def ask_question():
-    """Handle user question, retrieve context, and generate response."""
     data = request.get_json()
     if not data or 'message' not in data:
-        return jsonify({"error": "Missing 'message' in request body"}), 400
-    message = data['message'].strip()
-    history = data.get('history', []) # Get history, default to empty list
     if not message:
-         return jsonify({"response": "Please enter a question."}) # Basic validation
-    # Ensure history format is correct (list of tuples/lists)
-    validated_history = []
-    if isinstance(history, list):
-        for item in history:
             if isinstance(item, (list, tuple)) and len(item) == 2:
-                validated_history.append((str(item[0]), str(item[1])))
-            # else: log potential format error?
     try:
-        response = generate_response(message, validated_history)
-        return jsonify({"response": response})
     except Exception as e:
-         # Catch potential errors during generation (though generate_response has its own try-except)
-         print(f"Error in /ask_question endpoint: {e}")
-         return jsonify({"response": "Sorry, an error occurred while generating the response."}), 500
 if __name__ == '__main__':
-    # Make sure OPENROUTER_API_KEY is checked before starting the app
     if not OPENROUTER_API_KEY:
-         print("ERROR: OPENROUTER_API_KEY environment variable is not set. Exiting.")
     else:
-         # Consider host='0.0.0.0' to make it accessible on your network
-         app.run(debug=True, host='127.0.0.1', port=5000)

+# -*- coding: utf-8 -*-
+"""
+Flask App for SentryLabs Document Assistant using RAG.
+"""
 from openai import OpenAI
 from os import getenv
 from flask import Flask, request, jsonify, render_template
 import numpy as np
 import os
 from sentence_transformers import SentenceTransformer
 from typing import List, Tuple
+from io import BytesIO
+# --- Flask App Setup ---
+# Use the current directory for templates (where index.html is expected)
 app = Flask(__name__, template_folder=os.getcwd())
+os.environ["TOKENIZERS_PARALLELISM"] = "false" # Suppress tokenizer parallelism warning
 # --- Configuration ---
 class ChatConfig:
+    """Configuration settings for the chat assistant."""
+    MODEL = "google/gemma-3-27b-it:free" # OpenRouter model identifier
+    DEFAULT_MAX_TOKENS = 768 # Max tokens for the LLM response
+    DEFAULT_TEMP = 0.4  # Temperature for LLM generation (balance creativity/determinism)
+    DEFAULT_TOP_P = 0.95 # Top-P nucleus sampling parameter
+    # --- SentryLabs Persona System Prompt ---
+    SYSTEM_PROMPT_SENTRY = """You are Sentry, an AI assistant representing SentryLabs. Your purpose is to act as a knowledgeable, trusted advisor and cybersecurity innovator, assisting users by analyzing the content of the uploaded PDF document.
+    **Your Core Directives:**
+    1.  **Strict Document Scope:** Your knowledge is **strictly limited** to the content within the provided context sections of the uploaded PDF. You **must not** use any external knowledge, make assumptions beyond the text, or invent information.
+    2.  **Persona Embodiment:** Consistently embody the SentryLabs voice:
+        *   **Authoritative but Approachable:** Be confident and expert, yet clear and supportive.
+        *   **Innovative & Forward-Thinking:** Frame answers with a proactive cybersecurity mindset where the text allows.
+        *   **Customer-Centric:** Focus on providing clear value and insights derived *from the document*.
+        *   **Professional & Clear:** Use precise, professional language. Employ technical terms from the document accurately, but strive for accessibility. Explain complex document concepts simply if possible. Use **active voice**.
+    3.  **Tone:** Maintain a confident, informative, empathetic, and collaborative semi-formal tone. Avoid slang and overly casual language.
+    4.  **Handling Missing Information:** If the provided document context **does not** contain the information needed to answer a question, state this clearly and professionally. Indicate that the answer is outside the scope of the analyzed document sections. Do not apologize excessively; simply state the limitation based on the provided text. Example: "Based on the document sections provided, specific details on [topic] are not covered." or "The analyzed text does not contain information regarding [topic]."
+    5.  **Source Attribution:** When answering, subtly reference the document as the source of your information (e.g., "According to the document...", "The provided text indicates...", "Based on the analysis of the document sections...").
+    6.  **Audience Awareness (Implied):** While interacting with one user, frame your analysis in a way that would be valuable to decision-makers (balancing technical detail found in the document with its potential strategic relevance, *if* the document provides such context).
+    7.  **Focus:** Your primary goal is accurate information retrieval and synthesis *from the provided document text only*, presented through the SentryLabs persona.
+    Engage directly and professionally. If this is the start of the conversation (no prior history), you can offer a brief introductory sentence. Remember, accuracy and adherence to the document are paramount.
     """
+    # --- Select the Sentry prompt ---
+    SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_SENTRY
+# --- API Client Setup ---
 OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
 if not OPENROUTER_API_KEY:
+    raise ValueError("FATAL: OPENROUTER_API_KEY environment variable not set.")
+# Initialize OpenAI client to point to OpenRouter
 client = OpenAI(
     base_url="https://openrouter.ai/api/v1",
     api_key=OPENROUTER_API_KEY,
 )
+# --- Embedding Model and Vector Store Setup ---
+# Define embedding model name and dimension
+EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+VECTOR_DIMENSION = 384 # Dimension for all-MiniLM-L6-v2
+# Define cache directory for Sentence Transformers models
+CACHE_DIR = getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache")
+os.makedirs(CACHE_DIR, exist_ok=True) # Ensure cache directory exists
+# Initialize embedding model (will be loaded properly in __main__)
+embed_model = None
+# Initialize FAISS index (in-memory L2 distance index)
+index = faiss.IndexFlatL2(VECTOR_DIMENSION)
+# Store original text chunks corresponding to index entries
+documents: List[str] = []
 # --- Core Functions ---
 def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
+    """
+    Extracts text from each page of a PDF provided as a BytesIO stream.
+    Performs basic cleaning (stripping whitespace).
+    Args:
+        pdf_stream: A BytesIO object containing the PDF data.
+    Returns:
+        A list of strings, where each string is the text content of a page.
+        Returns an empty list if no text could be extracted.
+    """
+    text_chunks = []
+    try:
+        # Ensure the stream is BytesIO
+        if not isinstance(pdf_stream, BytesIO):
+             pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream
+        doc = fitz.open(stream=pdf_stream, filetype="pdf")
+        # Simple chunking by page - consider more advanced chunking for better RAG
+        for page_num in range(len(doc)):
+            page = doc.load_page(page_num)
+            text = page.get_text("text").strip()
+            if text: # Only add non-empty pages
+                text_chunks.append(text)
+        doc.close()
+        print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
+    except Exception as e:
+        print(f"Error extracting text from PDF: {e}")
+        # Optionally re-raise or handle specific fitz errors
     return text_chunks
 def build_vector_index(text_chunks: List[str]):
+    """
+    Embeds text chunks using the sentence transformer model and builds/rebuilds
+    the FAISS index.
+    Args:
+        text_chunks: A list of strings to be indexed.
+    """
+    global documents, index, VECTOR_DIMENSION, embed_model
+    if embed_model is None:
+        print("Error: Embedding model not loaded. Cannot build index.")
+        raise RuntimeError("Embedding model is not initialized.")
     if not text_chunks:
         print("Warning: No text chunks provided to build the vector index.")
         documents = []
+        # Reset index to an empty state
+        index = faiss.IndexFlatL2(VECTOR_DIMENSION)
         return
     print(f"Building vector index for {len(text_chunks)} chunks...")
+    # Store the original text corresponding to the vectors
+    documents = text_chunks
+    # Reset the index before adding new embeddings
+    index = faiss.IndexFlatL2(VECTOR_DIMENSION)
     try:
+        # Encode the text chunks into embeddings
+        print("Encoding text chunks...")
         embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
+        embeddings = np.array(embeddings, dtype=np.float32) # Ensure correct dtype for FAISS
+        # Validate embeddings shape
+        if embeddings.ndim != 2 or embeddings.shape[1] != VECTOR_DIMENSION:
+             raise ValueError(f"Embedding dimension mismatch or incorrect shape: expected (n, {VECTOR_DIMENSION}), got {embeddings.shape}")
+        # Add embeddings to the FAISS index
         index.add(embeddings)
         print(f"FAISS index built successfully with {index.ntotal} vectors.")
     except Exception as e:
         print(f"Error during embedding or indexing: {e}")
+        # Reset state in case of error to avoid partial indexing
         documents = []
+        index = faiss.IndexFlatL2(VECTOR_DIMENSION)
+        raise # Re-raise the exception to signal failure to the caller
 def search_relevant_chunks(query: str, k: int = 3) -> str:
+    """
+    Finds the top 'k' most relevant text chunks for a given query using
+    vector similarity search (FAISS).
+    Args:
+        query: The user's query string.
+        k: The number of relevant chunks to retrieve.
+    Returns:
+        A single string containing the concatenated relevant text chunks,
+        separated by newlines, or an empty string if no relevant chunks are found
+        or if the index is empty.
+    """
+    global index, documents, embed_model
+    if embed_model is None:
+        print("Error: Embedding model not loaded. Cannot perform search.")
+        return ""
     if index.ntotal == 0:
         print("Warning: Search attempted on an empty index.")
         return "" # Return empty string if index is not ready
     if not query:
+        print("Warning: Empty query provided for search.")
         return ""
     try:
+        # Encode the query into an embedding
         query_embedding = embed_model.encode([query])
         query_embedding = np.array(query_embedding, dtype=np.float32)
+        # Perform the similarity search
+        # Ensure k is not greater than the number of items in the index
+        k_search = min(k, index.ntotal)
+        if k_search <= 0: return "" # Should not happen if ntotal > 0, but safe check
+        distances, indices = index.search(query_embedding, k=k_search)
+        # Filter out potential invalid indices (-1) and ensure indices are within bounds
+        valid_indices = [idx for idx in indices[0] if idx != -1 and 0 <= idx < len(documents)]
         if not valid_indices:
              print(f"No relevant chunks found for query: '{query[:50]}...'")
              return ""
+        # Retrieve the actual text chunks based on the valid indices
         relevant_docs = [documents[i] for i in valid_indices]
+        print(f"Retrieved {len(relevant_docs)} relevant chunks for query.")
+        # Join the relevant documents with a clear separator
+        return "\n\n---\n\n".join(relevant_docs)
     except Exception as e:
+        print(f"Error during similarity search for query '{query[:50]}...': {e}")
+        return "" # Return empty string on error
 def generate_response(
     message: str,
     history: List[Tuple[str, str]],
+    system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT,
     max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
     temperature: float = ChatConfig.DEFAULT_TEMP,
     top_p: float = ChatConfig.DEFAULT_TOP_P
 ) -> str:
+    """
+    Generates a response from the LLM based on the user's message,
+    retrieved context from the PDF, and conversation history.
+    Args:
+        message: The current user message.
+        history: A list of past conversation turns as (user_message, assistant_response) tuples.
+        system_message: The system prompt defining the AI's persona and rules.
+        max_tokens: Maximum number of tokens for the response.
+        temperature: Controls randomness in generation.
+        top_p: Controls nucleus sampling.
+    Returns:
+        The generated response string from the AI assistant.
+    """
+    global index
+    if index.ntotal == 0: # Check if index is built (PDF uploaded and processed)
+        # Use Sentry's voice for this initial state message
+        return "I am Sentry, your SentryLabs assistant. To begin our analysis, please upload a PDF document using the button above."
+    # 1. Retrieve Relevant Context from PDF Index
+    context = search_relevant_chunks(message, k=3) # Retrieve top 3 relevant chunks
+    # 2. Prepare the Message List for the LLM API
     messages = []
+    # Add the System Prompt (Defines Sentry's persona and constraints)
     messages.append({"role": "system", "content": system_message})
     # 3. Add Conversation History (if any)
+    # Process history into the format expected by the API (alternating user/assistant roles)
+    # Ensure we only include valid, non-empty turns
+    processed_history = []
     for user_msg, assistant_msg in history:
+        # Basic validation to avoid sending empty or placeholder turns
+        if user_msg is not None and assistant_msg is not None and \
+           str(user_msg).strip() != "" and str(assistant_msg).strip() != "":
+            processed_history.append({"role": "user", "content": str(user_msg)})
+            processed_history.append({"role": "assistant", "content": str(assistant_msg)})
+    messages.extend(processed_history)
+    # 4. Construct the Final User Prompt (Current Question + Retrieved Context)
     if context:
+        # Provide context clearly labeled
+        user_prompt_content = (
+            "Based on the following context from the document, please answer the question:\n\n"
+            f"DOCUMENT CONTEXT:\n---\n{context}\n---\n\n"
+            f"QUESTION:\n{message}"
+        )
     else:
+        # If no context was found, inform the LLM. The system prompt guides its response.
+        user_prompt_content = (
+            "Regarding the document, please address the following question (Note: Specific context sections "
+            f"could not be retrieved via similarity search for this query):\n\nQUESTION:\n{message}"
+        )
+    # Add the final user message (including context) to the list
     messages.append({"role": "user", "content": user_prompt_content})
+    # 5. Call the LLM API via OpenRouter
     try:
+        print(f"--- Sending to {ChatConfig.MODEL} (Sentry Persona) ---")
+        # print("Messages being sent:", messages) # Uncomment for deep debugging
         completion = client.chat.completions.create(
             model=ChatConfig.MODEL,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
+            # Optional: Add stop sequences if the model tends to hallucinate roles
+            # stop=["\nUser:", "\nAssistant:", "\nSystem:"]
         )
         response = completion.choices[0].message.content
+        print(f"--- Received Response from Sentry ({len(response or '')} chars) ---")
+        # Ensure response is not None before stripping
+        return response.strip() if response else "Received an empty response."
     except Exception as e:
         print(f"Error generating response from LLM: {str(e)}")
+        # Provide a professional, Sentry-like error message
+        return "I encountered an issue while processing your request with the language model. Please try again shortly. If the problem persists, please verify the document and query."
+# --- Flask Routes ---
 @app.route('/')
+def index_route():
+    """Serves the main HTML page for the chat interface."""
     return render_template('index.html')
 @app.route('/upload_pdf', methods=['POST'])
 def upload_pdf():
+    """
+    Handles PDF file upload, extracts text, and builds the vector index.
+    Resets the index and documents before processing a new file.
+    """
+    global documents, index, VECTOR_DIMENSION # Ensure we modify the global state
     if 'pdf' not in request.files:
         return jsonify({"error": "No PDF file part in the request."}), 400
     file = request.files['pdf']
+    if not file or file.filename == "":
         return jsonify({"error": "No file selected."}), 400
     if not file.filename.lower().endswith('.pdf'):
+         return jsonify({"error": "Invalid file type. Please upload a PDF document."}), 400
+    print(f"Received file for processing: {file.filename}")
+    # Reset index and documents for the new file
+    print("Resetting index and documents for new upload...")
+    documents = []
+    index = faiss.IndexFlatL2(VECTOR_DIMENSION) # Reinitialize the index
     try:
+        pdf_stream = BytesIO(file.read()) # Read file content into memory
+        # Extract text chunks from the PDF
         text_chunks = extract_text_from_pdf(pdf_stream)
         if not text_chunks:
+             # Use a more professional error message
+             return jsonify({"error": "Could not extract readable text content from the provided PDF."}), 400
+        # Build the vector database (FAISS index) with the extracted text
+        build_vector_index(text_chunks) # This function handles index creation and populating documents
+        # Use a professional success message
+        return jsonify({"message": f"Document '{file.filename}' processed successfully. Ready for analysis."}), 200
     except fitz.fitz.FileDataError:
+         # Error for corrupted or invalid PDF format
+         return jsonify({"error": "Invalid or corrupted PDF file. Please provide a valid PDF document."}), 400
+    except RuntimeError as e:
+         # Catch errors from build_vector_index (like model not loaded)
+         print(f"Runtime Error during PDF processing: {e}")
+         return jsonify({"error": f"A runtime error occurred during processing: {e}"}), 500
+    except ValueError as e:
+         # Catch potential value errors (e.g., dimension mismatch)
+         print(f"Value Error during PDF processing: {e}")
+         return jsonify({"error": f"A configuration or value error occurred: {e}"}), 500
     except Exception as e:
+        # Generic error handler for unexpected issues
+        print(f"Unexpected error processing PDF upload: {str(e)}")
+        # Ensure state is clean even after unexpected errors
         documents = []
+        index = faiss.IndexFlatL2(VECTOR_DIMENSION)
+        return jsonify({"error": f"An unexpected error occurred during PDF processing. Details: {str(e)}"}), 500
 @app.route('/ask_question', methods=['POST'])
 def ask_question():
+    """
+    Handles user questions, retrieves relevant context, generates a response
+    using the LLM, and returns it.
+    """
     data = request.get_json()
     if not data or 'message' not in data:
+        return jsonify({"error": "Missing 'message' field in request body"}), 400
+    message = data.get('message', '').strip()
+    # History comes from frontend as list of lists/tuples: [[user_msg1, assistant_msg1], [user_msg2, assistant_msg2]]
+    history_raw = data.get('history', [])
     if not message:
+         # Return a Sentry-like response for empty input
+         return jsonify({"response": "Please provide a question or topic you'd like to discuss regarding the document."})
+    # Validate and sanitize history format
+    validated_history: List[Tuple[str, str]] = []
+    if isinstance(history_raw, list):
+        for item in history_raw:
+            # Ensure item is a list/tuple of exactly two strings
             if isinstance(item, (list, tuple)) and len(item) == 2:
+                user_msg = str(item[0] or "").strip()
+                assistant_msg = str(item[1] or "").strip()
+                # Only add pairs where both messages have content after stripping
+                if user_msg and assistant_msg:
+                    validated_history.append((user_msg, assistant_msg))
+            else:
+                print(f"Warning: Invalid history item format received: {item}. Skipping.")
     try:
+        # Generate response using the core logic function
+        response_text = generate_response(message, validated_history)
+        return jsonify({"response": response_text})
     except Exception as e:
+         # Catch potential errors during the generation process itself
+         print(f"Error in /ask_question endpoint during response generation: {e}")
+         # Return a professional error message
+         return jsonify({"response": "Apologies, an internal error occurred while generating the response. Please try again."}), 500
+# --- Main Execution Block ---
 if __name__ == '__main__':
+    print("--- SentryLabs Document Assistant Initializing ---")
+    # Ensure API key is set
     if not OPENROUTER_API_KEY:
+         print("FATAL: OPENROUTER_API_KEY is not set. Please set the environment variable.")
+         exit(1)
     else:
+        print("OpenRouter API Key found.")
+    # Load the embedding model during startup
+    try:
+        print(f"Loading embedding model '{EMBEDDING_MODEL_NAME}' from cache/hub...")
+        print(f"Using cache directory: {CACHE_DIR}")
+        embed_model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=CACHE_DIR)
+        # Perform a dummy encode to ensure model is fully loaded/functional
+        _ = embed_model.encode(["test sentence"])
+        print("Embedding model loaded successfully.")
+    except Exception as e:
+        print(f"FATAL: Failed to load Sentence Transformer model '{EMBEDDING_MODEL_NAME}'. Error: {e}")
+        print("Please check model name, network connection, and cache permissions.")
+        exit(1) # Exit if the core embedding model fails to load
+    # Start the Flask development server
+    print("Starting Flask development server...")
+    # Use host='0.0.0.0' to make accessible on the network, default is '127.0.0.1'
+    app.run(debug=True, host='127.0.0.1', port=5000)
+    print("--- Server Shutdown ---")