NanobotzAI commited on
Commit
f1790d7
·
verified ·
1 Parent(s): 9d2b0e2

Update app.py

Browse files

updated the bot to feel like sentry assist

Files changed (1) hide show
  1. app.py +298 -151
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  from openai import OpenAI
2
  from os import getenv
3
  from flask import Flask, request, jsonify, render_template
@@ -6,199 +11,285 @@ import faiss # FAISS for vector search
6
  import numpy as np
7
  import os
8
  from sentence_transformers import SentenceTransformer
9
- # from huggingface_hub import InferenceClient # Not used in the current code, removed for clarity
10
  from typing import List, Tuple
11
- from io import BytesIO # Added for BytesIO
12
 
 
 
13
  app = Flask(__name__, template_folder=os.getcwd())
14
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
15
 
16
  # --- Configuration ---
17
  class ChatConfig:
18
- MODEL = "google/gemma-3-27b-it:free"
19
- DEFAULT_MAX_TOKENS = 512
20
- DEFAULT_TEMP = 0.5 # Slightly increased for more natural variance
21
- DEFAULT_TOP_P = 0.95
22
-
23
- # --- NEW: System Prompt Options ---
24
- # Option 1: Friendly & Helpful Assistant
25
- SYSTEM_PROMPT_FRIENDLY = """You are a friendly and helpful AI assistant. Your knowledge is strictly limited to the content of the uploaded PDF document.
26
- Your primary goal is to answer user questions accurately based *only* on the provided context sections from the document.
27
- Engage in a natural, conversational tone. Be polite and helpful.
28
- If the provided context does not contain the answer to the question, clearly state that the information is not available in the relevant sections of the document you have access to.
29
- Do *not* invent answers or use any external knowledge.
30
- When answering, subtly weave in that your information comes from the document, e.g., "According to the document..." or "Based on the text provided...".
31
- Keep your answers concise but informative, directly addressing the user's query.
32
- """
33
-
34
- # Option 2: Knowledgeable Expert (More Formal)
35
- SYSTEM_PROMPT_EXPERT = """You are a knowledgeable AI expert specializing in the content of the uploaded PDF document.
36
- You must answer user questions with precision, drawing *exclusively* from the provided context segments.
37
- Maintain a professional and informative tone.
38
- If the provided context does not contain the necessary information to answer the question, explicitly state that the information is not found within the scope of the provided text.
39
- Do not speculate, infer beyond the text, or utilize any external information sources.
40
- Clearly attribute your answers to the document, for instance, by starting with "The document indicates that..." or "Based on the provided context...".
41
- Provide comprehensive answers derived solely from the text.
42
  """
43
 
44
- # --- Select the desired prompt ---
45
- SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_FRIENDLY # Choose which personality to use
46
 
47
- # --- API Client & Embedding Setup ---
48
  OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
49
  if not OPENROUTER_API_KEY:
50
- raise ValueError("OPENROUTER_API_KEY environment variable not set.")
51
 
 
52
  client = OpenAI(
53
  base_url="https://openrouter.ai/api/v1",
54
  api_key=OPENROUTER_API_KEY,
55
  )
56
 
57
- # Use a temporary cache directory if needed, or configure appropriately
58
- embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache"))
59
- vector_dim = 384
60
- index = faiss.IndexFlatL2(vector_dim)
61
- documents = [] # Store original text chunks corresponding to index entries
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # --- Core Functions ---
64
 
65
  def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
66
- """Extracts text from PDF stream"""
67
- # Ensure the stream is BytesIO
68
- if not isinstance(pdf_stream, BytesIO):
69
- pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream
70
-
71
- doc = fitz.open(stream=pdf_stream, filetype="pdf")
72
- # Simple chunking by page - consider more advanced chunking (by paragraph, sentence, fixed size) for better RAG performance
73
- text_chunks = [page.get_text("text").strip() for page in doc if page.get_text("text").strip()]
74
- doc.close()
75
- print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  return text_chunks
77
 
78
- # Renamed for clarity, added error handling
79
  def build_vector_index(text_chunks: List[str]):
80
- """Embeds text chunks and builds the FAISS index."""
81
- global documents, index, vector_dim
 
 
 
 
 
 
 
 
 
 
82
 
83
  if not text_chunks:
84
  print("Warning: No text chunks provided to build the vector index.")
85
  documents = []
86
- index = faiss.IndexFlatL2(vector_dim) # Reinitialize empty index
 
87
  return
88
 
89
  print(f"Building vector index for {len(text_chunks)} chunks...")
90
- documents = text_chunks # Store the original text
 
91
 
92
- # Reset the index
93
- index = faiss.IndexFlatL2(vector_dim)
94
 
95
  try:
 
 
96
  embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
97
- embeddings = np.array(embeddings, dtype=np.float32)
98
-
99
- if embeddings.ndim == 1:
100
- embeddings = embeddings.reshape(1, -1)
101
 
102
- if embeddings.shape[1] != vector_dim:
103
- raise ValueError(f"Embedding dimension mismatch: expected {vector_dim}, got {embeddings.shape[1]}")
 
104
 
 
105
  index.add(embeddings)
106
  print(f"FAISS index built successfully with {index.ntotal} vectors.")
107
 
108
  except Exception as e:
109
  print(f"Error during embedding or indexing: {e}")
110
- # Reset state in case of error
111
  documents = []
112
- index = faiss.IndexFlatL2(vector_dim)
113
- raise # Re-raise the exception to signal failure
114
 
115
-
116
- # Renamed for clarity, added checks
117
  def search_relevant_chunks(query: str, k: int = 3) -> str:
118
- """Finds the most relevant text chunks for the given query using FAISS."""
119
- global index, documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  if index.ntotal == 0:
122
  print("Warning: Search attempted on an empty index.")
123
  return "" # Return empty string if index is not ready
124
 
125
  if not query:
 
126
  return ""
127
 
128
  try:
 
129
  query_embedding = embed_model.encode([query])
130
  query_embedding = np.array(query_embedding, dtype=np.float32)
131
 
132
- # Perform the search
133
- distances, indices = index.search(query_embedding, k=min(k, index.ntotal)) # Ensure k <= index size
 
 
 
 
134
 
135
- # Filter out potential invalid indices (-1 can sometimes occur if k > ntotal, though min() handles it)
136
- valid_indices = [idx for idx in indices[0] if idx != -1 and idx < len(documents)]
137
 
138
  if not valid_indices:
139
  print(f"No relevant chunks found for query: '{query[:50]}...'")
140
  return ""
141
 
142
- # Retrieve the actual text chunks
143
  relevant_docs = [documents[i] for i in valid_indices]
144
- print(f"Retrieved {len(relevant_docs)} relevant chunks.")
145
- return "\n\n---\n\n".join(relevant_docs) # Join with a clear separator
 
 
146
 
147
  except Exception as e:
148
- print(f"Error during similarity search: {e}")
149
- return "" # Return empty on error
150
 
151
- # --- Improved Generation Function ---
152
  def generate_response(
153
  message: str,
154
  history: List[Tuple[str, str]],
155
- system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT, # Use the chosen system prompt
156
  max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
157
  temperature: float = ChatConfig.DEFAULT_TEMP,
158
  top_p: float = ChatConfig.DEFAULT_TOP_P
159
  ) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- if index.ntotal == 0: # Check if index is built
162
- return "I need a PDF document to be uploaded and processed first before I can answer questions."
 
163
 
164
- # 1. Retrieve Context
165
- context = search_relevant_chunks(message, k=3) # Retrieve top 3 chunks
166
 
167
- # Prepare the prompt messages list
168
  messages = []
169
 
170
- # 2. Add the System Prompt (Crucial Change)
171
  messages.append({"role": "system", "content": system_message})
172
 
173
  # 3. Add Conversation History (if any)
174
- # Ensure alternating user/assistant roles, starting with user
 
 
175
  for user_msg, assistant_msg in history:
176
- if user_msg: # Add user message if not empty
177
- messages.append({"role": "user", "content": user_msg})
178
- if assistant_msg: # Add assistant message if not empty
179
- messages.append({"role": "assistant", "content": assistant_msg})
180
-
181
- # 4. Construct the Final User Prompt with Context
182
- # We include context here, clearly marked.
183
- # The system prompt already told the AI *how* to use this context.
184
  if context:
185
- user_prompt_content = f"Based on the following context from the document, please answer the question:\n\nCONTEXT:\n{context}\n\n---\n\nQUESTION:\n{message}"
 
 
 
 
 
186
  else:
187
- # If no context found, still ask the question but the system prompt guides the "I don't know" response.
188
- # Alternatively, you could return a hardcoded message here *before* calling the LLM if desired.
189
- # Forcing the LLM to respond based on the prompt is generally better for natural language.
190
- user_prompt_content = f"Regarding the document, I have the following question, although I couldn't retrieve specific context for it:\n\nQUESTION:\n{message}"
191
- # Or, more simply:
192
- # user_prompt_content = f"QUESTION: {message}\n\n(Note: No specific context sections were retrieved for this question based on similarity search.)"
193
 
 
194
  messages.append({"role": "user", "content": user_prompt_content})
195
 
196
- # 5. Call the LLM API
197
  try:
198
- print(f"--- Sending to {ChatConfig.MODEL} ---")
199
- # print("System Prompt:", system_message) # Optional: Debug logging
200
- # print("History:", history) # Optional: Debug logging
201
- # print("User Prompt:", user_prompt_content) # Optional: Debug logging
202
 
203
  completion = client.chat.completions.create(
204
  model=ChatConfig.MODEL,
@@ -206,97 +297,153 @@ def generate_response(
206
  max_tokens=max_tokens,
207
  temperature=temperature,
208
  top_p=top_p,
209
- # Consider adding stop sequences if needed, e.g., stop=["\nUSER:", "\nASSISTANT:"]
 
210
  )
 
211
  response = completion.choices[0].message.content
212
- print(f"--- Received Response ({len(response)} chars) ---")
213
- return response.strip()
 
214
 
215
  except Exception as e:
216
  print(f"Error generating response from LLM: {str(e)}")
217
- # Provide a more user-friendly error message
218
- return "I'm sorry, but I encountered an issue while trying to process your request. Please check the connection or try again later."
219
 
220
-
221
- # --- Flask Routes (Mostly Unchanged, added checks) ---
222
 
223
  @app.route('/')
224
- def index_route(): # Renamed to avoid conflict with faiss.Index object
225
- """Serve the HTML page for the user interface"""
226
  return render_template('index.html')
227
 
228
  @app.route('/upload_pdf', methods=['POST'])
229
  def upload_pdf():
230
- """Handle PDF upload, extract text, and build vector index."""
231
- global documents, index # Ensure we are modifying the global state
 
 
 
232
 
233
  if 'pdf' not in request.files:
234
  return jsonify({"error": "No PDF file part in the request."}), 400
235
 
236
  file = request.files['pdf']
237
- if file.filename == "":
238
  return jsonify({"error": "No file selected."}), 400
239
  if not file.filename.lower().endswith('.pdf'):
240
- return jsonify({"error": "Invalid file type. Please upload a PDF."}), 400
 
 
241
 
242
- print(f"Received file: {file.filename}")
 
 
 
243
 
244
  try:
245
- pdf_stream = BytesIO(file.read()) # Read file into memory
246
 
247
- # Extract text
248
  text_chunks = extract_text_from_pdf(pdf_stream)
249
  if not text_chunks:
250
- return jsonify({"error": "Could not extract any text from the PDF."}), 400
 
251
 
252
- # Build vector database (index)
253
- build_vector_index(text_chunks) # This function now handles index creation
254
 
255
- return jsonify({"message": f"PDF '{file.filename}' processed successfully. {len(documents)} chunks indexed."}), 200
 
256
 
257
  except fitz.fitz.FileDataError:
258
- return jsonify({"error": "Invalid or corrupted PDF file."}), 400
 
 
 
 
 
 
 
 
 
259
  except Exception as e:
260
- print(f"Error processing PDF upload: {str(e)}")
261
- # Reset state on error
 
262
  documents = []
263
- index = faiss.IndexFlatL2(vector_dim)
264
- return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
265
 
266
  @app.route('/ask_question', methods=['POST'])
267
  def ask_question():
268
- """Handle user question, retrieve context, and generate response."""
 
 
 
269
  data = request.get_json()
270
  if not data or 'message' not in data:
271
- return jsonify({"error": "Missing 'message' in request body"}), 400
272
 
273
- message = data['message'].strip()
274
- history = data.get('history', []) # Get history, default to empty list
 
275
 
276
  if not message:
277
- return jsonify({"response": "Please enter a question."}) # Basic validation
278
-
279
- # Ensure history format is correct (list of tuples/lists)
280
- validated_history = []
281
- if isinstance(history, list):
282
- for item in history:
 
 
283
  if isinstance(item, (list, tuple)) and len(item) == 2:
284
- validated_history.append((str(item[0]), str(item[1])))
285
- # else: log potential format error?
 
 
 
 
 
286
 
287
  try:
288
- response = generate_response(message, validated_history)
289
- return jsonify({"response": response})
 
290
  except Exception as e:
291
- # Catch potential errors during generation (though generate_response has its own try-except)
292
- print(f"Error in /ask_question endpoint: {e}")
293
- return jsonify({"response": "Sorry, an error occurred while generating the response."}), 500
294
-
295
 
 
296
  if __name__ == '__main__':
297
- # Make sure OPENROUTER_API_KEY is checked before starting the app
 
 
298
  if not OPENROUTER_API_KEY:
299
- print("ERROR: OPENROUTER_API_KEY environment variable is not set. Exiting.")
 
300
  else:
301
- # Consider host='0.0.0.0' to make it accessible on your network
302
- app.run(debug=True, host='127.0.0.1', port=5000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Flask App for SentryLabs Document Assistant using RAG.
4
+ """
5
+
6
  from openai import OpenAI
7
  from os import getenv
8
  from flask import Flask, request, jsonify, render_template
 
11
  import numpy as np
12
  import os
13
  from sentence_transformers import SentenceTransformer
 
14
  from typing import List, Tuple
15
+ from io import BytesIO
16
 
17
+ # --- Flask App Setup ---
18
+ # Use the current directory for templates (where index.html is expected)
19
  app = Flask(__name__, template_folder=os.getcwd())
20
+ os.environ["TOKENIZERS_PARALLELISM"] = "false" # Suppress tokenizer parallelism warning
21
 
22
  # --- Configuration ---
23
  class ChatConfig:
24
+ """Configuration settings for the chat assistant."""
25
+ MODEL = "google/gemma-3-27b-it:free" # OpenRouter model identifier
26
+ DEFAULT_MAX_TOKENS = 768 # Max tokens for the LLM response
27
+ DEFAULT_TEMP = 0.4 # Temperature for LLM generation (balance creativity/determinism)
28
+ DEFAULT_TOP_P = 0.95 # Top-P nucleus sampling parameter
29
+
30
+ # --- SentryLabs Persona System Prompt ---
31
+ SYSTEM_PROMPT_SENTRY = """You are Sentry, an AI assistant representing SentryLabs. Your purpose is to act as a knowledgeable, trusted advisor and cybersecurity innovator, assisting users by analyzing the content of the uploaded PDF document.
32
+
33
+ **Your Core Directives:**
34
+ 1. **Strict Document Scope:** Your knowledge is **strictly limited** to the content within the provided context sections of the uploaded PDF. You **must not** use any external knowledge, make assumptions beyond the text, or invent information.
35
+ 2. **Persona Embodiment:** Consistently embody the SentryLabs voice:
36
+ * **Authoritative but Approachable:** Be confident and expert, yet clear and supportive.
37
+ * **Innovative & Forward-Thinking:** Frame answers with a proactive cybersecurity mindset where the text allows.
38
+ * **Customer-Centric:** Focus on providing clear value and insights derived *from the document*.
39
+ * **Professional & Clear:** Use precise, professional language. Employ technical terms from the document accurately, but strive for accessibility. Explain complex document concepts simply if possible. Use **active voice**.
40
+ 3. **Tone:** Maintain a confident, informative, empathetic, and collaborative semi-formal tone. Avoid slang and overly casual language.
41
+ 4. **Handling Missing Information:** If the provided document context **does not** contain the information needed to answer a question, state this clearly and professionally. Indicate that the answer is outside the scope of the analyzed document sections. Do not apologize excessively; simply state the limitation based on the provided text. Example: "Based on the document sections provided, specific details on [topic] are not covered." or "The analyzed text does not contain information regarding [topic]."
42
+ 5. **Source Attribution:** When answering, subtly reference the document as the source of your information (e.g., "According to the document...", "The provided text indicates...", "Based on the analysis of the document sections...").
43
+ 6. **Audience Awareness (Implied):** While interacting with one user, frame your analysis in a way that would be valuable to decision-makers (balancing technical detail found in the document with its potential strategic relevance, *if* the document provides such context).
44
+ 7. **Focus:** Your primary goal is accurate information retrieval and synthesis *from the provided document text only*, presented through the SentryLabs persona.
45
+
46
+ Engage directly and professionally. If this is the start of the conversation (no prior history), you can offer a brief introductory sentence. Remember, accuracy and adherence to the document are paramount.
 
47
  """
48
 
49
+ # --- Select the Sentry prompt ---
50
+ SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_SENTRY
51
 
52
+ # --- API Client Setup ---
53
  OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
54
  if not OPENROUTER_API_KEY:
55
+ raise ValueError("FATAL: OPENROUTER_API_KEY environment variable not set.")
56
 
57
+ # Initialize OpenAI client to point to OpenRouter
58
  client = OpenAI(
59
  base_url="https://openrouter.ai/api/v1",
60
  api_key=OPENROUTER_API_KEY,
61
  )
62
 
63
+ # --- Embedding Model and Vector Store Setup ---
64
+ # Define embedding model name and dimension
65
+ EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
66
+ VECTOR_DIMENSION = 384 # Dimension for all-MiniLM-L6-v2
67
+
68
+ # Define cache directory for Sentence Transformers models
69
+ CACHE_DIR = getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache")
70
+ os.makedirs(CACHE_DIR, exist_ok=True) # Ensure cache directory exists
71
+
72
+ # Initialize embedding model (will be loaded properly in __main__)
73
+ embed_model = None
74
+
75
+ # Initialize FAISS index (in-memory L2 distance index)
76
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
77
+ # Store original text chunks corresponding to index entries
78
+ documents: List[str] = []
79
 
80
  # --- Core Functions ---
81
 
82
  def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
83
+ """
84
+ Extracts text from each page of a PDF provided as a BytesIO stream.
85
+ Performs basic cleaning (stripping whitespace).
86
+
87
+ Args:
88
+ pdf_stream: A BytesIO object containing the PDF data.
89
+
90
+ Returns:
91
+ A list of strings, where each string is the text content of a page.
92
+ Returns an empty list if no text could be extracted.
93
+ """
94
+ text_chunks = []
95
+ try:
96
+ # Ensure the stream is BytesIO
97
+ if not isinstance(pdf_stream, BytesIO):
98
+ pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream
99
+
100
+ doc = fitz.open(stream=pdf_stream, filetype="pdf")
101
+ # Simple chunking by page - consider more advanced chunking for better RAG
102
+ for page_num in range(len(doc)):
103
+ page = doc.load_page(page_num)
104
+ text = page.get_text("text").strip()
105
+ if text: # Only add non-empty pages
106
+ text_chunks.append(text)
107
+ doc.close()
108
+ print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
109
+ except Exception as e:
110
+ print(f"Error extracting text from PDF: {e}")
111
+ # Optionally re-raise or handle specific fitz errors
112
  return text_chunks
113
 
 
114
  def build_vector_index(text_chunks: List[str]):
115
+ """
116
+ Embeds text chunks using the sentence transformer model and builds/rebuilds
117
+ the FAISS index.
118
+
119
+ Args:
120
+ text_chunks: A list of strings to be indexed.
121
+ """
122
+ global documents, index, VECTOR_DIMENSION, embed_model
123
+
124
+ if embed_model is None:
125
+ print("Error: Embedding model not loaded. Cannot build index.")
126
+ raise RuntimeError("Embedding model is not initialized.")
127
 
128
  if not text_chunks:
129
  print("Warning: No text chunks provided to build the vector index.")
130
  documents = []
131
+ # Reset index to an empty state
132
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
133
  return
134
 
135
  print(f"Building vector index for {len(text_chunks)} chunks...")
136
+ # Store the original text corresponding to the vectors
137
+ documents = text_chunks
138
 
139
+ # Reset the index before adding new embeddings
140
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
141
 
142
  try:
143
+ # Encode the text chunks into embeddings
144
+ print("Encoding text chunks...")
145
  embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
146
+ embeddings = np.array(embeddings, dtype=np.float32) # Ensure correct dtype for FAISS
 
 
 
147
 
148
+ # Validate embeddings shape
149
+ if embeddings.ndim != 2 or embeddings.shape[1] != VECTOR_DIMENSION:
150
+ raise ValueError(f"Embedding dimension mismatch or incorrect shape: expected (n, {VECTOR_DIMENSION}), got {embeddings.shape}")
151
 
152
+ # Add embeddings to the FAISS index
153
  index.add(embeddings)
154
  print(f"FAISS index built successfully with {index.ntotal} vectors.")
155
 
156
  except Exception as e:
157
  print(f"Error during embedding or indexing: {e}")
158
+ # Reset state in case of error to avoid partial indexing
159
  documents = []
160
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
161
+ raise # Re-raise the exception to signal failure to the caller
162
 
 
 
163
  def search_relevant_chunks(query: str, k: int = 3) -> str:
164
+ """
165
+ Finds the top 'k' most relevant text chunks for a given query using
166
+ vector similarity search (FAISS).
167
+
168
+ Args:
169
+ query: The user's query string.
170
+ k: The number of relevant chunks to retrieve.
171
+
172
+ Returns:
173
+ A single string containing the concatenated relevant text chunks,
174
+ separated by newlines, or an empty string if no relevant chunks are found
175
+ or if the index is empty.
176
+ """
177
+ global index, documents, embed_model
178
+
179
+ if embed_model is None:
180
+ print("Error: Embedding model not loaded. Cannot perform search.")
181
+ return ""
182
 
183
  if index.ntotal == 0:
184
  print("Warning: Search attempted on an empty index.")
185
  return "" # Return empty string if index is not ready
186
 
187
  if not query:
188
+ print("Warning: Empty query provided for search.")
189
  return ""
190
 
191
  try:
192
+ # Encode the query into an embedding
193
  query_embedding = embed_model.encode([query])
194
  query_embedding = np.array(query_embedding, dtype=np.float32)
195
 
196
+ # Perform the similarity search
197
+ # Ensure k is not greater than the number of items in the index
198
+ k_search = min(k, index.ntotal)
199
+ if k_search <= 0: return "" # Should not happen if ntotal > 0, but safe check
200
+
201
+ distances, indices = index.search(query_embedding, k=k_search)
202
 
203
+ # Filter out potential invalid indices (-1) and ensure indices are within bounds
204
+ valid_indices = [idx for idx in indices[0] if idx != -1 and 0 <= idx < len(documents)]
205
 
206
  if not valid_indices:
207
  print(f"No relevant chunks found for query: '{query[:50]}...'")
208
  return ""
209
 
210
+ # Retrieve the actual text chunks based on the valid indices
211
  relevant_docs = [documents[i] for i in valid_indices]
212
+ print(f"Retrieved {len(relevant_docs)} relevant chunks for query.")
213
+
214
+ # Join the relevant documents with a clear separator
215
+ return "\n\n---\n\n".join(relevant_docs)
216
 
217
  except Exception as e:
218
+ print(f"Error during similarity search for query '{query[:50]}...': {e}")
219
+ return "" # Return empty string on error
220
 
 
221
  def generate_response(
222
  message: str,
223
  history: List[Tuple[str, str]],
224
+ system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT,
225
  max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
226
  temperature: float = ChatConfig.DEFAULT_TEMP,
227
  top_p: float = ChatConfig.DEFAULT_TOP_P
228
  ) -> str:
229
+ """
230
+ Generates a response from the LLM based on the user's message,
231
+ retrieved context from the PDF, and conversation history.
232
+
233
+ Args:
234
+ message: The current user message.
235
+ history: A list of past conversation turns as (user_message, assistant_response) tuples.
236
+ system_message: The system prompt defining the AI's persona and rules.
237
+ max_tokens: Maximum number of tokens for the response.
238
+ temperature: Controls randomness in generation.
239
+ top_p: Controls nucleus sampling.
240
+
241
+ Returns:
242
+ The generated response string from the AI assistant.
243
+ """
244
+ global index
245
 
246
+ if index.ntotal == 0: # Check if index is built (PDF uploaded and processed)
247
+ # Use Sentry's voice for this initial state message
248
+ return "I am Sentry, your SentryLabs assistant. To begin our analysis, please upload a PDF document using the button above."
249
 
250
+ # 1. Retrieve Relevant Context from PDF Index
251
+ context = search_relevant_chunks(message, k=3) # Retrieve top 3 relevant chunks
252
 
253
+ # 2. Prepare the Message List for the LLM API
254
  messages = []
255
 
256
+ # Add the System Prompt (Defines Sentry's persona and constraints)
257
  messages.append({"role": "system", "content": system_message})
258
 
259
  # 3. Add Conversation History (if any)
260
+ # Process history into the format expected by the API (alternating user/assistant roles)
261
+ # Ensure we only include valid, non-empty turns
262
+ processed_history = []
263
  for user_msg, assistant_msg in history:
264
+ # Basic validation to avoid sending empty or placeholder turns
265
+ if user_msg is not None and assistant_msg is not None and \
266
+ str(user_msg).strip() != "" and str(assistant_msg).strip() != "":
267
+ processed_history.append({"role": "user", "content": str(user_msg)})
268
+ processed_history.append({"role": "assistant", "content": str(assistant_msg)})
269
+ messages.extend(processed_history)
270
+
271
+ # 4. Construct the Final User Prompt (Current Question + Retrieved Context)
272
  if context:
273
+ # Provide context clearly labeled
274
+ user_prompt_content = (
275
+ "Based on the following context from the document, please answer the question:\n\n"
276
+ f"DOCUMENT CONTEXT:\n---\n{context}\n---\n\n"
277
+ f"QUESTION:\n{message}"
278
+ )
279
  else:
280
+ # If no context was found, inform the LLM. The system prompt guides its response.
281
+ user_prompt_content = (
282
+ "Regarding the document, please address the following question (Note: Specific context sections "
283
+ f"could not be retrieved via similarity search for this query):\n\nQUESTION:\n{message}"
284
+ )
 
285
 
286
+ # Add the final user message (including context) to the list
287
  messages.append({"role": "user", "content": user_prompt_content})
288
 
289
+ # 5. Call the LLM API via OpenRouter
290
  try:
291
+ print(f"--- Sending to {ChatConfig.MODEL} (Sentry Persona) ---")
292
+ # print("Messages being sent:", messages) # Uncomment for deep debugging
 
 
293
 
294
  completion = client.chat.completions.create(
295
  model=ChatConfig.MODEL,
 
297
  max_tokens=max_tokens,
298
  temperature=temperature,
299
  top_p=top_p,
300
+ # Optional: Add stop sequences if the model tends to hallucinate roles
301
+ # stop=["\nUser:", "\nAssistant:", "\nSystem:"]
302
  )
303
+
304
  response = completion.choices[0].message.content
305
+ print(f"--- Received Response from Sentry ({len(response or '')} chars) ---")
306
+ # Ensure response is not None before stripping
307
+ return response.strip() if response else "Received an empty response."
308
 
309
  except Exception as e:
310
  print(f"Error generating response from LLM: {str(e)}")
311
+ # Provide a professional, Sentry-like error message
312
+ return "I encountered an issue while processing your request with the language model. Please try again shortly. If the problem persists, please verify the document and query."
313
 
314
+ # --- Flask Routes ---
 
315
 
316
  @app.route('/')
317
+ def index_route():
318
+ """Serves the main HTML page for the chat interface."""
319
  return render_template('index.html')
320
 
321
  @app.route('/upload_pdf', methods=['POST'])
322
  def upload_pdf():
323
+ """
324
+ Handles PDF file upload, extracts text, and builds the vector index.
325
+ Resets the index and documents before processing a new file.
326
+ """
327
+ global documents, index, VECTOR_DIMENSION # Ensure we modify the global state
328
 
329
  if 'pdf' not in request.files:
330
  return jsonify({"error": "No PDF file part in the request."}), 400
331
 
332
  file = request.files['pdf']
333
+ if not file or file.filename == "":
334
  return jsonify({"error": "No file selected."}), 400
335
  if not file.filename.lower().endswith('.pdf'):
336
+ return jsonify({"error": "Invalid file type. Please upload a PDF document."}), 400
337
+
338
+ print(f"Received file for processing: {file.filename}")
339
 
340
+ # Reset index and documents for the new file
341
+ print("Resetting index and documents for new upload...")
342
+ documents = []
343
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION) # Reinitialize the index
344
 
345
  try:
346
+ pdf_stream = BytesIO(file.read()) # Read file content into memory
347
 
348
+ # Extract text chunks from the PDF
349
  text_chunks = extract_text_from_pdf(pdf_stream)
350
  if not text_chunks:
351
+ # Use a more professional error message
352
+ return jsonify({"error": "Could not extract readable text content from the provided PDF."}), 400
353
 
354
+ # Build the vector database (FAISS index) with the extracted text
355
+ build_vector_index(text_chunks) # This function handles index creation and populating documents
356
 
357
+ # Use a professional success message
358
+ return jsonify({"message": f"Document '{file.filename}' processed successfully. Ready for analysis."}), 200
359
 
360
  except fitz.fitz.FileDataError:
361
+ # Error for corrupted or invalid PDF format
362
+ return jsonify({"error": "Invalid or corrupted PDF file. Please provide a valid PDF document."}), 400
363
+ except RuntimeError as e:
364
+ # Catch errors from build_vector_index (like model not loaded)
365
+ print(f"Runtime Error during PDF processing: {e}")
366
+ return jsonify({"error": f"A runtime error occurred during processing: {e}"}), 500
367
+ except ValueError as e:
368
+ # Catch potential value errors (e.g., dimension mismatch)
369
+ print(f"Value Error during PDF processing: {e}")
370
+ return jsonify({"error": f"A configuration or value error occurred: {e}"}), 500
371
  except Exception as e:
372
+ # Generic error handler for unexpected issues
373
+ print(f"Unexpected error processing PDF upload: {str(e)}")
374
+ # Ensure state is clean even after unexpected errors
375
  documents = []
376
+ index = faiss.IndexFlatL2(VECTOR_DIMENSION)
377
+ return jsonify({"error": f"An unexpected error occurred during PDF processing. Details: {str(e)}"}), 500
378
 
379
  @app.route('/ask_question', methods=['POST'])
380
  def ask_question():
381
+ """
382
+ Handles user questions, retrieves relevant context, generates a response
383
+ using the LLM, and returns it.
384
+ """
385
  data = request.get_json()
386
  if not data or 'message' not in data:
387
+ return jsonify({"error": "Missing 'message' field in request body"}), 400
388
 
389
+ message = data.get('message', '').strip()
390
+ # History comes from frontend as list of lists/tuples: [[user_msg1, assistant_msg1], [user_msg2, assistant_msg2]]
391
+ history_raw = data.get('history', [])
392
 
393
  if not message:
394
+ # Return a Sentry-like response for empty input
395
+ return jsonify({"response": "Please provide a question or topic you'd like to discuss regarding the document."})
396
+
397
+ # Validate and sanitize history format
398
+ validated_history: List[Tuple[str, str]] = []
399
+ if isinstance(history_raw, list):
400
+ for item in history_raw:
401
+ # Ensure item is a list/tuple of exactly two strings
402
  if isinstance(item, (list, tuple)) and len(item) == 2:
403
+ user_msg = str(item[0] or "").strip()
404
+ assistant_msg = str(item[1] or "").strip()
405
+ # Only add pairs where both messages have content after stripping
406
+ if user_msg and assistant_msg:
407
+ validated_history.append((user_msg, assistant_msg))
408
+ else:
409
+ print(f"Warning: Invalid history item format received: {item}. Skipping.")
410
 
411
  try:
412
+ # Generate response using the core logic function
413
+ response_text = generate_response(message, validated_history)
414
+ return jsonify({"response": response_text})
415
  except Exception as e:
416
+ # Catch potential errors during the generation process itself
417
+ print(f"Error in /ask_question endpoint during response generation: {e}")
418
+ # Return a professional error message
419
+ return jsonify({"response": "Apologies, an internal error occurred while generating the response. Please try again."}), 500
420
 
421
+ # --- Main Execution Block ---
422
  if __name__ == '__main__':
423
+ print("--- SentryLabs Document Assistant Initializing ---")
424
+
425
+ # Ensure API key is set
426
  if not OPENROUTER_API_KEY:
427
+ print("FATAL: OPENROUTER_API_KEY is not set. Please set the environment variable.")
428
+ exit(1)
429
  else:
430
+ print("OpenRouter API Key found.")
431
+
432
+ # Load the embedding model during startup
433
+ try:
434
+ print(f"Loading embedding model '{EMBEDDING_MODEL_NAME}' from cache/hub...")
435
+ print(f"Using cache directory: {CACHE_DIR}")
436
+ embed_model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=CACHE_DIR)
437
+ # Perform a dummy encode to ensure model is fully loaded/functional
438
+ _ = embed_model.encode(["test sentence"])
439
+ print("Embedding model loaded successfully.")
440
+ except Exception as e:
441
+ print(f"FATAL: Failed to load Sentence Transformer model '{EMBEDDING_MODEL_NAME}'. Error: {e}")
442
+ print("Please check model name, network connection, and cache permissions.")
443
+ exit(1) # Exit if the core embedding model fails to load
444
+
445
+ # Start the Flask development server
446
+ print("Starting Flask development server...")
447
+ # Use host='0.0.0.0' to make accessible on the network, default is '127.0.0.1'
448
+ app.run(debug=True, host='127.0.0.1', port=5000)
449
+ print("--- Server Shutdown ---")