Spaces:
Sleeping
Sleeping
Update app.py
Browse filesupdated the bot to feel like sentry assist
app.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from openai import OpenAI
|
2 |
from os import getenv
|
3 |
from flask import Flask, request, jsonify, render_template
|
@@ -6,199 +11,285 @@ import faiss # FAISS for vector search
|
|
6 |
import numpy as np
|
7 |
import os
|
8 |
from sentence_transformers import SentenceTransformer
|
9 |
-
# from huggingface_hub import InferenceClient # Not used in the current code, removed for clarity
|
10 |
from typing import List, Tuple
|
11 |
-
from io import BytesIO
|
12 |
|
|
|
|
|
13 |
app = Flask(__name__, template_folder=os.getcwd())
|
14 |
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
15 |
|
16 |
# --- Configuration ---
|
17 |
class ChatConfig:
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
Provide comprehensive answers derived solely from the text.
|
42 |
"""
|
43 |
|
44 |
-
# --- Select the
|
45 |
-
SELECTED_SYSTEM_PROMPT =
|
46 |
|
47 |
-
# --- API Client
|
48 |
OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
|
49 |
if not OPENROUTER_API_KEY:
|
50 |
-
raise ValueError("OPENROUTER_API_KEY environment variable not set.")
|
51 |
|
|
|
52 |
client = OpenAI(
|
53 |
base_url="https://openrouter.ai/api/v1",
|
54 |
api_key=OPENROUTER_API_KEY,
|
55 |
)
|
56 |
|
57 |
-
#
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# --- Core Functions ---
|
64 |
|
65 |
def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
|
66 |
-
"""
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
return text_chunks
|
77 |
|
78 |
-
# Renamed for clarity, added error handling
|
79 |
def build_vector_index(text_chunks: List[str]):
|
80 |
-
"""
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
if not text_chunks:
|
84 |
print("Warning: No text chunks provided to build the vector index.")
|
85 |
documents = []
|
86 |
-
|
|
|
87 |
return
|
88 |
|
89 |
print(f"Building vector index for {len(text_chunks)} chunks...")
|
90 |
-
|
|
|
91 |
|
92 |
-
# Reset the index
|
93 |
-
index = faiss.IndexFlatL2(
|
94 |
|
95 |
try:
|
|
|
|
|
96 |
embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
|
97 |
-
embeddings = np.array(embeddings, dtype=np.float32)
|
98 |
-
|
99 |
-
if embeddings.ndim == 1:
|
100 |
-
embeddings = embeddings.reshape(1, -1)
|
101 |
|
102 |
-
|
103 |
-
|
|
|
104 |
|
|
|
105 |
index.add(embeddings)
|
106 |
print(f"FAISS index built successfully with {index.ntotal} vectors.")
|
107 |
|
108 |
except Exception as e:
|
109 |
print(f"Error during embedding or indexing: {e}")
|
110 |
-
# Reset state in case of error
|
111 |
documents = []
|
112 |
-
index = faiss.IndexFlatL2(
|
113 |
-
raise # Re-raise the exception to signal failure
|
114 |
|
115 |
-
|
116 |
-
# Renamed for clarity, added checks
|
117 |
def search_relevant_chunks(query: str, k: int = 3) -> str:
|
118 |
-
"""
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
if index.ntotal == 0:
|
122 |
print("Warning: Search attempted on an empty index.")
|
123 |
return "" # Return empty string if index is not ready
|
124 |
|
125 |
if not query:
|
|
|
126 |
return ""
|
127 |
|
128 |
try:
|
|
|
129 |
query_embedding = embed_model.encode([query])
|
130 |
query_embedding = np.array(query_embedding, dtype=np.float32)
|
131 |
|
132 |
-
# Perform the search
|
133 |
-
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
# Filter out potential invalid indices (-1
|
136 |
-
valid_indices = [idx for idx in indices[0] if idx != -1 and idx < len(documents)]
|
137 |
|
138 |
if not valid_indices:
|
139 |
print(f"No relevant chunks found for query: '{query[:50]}...'")
|
140 |
return ""
|
141 |
|
142 |
-
# Retrieve the actual text chunks
|
143 |
relevant_docs = [documents[i] for i in valid_indices]
|
144 |
-
print(f"Retrieved {len(relevant_docs)} relevant chunks.")
|
145 |
-
|
|
|
|
|
146 |
|
147 |
except Exception as e:
|
148 |
-
print(f"Error during similarity search: {e}")
|
149 |
-
return "" # Return empty on error
|
150 |
|
151 |
-
# --- Improved Generation Function ---
|
152 |
def generate_response(
|
153 |
message: str,
|
154 |
history: List[Tuple[str, str]],
|
155 |
-
system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT,
|
156 |
max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
|
157 |
temperature: float = ChatConfig.DEFAULT_TEMP,
|
158 |
top_p: float = ChatConfig.DEFAULT_TOP_P
|
159 |
) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
-
if index.ntotal == 0: # Check if index is built
|
162 |
-
|
|
|
163 |
|
164 |
-
# 1. Retrieve Context
|
165 |
-
context = search_relevant_chunks(message, k=3) # Retrieve top 3 chunks
|
166 |
|
167 |
-
# Prepare the
|
168 |
messages = []
|
169 |
|
170 |
-
#
|
171 |
messages.append({"role": "system", "content": system_message})
|
172 |
|
173 |
# 3. Add Conversation History (if any)
|
174 |
-
#
|
|
|
|
|
175 |
for user_msg, assistant_msg in history:
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
#
|
184 |
if context:
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
186 |
else:
|
187 |
-
# If no context found,
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
# user_prompt_content = f"QUESTION: {message}\n\n(Note: No specific context sections were retrieved for this question based on similarity search.)"
|
193 |
|
|
|
194 |
messages.append({"role": "user", "content": user_prompt_content})
|
195 |
|
196 |
-
# 5. Call the LLM API
|
197 |
try:
|
198 |
-
print(f"--- Sending to {ChatConfig.MODEL} ---")
|
199 |
-
# print("
|
200 |
-
# print("History:", history) # Optional: Debug logging
|
201 |
-
# print("User Prompt:", user_prompt_content) # Optional: Debug logging
|
202 |
|
203 |
completion = client.chat.completions.create(
|
204 |
model=ChatConfig.MODEL,
|
@@ -206,97 +297,153 @@ def generate_response(
|
|
206 |
max_tokens=max_tokens,
|
207 |
temperature=temperature,
|
208 |
top_p=top_p,
|
209 |
-
#
|
|
|
210 |
)
|
|
|
211 |
response = completion.choices[0].message.content
|
212 |
-
print(f"--- Received Response ({len(response)} chars) ---")
|
213 |
-
|
|
|
214 |
|
215 |
except Exception as e:
|
216 |
print(f"Error generating response from LLM: {str(e)}")
|
217 |
-
# Provide a
|
218 |
-
return "I
|
219 |
|
220 |
-
|
221 |
-
# --- Flask Routes (Mostly Unchanged, added checks) ---
|
222 |
|
223 |
@app.route('/')
|
224 |
-
def index_route():
|
225 |
-
"""
|
226 |
return render_template('index.html')
|
227 |
|
228 |
@app.route('/upload_pdf', methods=['POST'])
|
229 |
def upload_pdf():
|
230 |
-
"""
|
231 |
-
|
|
|
|
|
|
|
232 |
|
233 |
if 'pdf' not in request.files:
|
234 |
return jsonify({"error": "No PDF file part in the request."}), 400
|
235 |
|
236 |
file = request.files['pdf']
|
237 |
-
if file.filename == "":
|
238 |
return jsonify({"error": "No file selected."}), 400
|
239 |
if not file.filename.lower().endswith('.pdf'):
|
240 |
-
return jsonify({"error": "Invalid file type. Please upload a PDF."}), 400
|
|
|
|
|
241 |
|
242 |
-
|
|
|
|
|
|
|
243 |
|
244 |
try:
|
245 |
-
pdf_stream = BytesIO(file.read()) # Read file into memory
|
246 |
|
247 |
-
# Extract text
|
248 |
text_chunks = extract_text_from_pdf(pdf_stream)
|
249 |
if not text_chunks:
|
250 |
-
|
|
|
251 |
|
252 |
-
# Build vector database (index)
|
253 |
-
build_vector_index(text_chunks) # This function
|
254 |
|
255 |
-
|
|
|
256 |
|
257 |
except fitz.fitz.FileDataError:
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
except Exception as e:
|
260 |
-
|
261 |
-
|
|
|
262 |
documents = []
|
263 |
-
index = faiss.IndexFlatL2(
|
264 |
-
return jsonify({"error": f"An unexpected error occurred: {str(e)}"}), 500
|
265 |
|
266 |
@app.route('/ask_question', methods=['POST'])
|
267 |
def ask_question():
|
268 |
-
"""
|
|
|
|
|
|
|
269 |
data = request.get_json()
|
270 |
if not data or 'message' not in data:
|
271 |
-
return jsonify({"error": "Missing 'message' in request body"}), 400
|
272 |
|
273 |
-
message = data
|
274 |
-
|
|
|
275 |
|
276 |
if not message:
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
283 |
if isinstance(item, (list, tuple)) and len(item) == 2:
|
284 |
-
|
285 |
-
|
|
|
|
|
|
|
|
|
|
|
286 |
|
287 |
try:
|
288 |
-
response
|
289 |
-
|
|
|
290 |
except Exception as e:
|
291 |
-
# Catch potential errors during generation
|
292 |
-
print(f"Error in /ask_question endpoint: {e}")
|
293 |
-
|
294 |
-
|
295 |
|
|
|
296 |
if __name__ == '__main__':
|
297 |
-
|
|
|
|
|
298 |
if not OPENROUTER_API_KEY:
|
299 |
-
print("
|
|
|
300 |
else:
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Flask App for SentryLabs Document Assistant using RAG.
|
4 |
+
"""
|
5 |
+
|
6 |
from openai import OpenAI
|
7 |
from os import getenv
|
8 |
from flask import Flask, request, jsonify, render_template
|
|
|
11 |
import numpy as np
|
12 |
import os
|
13 |
from sentence_transformers import SentenceTransformer
|
|
|
14 |
from typing import List, Tuple
|
15 |
+
from io import BytesIO
|
16 |
|
17 |
+
# --- Flask App Setup ---
|
18 |
+
# Use the current directory for templates (where index.html is expected)
|
19 |
app = Flask(__name__, template_folder=os.getcwd())
|
20 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Suppress tokenizer parallelism warning
|
21 |
|
22 |
# --- Configuration ---
|
23 |
class ChatConfig:
|
24 |
+
"""Configuration settings for the chat assistant."""
|
25 |
+
MODEL = "google/gemma-3-27b-it:free" # OpenRouter model identifier
|
26 |
+
DEFAULT_MAX_TOKENS = 768 # Max tokens for the LLM response
|
27 |
+
DEFAULT_TEMP = 0.4 # Temperature for LLM generation (balance creativity/determinism)
|
28 |
+
DEFAULT_TOP_P = 0.95 # Top-P nucleus sampling parameter
|
29 |
+
|
30 |
+
# --- SentryLabs Persona System Prompt ---
|
31 |
+
SYSTEM_PROMPT_SENTRY = """You are Sentry, an AI assistant representing SentryLabs. Your purpose is to act as a knowledgeable, trusted advisor and cybersecurity innovator, assisting users by analyzing the content of the uploaded PDF document.
|
32 |
+
|
33 |
+
**Your Core Directives:**
|
34 |
+
1. **Strict Document Scope:** Your knowledge is **strictly limited** to the content within the provided context sections of the uploaded PDF. You **must not** use any external knowledge, make assumptions beyond the text, or invent information.
|
35 |
+
2. **Persona Embodiment:** Consistently embody the SentryLabs voice:
|
36 |
+
* **Authoritative but Approachable:** Be confident and expert, yet clear and supportive.
|
37 |
+
* **Innovative & Forward-Thinking:** Frame answers with a proactive cybersecurity mindset where the text allows.
|
38 |
+
* **Customer-Centric:** Focus on providing clear value and insights derived *from the document*.
|
39 |
+
* **Professional & Clear:** Use precise, professional language. Employ technical terms from the document accurately, but strive for accessibility. Explain complex document concepts simply if possible. Use **active voice**.
|
40 |
+
3. **Tone:** Maintain a confident, informative, empathetic, and collaborative semi-formal tone. Avoid slang and overly casual language.
|
41 |
+
4. **Handling Missing Information:** If the provided document context **does not** contain the information needed to answer a question, state this clearly and professionally. Indicate that the answer is outside the scope of the analyzed document sections. Do not apologize excessively; simply state the limitation based on the provided text. Example: "Based on the document sections provided, specific details on [topic] are not covered." or "The analyzed text does not contain information regarding [topic]."
|
42 |
+
5. **Source Attribution:** When answering, subtly reference the document as the source of your information (e.g., "According to the document...", "The provided text indicates...", "Based on the analysis of the document sections...").
|
43 |
+
6. **Audience Awareness (Implied):** While interacting with one user, frame your analysis in a way that would be valuable to decision-makers (balancing technical detail found in the document with its potential strategic relevance, *if* the document provides such context).
|
44 |
+
7. **Focus:** Your primary goal is accurate information retrieval and synthesis *from the provided document text only*, presented through the SentryLabs persona.
|
45 |
+
|
46 |
+
Engage directly and professionally. If this is the start of the conversation (no prior history), you can offer a brief introductory sentence. Remember, accuracy and adherence to the document are paramount.
|
|
|
47 |
"""
|
48 |
|
49 |
+
# --- Select the Sentry prompt ---
|
50 |
+
SELECTED_SYSTEM_PROMPT = SYSTEM_PROMPT_SENTRY
|
51 |
|
52 |
+
# --- API Client Setup ---
|
53 |
OPENROUTER_API_KEY = getenv('OPENROUTER_API_KEY')
|
54 |
if not OPENROUTER_API_KEY:
|
55 |
+
raise ValueError("FATAL: OPENROUTER_API_KEY environment variable not set.")
|
56 |
|
57 |
+
# Initialize OpenAI client to point to OpenRouter
|
58 |
client = OpenAI(
|
59 |
base_url="https://openrouter.ai/api/v1",
|
60 |
api_key=OPENROUTER_API_KEY,
|
61 |
)
|
62 |
|
63 |
+
# --- Embedding Model and Vector Store Setup ---
|
64 |
+
# Define embedding model name and dimension
|
65 |
+
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
|
66 |
+
VECTOR_DIMENSION = 384 # Dimension for all-MiniLM-L6-v2
|
67 |
+
|
68 |
+
# Define cache directory for Sentence Transformers models
|
69 |
+
CACHE_DIR = getenv("SENTENCE_TRANSFORMERS_HOME", "/tmp/st_cache")
|
70 |
+
os.makedirs(CACHE_DIR, exist_ok=True) # Ensure cache directory exists
|
71 |
+
|
72 |
+
# Initialize embedding model (will be loaded properly in __main__)
|
73 |
+
embed_model = None
|
74 |
+
|
75 |
+
# Initialize FAISS index (in-memory L2 distance index)
|
76 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION)
|
77 |
+
# Store original text chunks corresponding to index entries
|
78 |
+
documents: List[str] = []
|
79 |
|
80 |
# --- Core Functions ---
|
81 |
|
82 |
def extract_text_from_pdf(pdf_stream: BytesIO) -> List[str]:
|
83 |
+
"""
|
84 |
+
Extracts text from each page of a PDF provided as a BytesIO stream.
|
85 |
+
Performs basic cleaning (stripping whitespace).
|
86 |
+
|
87 |
+
Args:
|
88 |
+
pdf_stream: A BytesIO object containing the PDF data.
|
89 |
+
|
90 |
+
Returns:
|
91 |
+
A list of strings, where each string is the text content of a page.
|
92 |
+
Returns an empty list if no text could be extracted.
|
93 |
+
"""
|
94 |
+
text_chunks = []
|
95 |
+
try:
|
96 |
+
# Ensure the stream is BytesIO
|
97 |
+
if not isinstance(pdf_stream, BytesIO):
|
98 |
+
pdf_stream = BytesIO(pdf_stream.read()) # Read if it's a file stream
|
99 |
+
|
100 |
+
doc = fitz.open(stream=pdf_stream, filetype="pdf")
|
101 |
+
# Simple chunking by page - consider more advanced chunking for better RAG
|
102 |
+
for page_num in range(len(doc)):
|
103 |
+
page = doc.load_page(page_num)
|
104 |
+
text = page.get_text("text").strip()
|
105 |
+
if text: # Only add non-empty pages
|
106 |
+
text_chunks.append(text)
|
107 |
+
doc.close()
|
108 |
+
print(f"Extracted {len(text_chunks)} non-empty text chunks from PDF.")
|
109 |
+
except Exception as e:
|
110 |
+
print(f"Error extracting text from PDF: {e}")
|
111 |
+
# Optionally re-raise or handle specific fitz errors
|
112 |
return text_chunks
|
113 |
|
|
|
114 |
def build_vector_index(text_chunks: List[str]):
|
115 |
+
"""
|
116 |
+
Embeds text chunks using the sentence transformer model and builds/rebuilds
|
117 |
+
the FAISS index.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
text_chunks: A list of strings to be indexed.
|
121 |
+
"""
|
122 |
+
global documents, index, VECTOR_DIMENSION, embed_model
|
123 |
+
|
124 |
+
if embed_model is None:
|
125 |
+
print("Error: Embedding model not loaded. Cannot build index.")
|
126 |
+
raise RuntimeError("Embedding model is not initialized.")
|
127 |
|
128 |
if not text_chunks:
|
129 |
print("Warning: No text chunks provided to build the vector index.")
|
130 |
documents = []
|
131 |
+
# Reset index to an empty state
|
132 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION)
|
133 |
return
|
134 |
|
135 |
print(f"Building vector index for {len(text_chunks)} chunks...")
|
136 |
+
# Store the original text corresponding to the vectors
|
137 |
+
documents = text_chunks
|
138 |
|
139 |
+
# Reset the index before adding new embeddings
|
140 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION)
|
141 |
|
142 |
try:
|
143 |
+
# Encode the text chunks into embeddings
|
144 |
+
print("Encoding text chunks...")
|
145 |
embeddings = embed_model.encode(text_chunks, show_progress_bar=True)
|
146 |
+
embeddings = np.array(embeddings, dtype=np.float32) # Ensure correct dtype for FAISS
|
|
|
|
|
|
|
147 |
|
148 |
+
# Validate embeddings shape
|
149 |
+
if embeddings.ndim != 2 or embeddings.shape[1] != VECTOR_DIMENSION:
|
150 |
+
raise ValueError(f"Embedding dimension mismatch or incorrect shape: expected (n, {VECTOR_DIMENSION}), got {embeddings.shape}")
|
151 |
|
152 |
+
# Add embeddings to the FAISS index
|
153 |
index.add(embeddings)
|
154 |
print(f"FAISS index built successfully with {index.ntotal} vectors.")
|
155 |
|
156 |
except Exception as e:
|
157 |
print(f"Error during embedding or indexing: {e}")
|
158 |
+
# Reset state in case of error to avoid partial indexing
|
159 |
documents = []
|
160 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION)
|
161 |
+
raise # Re-raise the exception to signal failure to the caller
|
162 |
|
|
|
|
|
163 |
def search_relevant_chunks(query: str, k: int = 3) -> str:
|
164 |
+
"""
|
165 |
+
Finds the top 'k' most relevant text chunks for a given query using
|
166 |
+
vector similarity search (FAISS).
|
167 |
+
|
168 |
+
Args:
|
169 |
+
query: The user's query string.
|
170 |
+
k: The number of relevant chunks to retrieve.
|
171 |
+
|
172 |
+
Returns:
|
173 |
+
A single string containing the concatenated relevant text chunks,
|
174 |
+
separated by newlines, or an empty string if no relevant chunks are found
|
175 |
+
or if the index is empty.
|
176 |
+
"""
|
177 |
+
global index, documents, embed_model
|
178 |
+
|
179 |
+
if embed_model is None:
|
180 |
+
print("Error: Embedding model not loaded. Cannot perform search.")
|
181 |
+
return ""
|
182 |
|
183 |
if index.ntotal == 0:
|
184 |
print("Warning: Search attempted on an empty index.")
|
185 |
return "" # Return empty string if index is not ready
|
186 |
|
187 |
if not query:
|
188 |
+
print("Warning: Empty query provided for search.")
|
189 |
return ""
|
190 |
|
191 |
try:
|
192 |
+
# Encode the query into an embedding
|
193 |
query_embedding = embed_model.encode([query])
|
194 |
query_embedding = np.array(query_embedding, dtype=np.float32)
|
195 |
|
196 |
+
# Perform the similarity search
|
197 |
+
# Ensure k is not greater than the number of items in the index
|
198 |
+
k_search = min(k, index.ntotal)
|
199 |
+
if k_search <= 0: return "" # Should not happen if ntotal > 0, but safe check
|
200 |
+
|
201 |
+
distances, indices = index.search(query_embedding, k=k_search)
|
202 |
|
203 |
+
# Filter out potential invalid indices (-1) and ensure indices are within bounds
|
204 |
+
valid_indices = [idx for idx in indices[0] if idx != -1 and 0 <= idx < len(documents)]
|
205 |
|
206 |
if not valid_indices:
|
207 |
print(f"No relevant chunks found for query: '{query[:50]}...'")
|
208 |
return ""
|
209 |
|
210 |
+
# Retrieve the actual text chunks based on the valid indices
|
211 |
relevant_docs = [documents[i] for i in valid_indices]
|
212 |
+
print(f"Retrieved {len(relevant_docs)} relevant chunks for query.")
|
213 |
+
|
214 |
+
# Join the relevant documents with a clear separator
|
215 |
+
return "\n\n---\n\n".join(relevant_docs)
|
216 |
|
217 |
except Exception as e:
|
218 |
+
print(f"Error during similarity search for query '{query[:50]}...': {e}")
|
219 |
+
return "" # Return empty string on error
|
220 |
|
|
|
221 |
def generate_response(
|
222 |
message: str,
|
223 |
history: List[Tuple[str, str]],
|
224 |
+
system_message: str = ChatConfig.SELECTED_SYSTEM_PROMPT,
|
225 |
max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
|
226 |
temperature: float = ChatConfig.DEFAULT_TEMP,
|
227 |
top_p: float = ChatConfig.DEFAULT_TOP_P
|
228 |
) -> str:
|
229 |
+
"""
|
230 |
+
Generates a response from the LLM based on the user's message,
|
231 |
+
retrieved context from the PDF, and conversation history.
|
232 |
+
|
233 |
+
Args:
|
234 |
+
message: The current user message.
|
235 |
+
history: A list of past conversation turns as (user_message, assistant_response) tuples.
|
236 |
+
system_message: The system prompt defining the AI's persona and rules.
|
237 |
+
max_tokens: Maximum number of tokens for the response.
|
238 |
+
temperature: Controls randomness in generation.
|
239 |
+
top_p: Controls nucleus sampling.
|
240 |
+
|
241 |
+
Returns:
|
242 |
+
The generated response string from the AI assistant.
|
243 |
+
"""
|
244 |
+
global index
|
245 |
|
246 |
+
if index.ntotal == 0: # Check if index is built (PDF uploaded and processed)
|
247 |
+
# Use Sentry's voice for this initial state message
|
248 |
+
return "I am Sentry, your SentryLabs assistant. To begin our analysis, please upload a PDF document using the button above."
|
249 |
|
250 |
+
# 1. Retrieve Relevant Context from PDF Index
|
251 |
+
context = search_relevant_chunks(message, k=3) # Retrieve top 3 relevant chunks
|
252 |
|
253 |
+
# 2. Prepare the Message List for the LLM API
|
254 |
messages = []
|
255 |
|
256 |
+
# Add the System Prompt (Defines Sentry's persona and constraints)
|
257 |
messages.append({"role": "system", "content": system_message})
|
258 |
|
259 |
# 3. Add Conversation History (if any)
|
260 |
+
# Process history into the format expected by the API (alternating user/assistant roles)
|
261 |
+
# Ensure we only include valid, non-empty turns
|
262 |
+
processed_history = []
|
263 |
for user_msg, assistant_msg in history:
|
264 |
+
# Basic validation to avoid sending empty or placeholder turns
|
265 |
+
if user_msg is not None and assistant_msg is not None and \
|
266 |
+
str(user_msg).strip() != "" and str(assistant_msg).strip() != "":
|
267 |
+
processed_history.append({"role": "user", "content": str(user_msg)})
|
268 |
+
processed_history.append({"role": "assistant", "content": str(assistant_msg)})
|
269 |
+
messages.extend(processed_history)
|
270 |
+
|
271 |
+
# 4. Construct the Final User Prompt (Current Question + Retrieved Context)
|
272 |
if context:
|
273 |
+
# Provide context clearly labeled
|
274 |
+
user_prompt_content = (
|
275 |
+
"Based on the following context from the document, please answer the question:\n\n"
|
276 |
+
f"DOCUMENT CONTEXT:\n---\n{context}\n---\n\n"
|
277 |
+
f"QUESTION:\n{message}"
|
278 |
+
)
|
279 |
else:
|
280 |
+
# If no context was found, inform the LLM. The system prompt guides its response.
|
281 |
+
user_prompt_content = (
|
282 |
+
"Regarding the document, please address the following question (Note: Specific context sections "
|
283 |
+
f"could not be retrieved via similarity search for this query):\n\nQUESTION:\n{message}"
|
284 |
+
)
|
|
|
285 |
|
286 |
+
# Add the final user message (including context) to the list
|
287 |
messages.append({"role": "user", "content": user_prompt_content})
|
288 |
|
289 |
+
# 5. Call the LLM API via OpenRouter
|
290 |
try:
|
291 |
+
print(f"--- Sending to {ChatConfig.MODEL} (Sentry Persona) ---")
|
292 |
+
# print("Messages being sent:", messages) # Uncomment for deep debugging
|
|
|
|
|
293 |
|
294 |
completion = client.chat.completions.create(
|
295 |
model=ChatConfig.MODEL,
|
|
|
297 |
max_tokens=max_tokens,
|
298 |
temperature=temperature,
|
299 |
top_p=top_p,
|
300 |
+
# Optional: Add stop sequences if the model tends to hallucinate roles
|
301 |
+
# stop=["\nUser:", "\nAssistant:", "\nSystem:"]
|
302 |
)
|
303 |
+
|
304 |
response = completion.choices[0].message.content
|
305 |
+
print(f"--- Received Response from Sentry ({len(response or '')} chars) ---")
|
306 |
+
# Ensure response is not None before stripping
|
307 |
+
return response.strip() if response else "Received an empty response."
|
308 |
|
309 |
except Exception as e:
|
310 |
print(f"Error generating response from LLM: {str(e)}")
|
311 |
+
# Provide a professional, Sentry-like error message
|
312 |
+
return "I encountered an issue while processing your request with the language model. Please try again shortly. If the problem persists, please verify the document and query."
|
313 |
|
314 |
+
# --- Flask Routes ---
|
|
|
315 |
|
316 |
@app.route('/')
|
317 |
+
def index_route():
|
318 |
+
"""Serves the main HTML page for the chat interface."""
|
319 |
return render_template('index.html')
|
320 |
|
321 |
@app.route('/upload_pdf', methods=['POST'])
|
322 |
def upload_pdf():
|
323 |
+
"""
|
324 |
+
Handles PDF file upload, extracts text, and builds the vector index.
|
325 |
+
Resets the index and documents before processing a new file.
|
326 |
+
"""
|
327 |
+
global documents, index, VECTOR_DIMENSION # Ensure we modify the global state
|
328 |
|
329 |
if 'pdf' not in request.files:
|
330 |
return jsonify({"error": "No PDF file part in the request."}), 400
|
331 |
|
332 |
file = request.files['pdf']
|
333 |
+
if not file or file.filename == "":
|
334 |
return jsonify({"error": "No file selected."}), 400
|
335 |
if not file.filename.lower().endswith('.pdf'):
|
336 |
+
return jsonify({"error": "Invalid file type. Please upload a PDF document."}), 400
|
337 |
+
|
338 |
+
print(f"Received file for processing: {file.filename}")
|
339 |
|
340 |
+
# Reset index and documents for the new file
|
341 |
+
print("Resetting index and documents for new upload...")
|
342 |
+
documents = []
|
343 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION) # Reinitialize the index
|
344 |
|
345 |
try:
|
346 |
+
pdf_stream = BytesIO(file.read()) # Read file content into memory
|
347 |
|
348 |
+
# Extract text chunks from the PDF
|
349 |
text_chunks = extract_text_from_pdf(pdf_stream)
|
350 |
if not text_chunks:
|
351 |
+
# Use a more professional error message
|
352 |
+
return jsonify({"error": "Could not extract readable text content from the provided PDF."}), 400
|
353 |
|
354 |
+
# Build the vector database (FAISS index) with the extracted text
|
355 |
+
build_vector_index(text_chunks) # This function handles index creation and populating documents
|
356 |
|
357 |
+
# Use a professional success message
|
358 |
+
return jsonify({"message": f"Document '{file.filename}' processed successfully. Ready for analysis."}), 200
|
359 |
|
360 |
except fitz.fitz.FileDataError:
|
361 |
+
# Error for corrupted or invalid PDF format
|
362 |
+
return jsonify({"error": "Invalid or corrupted PDF file. Please provide a valid PDF document."}), 400
|
363 |
+
except RuntimeError as e:
|
364 |
+
# Catch errors from build_vector_index (like model not loaded)
|
365 |
+
print(f"Runtime Error during PDF processing: {e}")
|
366 |
+
return jsonify({"error": f"A runtime error occurred during processing: {e}"}), 500
|
367 |
+
except ValueError as e:
|
368 |
+
# Catch potential value errors (e.g., dimension mismatch)
|
369 |
+
print(f"Value Error during PDF processing: {e}")
|
370 |
+
return jsonify({"error": f"A configuration or value error occurred: {e}"}), 500
|
371 |
except Exception as e:
|
372 |
+
# Generic error handler for unexpected issues
|
373 |
+
print(f"Unexpected error processing PDF upload: {str(e)}")
|
374 |
+
# Ensure state is clean even after unexpected errors
|
375 |
documents = []
|
376 |
+
index = faiss.IndexFlatL2(VECTOR_DIMENSION)
|
377 |
+
return jsonify({"error": f"An unexpected error occurred during PDF processing. Details: {str(e)}"}), 500
|
378 |
|
379 |
@app.route('/ask_question', methods=['POST'])
|
380 |
def ask_question():
|
381 |
+
"""
|
382 |
+
Handles user questions, retrieves relevant context, generates a response
|
383 |
+
using the LLM, and returns it.
|
384 |
+
"""
|
385 |
data = request.get_json()
|
386 |
if not data or 'message' not in data:
|
387 |
+
return jsonify({"error": "Missing 'message' field in request body"}), 400
|
388 |
|
389 |
+
message = data.get('message', '').strip()
|
390 |
+
# History comes from frontend as list of lists/tuples: [[user_msg1, assistant_msg1], [user_msg2, assistant_msg2]]
|
391 |
+
history_raw = data.get('history', [])
|
392 |
|
393 |
if not message:
|
394 |
+
# Return a Sentry-like response for empty input
|
395 |
+
return jsonify({"response": "Please provide a question or topic you'd like to discuss regarding the document."})
|
396 |
+
|
397 |
+
# Validate and sanitize history format
|
398 |
+
validated_history: List[Tuple[str, str]] = []
|
399 |
+
if isinstance(history_raw, list):
|
400 |
+
for item in history_raw:
|
401 |
+
# Ensure item is a list/tuple of exactly two strings
|
402 |
if isinstance(item, (list, tuple)) and len(item) == 2:
|
403 |
+
user_msg = str(item[0] or "").strip()
|
404 |
+
assistant_msg = str(item[1] or "").strip()
|
405 |
+
# Only add pairs where both messages have content after stripping
|
406 |
+
if user_msg and assistant_msg:
|
407 |
+
validated_history.append((user_msg, assistant_msg))
|
408 |
+
else:
|
409 |
+
print(f"Warning: Invalid history item format received: {item}. Skipping.")
|
410 |
|
411 |
try:
|
412 |
+
# Generate response using the core logic function
|
413 |
+
response_text = generate_response(message, validated_history)
|
414 |
+
return jsonify({"response": response_text})
|
415 |
except Exception as e:
|
416 |
+
# Catch potential errors during the generation process itself
|
417 |
+
print(f"Error in /ask_question endpoint during response generation: {e}")
|
418 |
+
# Return a professional error message
|
419 |
+
return jsonify({"response": "Apologies, an internal error occurred while generating the response. Please try again."}), 500
|
420 |
|
421 |
+
# --- Main Execution Block ---
|
422 |
if __name__ == '__main__':
|
423 |
+
print("--- SentryLabs Document Assistant Initializing ---")
|
424 |
+
|
425 |
+
# Ensure API key is set
|
426 |
if not OPENROUTER_API_KEY:
|
427 |
+
print("FATAL: OPENROUTER_API_KEY is not set. Please set the environment variable.")
|
428 |
+
exit(1)
|
429 |
else:
|
430 |
+
print("OpenRouter API Key found.")
|
431 |
+
|
432 |
+
# Load the embedding model during startup
|
433 |
+
try:
|
434 |
+
print(f"Loading embedding model '{EMBEDDING_MODEL_NAME}' from cache/hub...")
|
435 |
+
print(f"Using cache directory: {CACHE_DIR}")
|
436 |
+
embed_model = SentenceTransformer(EMBEDDING_MODEL_NAME, cache_folder=CACHE_DIR)
|
437 |
+
# Perform a dummy encode to ensure model is fully loaded/functional
|
438 |
+
_ = embed_model.encode(["test sentence"])
|
439 |
+
print("Embedding model loaded successfully.")
|
440 |
+
except Exception as e:
|
441 |
+
print(f"FATAL: Failed to load Sentence Transformer model '{EMBEDDING_MODEL_NAME}'. Error: {e}")
|
442 |
+
print("Please check model name, network connection, and cache permissions.")
|
443 |
+
exit(1) # Exit if the core embedding model fails to load
|
444 |
+
|
445 |
+
# Start the Flask development server
|
446 |
+
print("Starting Flask development server...")
|
447 |
+
# Use host='0.0.0.0' to make accessible on the network, default is '127.0.0.1'
|
448 |
+
app.run(debug=True, host='127.0.0.1', port=5000)
|
449 |
+
print("--- Server Shutdown ---")
|