Rulga commited on
Commit
a38fa3f
·
1 Parent(s): 0b14128

Add logging and knowledge base configuration to app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -35
app.py CHANGED
@@ -6,6 +6,14 @@ import traceback
6
  import warnings
7
  from datetime import datetime
8
  from typing import Optional, List, Dict
 
 
 
 
 
 
 
 
9
 
10
  import requests
11
  from bs4 import BeautifulSoup
@@ -32,6 +40,24 @@ load_dotenv()
32
  # Initialize FastAPI app
33
  app = FastAPI(title="Status Law Assistant API")
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  # Models for request/response
36
  class ChatRequest(BaseModel):
37
  message: str
@@ -144,70 +170,105 @@ def load_url_content(url: str) -> List[Document]:
144
 
145
  def build_knowledge_base(embeddings):
146
  try:
 
 
147
  documents = []
148
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
149
 
150
- print("Starting to load documents...")
 
151
 
152
- # First check which URLs are available
153
- available_urls = [url for url in URLS if check_url_availability(url)]
154
- print(f"\nAccessible URLs: {len(available_urls)} out of {len(URLS)}")
 
 
 
 
 
155
 
156
- # Load content from available URLs
157
  for url in available_urls:
158
  try:
159
- print(f"\nProcessing {url}")
160
  docs = load_url_content(url)
161
  if docs:
162
  documents.extend(docs)
163
- print(f"Successfully loaded content from {url}")
 
164
  else:
165
- print(f"No content extracted from {url}")
166
  except Exception as e:
167
- print(f"Failed to process {url}: {str(e)}")
168
  continue
169
 
170
  if not documents:
 
 
 
171
  raise Exception("No documents were successfully loaded!")
172
 
173
- print(f"\nTotal documents loaded: {len(documents)}")
174
 
175
  text_splitter = RecursiveCharacterTextSplitter(
176
- chunk_size=500,
177
- chunk_overlap=100
178
  )
179
- print("Splitting documents into chunks...")
180
  chunks = text_splitter.split_documents(documents)
181
- print(f"Created {len(chunks)} chunks")
182
 
183
- print("Creating vector store...")
184
- vector_store = FAISS.from_documents(chunks, embeddings)
 
 
 
 
 
 
 
185
 
186
- print("Saving vector store...")
187
  vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
188
 
 
 
 
 
 
 
189
  return vector_store
 
190
  except Exception as e:
191
- print(f"Error in build_knowledge_base: {str(e)}")
192
  traceback.print_exc()
193
  raise Exception(f"Knowledge base creation failed: {str(e)}")
194
 
195
  # Initialize models and knowledge base on startup
196
- llm, embeddings = init_models()
197
- vector_store = None
 
198
 
199
- if os.path.exists(VECTOR_STORE_PATH):
200
- try:
201
- vector_store = FAISS.load_local(
202
- VECTOR_STORE_PATH,
203
- embeddings,
204
- allow_dangerous_deserialization=True
205
- )
206
- except Exception as e:
207
- print(f"Failed to load existing knowledge base: {str(e)}")
 
 
208
 
209
- if vector_store is None:
210
- vector_store = build_knowledge_base(embeddings)
 
 
 
 
 
 
 
211
 
212
  # API endpoints
213
  # API endpoints
@@ -260,14 +321,27 @@ async def rebuild_knowledge_base():
260
  except Exception as e:
261
  raise HTTPException(status_code=500, detail=str(e))
262
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  def log_interaction(user_input: str, bot_response: str, context: str):
264
  try:
 
265
  log_entry = {
266
  "timestamp": datetime.now().isoformat(),
267
  "user_input": user_input,
268
  "bot_response": bot_response,
269
  "context": context[:500],
270
- "kb_version": "1.1" # You might want to implement version tracking
271
  }
272
 
273
  os.makedirs("chat_history", exist_ok=True)
@@ -275,9 +349,9 @@ def log_interaction(user_input: str, bot_response: str, context: str):
275
  f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
276
 
277
  except Exception as e:
278
- print(f"Logging error: {str(e)}")
279
- print(traceback.format_exc())
280
 
281
  if __name__ == "__main__":
282
  import uvicorn
283
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
6
  import warnings
7
  from datetime import datetime
8
  from typing import Optional, List, Dict
9
+ import logging
10
+
11
+ # Настройка логгера
12
+ logger = logging.getLogger(__name__)
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format='%(asctime)s - %(levelname)s - %(message)s'
16
+ )
17
 
18
  import requests
19
  from bs4 import BeautifulSoup
 
40
  # Initialize FastAPI app
41
  app = FastAPI(title="Status Law Assistant API")
42
 
43
+ # Конфигурация базы знаний
44
+ KB_CONFIG_PATH = "vector_store/kb_config.json"
45
+
46
+ def get_kb_config():
47
+ if os.path.exists(KB_CONFIG_PATH):
48
+ with open(KB_CONFIG_PATH, 'r') as f:
49
+ return json.load(f)
50
+ return {
51
+ "version": 1,
52
+ "processed_urls": [],
53
+ "last_update": None
54
+ }
55
+
56
+ def save_kb_config(config):
57
+ os.makedirs(os.path.dirname(KB_CONFIG_PATH), exist_ok=True)
58
+ with open(KB_CONFIG_PATH, 'w') as f:
59
+ json.dump(config, f)
60
+
61
  # Models for request/response
62
  class ChatRequest(BaseModel):
63
  message: str
 
170
 
171
  def build_knowledge_base(embeddings):
172
  try:
173
+ logger.info("Starting knowledge base construction...")
174
+ kb_config = get_kb_config()
175
  documents = []
176
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
177
 
178
+ # Определяем URL для обработки
179
+ urls_to_process = [url for url in URLS if url not in kb_config["processed_urls"]]
180
 
181
+ if not urls_to_process:
182
+ logger.info("No new URLs to process")
183
+ return FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
184
+
185
+ logger.info(f"Processing {len(urls_to_process)} new URLs")
186
+
187
+ available_urls = [url for url in urls_to_process if check_url_availability(url)]
188
+ logger.info(f"Accessible URLs: {len(available_urls)} out of {len(urls_to_process)}")
189
 
 
190
  for url in available_urls:
191
  try:
192
+ logger.info(f"Processing {url}")
193
  docs = load_url_content(url)
194
  if docs:
195
  documents.extend(docs)
196
+ kb_config["processed_urls"].append(url)
197
+ logger.info(f"Successfully loaded content from {url}")
198
  else:
199
+ logger.warning(f"No content extracted from {url}")
200
  except Exception as e:
201
+ logger.error(f"Failed to process {url}: {str(e)}")
202
  continue
203
 
204
  if not documents:
205
+ if kb_config["processed_urls"]:
206
+ logger.info("No new documents to add, loading existing vector store")
207
+ return FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
208
  raise Exception("No documents were successfully loaded!")
209
 
210
+ logger.info(f"Total new documents loaded: {len(documents)}")
211
 
212
  text_splitter = RecursiveCharacterTextSplitter(
213
+ chunk_size=1000,
214
+ chunk_overlap=50
215
  )
216
+ logger.info("Splitting documents into chunks...")
217
  chunks = text_splitter.split_documents(documents)
218
+ logger.info(f"Created {len(chunks)} chunks")
219
 
220
+ # Если есть существующая база знаний, добавляем к ней
221
+ if os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
222
+ logger.info("Loading existing vector store...")
223
+ vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
224
+ logger.info("Adding new documents to existing vector store...")
225
+ vector_store.add_documents(chunks)
226
+ else:
227
+ logger.info("Creating new vector store...")
228
+ vector_store = FAISS.from_documents(chunks, embeddings)
229
 
230
+ logger.info("Saving vector store...")
231
  vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
232
 
233
+ # Обновляем конфигурацию
234
+ kb_config["version"] += 1
235
+ kb_config["last_update"] = datetime.now().isoformat()
236
+ save_kb_config(kb_config)
237
+
238
+ logger.info(f"Knowledge base updated to version {kb_config['version']}")
239
  return vector_store
240
+
241
  except Exception as e:
242
+ logger.error(f"Error in build_knowledge_base: {str(e)}")
243
  traceback.print_exc()
244
  raise Exception(f"Knowledge base creation failed: {str(e)}")
245
 
246
  # Initialize models and knowledge base on startup
247
+ try:
248
+ llm, embeddings = init_models()
249
+ vector_store = None
250
 
251
+ if os.path.exists(VECTOR_STORE_PATH):
252
+ try:
253
+ vector_store = FAISS.load_local(
254
+ VECTOR_STORE_PATH,
255
+ embeddings,
256
+ allow_dangerous_deserialization=True
257
+ )
258
+ logger.info("Successfully loaded existing knowledge base")
259
+ except Exception as e:
260
+ logger.error(f"Failed to load existing knowledge base: {str(e)}")
261
+ logger.error(traceback.format_exc())
262
 
263
+ if vector_store is None:
264
+ logger.info("Building new knowledge base...")
265
+ vector_store = build_knowledge_base(embeddings)
266
+ logger.info("Knowledge base built successfully")
267
+
268
+ except Exception as e:
269
+ logger.error(f"Critical initialization error: {str(e)}")
270
+ logger.error(traceback.format_exc())
271
+ raise
272
 
273
  # API endpoints
274
  # API endpoints
 
321
  except Exception as e:
322
  raise HTTPException(status_code=500, detail=str(e))
323
 
324
+ @app.get("/kb-status")
325
+ async def get_kb_status():
326
+ """Get current knowledge base status"""
327
+ kb_config = get_kb_config()
328
+ return {
329
+ "version": kb_config["version"],
330
+ "total_urls": len(URLS),
331
+ "processed_urls": len(kb_config["processed_urls"]),
332
+ "pending_urls": len([url for url in URLS if url not in kb_config["processed_urls"]]),
333
+ "last_update": kb_config["last_update"]
334
+ }
335
+
336
  def log_interaction(user_input: str, bot_response: str, context: str):
337
  try:
338
+ kb_config = get_kb_config()
339
  log_entry = {
340
  "timestamp": datetime.now().isoformat(),
341
  "user_input": user_input,
342
  "bot_response": bot_response,
343
  "context": context[:500],
344
+ "kb_version": kb_config["version"] # Используем актуальную версию
345
  }
346
 
347
  os.makedirs("chat_history", exist_ok=True)
 
349
  f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
350
 
351
  except Exception as e:
352
+ logger.error(f"Logging error: {str(e)}")
353
+ logger.error(traceback.format_exc())
354
 
355
  if __name__ == "__main__":
356
  import uvicorn
357
+ uvicorn.run(app, host="0.0.0.0", port=8000)