Refactor app.py to enhance logging, add status endpoints, and improve knowledge base management
Browse files
app.py
CHANGED
@@ -28,121 +28,124 @@ from rich.table import Table
|
|
28 |
|
29 |
console = Console()
|
30 |
|
31 |
-
#
|
32 |
-
logging.basicConfig(
|
33 |
-
level=logging.DEBUG,
|
34 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
35 |
-
)
|
36 |
logger = logging.getLogger(__name__)
|
37 |
|
38 |
-
#
|
39 |
-
load_dotenv()
|
40 |
-
logger.debug("Environment variables loaded")
|
41 |
-
|
42 |
-
# Define constants for directory paths
|
43 |
VECTOR_STORE_PATH = os.path.join(os.getcwd(), "vector_store")
|
44 |
CHAT_HISTORY_PATH = os.path.join(os.getcwd(), "chat_history")
|
45 |
-
logger.debug(f"Paths initialized: VECTOR_STORE_PATH={VECTOR_STORE_PATH}, CHAT_HISTORY_PATH={CHAT_HISTORY_PATH}")
|
46 |
-
|
47 |
-
def create_required_directories():
|
48 |
-
"""Create required directories if they don't exist"""
|
49 |
-
try:
|
50 |
-
for directory in [VECTOR_STORE_PATH, CHAT_HISTORY_PATH]:
|
51 |
-
os.makedirs(directory, exist_ok=True)
|
52 |
-
gitkeep_path = os.path.join(directory, '.gitkeep')
|
53 |
-
if not os.path.exists(gitkeep_path):
|
54 |
-
with open(gitkeep_path, 'w') as f:
|
55 |
-
pass
|
56 |
-
logger.debug(f"Directory created/verified: {directory}")
|
57 |
-
except Exception as e:
|
58 |
-
logger.error(f"Error creating directories: {str(e)}")
|
59 |
-
raise
|
60 |
-
|
61 |
-
# Create directories before initializing the app
|
62 |
-
create_required_directories()
|
63 |
|
64 |
app = FastAPI(title="Status Law Assistant API")
|
65 |
-
app.include_router(analysis_router)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
async def startup_event():
|
70 |
-
"""Startup event handler"""
|
71 |
-
try:
|
72 |
-
logger.info("Starting application...")
|
73 |
-
# Проверяем наличие необходимых переменных окружения
|
74 |
-
if not os.getenv("GROQ_API_KEY"):
|
75 |
-
logger.error("GROQ_API_KEY not found in environment variables")
|
76 |
-
raise ValueError("GROQ_API_KEY is required")
|
77 |
-
|
78 |
-
# Проверяем доступность директорий
|
79 |
-
for directory in [VECTOR_STORE_PATH, CHAT_HISTORY_PATH]:
|
80 |
-
if not os.path.exists(directory):
|
81 |
-
logger.error(f"Required directory not found: {directory}")
|
82 |
-
raise ValueError(f"Required directory not found: {directory}")
|
83 |
-
|
84 |
-
logger.info("Application startup completed successfully")
|
85 |
-
except Exception as e:
|
86 |
-
logger.error(f"Startup failed: {str(e)}")
|
87 |
-
raise
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
93 |
|
94 |
-
# Базовый маршрут для проверки
|
95 |
@app.get("/")
|
96 |
async def root():
|
97 |
-
|
98 |
return {
|
99 |
"status": "ok",
|
100 |
-
"
|
101 |
-
"
|
102 |
-
"GROQ_API_KEY": "configured" if os.getenv("GROQ_API_KEY") else "missing",
|
103 |
-
"vector_store": os.path.exists(VECTOR_STORE_PATH),
|
104 |
-
"chat_history": os.path.exists(CHAT_HISTORY_PATH)
|
105 |
-
}
|
106 |
}
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
return
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
"type": "network_error"
|
117 |
-
}
|
118 |
-
)
|
119 |
|
120 |
-
@app.
|
121 |
-
async def
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
}
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
|
132 |
-
def
|
133 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
try:
|
|
|
135 |
llm = ChatGroq(
|
136 |
model_name="llama-3.3-70b-versatile",
|
137 |
temperature=0.6,
|
138 |
api_key=os.getenv("GROQ_API_KEY")
|
139 |
)
|
|
|
140 |
embeddings = HuggingFaceEmbeddings(
|
141 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
142 |
)
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
except Exception as e:
|
145 |
-
|
|
|
|
|
|
|
|
|
146 |
|
147 |
# --------------- Knowledge Base Management ---------------
|
148 |
URLS = [
|
|
|
28 |
|
29 |
console = Console()
|
30 |
|
31 |
+
# Базовая настройка логирования
|
32 |
+
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
|
|
|
33 |
logger = logging.getLogger(__name__)
|
34 |
|
35 |
+
# Определение путей
|
|
|
|
|
|
|
|
|
36 |
VECTOR_STORE_PATH = os.path.join(os.getcwd(), "vector_store")
|
37 |
CHAT_HISTORY_PATH = os.path.join(os.getcwd(), "chat_history")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
app = FastAPI(title="Status Law Assistant API")
|
|
|
40 |
|
41 |
+
class ChatRequest(BaseModel):
|
42 |
+
message: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
class ChatResponse(BaseModel):
|
45 |
+
response: str
|
46 |
+
|
47 |
+
def check_vector_store():
|
48 |
+
"""Проверка наличия векторной базы"""
|
49 |
+
index_path = os.path.join(VECTOR_STORE_PATH, "index.faiss")
|
50 |
+
return os.path.exists(index_path)
|
51 |
|
|
|
52 |
@app.get("/")
|
53 |
async def root():
|
54 |
+
"""Базовый эндпоинт с информацией о состоянии"""
|
55 |
return {
|
56 |
"status": "ok",
|
57 |
+
"vector_store_ready": check_vector_store(),
|
58 |
+
"timestamp": datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
59 |
}
|
60 |
|
61 |
+
@app.get("/status")
|
62 |
+
async def get_status():
|
63 |
+
"""Получение статуса векторной базы"""
|
64 |
+
return {
|
65 |
+
"vector_store_exists": check_vector_store(),
|
66 |
+
"can_chat": check_vector_store(),
|
67 |
+
"vector_store_path": VECTOR_STORE_PATH
|
68 |
+
}
|
|
|
|
|
|
|
69 |
|
70 |
+
@app.post("/build-knowledge-base")
|
71 |
+
async def build_kb():
|
72 |
+
"""Эндпоинт для построения базы знаний"""
|
73 |
+
try:
|
74 |
+
if check_vector_store():
|
75 |
+
return {
|
76 |
+
"status": "exists",
|
77 |
+
"message": "Knowledge base already exists"
|
78 |
+
}
|
79 |
+
|
80 |
+
# Инициализируем embeddings только когда нужно построить базу
|
81 |
+
embeddings = HuggingFaceEmbeddings(
|
82 |
+
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
83 |
+
)
|
84 |
+
vector_store = build_knowledge_base(embeddings)
|
85 |
+
|
86 |
+
return {
|
87 |
+
"status": "success",
|
88 |
+
"message": "Knowledge base built successfully"
|
89 |
}
|
90 |
+
except Exception as e:
|
91 |
+
logger.error(f"Failed to build knowledge base: {str(e)}")
|
92 |
+
raise HTTPException(
|
93 |
+
status_code=500,
|
94 |
+
detail=f"Failed to build knowledge base: {str(e)}"
|
95 |
+
)
|
96 |
|
97 |
+
@app.post("/chat", response_model=ChatResponse)
|
98 |
+
async def chat_endpoint(request: ChatRequest):
|
99 |
+
"""Эндпоинт чата"""
|
100 |
+
if not check_vector_store():
|
101 |
+
raise HTTPException(
|
102 |
+
status_code=400,
|
103 |
+
detail="Knowledge base not found. Please build it first using /build-knowledge-base endpoint"
|
104 |
+
)
|
105 |
+
|
106 |
try:
|
107 |
+
# Инициализируем компоненты только при необходимости
|
108 |
llm = ChatGroq(
|
109 |
model_name="llama-3.3-70b-versatile",
|
110 |
temperature=0.6,
|
111 |
api_key=os.getenv("GROQ_API_KEY")
|
112 |
)
|
113 |
+
|
114 |
embeddings = HuggingFaceEmbeddings(
|
115 |
model_name="sentence-transformers/all-MiniLM-L6-v2"
|
116 |
)
|
117 |
+
|
118 |
+
vector_store = FAISS.load_local(
|
119 |
+
VECTOR_STORE_PATH,
|
120 |
+
embeddings,
|
121 |
+
allow_dangerous_deserialization=True
|
122 |
+
)
|
123 |
+
|
124 |
+
# Остальная логика чата...
|
125 |
+
context_docs = vector_store.similarity_search(request.message)
|
126 |
+
context_text = "\n".join([d.page_content for d in context_docs])
|
127 |
+
|
128 |
+
prompt_template = PromptTemplate.from_template('''
|
129 |
+
You are a helpful and polite legal assistant at Status Law.
|
130 |
+
Answer the question based on the context provided.
|
131 |
+
Context: {context}
|
132 |
+
Question: {question}
|
133 |
+
''')
|
134 |
+
|
135 |
+
chain = prompt_template | llm | StrOutputParser()
|
136 |
+
response = chain.invoke({
|
137 |
+
"context": context_text,
|
138 |
+
"question": request.message
|
139 |
+
})
|
140 |
+
|
141 |
+
return ChatResponse(response=response)
|
142 |
+
|
143 |
except Exception as e:
|
144 |
+
logger.error(f"Chat error: {str(e)}")
|
145 |
+
raise HTTPException(
|
146 |
+
status_code=500,
|
147 |
+
detail=f"Chat error: {str(e)}"
|
148 |
+
)
|
149 |
|
150 |
# --------------- Knowledge Base Management ---------------
|
151 |
URLS = [
|