Rulga commited on
Commit
68b95db
·
1 Parent(s): 080d622

rebild project

Browse files
Files changed (6) hide show
  1. Dockerfile +17 -23
  2. app - Copy.py +417 -0
  3. app.py +312 -322
  4. index.html +276 -0
  5. requirements.txt +14 -24
  6. space.yaml +9 -0
Dockerfile CHANGED
@@ -1,39 +1,33 @@
1
- FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
8
- curl \
9
- software-properties-common \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
- # Create directories and set permissions
13
- RUN mkdir -p /app/vector_store /app/chat_history /app/.cache /app/logs && \
14
- chmod 777 /app/vector_store /app/chat_history /app/.cache /app/logs
15
-
16
- # Set environment variables
17
- ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
18
- ENV HF_HOME=/app/.cache/huggingface
19
- ENV XDG_CACHE_HOME=/app/.cache
20
- ENV PYTHONUNBUFFERED=1
21
-
22
- # Create cache directories with proper permissions
23
- RUN mkdir -p /app/.cache/huggingface && \
24
- chmod -R 777 /app/.cache
25
-
26
  COPY requirements.txt .
 
 
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
 
29
  COPY . .
30
 
31
- RUN chown -R 1000:1000 /app && \
32
- chmod -R 755 /app
 
33
 
34
- EXPOSE 8000
 
 
 
 
35
 
36
- USER 1000
 
37
 
38
- # Изменяем команду запуска для сохранения логов
39
- CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000 --log-level debug 2>&1 | tee /app/logs/app.log"]
 
1
+ FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
5
  # Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  build-essential \
 
 
8
  && rm -rf /var/lib/apt/lists/*
9
 
10
+ # Copy requirements file
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  COPY requirements.txt .
12
+
13
+ # Install Python dependencies
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
+ # Copy the application
17
  COPY . .
18
 
19
+ # Create directories for persistent storage
20
+ RUN mkdir -p vector_store
21
+ RUN mkdir -p chat_history
22
 
23
+ # Make sure the static directory exists
24
+ RUN mkdir -p static
25
+
26
+ # Copy the frontend to the static directory
27
+ COPY index.html static/
28
 
29
+ # Expose the port the app runs on
30
+ EXPOSE 8000
31
 
32
+ # Command to run the application
33
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app - Copy.py ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ from dotenv import load_dotenv
4
+ from langchain_groq import ChatGroq
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_community.document_loaders import WebBaseLoader
9
+ from langchain_core.prompts import PromptTemplate
10
+ from langchain_core.output_parsers import StrOutputParser
11
+ from datetime import datetime
12
+ import json
13
+ import traceback
14
+ from fastapi import FastAPI, HTTPException, Request
15
+ from fastapi.responses import JSONResponse
16
+ from pydantic import BaseModel
17
+ from api import router as analysis_router
18
+ from utils import ChatAnalyzer, setup_chat_analysis
19
+ import requests.exceptions
20
+ import aiohttp
21
+ from typing import Union
22
+ import uvicorn
23
+ import logging
24
+ from rich import print as rprint
25
+ from rich.console import Console
26
+ from rich.panel import Panel
27
+ from rich.table import Table
28
+
29
+ console = Console()
30
+
31
+ # Базовая настройка логирования
32
+ logging.basicConfig(level=logging.DEBUG)
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # Определение путей
36
+ VECTOR_STORE_PATH = os.path.join(os.getcwd(), "vector_store")
37
+ CHAT_HISTORY_PATH = os.path.join(os.getcwd(), "chat_history")
38
+
39
+ app = FastAPI(title="Status Law Assistant API")
40
+
41
+ class ChatRequest(BaseModel):
42
+ message: str
43
+
44
+ class ChatResponse(BaseModel):
45
+ response: str
46
+
47
+ def check_vector_store():
48
+ """Проверка наличия векторной базы"""
49
+ index_path = os.path.join(VECTOR_STORE_PATH, "index.faiss")
50
+ return os.path.exists(index_path)
51
+
52
+ @app.get("/")
53
+ async def root():
54
+ """Базовый эндпоинт с информацией о состоянии"""
55
+ return {
56
+ "status": "ok",
57
+ "vector_store_ready": check_vector_store(),
58
+ "timestamp": datetime.now().isoformat()
59
+ }
60
+
61
+ @app.get("/status")
62
+ async def get_status():
63
+ """Получение статуса векторной базы"""
64
+ return {
65
+ "vector_store_exists": check_vector_store(),
66
+ "can_chat": check_vector_store(),
67
+ "vector_store_path": VECTOR_STORE_PATH
68
+ }
69
+
70
+ @app.post("/build-knowledge-base")
71
+ async def build_kb():
72
+ """Эндпоинт для построения базы знаний"""
73
+ try:
74
+ if check_vector_store():
75
+ return {
76
+ "status": "exists",
77
+ "message": "Knowledge base already exists"
78
+ }
79
+
80
+ # Инициализируем embeddings только когда нужно построить базу
81
+ embeddings = HuggingFaceEmbeddings(
82
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
83
+ )
84
+ vector_store = build_knowledge_base(embeddings)
85
+
86
+ return {
87
+ "status": "success",
88
+ "message": "Knowledge base built successfully"
89
+ }
90
+ except Exception as e:
91
+ logger.error(f"Failed to build knowledge base: {str(e)}")
92
+ raise HTTPException(
93
+ status_code=500,
94
+ detail=f"Failed to build knowledge base: {str(e)}"
95
+ )
96
+
97
+ @app.post("/chat", response_model=ChatResponse)
98
+ async def chat_endpoint(request: ChatRequest):
99
+ """Эндпоинт чата"""
100
+ if not check_vector_store():
101
+ raise HTTPException(
102
+ status_code=400,
103
+ detail="Knowledge base not found. Please build it first using /build-knowledge-base endpoint"
104
+ )
105
+
106
+ try:
107
+ # Инициализируем компоненты только при необходимости
108
+ llm = ChatGroq(
109
+ model_name="llama-3.3-70b-versatile",
110
+ temperature=0.6,
111
+ api_key=os.getenv("GROQ_API_KEY")
112
+ )
113
+
114
+ embeddings = HuggingFaceEmbeddings(
115
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
116
+ )
117
+
118
+ vector_store = FAISS.load_local(
119
+ VECTOR_STORE_PATH,
120
+ embeddings,
121
+ allow_dangerous_deserialization=True
122
+ )
123
+
124
+ # Остальная логика чата...
125
+ context_docs = vector_store.similarity_search(request.message)
126
+ context_text = "\n".join([d.page_content for d in context_docs])
127
+
128
+ prompt_template = PromptTemplate.from_template('''
129
+ You are a helpful and polite legal assistant at Status Law.
130
+ Answer the question based on the context provided.
131
+ Context: {context}
132
+ Question: {question}
133
+ ''')
134
+
135
+ chain = prompt_template | llm | StrOutputParser()
136
+ response = chain.invoke({
137
+ "context": context_text,
138
+ "question": request.message
139
+ })
140
+
141
+ return ChatResponse(response=response)
142
+
143
+ except Exception as e:
144
+ logger.error(f"Chat error: {str(e)}")
145
+ raise HTTPException(
146
+ status_code=500,
147
+ detail=f"Chat error: {str(e)}"
148
+ )
149
+
150
+ # --------------- Knowledge Base Management ---------------
151
+ URLS = [
152
+ "https://status.law",
153
+ "https://status.law/about",
154
+ "https://status.law/careers",
155
+ "https://status.law/tariffs-for-services-against-extradition-en",
156
+ "https://status.law/challenging-sanctions",
157
+ "https://status.law/law-firm-contact-legal-protection"
158
+ "https://status.law/cross-border-banking-legal-issues",
159
+ "https://status.law/extradition-defense",
160
+ "https://status.law/international-prosecution-protection",
161
+ "https://status.law/interpol-red-notice-removal",
162
+ "https://status.law/practice-areas",
163
+ "https://status.law/reputation-protection",
164
+ "https://status.law/faq"
165
+ ]
166
+
167
+ def build_knowledge_base(_embeddings):
168
+ """Build or update the knowledge base"""
169
+ try:
170
+ start_time = time.time()
171
+ documents = []
172
+
173
+ # Ensure vector store directory exists
174
+ if not os.path.exists(VECTOR_STORE_PATH):
175
+ os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
176
+
177
+ for url in URLS:
178
+ try:
179
+ loader = WebBaseLoader(url)
180
+ docs = loader.load()
181
+ documents.extend(docs)
182
+ except Exception as e:
183
+ print(f"Failed to load {url}: {str(e)}")
184
+ continue
185
+
186
+ if not documents:
187
+ raise HTTPException(status_code=500, detail="No documents loaded")
188
+
189
+ text_splitter = RecursiveCharacterTextSplitter(
190
+ chunk_size=500,
191
+ chunk_overlap=100
192
+ )
193
+ chunks = text_splitter.split_documents(documents)
194
+
195
+ vector_store = FAISS.from_documents(chunks, _embeddings)
196
+ vector_store.save_local(
197
+ folder_path=VECTOR_STORE_PATH,
198
+ index_name="index"
199
+ )
200
+
201
+ if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
202
+ raise HTTPException(status_code=500, detail="FAISS index file not created")
203
+
204
+ return vector_store
205
+
206
+ except Exception as e:
207
+ raise HTTPException(status_code=500, detail=f"Knowledge base creation failed: {str(e)}")
208
+
209
+ # --------------- API Models ---------------
210
+ class ChatRequest(BaseModel):
211
+ message: str
212
+
213
+ class ChatResponse(BaseModel):
214
+ response: str
215
+
216
+ # --------------- API Routes ---------------
217
+ @app.post("/chat", response_model=ChatResponse)
218
+ async def chat_endpoint(request: ChatRequest):
219
+ try:
220
+ llm, embeddings = init_models()
221
+
222
+ if not os.path.exists(VECTOR_STORE_PATH):
223
+ vector_store = build_knowledge_base(embeddings)
224
+ else:
225
+ vector_store = FAISS.load_local(
226
+ VECTOR_STORE_PATH,
227
+ embeddings,
228
+ allow_dangerous_deserialization=True
229
+ )
230
+
231
+ # Add retry logic for network operations
232
+ max_retries = 3
233
+ retry_count = 0
234
+
235
+ while retry_count < max_retries:
236
+ try:
237
+ context_docs = vector_store.similarity_search(request.message)
238
+ context_text = "\n".join([d.page_content for d in context_docs])
239
+
240
+ prompt_template = PromptTemplate.from_template('''
241
+ You are a helpful and polite legal assistant at Status Law.
242
+ You answer in the language in which the question was asked.
243
+ Answer the question based on the context provided.
244
+
245
+ # ... остальной текст промпта ...
246
+
247
+ Context: {context}
248
+ Question: {question}
249
+
250
+ Response Guidelines:
251
+ 1. Answer in the user's language
252
+ 2. Cite sources when possible
253
+ 3. Offer contact options if unsure
254
+ ''')
255
+
256
+ chain = prompt_template | llm | StrOutputParser()
257
+ response = chain.invoke({
258
+ "context": context_text,
259
+ "question": request.message
260
+ })
261
+
262
+ log_interaction(request.message, response, context_text)
263
+ return ChatResponse(response=response)
264
+
265
+ except (requests.exceptions.RequestException, aiohttp.ClientError) as e:
266
+ retry_count += 1
267
+ if retry_count == max_retries:
268
+ raise HTTPException(
269
+ status_code=503,
270
+ detail={
271
+ "error": "Network error after maximum retries",
272
+ "detail": str(e),
273
+ "type": "network_error"
274
+ }
275
+ )
276
+ await asyncio.sleep(1 * retry_count) # Exponential backoff
277
+
278
+ except Exception as e:
279
+ if isinstance(e, (requests.exceptions.RequestException, aiohttp.ClientError)):
280
+ raise HTTPException(
281
+ status_code=503,
282
+ detail={
283
+ "error": "Network error occurred",
284
+ "detail": str(e),
285
+ "type": "network_error"
286
+ }
287
+ )
288
+ raise HTTPException(status_code=500, detail=str(e))
289
+
290
+ # --------------- Logging ---------------
291
+ def log_interaction(user_input: str, bot_response: str, context: str):
292
+ try:
293
+ log_entry = {
294
+ "timestamp": datetime.now().isoformat(),
295
+ "user_input": user_input,
296
+ "bot_response": bot_response,
297
+ "context": context[:500],
298
+ "kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
299
+ }
300
+
301
+ os.makedirs("chat_history", exist_ok=True)
302
+ log_path = os.path.join("chat_history", "chat_logs.json")
303
+
304
+ with open(log_path, "a", encoding="utf-8") as f:
305
+ f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
306
+
307
+ except Exception as e:
308
+ print(f"Logging error: {str(e)}")
309
+ print(traceback.format_exc())
310
+
311
+ # Add health check endpoint
312
+ @app.get("/health")
313
+ async def health_check():
314
+ try:
315
+ # Check if models can be initialized
316
+ llm, embeddings = init_models()
317
+
318
+ # Check if vector store is accessible
319
+ if os.path.exists(VECTOR_STORE_PATH):
320
+ vector_store = FAISS.load_local(
321
+ VECTOR_STORE_PATH,
322
+ embeddings,
323
+ allow_dangerous_deserialization=True
324
+ )
325
+
326
+ return {
327
+ "status": "healthy",
328
+ "vector_store": "available" if os.path.exists(VECTOR_STORE_PATH) else "not_found"
329
+ }
330
+
331
+ except Exception as e:
332
+ return JSONResponse(
333
+ status_code=503,
334
+ content={
335
+ "status": "unhealthy",
336
+ "error": str(e)
337
+ }
338
+ )
339
+
340
+ # Add diagnostic endpoint
341
+ @app.get("/directory-status")
342
+ async def check_directory_status():
343
+ """Check status of required directories"""
344
+ return {
345
+ "vector_store": {
346
+ "exists": os.path.exists(VECTOR_STORE_PATH),
347
+ "path": os.path.abspath(VECTOR_STORE_PATH),
348
+ "contents": os.listdir(VECTOR_STORE_PATH) if os.path.exists(VECTOR_STORE_PATH) else []
349
+ },
350
+ "chat_history": {
351
+ "exists": os.path.exists(CHAT_HISTORY_PATH),
352
+ "path": os.path.abspath(CHAT_HISTORY_PATH),
353
+ "contents": os.listdir(CHAT_HISTORY_PATH) if os.path.exists(CHAT_HISTORY_PATH) else []
354
+ }
355
+ }
356
+
357
+ # Добавим функцию для вывода статуса
358
+ def print_startup_status():
359
+ """Print application startup status with rich formatting"""
360
+ try:
361
+ # Create status table
362
+ table = Table(show_header=True, header_style="bold magenta")
363
+ table.add_column("Component", style="cyan")
364
+ table.add_column("Status", style="green")
365
+
366
+ # Check directories
367
+ vector_store_exists = os.path.exists(VECTOR_STORE_PATH)
368
+ chat_history_exists = os.path.exists(CHAT_HISTORY_PATH)
369
+
370
+ table.add_row(
371
+ "Vector Store Directory",
372
+ "✅ Created" if vector_store_exists else "❌ Missing"
373
+ )
374
+ table.add_row(
375
+ "Chat History Directory",
376
+ "✅ Created" if chat_history_exists else "❌ Missing"
377
+ )
378
+
379
+ # Check environment variables
380
+ table.add_row(
381
+ "GROQ API Key",
382
+ "✅ Set" if os.getenv("GROQ_API_KEY") else "❌ Missing"
383
+ )
384
+
385
+ # Create status panel
386
+ status_panel = Panel(
387
+ table,
388
+ title="[bold blue]Status Law Assistant API Status[/bold blue]",
389
+ border_style="blue"
390
+ )
391
+
392
+ # Print startup message and status
393
+ console.print("\n")
394
+ console.print("[bold green]🚀 Server started successfully![/bold green]")
395
+ console.print(status_panel)
396
+ console.print("\n[bold yellow]API Documentation:[/bold yellow]")
397
+ console.print("📚 Swagger UI: http://0.0.0.0:8000/docs")
398
+ console.print("📘 ReDoc: http://0.0.0.0:8000/redoc\n")
399
+
400
+ except Exception as e:
401
+ console.print(f"[bold red]Error printing status: {str(e)}[/bold red]")
402
+
403
+ if __name__ == "__main__":
404
+ import uvicorn
405
+
406
+ port = int(os.getenv("PORT", 8000))
407
+ logger.info(f"Starting server on port {port}")
408
+
409
+ config = uvicorn.Config(
410
+ app,
411
+ host="0.0.0.0",
412
+ port=port,
413
+ log_level="debug"
414
+ )
415
+
416
+ server = uvicorn.Server(config)
417
+ server.run()
app.py CHANGED
@@ -1,5 +1,11 @@
1
  import os
2
  import time
 
 
 
 
 
 
3
  from dotenv import load_dotenv
4
  from langchain_groq import ChatGroq
5
  from langchain_huggingface import HuggingFaceEmbeddings
@@ -11,407 +17,391 @@ from langchain_core.output_parsers import StrOutputParser
11
  from datetime import datetime
12
  import json
13
  import traceback
14
- from fastapi import FastAPI, HTTPException, Request
15
- from fastapi.responses import JSONResponse
16
  from pydantic import BaseModel
17
- from api import router as analysis_router
18
- from utils import ChatAnalyzer, setup_chat_analysis
19
- import requests.exceptions
20
- import aiohttp
21
- from typing import Union
22
- import uvicorn
23
- import logging
24
- from rich import print as rprint
25
- from rich.console import Console
26
- from rich.panel import Panel
27
- from rich.table import Table
28
 
29
- console = Console()
 
30
 
31
- # Базовая настройка логирования
32
- logging.basicConfig(level=logging.DEBUG)
33
- logger = logging.getLogger(__name__)
 
34
 
35
- # Определение путей
36
- VECTOR_STORE_PATH = os.path.join(os.getcwd(), "vector_store")
37
- CHAT_HISTORY_PATH = os.path.join(os.getcwd(), "chat_history")
 
 
 
 
 
 
 
 
 
 
 
 
38
 
 
39
  app = FastAPI(title="Status Law Assistant API")
40
 
 
 
 
 
 
 
 
 
 
 
41
  class ChatRequest(BaseModel):
42
  message: str
43
-
 
44
  class ChatResponse(BaseModel):
45
  response: str
 
 
 
 
 
 
46
 
47
- def check_vector_store():
48
- """Проверка наличия векторной базы"""
49
- index_path = os.path.join(VECTOR_STORE_PATH, "index.faiss")
50
- return os.path.exists(index_path)
51
-
52
- @app.get("/")
53
- async def root():
54
- """Базовый эндпоинт с информацией о состоянии"""
55
- return {
56
- "status": "ok",
57
- "vector_store_ready": check_vector_store(),
58
- "timestamp": datetime.now().isoformat()
59
- }
60
-
61
- @app.get("/status")
62
- async def get_status():
63
- """Получение статуса векторной базы"""
64
- return {
65
- "vector_store_exists": check_vector_store(),
66
- "can_chat": check_vector_store(),
67
- "vector_store_path": VECTOR_STORE_PATH
68
- }
69
 
70
- @app.post("/build-knowledge-base")
71
- async def build_kb():
72
- """Эндпоинт для построения базы знаний"""
73
  try:
74
- if check_vector_store():
75
- return {
76
- "status": "exists",
77
- "message": "Knowledge base already exists"
78
- }
79
-
80
- # Инициализируем embeddings только когда нужно построить базу
81
- embeddings = HuggingFaceEmbeddings(
82
- model_name="sentence-transformers/all-MiniLM-L6-v2"
83
- )
84
- vector_store = build_knowledge_base(embeddings)
 
 
 
 
 
85
 
86
- return {
87
- "status": "success",
88
- "message": "Knowledge base built successfully"
89
- }
 
 
90
  except Exception as e:
91
- logger.error(f"Failed to build knowledge base: {str(e)}")
92
- raise HTTPException(
93
- status_code=500,
94
- detail=f"Failed to build knowledge base: {str(e)}"
95
- )
96
 
97
- @app.post("/chat", response_model=ChatResponse)
98
- async def chat_endpoint(request: ChatRequest):
99
- """Эндпоинт чата"""
100
- if not check_vector_store():
101
- raise HTTPException(
102
- status_code=400,
103
- detail="Knowledge base not found. Please build it first using /build-knowledge-base endpoint"
104
- )
105
 
106
  try:
107
- # Инициализируем компоненты только при необходимости
108
- llm = ChatGroq(
109
- model_name="llama-3.3-70b-versatile",
110
- temperature=0.6,
111
- api_key=os.getenv("GROQ_API_KEY")
112
- )
113
-
114
- embeddings = HuggingFaceEmbeddings(
115
- model_name="sentence-transformers/all-MiniLM-L6-v2"
116
  )
117
 
118
- vector_store = FAISS.load_local(
119
- VECTOR_STORE_PATH,
120
- embeddings,
121
- allow_dangerous_deserialization=True
122
- )
123
-
124
- # Остальная логика чата...
125
- context_docs = vector_store.similarity_search(request.message)
126
- context_text = "\n".join([d.page_content for d in context_docs])
127
 
128
- prompt_template = PromptTemplate.from_template('''
129
- You are a helpful and polite legal assistant at Status Law.
130
- Answer the question based on the context provided.
131
- Context: {context}
132
- Question: {question}
133
- ''')
134
 
135
- chain = prompt_template | llm | StrOutputParser()
136
- response = chain.invoke({
137
- "context": context_text,
138
- "question": request.message
139
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
- return ChatResponse(response=response)
 
142
 
 
 
 
 
 
 
143
  except Exception as e:
144
- logger.error(f"Chat error: {str(e)}")
145
- raise HTTPException(
146
- status_code=500,
147
- detail=f"Chat error: {str(e)}"
148
- )
149
 
150
- # --------------- Knowledge Base Management ---------------
151
- URLS = [
152
- "https://status.law",
153
- "https://status.law/about",
154
- "https://status.law/careers",
155
- "https://status.law/tariffs-for-services-against-extradition-en",
156
- "https://status.law/challenging-sanctions",
157
- "https://status.law/law-firm-contact-legal-protection"
158
- "https://status.law/cross-border-banking-legal-issues",
159
- "https://status.law/extradition-defense",
160
- "https://status.law/international-prosecution-protection",
161
- "https://status.law/interpol-red-notice-removal",
162
- "https://status.law/practice-areas",
163
- "https://status.law/reputation-protection",
164
- "https://status.law/faq"
165
- ]
 
 
 
 
 
 
 
 
 
 
166
 
167
- def build_knowledge_base(_embeddings):
 
168
  """Build or update the knowledge base"""
 
 
 
 
169
  try:
170
  start_time = time.time()
171
  documents = []
172
 
173
- # Ensure vector store directory exists
174
- if not os.path.exists(VECTOR_STORE_PATH):
175
- os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
176
 
 
177
  for url in URLS:
178
  try:
179
  loader = WebBaseLoader(url)
180
  docs = loader.load()
181
  documents.extend(docs)
 
182
  except Exception as e:
183
  print(f"Failed to load {url}: {str(e)}")
184
  continue
185
-
186
  if not documents:
187
- raise HTTPException(status_code=500, detail="No documents loaded")
188
 
 
189
  text_splitter = RecursiveCharacterTextSplitter(
190
  chunk_size=500,
191
  chunk_overlap=100
192
  )
193
  chunks = text_splitter.split_documents(documents)
194
 
 
195
  vector_store = FAISS.from_documents(chunks, _embeddings)
196
  vector_store.save_local(
197
  folder_path=VECTOR_STORE_PATH,
198
  index_name="index"
199
  )
200
 
 
201
  if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
202
- raise HTTPException(status_code=500, detail="FAISS index file not created")
203
 
204
- return vector_store
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  except Exception as e:
207
- raise HTTPException(status_code=500, detail=f"Knowledge base creation failed: {str(e)}")
 
 
 
208
 
209
- # --------------- API Models ---------------
210
- class ChatRequest(BaseModel):
211
- message: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
- class ChatResponse(BaseModel):
214
- response: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- # --------------- API Routes ---------------
217
  @app.post("/chat", response_model=ChatResponse)
218
  async def chat_endpoint(request: ChatRequest):
 
 
 
 
 
 
 
 
 
 
 
219
  try:
220
- llm, embeddings = init_models()
 
 
221
 
222
- if not os.path.exists(VECTOR_STORE_PATH):
223
- vector_store = build_knowledge_base(embeddings)
224
- else:
225
- vector_store = FAISS.load_local(
226
- VECTOR_STORE_PATH,
227
- embeddings,
228
- allow_dangerous_deserialization=True
229
- )
230
-
231
- # Add retry logic for network operations
232
- max_retries = 3
233
- retry_count = 0
234
-
235
- while retry_count < max_retries:
236
- try:
237
- context_docs = vector_store.similarity_search(request.message)
238
- context_text = "\n".join([d.page_content for d in context_docs])
239
-
240
- prompt_template = PromptTemplate.from_template('''
241
- You are a helpful and polite legal assistant at Status Law.
242
- You answer in the language in which the question was asked.
243
- Answer the question based on the context provided.
244
-
245
- # ... остальной текст промпта ...
246
-
247
- Context: {context}
248
- Question: {question}
249
-
250
- Response Guidelines:
251
- 1. Answer in the user's language
252
- 2. Cite sources when possible
253
- 3. Offer contact options if unsure
254
- ''')
255
-
256
- chain = prompt_template | llm | StrOutputParser()
257
- response = chain.invoke({
258
- "context": context_text,
259
- "question": request.message
260
- })
261
-
262
- log_interaction(request.message, response, context_text)
263
- return ChatResponse(response=response)
264
-
265
- except (requests.exceptions.RequestException, aiohttp.ClientError) as e:
266
- retry_count += 1
267
- if retry_count == max_retries:
268
- raise HTTPException(
269
- status_code=503,
270
- detail={
271
- "error": "Network error after maximum retries",
272
- "detail": str(e),
273
- "type": "network_error"
274
- }
275
- )
276
- await asyncio.sleep(1 * retry_count) # Exponential backoff
277
-
278
- except Exception as e:
279
- if isinstance(e, (requests.exceptions.RequestException, aiohttp.ClientError)):
280
  raise HTTPException(
281
- status_code=503,
282
- detail={
283
- "error": "Network error occurred",
284
- "detail": str(e),
285
- "type": "network_error"
286
- }
287
  )
288
- raise HTTPException(status_code=500, detail=str(e))
289
-
290
- # --------------- Logging ---------------
291
- def log_interaction(user_input: str, bot_response: str, context: str):
292
- try:
293
- log_entry = {
294
- "timestamp": datetime.now().isoformat(),
295
- "user_input": user_input,
296
- "bot_response": bot_response,
297
- "context": context[:500],
298
- "kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
299
- }
300
 
301
- os.makedirs("chat_history", exist_ok=True)
302
- log_path = os.path.join("chat_history", "chat_logs.json")
 
303
 
304
- with open(log_path, "a", encoding="utf-8") as f:
305
- f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
306
-
307
- except Exception as e:
308
- print(f"Logging error: {str(e)}")
309
- print(traceback.format_exc())
 
 
 
 
310
 
311
- # Add health check endpoint
312
- @app.get("/health")
313
- async def health_check():
314
- try:
315
- # Check if models can be initialized
316
- llm, embeddings = init_models()
 
 
 
 
 
 
 
 
 
 
 
317
 
318
- # Check if vector store is accessible
319
- if os.path.exists(VECTOR_STORE_PATH):
320
- vector_store = FAISS.load_local(
321
- VECTOR_STORE_PATH,
322
- embeddings,
323
- allow_dangerous_deserialization=True
324
- )
 
325
 
326
  return {
327
- "status": "healthy",
328
- "vector_store": "available" if os.path.exists(VECTOR_STORE_PATH) else "not_found"
329
  }
330
-
331
  except Exception as e:
332
- return JSONResponse(
333
- status_code=503,
334
- content={
335
- "status": "unhealthy",
336
- "error": str(e)
337
- }
338
- )
339
-
340
- # Add diagnostic endpoint
341
- @app.get("/directory-status")
342
- async def check_directory_status():
343
- """Check status of required directories"""
344
- return {
345
- "vector_store": {
346
- "exists": os.path.exists(VECTOR_STORE_PATH),
347
- "path": os.path.abspath(VECTOR_STORE_PATH),
348
- "contents": os.listdir(VECTOR_STORE_PATH) if os.path.exists(VECTOR_STORE_PATH) else []
349
- },
350
- "chat_history": {
351
- "exists": os.path.exists(CHAT_HISTORY_PATH),
352
- "path": os.path.abspath(CHAT_HISTORY_PATH),
353
- "contents": os.listdir(CHAT_HISTORY_PATH) if os.path.exists(CHAT_HISTORY_PATH) else []
354
- }
355
- }
356
 
357
- # Добавим функцию для вывода статуса
358
- def print_startup_status():
359
- """Print application startup status with rich formatting"""
 
 
 
 
 
360
  try:
361
- # Create status table
362
- table = Table(show_header=True, header_style="bold magenta")
363
- table.add_column("Component", style="cyan")
364
- table.add_column("Status", style="green")
365
-
366
- # Check directories
367
- vector_store_exists = os.path.exists(VECTOR_STORE_PATH)
368
- chat_history_exists = os.path.exists(CHAT_HISTORY_PATH)
369
-
370
- table.add_row(
371
- "Vector Store Directory",
372
- "✅ Created" if vector_store_exists else "❌ Missing"
373
- )
374
- table.add_row(
375
- "Chat History Directory",
376
- "✅ Created" if chat_history_exists else "❌ Missing"
377
- )
378
-
379
- # Check environment variables
380
- table.add_row(
381
- "GROQ API Key",
382
- "✅ Set" if os.getenv("GROQ_API_KEY") else "❌ Missing"
383
- )
384
-
385
- # Create status panel
386
- status_panel = Panel(
387
- table,
388
- title="[bold blue]Status Law Assistant API Status[/bold blue]",
389
- border_style="blue"
390
- )
391
-
392
- # Print startup message and status
393
- console.print("\n")
394
- console.print("[bold green]🚀 Server started successfully![/bold green]")
395
- console.print(status_panel)
396
- console.print("\n[bold yellow]API Documentation:[/bold yellow]")
397
- console.print("📚 Swagger UI: http://0.0.0.0:8000/docs")
398
- console.print("📘 ReDoc: http://0.0.0.0:8000/redoc\n")
399
-
400
  except Exception as e:
401
- console.print(f"[bold red]Error printing status: {str(e)}[/bold red]")
402
 
 
403
  if __name__ == "__main__":
404
- import uvicorn
405
-
406
- port = int(os.getenv("PORT", 8000))
407
- logger.info(f"Starting server on port {port}")
408
-
409
- config = uvicorn.Config(
410
- app,
411
- host="0.0.0.0",
412
- port=port,
413
- log_level="debug"
414
- )
415
-
416
- server = uvicorn.Server(config)
417
- server.run()
 
1
  import os
2
  import time
3
+ import uvicorn
4
+ from fastapi import FastAPI, HTTPException, Request
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from fastapi.responses import HTMLResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.templating import Jinja2Templates
9
  from dotenv import load_dotenv
10
  from langchain_groq import ChatGroq
11
  from langchain_huggingface import HuggingFaceEmbeddings
 
17
  from datetime import datetime
18
  import json
19
  import traceback
20
+ from typing import Dict, List, Optional
 
21
  from pydantic import BaseModel
22
+ from huggingface_hub import Repository, snapshot_download
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Initialize environment variables
25
+ load_dotenv()
26
 
27
+ # Constants for paths and URLs
28
+ VECTOR_STORE_PATH = "vector_store"
29
+ HF_DATASET_REPO = "Rulga/LS_chat"
30
+ LOCAL_CHAT_HISTORY_PATH = "chat_history"
31
 
32
+ URLS = [
33
+ "https://status.law",
34
+ "https://status.law/about",
35
+ "https://status.law/careers",
36
+ "https://status.law/tariffs-for-services-of-protection-against-extradition",
37
+ "https://status.law/challenging-sanctions",
38
+ "https://status.law/law-firm-contact-legal-protection",
39
+ "https://status.law/cross-border-banking-legal-issues",
40
+ "https://status.law/extradition-defense",
41
+ "https://status.law/international-prosecution-protection",
42
+ "https://status.law/interpol-red-notice-removal",
43
+ "https://status.law/practice-areas",
44
+ "https://status.law/reputation-protection",
45
+ "https://status.law/faq"
46
+ ]
47
 
48
+ # Initialize the FastAPI app
49
  app = FastAPI(title="Status Law Assistant API")
50
 
51
+ # Add CORS middleware
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ allow_origins=["*"],
55
+ allow_credentials=True,
56
+ allow_methods=["*"],
57
+ allow_headers=["*"],
58
+ )
59
+
60
+ # Define request and response models
61
  class ChatRequest(BaseModel):
62
  message: str
63
+ conversation_id: Optional[str] = None
64
+
65
  class ChatResponse(BaseModel):
66
  response: str
67
+ conversation_id: str
68
+
69
+ class BuildKnowledgeBaseResponse(BaseModel):
70
+ status: str
71
+ message: str
72
+ details: Optional[Dict] = None
73
 
74
+ # Global variables for models and knowledge base
75
+ llm = None
76
+ embeddings = None
77
+ vector_store = None
78
+ kb_info = {
79
+ 'build_time': None,
80
+ 'size': None,
81
+ 'version': '1.1'
82
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ # --------------- Hugging Face Dataset Integration ---------------
85
+ def init_hf_dataset_integration():
86
+ """Initialize integration with Hugging Face dataset for persistence"""
87
  try:
88
+ # Download the latest snapshot of the dataset if it exists
89
+ if os.getenv("HF_TOKEN"):
90
+ # With authentication if token provided
91
+ snapshot_download(
92
+ repo_id=HF_DATASET_REPO,
93
+ repo_type="dataset",
94
+ local_dir="./data_snapshot",
95
+ token=os.getenv("HF_TOKEN")
96
+ )
97
+ else:
98
+ # Try without authentication for public datasets
99
+ snapshot_download(
100
+ repo_id=HF_DATASET_REPO,
101
+ repo_type="dataset",
102
+ local_dir="./data_snapshot"
103
+ )
104
 
105
+ # Check if vector store exists in the downloaded data
106
+ if os.path.exists("./data_snapshot/vector_store/index.faiss"):
107
+ # Copy to the local vector store path
108
+ os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
109
+ os.system(f"cp -r ./data_snapshot/vector_store/* {VECTOR_STORE_PATH}/")
110
+ return True
111
  except Exception as e:
112
+ print(f"Error downloading dataset: {e}")
113
+
114
+ return False
 
 
115
 
116
+ def upload_to_hf_dataset():
117
+ """Upload the vector store and chat history to the Hugging Face dataset"""
118
+ if not os.getenv("HF_TOKEN"):
119
+ print("HF_TOKEN not set, cannot upload to Hugging Face")
120
+ return False
 
 
 
121
 
122
  try:
123
+ # Clone the repository
124
+ repo = Repository(
125
+ local_dir="./data_upload",
126
+ clone_from=HF_DATASET_REPO,
127
+ repo_type="dataset",
128
+ token=os.getenv("HF_TOKEN")
 
 
 
129
  )
130
 
131
+ # Copy the vector store files
132
+ if os.path.exists(f"{VECTOR_STORE_PATH}/index.faiss"):
133
+ os.makedirs("./data_upload/vector_store", exist_ok=True)
134
+ os.system(f"cp -r {VECTOR_STORE_PATH}/* ./data_upload/vector_store/")
 
 
 
 
 
135
 
136
+ # Copy the chat history
137
+ if os.path.exists(f"{LOCAL_CHAT_HISTORY_PATH}/chat_logs.json"):
138
+ os.makedirs("./data_upload/chat_history", exist_ok=True)
139
+ os.system(f"cp -r {LOCAL_CHAT_HISTORY_PATH}/* ./data_upload/chat_history/")
 
 
140
 
141
+ # Push to Hugging Face
142
+ repo.push_to_hub(commit_message="Update vector store and chat history")
143
+ return True
144
+ except Exception as e:
145
+ print(f"Error uploading to dataset: {e}")
146
+ return False
147
+
148
+ # --------------- Enhanced Logging ---------------
149
+ def log_interaction(user_input: str, bot_response: str, context: str, conversation_id: str):
150
+ """Log interactions with error handling"""
151
+ try:
152
+ log_entry = {
153
+ "timestamp": datetime.now().isoformat(),
154
+ "conversation_id": conversation_id,
155
+ "user_input": user_input,
156
+ "bot_response": bot_response,
157
+ "context": context[:500] if context else "",
158
+ "kb_version": kb_info['version']
159
+ }
160
 
161
+ os.makedirs(LOCAL_CHAT_HISTORY_PATH, exist_ok=True)
162
+ log_path = os.path.join(LOCAL_CHAT_HISTORY_PATH, "chat_logs.json")
163
 
164
+ with open(log_path, "a", encoding="utf-8") as f:
165
+ f.write(json.dumps(log_entry, ensure_ascii=False) + "\n")
166
+
167
+ # Upload to Hugging Face after logging
168
+ upload_to_hf_dataset()
169
+
170
  except Exception as e:
171
+ print(f"Logging error: {str(e)}")
172
+ print(traceback.format_exc())
 
 
 
173
 
174
+ # --------------- Model Initialization ---------------
175
+ def init_models():
176
+ """Initialize AI models"""
177
+ global llm, embeddings
178
+
179
+ if not llm:
180
+ try:
181
+ llm = ChatGroq(
182
+ model_name="llama-3.3-70b-versatile",
183
+ temperature=0.6,
184
+ api_key=os.getenv("GROQ_API_KEY")
185
+ )
186
+ except Exception as e:
187
+ print(f"LLM initialization failed: {str(e)}")
188
+ raise HTTPException(status_code=500, detail=f"LLM initialization failed: {str(e)}")
189
+
190
+ if not embeddings:
191
+ try:
192
+ embeddings = HuggingFaceEmbeddings(
193
+ model_name="intfloat/multilingual-e5-large-instruct"
194
+ )
195
+ except Exception as e:
196
+ print(f"Embeddings initialization failed: {str(e)}")
197
+ raise HTTPException(status_code=500, detail=f"Embeddings initialization failed: {str(e)}")
198
+
199
+ return llm, embeddings
200
 
201
+ # --------------- Knowledge Base Management ---------------
202
+ def build_knowledge_base():
203
  """Build or update the knowledge base"""
204
+ global vector_store, kb_info
205
+
206
+ _, _embeddings = init_models()
207
+
208
  try:
209
  start_time = time.time()
210
  documents = []
211
 
212
+ # Create folder in advance
213
+ os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
 
214
 
215
+ # Load documents
216
  for url in URLS:
217
  try:
218
  loader = WebBaseLoader(url)
219
  docs = loader.load()
220
  documents.extend(docs)
221
+ print(f"Loaded {url}")
222
  except Exception as e:
223
  print(f"Failed to load {url}: {str(e)}")
224
  continue
225
+
226
  if not documents:
227
+ raise HTTPException(status_code=500, detail="No documents loaded!")
228
 
229
+ # Split into chunks
230
  text_splitter = RecursiveCharacterTextSplitter(
231
  chunk_size=500,
232
  chunk_overlap=100
233
  )
234
  chunks = text_splitter.split_documents(documents)
235
 
236
+ # Create vector store
237
  vector_store = FAISS.from_documents(chunks, _embeddings)
238
  vector_store.save_local(
239
  folder_path=VECTOR_STORE_PATH,
240
  index_name="index"
241
  )
242
 
243
+ # Verify file creation
244
  if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
245
+ raise HTTPException(status_code=500, detail="FAISS index file not created!")
246
 
247
+ # Update info
248
+ kb_info.update({
249
+ 'build_time': time.time() - start_time,
250
+ 'size': sum(
251
+ os.path.getsize(os.path.join(VECTOR_STORE_PATH, f))
252
+ for f in ["index.faiss", "index.pkl"]
253
+ ) / (1024 ** 2),
254
+ 'version': datetime.now().strftime("%Y%m%d-%H%M%S")
255
+ })
256
+
257
+ # Upload to Hugging Face
258
+ upload_to_hf_dataset()
259
+
260
+ return {
261
+ "status": "success",
262
+ "message": "Knowledge base successfully created!",
263
+ "details": kb_info
264
+ }
265
 
266
  except Exception as e:
267
+ error_msg = f"Knowledge base creation failed: {str(e)}"
268
+ print(error_msg)
269
+ print(traceback.format_exc())
270
+ raise HTTPException(status_code=500, detail=error_msg)
271
 
272
+ def load_knowledge_base():
273
+ """Load the knowledge base from disk"""
274
+ global vector_store
275
+
276
+ if vector_store:
277
+ return vector_store
278
+
279
+ _, _embeddings = init_models()
280
+
281
+ try:
282
+ vector_store = FAISS.load_local(
283
+ VECTOR_STORE_PATH,
284
+ _embeddings,
285
+ allow_dangerous_deserialization=True
286
+ )
287
+ return vector_store
288
+ except Exception as e:
289
+ error_msg = f"Failed to load knowledge base: {str(e)}"
290
+ print(error_msg)
291
+ print(traceback.format_exc())
292
+ return None
293
 
294
+ # --------------- API Endpoints ---------------
295
+ @app.get("/")
296
+ async def root():
297
+ """Root endpoint that shows app status"""
298
+ vector_store_exists = os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss"))
299
+
300
+ return {
301
+ "status": "running",
302
+ "knowledge_base_exists": vector_store_exists,
303
+ "kb_info": kb_info if vector_store_exists else None
304
+ }
305
+
306
+ @app.post("/build-kb", response_model=BuildKnowledgeBaseResponse)
307
+ async def build_kb_endpoint():
308
+ """Endpoint to build/rebuild the knowledge base"""
309
+ return build_knowledge_base()
310
 
 
311
  @app.post("/chat", response_model=ChatResponse)
312
  async def chat_endpoint(request: ChatRequest):
313
+ """Endpoint to chat with the assistant"""
314
+ # Check if knowledge base exists
315
+ if not os.path.exists(os.path.join(VECTOR_STORE_PATH, "index.faiss")):
316
+ raise HTTPException(
317
+ status_code=400,
318
+ detail="Knowledge base not found. Please build it first with /build-kb"
319
+ )
320
+
321
+ # Use provided conversation ID or generate a new one
322
+ conversation_id = request.conversation_id or f"conv_{datetime.now().strftime('%Y%m%d%H%M%S')}"
323
+
324
  try:
325
+ # Load models and knowledge base
326
+ _llm, _ = init_models()
327
+ _vector_store = load_knowledge_base()
328
 
329
+ if not _vector_store:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  raise HTTPException(
331
+ status_code=500,
332
+ detail="Failed to load knowledge base"
 
 
 
 
333
  )
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ # Retrieve context
336
+ context_docs = _vector_store.similarity_search(request.message)
337
+ context_text = "\n".join([d.page_content for d in context_docs])
338
 
339
+ # Generate response
340
+ prompt_template = PromptTemplate.from_template('''
341
+ You are a helpful and polite legal assistant at Status Law.
342
+ You answer in the language in which the question was asked.
343
+ Answer the question based on the context provided.
344
+ If you cannot answer based on the context, say so politely and offer to contact Status Law directly via the following channels:
345
+ - For all users: +32465594521 (landline phone).
346
+ - For English and Swedish speakers only: +46728495129 (available on WhatsApp, Telegram, Signal, IMO).
347
+ - Provide a link to the contact form: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
348
+ If the user has questions about specific services and their costs, suggest they visit the page https://status.law/tariffs-for-services-of-protection-against-extradition-and-international-prosecution/ for detailed information.
349
 
350
+ Ask the user additional questions to understand which service to recommend and provide an estimated cost. For example, clarify their situation and needs to suggest the most appropriate options.
351
+
352
+ Also, offer free consultations if they are available and suitable for the user's request.
353
+ Answer professionally but in a friendly manner.
354
+
355
+ Example:
356
+ Q: How can I challenge the sanctions?
357
+ A: To challenge the sanctions, you should consult with our legal team, who specialize in this area. Please contact us directly for detailed advice. You can fill out our contact form here: [Contact Form](https://status.law/law-firm-contact-legal-protection/).
358
+
359
+ Context: {context}
360
+ Question: {question}
361
+
362
+ Response Guidelines:
363
+ 1. Answer in the user's language
364
+ 2. Cite sources when possible
365
+ 3. Offer contact options if unsure
366
+ ''')
367
 
368
+ chain = prompt_template | _llm | StrOutputParser()
369
+ response = chain.invoke({
370
+ "context": context_text,
371
+ "question": request.message
372
+ })
373
+
374
+ # Log the interaction
375
+ log_interaction(request.message, response, context_text, conversation_id)
376
 
377
  return {
378
+ "response": response,
379
+ "conversation_id": conversation_id
380
  }
381
+
382
  except Exception as e:
383
+ error_msg = f"Error generating response: {str(e)}"
384
+ print(error_msg)
385
+ print(traceback.format_exc())
386
+ raise HTTPException(status_code=500, detail=error_msg)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ # Initialize dataset integration at startup
389
+ @app.on_event("startup")
390
+ async def startup_event():
391
+ """Initialize on startup"""
392
+ # Try to load existing knowledge base from Hugging Face
393
+ init_hf_dataset_integration()
394
+
395
+ # Preload embeddings model to reduce first-request latency
396
  try:
397
+ global embeddings
398
+ if not embeddings:
399
+ embeddings = HuggingFaceEmbeddings(
400
+ model_name="intfloat/multilingual-e5-large-instruct"
401
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  except Exception as e:
403
+ print(f"Warning: Failed to preload embeddings: {e}")
404
 
405
+ # Run the application
406
  if __name__ == "__main__":
407
+ uvicorn.run("app:app", host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
 
 
index.html ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Status Law Assistant</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <style>
9
+ body {
10
+ background-color: #f8f9fa;
11
+ }
12
+ .chat-container {
13
+ max-width: 800px;
14
+ margin: 30px auto;
15
+ background: white;
16
+ border-radius: 10px;
17
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
18
+ overflow: hidden;
19
+ }
20
+ .chat-header {
21
+ padding: 15px 20px;
22
+ background: linear-gradient(135deg, #2B5876 0%, #4E4376 100%);
23
+ color: white;
24
+ border-bottom: 1px solid #e6e6e6;
25
+ }
26
+ .chat-area {
27
+ height: 400px;
28
+ overflow-y: auto;
29
+ padding: 20px;
30
+ background-color: #f8f9fa;
31
+ }
32
+ .user-message, .bot-message {
33
+ padding: 8px 15px;
34
+ margin-bottom: 10px;
35
+ border-radius: 18px;
36
+ max-width: 75%;
37
+ word-wrap: break-word;
38
+ }
39
+ .user-message {
40
+ background-color: #e2f0ff;
41
+ margin-left: auto;
42
+ border-bottom-right-radius: 5px;
43
+ }
44
+ .bot-message {
45
+ background-color: #f0f0f0;
46
+ margin-right: auto;
47
+ border-bottom-left-radius: 5px;
48
+ }
49
+ .input-area {
50
+ padding: 15px;
51
+ background-color: white;
52
+ border-top: 1px solid #e6e6e6;
53
+ }
54
+ .kb-status {
55
+ padding: 10px 20px;
56
+ background-color: #f8f9fa;
57
+ border-top: 1px solid #e6e6e6;
58
+ font-size: 0.9em;
59
+ color: #6c757d;
60
+ }
61
+ .kb-badge {
62
+ font-size: 0.8em;
63
+ padding: 5px 10px;
64
+ border-radius: 15px;
65
+ }
66
+ .loading {
67
+ display: inline-block;
68
+ width: 20px;
69
+ height: 20px;
70
+ border: 3px solid rgba(0,0,0,.1);
71
+ border-radius: 50%;
72
+ border-top-color: #2B5876;
73
+ animation: spin 1s ease-in-out infinite;
74
+ }
75
+ @keyframes spin {
76
+ to { transform: rotate(360deg); }
77
+ }
78
+ </style>
79
+ </head>
80
+ <body>
81
+ <div class="container">
82
+ <div class="chat-container">
83
+ <div class="chat-header d-flex justify-content-between align-items-center">
84
+ <h1 class="h4 mb-0">⚖️ Status Law Assistant</h1>
85
+ <span id="kb-status-badge" class="kb-badge bg-warning">Checking...</span>
86
+ </div>
87
+
88
+ <div id="kb-action-area" class="p-3 bg-light d-none">
89
+ <div class="alert alert-warning">
90
+ Knowledge base not found. You need to build it before chatting.
91
+ </div>
92
+ <button id="build-kb-btn" class="btn btn-primary">Build Knowledge Base</button>
93
+ </div>
94
+
95
+ <div id="chat-area" class="chat-area">
96
+ <div class="bot-message">
97
+ Hello! I'm the Status Law assistant. How can I help you with your legal questions?
98
+ </div>
99
+ </div>
100
+
101
+ <div class="input-area">
102
+ <div class="input-group">
103
+ <input
104
+ type="text"
105
+ id="user-input"
106
+ class="form-control"
107
+ placeholder="Type your message here..."
108
+ aria-label="Message"
109
+ >
110
+ <button id="send-btn" class="btn btn-primary">Send</button>
111
+ </div>
112
+ </div>
113
+
114
+ <div class="kb-status">
115
+ <small id="kb-info">Loading knowledge base info...</small>
116
+ </div>
117
+ </div>
118
+ </div>
119
+
120
+ <script>
121
+ // Global variables
122
+ let conversationId = null;
123
+
124
+ // DOM elements
125
+ const userInput = document.getElementById('user-input');
126
+ const sendBtn = document.getElementById('send-btn');
127
+ const chatArea = document.getElementById('chat-area');
128
+ const kbInfo = document.getElementById('kb-info');
129
+ const kbStatusBadge = document.getElementById('kb-status-badge');
130
+ const kbActionArea = document.getElementById('kb-action-area');
131
+ const buildKbBtn = document.getElementById('build-kb-btn');
132
+
133
+ // Check knowledge base status on load
134
+ checkKnowledgeBaseStatus();
135
+
136
+ // Event listeners
137
+ sendBtn.addEventListener('click', sendMessage);
138
+ userInput.addEventListener('keypress', function(e) {
139
+ if (e.key === 'Enter') {
140
+ sendMessage();
141
+ }
142
+ });
143
+
144
+ buildKbBtn.addEventListener('click', buildKnowledgeBase);
145
+
146
+ // Functions
147
+ async function checkKnowledgeBaseStatus() {
148
+ try {
149
+ const response = await fetch('/');
150
+ const data = await response.json();
151
+
152
+ if (data.knowledge_base_exists) {
153
+ kbStatusBadge.className = 'kb-badge bg-success';
154
+ kbStatusBadge.textContent = 'Ready';
155
+ kbActionArea.classList.add('d-none');
156
+
157
+ if (data.kb_info) {
158
+ const date = new Date(data.kb_info.build_time * 1000);
159
+ const formattedDate = date.toLocaleString();
160
+ kbInfo.textContent = `Knowledge base version: ${data.kb_info.version || 'Unknown'}, Size: ${data.kb_info.size ? data.kb_info.size.toFixed(2) + ' MB' : 'Unknown'}`;
161
+ }
162
+ } else {
163
+ kbStatusBadge.className = 'kb-badge bg-danger';
164
+ kbStatusBadge.textContent = 'Not Ready';
165
+ kbActionArea.classList.remove('d-none');
166
+ kbInfo.textContent = 'Knowledge base not found. Please build it first.';
167
+ }
168
+ } catch (error) {
169
+ console.error('Error checking KB status:', error);
170
+ kbStatusBadge.className = 'kb-badge bg-danger';
171
+ kbStatusBadge.textContent = 'Error';
172
+ kbInfo.textContent = 'Error checking knowledge base status.';
173
+ }
174
+ }
175
+
176
+ async function buildKnowledgeBase() {
177
+ try {
178
+ kbStatusBadge.className = 'kb-badge bg-warning';
179
+ kbStatusBadge.textContent = 'Building...';
180
+ buildKbBtn.disabled = true;
181
+ buildKbBtn.innerHTML = '<span class="loading me-2"></span> Building...';
182
+
183
+ const response = await fetch('/build-kb', {
184
+ method: 'POST'
185
+ });
186
+
187
+ const data = await response.json();
188
+
189
+ if (response.ok) {
190
+ kbStatusBadge.className = 'kb-badge bg-success';
191
+ kbStatusBadge.textContent = 'Ready';
192
+ kbActionArea.classList.add('d-none');
193
+
194
+ if (data.details) {
195
+ kbInfo.textContent = `Knowledge base version: ${data.details.version || 'Unknown'}, Size: ${data.details.size ? data.details.size.toFixed(2) + ' MB' : 'Unknown'}`;
196
+ }
197
+
198
+ // Add a system message
199
+ addBotMessage("Knowledge base built successfully! You can now ask questions.");
200
+ } else {
201
+ throw new Error(data.detail || 'Failed to build knowledge base');
202
+ }
203
+ } catch (error) {
204
+ console.error('Error building KB:', error);
205
+ kbStatusBadge.className = 'kb-badge bg-danger';
206
+ kbStatusBadge.textContent = 'Error';
207
+ kbInfo.textContent = 'Error building knowledge base.';
208
+ addBotMessage("There was an error building the knowledge base. Please try again later.");
209
+ } finally {
210
+ buildKbBtn.disabled = false;
211
+ buildKbBtn.textContent = 'Build Knowledge Base';
212
+ }
213
+ }
214
+
215
+ async function sendMessage() {
216
+ const message = userInput.value.trim();
217
+ if (!message) return;
218
+
219
+ // Add user message to chat
220
+ addUserMessage(message);
221
+ userInput.value = '';
222
+
223
+ // Add a temporary bot message with loading indicator
224
+ const loadingMsgElement = addBotMessage('<span class="loading me-2"></span> Thinking...');
225
+
226
+ try {
227
+ const response = await fetch('/chat', {
228
+ method: 'POST',
229
+ headers: {
230
+ 'Content-Type': 'application/json',
231
+ },
232
+ body: JSON.stringify({
233
+ message: message,
234
+ conversation_id: conversationId
235
+ }),
236
+ });
237
+
238
+ const data = await response.json();
239
+
240
+ if (response.ok) {
241
+ // Update conversation ID for future messages
242
+ conversationId = data.conversation_id;
243
+
244
+ // Replace loading message with actual response
245
+ loadingMsgElement.innerHTML = data.response;
246
+ } else {
247
+ throw new Error(data.detail || 'Failed to get response');
248
+ }
249
+ } catch (error) {
250
+ console.error('Error sending message:', error);
251
+ loadingMsgElement.innerHTML = "Sorry, I encountered an error processing your request. Please try again later.";
252
+ }
253
+
254
+ // Scroll to bottom
255
+ chatArea.scrollTop = chatArea.scrollHeight;
256
+ }
257
+
258
+ function addUserMessage(text) {
259
+ const div = document.createElement('div');
260
+ div.className = 'user-message';
261
+ div.textContent = text;
262
+ chatArea.appendChild(div);
263
+ chatArea.scrollTop = chatArea.scrollHeight;
264
+ }
265
+
266
+ function addBotMessage(html) {
267
+ const div = document.createElement('div');
268
+ div.className = 'bot-message';
269
+ div.innerHTML = html;
270
+ chatArea.appendChild(div);
271
+ chatArea.scrollTop = chatArea.scrollHeight;
272
+ return div;
273
+ }
274
+ </script>
275
+ </body>
276
+ </html>
requirements.txt CHANGED
@@ -1,24 +1,14 @@
1
- langchain-community
2
- langchain-core
3
- langchain-huggingface
4
- langchain-groq
5
- python-dotenv
6
- beautifulsoup4
7
- faiss-cpu
8
- requests
9
- langgraph
10
- langchain-anthropic
11
- fastapi
12
- uvicorn[standard]
13
- pydantic
14
- python-multipart
15
- pandas
16
- langchain
17
- plotly
18
- pytest
19
- httpx
20
- pytest-asyncio
21
- aiohttp
22
- requests
23
- tenacity
24
- rich>=10.0.0
 
1
+ fastapi==0.109.2
2
+ uvicorn==0.27.1
3
+ langchain>=0.1.0
4
+ langchain_groq>=0.1.0
5
+ langchain_huggingface>=0.0.2
6
+ langchain_community>=0.0.13
7
+ langchain_text_splitters>=0.0.1
8
+ langchain_core>=0.1.10
9
+ faiss-cpu>=1.7.4
10
+ python-dotenv>=1.0.0
11
+ huggingface_hub>=0.19.0
12
+ jinja2>=3.0.0
13
+ aiofiles>=0.8.0
14
+ python-multipart>=0.0.6
 
 
 
 
 
 
 
 
 
 
space.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ sdk: docker
2
+ title: Status Law Assistant
3
+ emoji: ⚖️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk_version: 0.0.1
7
+ app_port: 8000
8
+ pinned: false
9
+ license: mit