Refactor project structure and add chat analysis functionality
Browse files- .gitignore +28 -5
- api/__init__.py +7 -0
- app.py +3 -0
- requirements.txt +3 -0
- setup_structure.sh +16 -0
- tests/__init__.py +6 -0
- tests/test_app.py +103 -0
- utils/__init__.py +7 -0
- utils/chat_analysis.py +111 -0
.gitignore
CHANGED
@@ -1,7 +1,30 @@
|
|
1 |
-
|
2 |
-
*.env
|
3 |
.env
|
4 |
-
|
5 |
-
.streamlit/secrets.toml
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Environment variables
|
|
|
2 |
.env
|
3 |
+
.env.*
|
|
|
4 |
|
5 |
+
# Python
|
6 |
+
__pycache__/
|
7 |
+
*.py[cod]
|
8 |
+
*$py.class
|
9 |
+
*.so
|
10 |
+
.Python
|
11 |
+
venv/
|
12 |
+
ENV/
|
13 |
+
|
14 |
+
# IDE
|
15 |
+
.idea/
|
16 |
+
.vscode/
|
17 |
+
*.swp
|
18 |
+
*.swo
|
19 |
+
|
20 |
+
# Project specific
|
21 |
+
vector_store/*
|
22 |
+
!vector_store/.gitkeep
|
23 |
+
chat_history/*
|
24 |
+
!chat_history/.gitkeep
|
25 |
+
.cache/
|
26 |
+
*.log
|
27 |
+
|
28 |
+
# Hugging Face Space
|
29 |
+
.streamlit/
|
30 |
+
stop_space.py
|
api/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import APIRouter
|
2 |
+
from .analysis import LogAnalyzer
|
3 |
+
|
4 |
+
# Create API router
|
5 |
+
router = APIRouter(prefix="/api", tags=["analysis"])
|
6 |
+
|
7 |
+
__all__ = ["LogAnalyzer", "router"]
|
app.py
CHANGED
@@ -13,11 +13,14 @@ import json
|
|
13 |
import traceback
|
14 |
from fastapi import FastAPI, HTTPException
|
15 |
from pydantic import BaseModel
|
|
|
|
|
16 |
|
17 |
# Initialize environment variables
|
18 |
load_dotenv()
|
19 |
|
20 |
app = FastAPI(title="Status Law Assistant API")
|
|
|
21 |
|
22 |
# --------------- Model Initialization ---------------
|
23 |
def init_models():
|
|
|
13 |
import traceback
|
14 |
from fastapi import FastAPI, HTTPException
|
15 |
from pydantic import BaseModel
|
16 |
+
from api import router as analysis_router
|
17 |
+
from utils import ChatAnalyzer, setup_chat_analysis
|
18 |
|
19 |
# Initialize environment variables
|
20 |
load_dotenv()
|
21 |
|
22 |
app = FastAPI(title="Status Law Assistant API")
|
23 |
+
app.include_router(analysis_router)
|
24 |
|
25 |
# --------------- Model Initialization ---------------
|
26 |
def init_models():
|
requirements.txt
CHANGED
@@ -15,3 +15,6 @@ python-multipart
|
|
15 |
pandas
|
16 |
langchain
|
17 |
plotly
|
|
|
|
|
|
|
|
15 |
pandas
|
16 |
langchain
|
17 |
plotly
|
18 |
+
pytest
|
19 |
+
httpx
|
20 |
+
pytest-asyncio
|
setup_structure.sh
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Создание основной структуры директорий
|
2 |
+
mkdir -p api utils tests chat_history vector_store
|
3 |
+
|
4 |
+
# Создание файлов Python
|
5 |
+
touch api/__init__.py
|
6 |
+
touch utils/__init__.py
|
7 |
+
touch tests/__init__.py
|
8 |
+
touch tests/test_app.py
|
9 |
+
|
10 |
+
# Перемещение существующих файлов
|
11 |
+
mv chat_analysis.py utils/
|
12 |
+
mv analysis.py api/
|
13 |
+
|
14 |
+
# Создание .gitkeep для пустых директорий
|
15 |
+
touch chat_history/.gitkeep
|
16 |
+
touch vector_store/.gitkeep
|
tests/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
|
4 |
+
# Add project root to Python path for imports in tests
|
5 |
+
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
6 |
+
sys.path.insert(0, project_root)
|
tests/test_app.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from fastapi.testclient import TestClient
|
3 |
+
from app import app
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
from utils import ChatAnalyzer
|
7 |
+
from api import LogAnalyzer
|
8 |
+
|
9 |
+
client = TestClient(app)
|
10 |
+
|
11 |
+
# Фикстуры для тестов
|
12 |
+
@pytest.fixture
|
13 |
+
def test_log_file(tmp_path):
|
14 |
+
"""Создает временный файл с тестовыми логами"""
|
15 |
+
log_file = tmp_path / "test_chat_logs.json"
|
16 |
+
test_logs = [
|
17 |
+
{
|
18 |
+
"timestamp": "2024-01-01T12:00:00",
|
19 |
+
"user_input": "Test question",
|
20 |
+
"bot_response": "Test response",
|
21 |
+
"context": "Test context",
|
22 |
+
"kb_version": "20240101"
|
23 |
+
}
|
24 |
+
]
|
25 |
+
|
26 |
+
with open(log_file, 'w', encoding='utf-8') as f:
|
27 |
+
for log in test_logs:
|
28 |
+
f.write(json.dumps(log) + '\n')
|
29 |
+
return log_file
|
30 |
+
|
31 |
+
# Тесты API endpoints
|
32 |
+
def test_chat_endpoint():
|
33 |
+
"""Тест основного эндпоинта чата"""
|
34 |
+
response = client.post(
|
35 |
+
"/chat",
|
36 |
+
json={"message": "What services do you provide?"}
|
37 |
+
)
|
38 |
+
assert response.status_code == 200
|
39 |
+
assert "response" in response.json()
|
40 |
+
|
41 |
+
def test_analysis_basic():
|
42 |
+
"""Тест эндпоинта базового анализа"""
|
43 |
+
response = client.get("/api/analysis/basic")
|
44 |
+
assert response.status_code == 200
|
45 |
+
data = response.json()
|
46 |
+
assert "total_interactions" in data
|
47 |
+
|
48 |
+
def test_analysis_temporal():
|
49 |
+
"""Тест эндпоинта временного анализа"""
|
50 |
+
response = client.get("/api/analysis/temporal")
|
51 |
+
assert response.status_code == 200
|
52 |
+
data = response.json()
|
53 |
+
assert "daily_activity" in data
|
54 |
+
assert "hourly_pattern" in data
|
55 |
+
|
56 |
+
# Тесты компонентов
|
57 |
+
def test_chat_analyzer():
|
58 |
+
"""Тест класса ChatAnalyzer"""
|
59 |
+
analyzer = ChatAnalyzer()
|
60 |
+
assert hasattr(analyzer, 'analyze_interaction')
|
61 |
+
assert hasattr(analyzer, 'create_analysis_dashboard')
|
62 |
+
|
63 |
+
def test_log_analyzer(test_log_file):
|
64 |
+
"""Тест класса LogAnalyzer"""
|
65 |
+
analyzer = LogAnalyzer(log_path=str(test_log_file))
|
66 |
+
stats = analyzer.get_basic_stats()
|
67 |
+
assert "total_interactions" in stats
|
68 |
+
assert stats["total_interactions"] == 1
|
69 |
+
|
70 |
+
# Тесты утилит
|
71 |
+
def test_knowledge_base():
|
72 |
+
"""Тест работы с базой знаний"""
|
73 |
+
vector_store_path = "vector_store"
|
74 |
+
assert os.path.exists(vector_store_path)
|
75 |
+
|
76 |
+
def test_environment():
|
77 |
+
"""Тест настройки окружения"""
|
78 |
+
assert "GROQ_API_KEY" in os.environ
|
79 |
+
|
80 |
+
# Тесты обработки ошибок
|
81 |
+
def test_chat_endpoint_error():
|
82 |
+
"""Тест обработки ошибок в чате"""
|
83 |
+
response = client.post(
|
84 |
+
"/chat",
|
85 |
+
json={"message": ""} # Пустой запрос
|
86 |
+
)
|
87 |
+
assert response.status_code == 422 # Validation error
|
88 |
+
|
89 |
+
def test_analysis_error():
|
90 |
+
"""Тест обработки ошибок в анализе"""
|
91 |
+
# Временно меняем путь к логам на несуществующий
|
92 |
+
original_path = LogAnalyzer._default_log_path
|
93 |
+
LogAnalyzer._default_log_path = "nonexistent/path.json"
|
94 |
+
|
95 |
+
response = client.get("/api/analysis/basic")
|
96 |
+
assert response.status_code == 200
|
97 |
+
assert response.json() == {}
|
98 |
+
|
99 |
+
# Восстанавливаем оригинальный путь
|
100 |
+
LogAnalyzer._default_log_path = original_path
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
pytest.main([__file__])
|
utils/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .chat_analysis import ChatAnalyzer, ChatAnalysis, setup_chat_analysis
|
2 |
+
|
3 |
+
__all__ = [
|
4 |
+
"ChatAnalyzer",
|
5 |
+
"ChatAnalysis",
|
6 |
+
"setup_chat_analysis"
|
7 |
+
]
|
utils/chat_analysis.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from datetime import datetime
|
3 |
+
import json
|
4 |
+
from typing import List, Dict
|
5 |
+
from dataclasses import dataclass, asdict
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class ChatAnalysis:
|
9 |
+
"""Class for storing analysis results of a single chat interaction"""
|
10 |
+
timestamp: str
|
11 |
+
user_input: str
|
12 |
+
bot_response: str
|
13 |
+
context: str
|
14 |
+
kb_version: str
|
15 |
+
response_time: float
|
16 |
+
tokens_used: int
|
17 |
+
context_relevance_score: float
|
18 |
+
|
19 |
+
class ChatAnalyzer:
|
20 |
+
"""Class for analyzing chat interactions"""
|
21 |
+
def __init__(self):
|
22 |
+
self.analyses: List[ChatAnalysis] = []
|
23 |
+
|
24 |
+
def load_logs(self, log_file_path: str) -> List[Dict]:
|
25 |
+
"""Load and parse chat logs from JSON file"""
|
26 |
+
logs = []
|
27 |
+
with open(log_file_path, 'r', encoding='utf-8') as f:
|
28 |
+
for line in f:
|
29 |
+
try:
|
30 |
+
logs.append(json.loads(line.strip()))
|
31 |
+
except json.JSONDecodeError:
|
32 |
+
continue
|
33 |
+
return logs
|
34 |
+
|
35 |
+
def analyze_interaction(self, log_entry: Dict) -> ChatAnalysis:
|
36 |
+
"""Analyze a single chat interaction"""
|
37 |
+
timestamp = datetime.fromisoformat(log_entry["timestamp"])
|
38 |
+
response_time = len(log_entry["bot_response"]) * 0.01
|
39 |
+
tokens_used = len(log_entry["bot_response"].split()) + len(log_entry["user_input"].split())
|
40 |
+
|
41 |
+
context_relevance = self._calculate_context_relevance(
|
42 |
+
log_entry["user_input"],
|
43 |
+
log_entry["context"],
|
44 |
+
log_entry["bot_response"]
|
45 |
+
)
|
46 |
+
|
47 |
+
return ChatAnalysis(
|
48 |
+
timestamp=timestamp.isoformat(),
|
49 |
+
user_input=log_entry["user_input"],
|
50 |
+
bot_response=log_entry["bot_response"],
|
51 |
+
context=log_entry["context"],
|
52 |
+
kb_version=log_entry["kb_version"],
|
53 |
+
response_time=response_time,
|
54 |
+
tokens_used=tokens_used,
|
55 |
+
context_relevance_score=context_relevance
|
56 |
+
)
|
57 |
+
|
58 |
+
def _calculate_context_relevance(self, query: str, context: str, response: str) -> float:
|
59 |
+
"""Calculate relevance score between query and provided context"""
|
60 |
+
query_terms = set(query.lower().split())
|
61 |
+
context_terms = set(context.lower().split())
|
62 |
+
response_terms = set(response.lower().split())
|
63 |
+
|
64 |
+
query_context_overlap = len(query_terms & context_terms)
|
65 |
+
context_response_overlap = len(context_terms & response_terms)
|
66 |
+
|
67 |
+
if not query_terms or not context_terms:
|
68 |
+
return 0.0
|
69 |
+
|
70 |
+
return (query_context_overlap + context_response_overlap) / (len(query_terms) + len(context_terms))
|
71 |
+
|
72 |
+
def get_analysis_data(self) -> Dict:
|
73 |
+
"""Get aggregated analysis data"""
|
74 |
+
df = pd.DataFrame([asdict(a) for a in self.analyses])
|
75 |
+
|
76 |
+
if df.empty:
|
77 |
+
return {
|
78 |
+
"total_interactions": 0,
|
79 |
+
"avg_response_time": 0,
|
80 |
+
"avg_relevance": 0,
|
81 |
+
"total_tokens": 0
|
82 |
+
}
|
83 |
+
|
84 |
+
return {
|
85 |
+
"total_interactions": len(df),
|
86 |
+
"avg_response_time": float(df['response_time'].mean()),
|
87 |
+
"avg_relevance": float(df['context_relevance_score'].mean()),
|
88 |
+
"total_tokens": int(df['tokens_used'].sum())
|
89 |
+
}
|
90 |
+
|
91 |
+
def setup_chat_analysis():
|
92 |
+
"""Initialize and configure chat analysis system"""
|
93 |
+
analyzer = ChatAnalyzer()
|
94 |
+
|
95 |
+
def enhanced_log_interaction(user_input: str, bot_response: str, context: str):
|
96 |
+
log_entry = {
|
97 |
+
"timestamp": datetime.now().isoformat(),
|
98 |
+
"user_input": user_input,
|
99 |
+
"bot_response": bot_response,
|
100 |
+
"context": context,
|
101 |
+
"kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
|
102 |
+
}
|
103 |
+
|
104 |
+
analysis = analyzer.analyze_interaction(log_entry)
|
105 |
+
analyzer.analyses.append(analysis)
|
106 |
+
|
107 |
+
with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
|
108 |
+
json.dump(log_entry, f, ensure_ascii=False)
|
109 |
+
f.write("\n")
|
110 |
+
|
111 |
+
return analyzer, enhanced_log_interaction
|