Rulga commited on
Commit
8c4af83
·
1 Parent(s): f289b0a

Refactor project structure and add chat analysis functionality

Browse files
.gitignore CHANGED
@@ -1,7 +1,30 @@
1
- /.streamlit
2
- *.env
3
  .env
4
- venv
5
- .streamlit/secrets.toml
6
 
7
- stop_space.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Environment variables
 
2
  .env
3
+ .env.*
 
4
 
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ venv/
12
+ ENV/
13
+
14
+ # IDE
15
+ .idea/
16
+ .vscode/
17
+ *.swp
18
+ *.swo
19
+
20
+ # Project specific
21
+ vector_store/*
22
+ !vector_store/.gitkeep
23
+ chat_history/*
24
+ !chat_history/.gitkeep
25
+ .cache/
26
+ *.log
27
+
28
+ # Hugging Face Space
29
+ .streamlit/
30
+ stop_space.py
api/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .analysis import LogAnalyzer
3
+
4
+ # Create API router
5
+ router = APIRouter(prefix="/api", tags=["analysis"])
6
+
7
+ __all__ = ["LogAnalyzer", "router"]
app.py CHANGED
@@ -13,11 +13,14 @@ import json
13
  import traceback
14
  from fastapi import FastAPI, HTTPException
15
  from pydantic import BaseModel
 
 
16
 
17
  # Initialize environment variables
18
  load_dotenv()
19
 
20
  app = FastAPI(title="Status Law Assistant API")
 
21
 
22
  # --------------- Model Initialization ---------------
23
  def init_models():
 
13
  import traceback
14
  from fastapi import FastAPI, HTTPException
15
  from pydantic import BaseModel
16
+ from api import router as analysis_router
17
+ from utils import ChatAnalyzer, setup_chat_analysis
18
 
19
  # Initialize environment variables
20
  load_dotenv()
21
 
22
  app = FastAPI(title="Status Law Assistant API")
23
+ app.include_router(analysis_router)
24
 
25
  # --------------- Model Initialization ---------------
26
  def init_models():
requirements.txt CHANGED
@@ -15,3 +15,6 @@ python-multipart
15
  pandas
16
  langchain
17
  plotly
 
 
 
 
15
  pandas
16
  langchain
17
  plotly
18
+ pytest
19
+ httpx
20
+ pytest-asyncio
setup_structure.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Создание основной структуры директорий
2
+ mkdir -p api utils tests chat_history vector_store
3
+
4
+ # Создание файлов Python
5
+ touch api/__init__.py
6
+ touch utils/__init__.py
7
+ touch tests/__init__.py
8
+ touch tests/test_app.py
9
+
10
+ # Перемещение существующих файлов
11
+ mv chat_analysis.py utils/
12
+ mv analysis.py api/
13
+
14
+ # Создание .gitkeep для пустых директорий
15
+ touch chat_history/.gitkeep
16
+ touch vector_store/.gitkeep
tests/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ # Add project root to Python path for imports in tests
5
+ project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
6
+ sys.path.insert(0, project_root)
tests/test_app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from fastapi.testclient import TestClient
3
+ from app import app
4
+ import os
5
+ import json
6
+ from utils import ChatAnalyzer
7
+ from api import LogAnalyzer
8
+
9
+ client = TestClient(app)
10
+
11
+ # Фикстуры для тестов
12
+ @pytest.fixture
13
+ def test_log_file(tmp_path):
14
+ """Создает временный файл с тестовыми логами"""
15
+ log_file = tmp_path / "test_chat_logs.json"
16
+ test_logs = [
17
+ {
18
+ "timestamp": "2024-01-01T12:00:00",
19
+ "user_input": "Test question",
20
+ "bot_response": "Test response",
21
+ "context": "Test context",
22
+ "kb_version": "20240101"
23
+ }
24
+ ]
25
+
26
+ with open(log_file, 'w', encoding='utf-8') as f:
27
+ for log in test_logs:
28
+ f.write(json.dumps(log) + '\n')
29
+ return log_file
30
+
31
+ # Тесты API endpoints
32
+ def test_chat_endpoint():
33
+ """Тест основного эндпоинта чата"""
34
+ response = client.post(
35
+ "/chat",
36
+ json={"message": "What services do you provide?"}
37
+ )
38
+ assert response.status_code == 200
39
+ assert "response" in response.json()
40
+
41
+ def test_analysis_basic():
42
+ """Тест эндпоинта базового анализа"""
43
+ response = client.get("/api/analysis/basic")
44
+ assert response.status_code == 200
45
+ data = response.json()
46
+ assert "total_interactions" in data
47
+
48
+ def test_analysis_temporal():
49
+ """Тест эндпоинта временного анализа"""
50
+ response = client.get("/api/analysis/temporal")
51
+ assert response.status_code == 200
52
+ data = response.json()
53
+ assert "daily_activity" in data
54
+ assert "hourly_pattern" in data
55
+
56
+ # Тесты компонентов
57
+ def test_chat_analyzer():
58
+ """Тест класса ChatAnalyzer"""
59
+ analyzer = ChatAnalyzer()
60
+ assert hasattr(analyzer, 'analyze_interaction')
61
+ assert hasattr(analyzer, 'create_analysis_dashboard')
62
+
63
+ def test_log_analyzer(test_log_file):
64
+ """Тест класса LogAnalyzer"""
65
+ analyzer = LogAnalyzer(log_path=str(test_log_file))
66
+ stats = analyzer.get_basic_stats()
67
+ assert "total_interactions" in stats
68
+ assert stats["total_interactions"] == 1
69
+
70
+ # Тесты утилит
71
+ def test_knowledge_base():
72
+ """Тест работы с базой знаний"""
73
+ vector_store_path = "vector_store"
74
+ assert os.path.exists(vector_store_path)
75
+
76
+ def test_environment():
77
+ """Тест настройки окружения"""
78
+ assert "GROQ_API_KEY" in os.environ
79
+
80
+ # Тесты обработки ошибок
81
+ def test_chat_endpoint_error():
82
+ """Тест обработки ошибок в чате"""
83
+ response = client.post(
84
+ "/chat",
85
+ json={"message": ""} # Пустой запрос
86
+ )
87
+ assert response.status_code == 422 # Validation error
88
+
89
+ def test_analysis_error():
90
+ """Тест обработки ошибок в анализе"""
91
+ # Временно меняем путь к логам на несуществующий
92
+ original_path = LogAnalyzer._default_log_path
93
+ LogAnalyzer._default_log_path = "nonexistent/path.json"
94
+
95
+ response = client.get("/api/analysis/basic")
96
+ assert response.status_code == 200
97
+ assert response.json() == {}
98
+
99
+ # Восстанавливаем оригинальный путь
100
+ LogAnalyzer._default_log_path = original_path
101
+
102
+ if __name__ == "__main__":
103
+ pytest.main([__file__])
utils/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .chat_analysis import ChatAnalyzer, ChatAnalysis, setup_chat_analysis
2
+
3
+ __all__ = [
4
+ "ChatAnalyzer",
5
+ "ChatAnalysis",
6
+ "setup_chat_analysis"
7
+ ]
utils/chat_analysis.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datetime import datetime
3
+ import json
4
+ from typing import List, Dict
5
+ from dataclasses import dataclass, asdict
6
+
7
+ @dataclass
8
+ class ChatAnalysis:
9
+ """Class for storing analysis results of a single chat interaction"""
10
+ timestamp: str
11
+ user_input: str
12
+ bot_response: str
13
+ context: str
14
+ kb_version: str
15
+ response_time: float
16
+ tokens_used: int
17
+ context_relevance_score: float
18
+
19
+ class ChatAnalyzer:
20
+ """Class for analyzing chat interactions"""
21
+ def __init__(self):
22
+ self.analyses: List[ChatAnalysis] = []
23
+
24
+ def load_logs(self, log_file_path: str) -> List[Dict]:
25
+ """Load and parse chat logs from JSON file"""
26
+ logs = []
27
+ with open(log_file_path, 'r', encoding='utf-8') as f:
28
+ for line in f:
29
+ try:
30
+ logs.append(json.loads(line.strip()))
31
+ except json.JSONDecodeError:
32
+ continue
33
+ return logs
34
+
35
+ def analyze_interaction(self, log_entry: Dict) -> ChatAnalysis:
36
+ """Analyze a single chat interaction"""
37
+ timestamp = datetime.fromisoformat(log_entry["timestamp"])
38
+ response_time = len(log_entry["bot_response"]) * 0.01
39
+ tokens_used = len(log_entry["bot_response"].split()) + len(log_entry["user_input"].split())
40
+
41
+ context_relevance = self._calculate_context_relevance(
42
+ log_entry["user_input"],
43
+ log_entry["context"],
44
+ log_entry["bot_response"]
45
+ )
46
+
47
+ return ChatAnalysis(
48
+ timestamp=timestamp.isoformat(),
49
+ user_input=log_entry["user_input"],
50
+ bot_response=log_entry["bot_response"],
51
+ context=log_entry["context"],
52
+ kb_version=log_entry["kb_version"],
53
+ response_time=response_time,
54
+ tokens_used=tokens_used,
55
+ context_relevance_score=context_relevance
56
+ )
57
+
58
+ def _calculate_context_relevance(self, query: str, context: str, response: str) -> float:
59
+ """Calculate relevance score between query and provided context"""
60
+ query_terms = set(query.lower().split())
61
+ context_terms = set(context.lower().split())
62
+ response_terms = set(response.lower().split())
63
+
64
+ query_context_overlap = len(query_terms & context_terms)
65
+ context_response_overlap = len(context_terms & response_terms)
66
+
67
+ if not query_terms or not context_terms:
68
+ return 0.0
69
+
70
+ return (query_context_overlap + context_response_overlap) / (len(query_terms) + len(context_terms))
71
+
72
+ def get_analysis_data(self) -> Dict:
73
+ """Get aggregated analysis data"""
74
+ df = pd.DataFrame([asdict(a) for a in self.analyses])
75
+
76
+ if df.empty:
77
+ return {
78
+ "total_interactions": 0,
79
+ "avg_response_time": 0,
80
+ "avg_relevance": 0,
81
+ "total_tokens": 0
82
+ }
83
+
84
+ return {
85
+ "total_interactions": len(df),
86
+ "avg_response_time": float(df['response_time'].mean()),
87
+ "avg_relevance": float(df['context_relevance_score'].mean()),
88
+ "total_tokens": int(df['tokens_used'].sum())
89
+ }
90
+
91
+ def setup_chat_analysis():
92
+ """Initialize and configure chat analysis system"""
93
+ analyzer = ChatAnalyzer()
94
+
95
+ def enhanced_log_interaction(user_input: str, bot_response: str, context: str):
96
+ log_entry = {
97
+ "timestamp": datetime.now().isoformat(),
98
+ "user_input": user_input,
99
+ "bot_response": bot_response,
100
+ "context": context,
101
+ "kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
102
+ }
103
+
104
+ analysis = analyzer.analyze_interaction(log_entry)
105
+ analyzer.analyses.append(analysis)
106
+
107
+ with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
108
+ json.dump(log_entry, f, ensure_ascii=False)
109
+ f.write("\n")
110
+
111
+ return analyzer, enhanced_log_interaction