Spaces:

Rulga
/

Doc-chat

Sleeping

App Files Files Community

Rulga commited on Mar 7

Commit

8c4af83

1 Parent(s): f289b0a

Refactor project structure and add chat analysis functionality

Browse files

Files changed (9) hide show

.gitignore +28 -5
api/__init__.py +7 -0
app.py +3 -0
requirements.txt +3 -0
setup_structure.sh +16 -0
tests/__init__.py +6 -0
tests/test_app.py +103 -0
utils/__init__.py +7 -0
utils/chat_analysis.py +111 -0

.gitignore CHANGED Viewed

@@ -1,7 +1,30 @@
-/.streamlit
-*.env
 .env
-venv
-.streamlit/secrets.toml
-stop_space.py

+# Environment variables
 .env
+.env.*
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# Project specific
+vector_store/*
+!vector_store/.gitkeep
+chat_history/*
+!chat_history/.gitkeep
+.cache/
+*.log
+# Hugging Face Space
+.streamlit/
+stop_space.py

api/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import APIRouter
+from .analysis import LogAnalyzer
+# Create API router
+router = APIRouter(prefix="/api", tags=["analysis"])
+__all__ = ["LogAnalyzer", "router"]

app.py CHANGED Viewed

@@ -13,11 +13,14 @@ import json
 import traceback
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 # Initialize environment variables
 load_dotenv()
 app = FastAPI(title="Status Law Assistant API")
 # --------------- Model Initialization ---------------
 def init_models():

 import traceback
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+from api import router as analysis_router
+from utils import ChatAnalyzer, setup_chat_analysis
 # Initialize environment variables
 load_dotenv()
 app = FastAPI(title="Status Law Assistant API")
+app.include_router(analysis_router)
 # --------------- Model Initialization ---------------
 def init_models():

requirements.txt CHANGED Viewed

@@ -15,3 +15,6 @@ python-multipart
 pandas
 langchain
 plotly

 pandas
 langchain
 plotly
+pytest
+httpx
+pytest-asyncio

setup_structure.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# Создание основной структуры директорий
+mkdir -p api utils tests chat_history vector_store
+# Создание файлов Python
+touch api/__init__.py
+touch utils/__init__.py
+touch tests/__init__.py
+touch tests/test_app.py
+# Перемещение существующих файлов
+mv chat_analysis.py utils/
+mv analysis.py api/
+# Создание .gitkeep для пустых директорий
+touch chat_history/.gitkeep
+touch vector_store/.gitkeep

tests/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import os
+import sys
+# Add project root to Python path for imports in tests
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, project_root)

tests/test_app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import pytest
+from fastapi.testclient import TestClient
+from app import app
+import os
+import json
+from utils import ChatAnalyzer
+from api import LogAnalyzer
+client = TestClient(app)
+# Фикстуры для тестов
+@pytest.fixture
+def test_log_file(tmp_path):
+    """Создает временный файл с тестовыми логами"""
+    log_file = tmp_path / "test_chat_logs.json"
+    test_logs = [
+        {
+            "timestamp": "2024-01-01T12:00:00",
+            "user_input": "Test question",
+            "bot_response": "Test response",
+            "context": "Test context",
+            "kb_version": "20240101"
+        }
+    ]
+    with open(log_file, 'w', encoding='utf-8') as f:
+        for log in test_logs:
+            f.write(json.dumps(log) + '\n')
+    return log_file
+# Тесты API endpoints
+def test_chat_endpoint():
+    """Тест основного эндпоинта чата"""
+    response = client.post(
+        "/chat",
+        json={"message": "What services do you provide?"}
+    )
+    assert response.status_code == 200
+    assert "response" in response.json()
+def test_analysis_basic():
+    """Тест эндпоинта базового анализа"""
+    response = client.get("/api/analysis/basic")
+    assert response.status_code == 200
+    data = response.json()
+    assert "total_interactions" in data
+def test_analysis_temporal():
+    """Тест эндпоинта временного анализа"""
+    response = client.get("/api/analysis/temporal")
+    assert response.status_code == 200
+    data = response.json()
+    assert "daily_activity" in data
+    assert "hourly_pattern" in data
+# Тесты компонентов
+def test_chat_analyzer():
+    """Тест класса ChatAnalyzer"""
+    analyzer = ChatAnalyzer()
+    assert hasattr(analyzer, 'analyze_interaction')
+    assert hasattr(analyzer, 'create_analysis_dashboard')
+def test_log_analyzer(test_log_file):
+    """Тест класса LogAnalyzer"""
+    analyzer = LogAnalyzer(log_path=str(test_log_file))
+    stats = analyzer.get_basic_stats()
+    assert "total_interactions" in stats
+    assert stats["total_interactions"] == 1
+# Тесты утилит
+def test_knowledge_base():
+    """Тест работы с базой знаний"""
+    vector_store_path = "vector_store"
+    assert os.path.exists(vector_store_path)
+def test_environment():
+    """Тест настройки окружения"""
+    assert "GROQ_API_KEY" in os.environ
+# Тесты обработки ошибок
+def test_chat_endpoint_error():
+    """Тест обработки ошибок в чате"""
+    response = client.post(
+        "/chat",
+        json={"message": ""}  # Пустой запрос
+    )
+    assert response.status_code == 422  # Validation error
+def test_analysis_error():
+    """Тест обработки ошибок в анализе"""
+    # Временно меняем путь к логам на несуществующий
+    original_path = LogAnalyzer._default_log_path
+    LogAnalyzer._default_log_path = "nonexistent/path.json"
+    response = client.get("/api/analysis/basic")
+    assert response.status_code == 200
+    assert response.json() == {}
+    # Восстанавливаем оригинальный путь
+    LogAnalyzer._default_log_path = original_path
+if __name__ == "__main__":
+    pytest.main([__file__])

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .chat_analysis import ChatAnalyzer, ChatAnalysis, setup_chat_analysis
+__all__ = [
+    "ChatAnalyzer",
+    "ChatAnalysis",
+    "setup_chat_analysis"
+]

utils/chat_analysis.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import pandas as pd
+from datetime import datetime
+import json
+from typing import List, Dict
+from dataclasses import dataclass, asdict
+@dataclass
+class ChatAnalysis:
+    """Class for storing analysis results of a single chat interaction"""
+    timestamp: str
+    user_input: str
+    bot_response: str
+    context: str
+    kb_version: str
+    response_time: float
+    tokens_used: int
+    context_relevance_score: float
+class ChatAnalyzer:
+    """Class for analyzing chat interactions"""
+    def __init__(self):
+        self.analyses: List[ChatAnalysis] = []
+    def load_logs(self, log_file_path: str) -> List[Dict]:
+        """Load and parse chat logs from JSON file"""
+        logs = []
+        with open(log_file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                try:
+                    logs.append(json.loads(line.strip()))
+                except json.JSONDecodeError:
+                    continue
+        return logs
+    def analyze_interaction(self, log_entry: Dict) -> ChatAnalysis:
+        """Analyze a single chat interaction"""
+        timestamp = datetime.fromisoformat(log_entry["timestamp"])
+        response_time = len(log_entry["bot_response"]) * 0.01
+        tokens_used = len(log_entry["bot_response"].split()) + len(log_entry["user_input"].split())
+        context_relevance = self._calculate_context_relevance(
+            log_entry["user_input"],
+            log_entry["context"],
+            log_entry["bot_response"]
+        )
+        return ChatAnalysis(
+            timestamp=timestamp.isoformat(),
+            user_input=log_entry["user_input"],
+            bot_response=log_entry["bot_response"],
+            context=log_entry["context"],
+            kb_version=log_entry["kb_version"],
+            response_time=response_time,
+            tokens_used=tokens_used,
+            context_relevance_score=context_relevance
+        )
+    def _calculate_context_relevance(self, query: str, context: str, response: str) -> float:
+        """Calculate relevance score between query and provided context"""
+        query_terms = set(query.lower().split())
+        context_terms = set(context.lower().split())
+        response_terms = set(response.lower().split())
+        query_context_overlap = len(query_terms & context_terms)
+        context_response_overlap = len(context_terms & response_terms)
+        if not query_terms or not context_terms:
+            return 0.0
+        return (query_context_overlap + context_response_overlap) / (len(query_terms) + len(context_terms))
+    def get_analysis_data(self) -> Dict:
+        """Get aggregated analysis data"""
+        df = pd.DataFrame([asdict(a) for a in self.analyses])
+        if df.empty:
+            return {
+                "total_interactions": 0,
+                "avg_response_time": 0,
+                "avg_relevance": 0,
+                "total_tokens": 0
+            }
+        return {
+            "total_interactions": len(df),
+            "avg_response_time": float(df['response_time'].mean()),
+            "avg_relevance": float(df['context_relevance_score'].mean()),
+            "total_tokens": int(df['tokens_used'].sum())
+        }
+def setup_chat_analysis():
+    """Initialize and configure chat analysis system"""
+    analyzer = ChatAnalyzer()
+    def enhanced_log_interaction(user_input: str, bot_response: str, context: str):
+        log_entry = {
+            "timestamp": datetime.now().isoformat(),
+            "user_input": user_input,
+            "bot_response": bot_response,
+            "context": context,
+            "kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
+        }
+        analysis = analyzer.analyze_interaction(log_entry)
+        analyzer.analyses.append(analysis)
+        with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
+            json.dump(log_entry, f, ensure_ascii=False)
+            f.write("\n")
+    return analyzer, enhanced_log_interaction