File size: 4,025 Bytes
8c4af83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
from datetime import datetime
import json
from typing import List, Dict
from dataclasses import dataclass, asdict

@dataclass
class ChatAnalysis:
    """Class for storing analysis results of a single chat interaction"""
    timestamp: str
    user_input: str
    bot_response: str
    context: str
    kb_version: str
    response_time: float
    tokens_used: int
    context_relevance_score: float

class ChatAnalyzer:
    """Class for analyzing chat interactions"""
    def __init__(self):
        self.analyses: List[ChatAnalysis] = []
        
    def load_logs(self, log_file_path: str) -> List[Dict]:
        """Load and parse chat logs from JSON file"""
        logs = []
        with open(log_file_path, 'r', encoding='utf-8') as f:
            for line in f:
                try:
                    logs.append(json.loads(line.strip()))
                except json.JSONDecodeError:
                    continue
        return logs

    def analyze_interaction(self, log_entry: Dict) -> ChatAnalysis:
        """Analyze a single chat interaction"""
        timestamp = datetime.fromisoformat(log_entry["timestamp"])
        response_time = len(log_entry["bot_response"]) * 0.01
        tokens_used = len(log_entry["bot_response"].split()) + len(log_entry["user_input"].split())
        
        context_relevance = self._calculate_context_relevance(
            log_entry["user_input"],
            log_entry["context"],
            log_entry["bot_response"]
        )
        
        return ChatAnalysis(
            timestamp=timestamp.isoformat(),
            user_input=log_entry["user_input"],
            bot_response=log_entry["bot_response"],
            context=log_entry["context"],
            kb_version=log_entry["kb_version"],
            response_time=response_time,
            tokens_used=tokens_used,
            context_relevance_score=context_relevance
        )

    def _calculate_context_relevance(self, query: str, context: str, response: str) -> float:
        """Calculate relevance score between query and provided context"""
        query_terms = set(query.lower().split())
        context_terms = set(context.lower().split())
        response_terms = set(response.lower().split())
        
        query_context_overlap = len(query_terms & context_terms)
        context_response_overlap = len(context_terms & response_terms)
        
        if not query_terms or not context_terms:
            return 0.0
            
        return (query_context_overlap + context_response_overlap) / (len(query_terms) + len(context_terms))

    def get_analysis_data(self) -> Dict:
        """Get aggregated analysis data"""
        df = pd.DataFrame([asdict(a) for a in self.analyses])
        
        if df.empty:
            return {
                "total_interactions": 0,
                "avg_response_time": 0,
                "avg_relevance": 0,
                "total_tokens": 0
            }
        
        return {
            "total_interactions": len(df),
            "avg_response_time": float(df['response_time'].mean()),
            "avg_relevance": float(df['context_relevance_score'].mean()),
            "total_tokens": int(df['tokens_used'].sum())
        }

def setup_chat_analysis():
    """Initialize and configure chat analysis system"""
    analyzer = ChatAnalyzer()
    
    def enhanced_log_interaction(user_input: str, bot_response: str, context: str):
        log_entry = {
            "timestamp": datetime.now().isoformat(),
            "user_input": user_input,
            "bot_response": bot_response,
            "context": context,
            "kb_version": datetime.now().strftime("%Y%m%d-%H%M%S")
        }
        
        analysis = analyzer.analyze_interaction(log_entry)
        analyzer.analyses.append(analysis)
        
        with open("chat_history/chat_logs.json", "a", encoding="utf-8") as f:
            json.dump(log_entry, f, ensure_ascii=False)
            f.write("\n")
    
    return analyzer, enhanced_log_interaction