SamanthaStorm commited on
Commit
37c4d94
·
verified ·
1 Parent(s): 8538561

Create analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +314 -153
analyzer.py CHANGED
@@ -1,165 +1,326 @@
1
- import json
2
- from datetime import datetime
3
  import pandas as pd
4
  import numpy as np
 
 
 
5
  from collections import Counter
6
- from dataclasses import asdict
7
- from typing import Dict, List
8
- from models import MessageAnalysis, RiskTrend
9
- from utils import logger
10
-
11
- class TetherProAnalyzer:
12
- """Comprehensive temporal analysis for Tether Pro"""
13
 
14
- def __init__(self):
15
- self.conversation_history: List[MessageAnalysis] = []
 
16
 
17
- def analyze_conversation_history(self, messages_json: str) -> Dict:
18
- """Parse JSON and run full temporal analysis"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  try:
20
- raw = json.loads(messages_json)
21
- self.conversation_history = []
22
- for i, msg in enumerate(raw):
23
- ma = MessageAnalysis(
24
- timestamp=msg.get('timestamp', datetime.now().isoformat()),
25
- message_id=msg.get('id', f"msg_{i}"),
26
- text=msg.get('text', msg.get('message', '')),
27
- sender=msg.get('sender', 'unknown'),
28
- abuse_score=float(msg.get('abuse_score', 0)),
29
- darvo_score=float(msg.get('darvo_score', 0)),
30
- boundary_health=msg.get('boundary_health', 'unknown'),
31
- detected_patterns=msg.get('patterns', msg.get('detected_patterns', [])),
32
- emotional_tone=msg.get('emotional_tone', 'neutral'),
33
- risk_level=msg.get('risk_level', 'low')
34
- )
35
- self.conversation_history.append(ma)
36
- return self._perform_temporal_analysis()
37
- except Exception as e:
38
- logger.error(f"Error in analyze_conversation_history: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  return {
40
- 'error': f"Analysis failed: {e}",
41
- 'total_messages': 0,
42
- 'temporal_analysis': {},
43
- 'recommendations': []
 
 
 
 
 
 
44
  }
45
-
46
- def _perform_temporal_analysis(self) -> Dict:
47
- """Perform comprehensive temporal analysis"""
48
- n = len(self.conversation_history)
49
- if n < 3:
50
  return {
51
- 'total_messages': n,
52
- 'analysis_status': 'insufficient_data',
53
- 'message': 'Need at least 3 messages for temporal analysis',
54
- 'basic_stats': self._get_basic_stats(),
55
- 'recommendations': ['Upload more conversation history for detailed analysis']
 
 
 
 
 
56
  }
57
-
58
- df = self._to_dataframe()
59
- escalation = self._detect_escalation_trends(df)
60
- cycles = self._detect_cycles(df)
61
- combos = self._analyze_pattern_combinations(df)
62
- risk = self._calculate_risk_trajectory(df)
63
- triggers = self._analyze_temporal_triggers(df)
64
- recs = self._generate_recommendations(escalation, combos, risk)
65
- viz = self._generate_visualizations(df)
66
- date_range = self._get_date_range()
67
-
68
- return {
69
- 'total_messages': n,
70
- 'analysis_status': 'complete',
71
- 'basic_stats': self._get_basic_stats(),
72
- 'temporal_analysis': {
73
- 'escalation_patterns': escalation,
74
- 'cyclical_patterns': cycles,
75
- 'pattern_combinations': combos,
76
- 'temporal_triggers': triggers
77
- },
78
- 'risk_assessment': risk,
79
- 'professional_recommendations': recs,
80
- 'visualizations': viz,
81
- 'date_range': date_range
82
- }
83
-
84
- def _to_dataframe(self) -> pd.DataFrame:
85
- """Convert conversation history to DataFrame"""
86
- data = []
87
- for msg in self.conversation_history:
88
- try:
89
- ts = datetime.fromisoformat(msg.timestamp.replace('Z', '+00:00'))
90
- except:
91
- ts = datetime.now()
92
- data.append({
93
- 'timestamp': ts,
94
- 'message_id': msg.message_id,
95
- 'sender': msg.sender,
96
- 'abuse_score': msg.abuse_score,
97
- 'darvo_score': msg.darvo_score,
98
- 'patterns': '|'.join(msg.detected_patterns)
99
- })
100
- return pd.DataFrame(data).sort_values('timestamp')
101
-
102
- def _detect_escalation_trends(self, df: pd.DataFrame) -> Dict:
103
- """Detect escalating abuse patterns over time"""
104
- if len(df) < 5:
105
- return {'detected': False, 'reason': 'insufficient_data'}
106
- df['abuse_rolling'] = df['abuse_score'].rolling(3, min_periods=1).mean()
107
- recent = df.tail(10)
108
- if len(recent) < 5:
109
- return {'detected': False, 'reason': 'insufficient_recent_data'}
110
- x = np.arange(len(recent))
111
- y = recent['abuse_rolling'].values
112
- corr = np.corrcoef(x, y)[0,1] if len(x) > 1 else 0
113
- if corr > 0.3:
114
- inc = float(y[-1] - y[0])
115
- severity = 'high' if inc > 20 else 'moderate' if inc > 10 else 'mild'
116
- return {
117
- 'detected': True,
118
- 'severity': severity,
119
- 'increase_amount': round(inc, 1),
120
- 'timeframe': f"Last {len(recent)} messages",
121
- 'confidence': min(abs(corr), 1.0),
122
- 'description': f"Abuse intensity increased by {inc:.1f}% over recent communications"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
- return {'detected': False, 'reason': 'no_escalation_trend'}
125
-
126
- def _detect_cycles(self, df: pd.DataFrame) -> Dict:
127
- """Detect cyclical abuse patterns"""
128
- if len(df) < 15:
129
- return {'detected': False, 'reason': 'insufficient_data_for_cycles'}
130
- df['date'] = df['timestamp'].dt.date
131
- daily = df.groupby('date')['abuse_score'].mean()
132
- if len(daily) < 10:
133
- return {'detected': False, 'reason': 'insufficient_days'}
134
- scores = daily.values
135
- peaks = [
136
- i for i in range(1, len(scores)-1)
137
- if scores[i] > scores[i-1] and scores[i] > scores[i+1] and scores[i] > 60
138
- ]
139
- valleys = [
140
- i for i in range(1, len(scores)-1)
141
- if scores[i] < scores[i-1] and scores[i] < scores[i+1] and scores[i] < 40
142
- ]
143
- if len(peaks) >= 2 and len(valleys) >= 2:
144
- intervals = [peaks[i+1] - peaks[i] for i in range(len(peaks)-1)]
145
- avg = float(np.mean(intervals))
146
- return {
147
- 'detected': True,
148
- 'cycle_count': min(len(peaks), len(valleys)),
149
- 'avg_cycle_length_days': round(avg, 1),
150
- 'pattern_type': 'tension_escalation_reconciliation',
151
- 'confidence': min(len(peaks) / 3.0, 1.0),
152
- 'description': f"Detected {min(len(peaks), len(valleys))} abuse cycles with average length of {avg:.1f} days"
153
  }
154
- return {'detected': False, 'reason': 'no_cyclical_pattern'}
155
-
156
- def _analyze_pattern_combinations(self, df: pd.DataFrame) -> List[Dict]:
157
- """Analyze dangerous pattern combinations"""
158
- allp = []
159
- for s in df['patterns']:
160
- if s:
161
- allp.extend(s.split('|'))
162
- counts = Counter(allp)
163
- combos = [
164
- {'name': 'Control + Manipulation Complex', 'patterns': ['control', 'gaslighting', 'darvo'], 'severity': 'critical'},
165
- {'name': 'Stalking + Threat Pattern', 'patterns': ['stalking language', 'veiled threats
 
 
 
1
  import pandas as pd
2
  import numpy as np
3
+ import logging
4
+ from datetime import datetime
5
+ import traceback
6
  from collections import Counter
 
 
 
 
 
 
 
7
 
8
+ # Set up logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
 
12
+ class MessageAnalyzer:
13
+ def __init__(self, model_manager):
14
+ """Initialize analyzer with model manager"""
15
+ self.model_manager = model_manager
16
+ self.thresholds = {
17
+ "recovery phase": 0.278,
18
+ "control": 0.287,
19
+ "gaslighting": 0.144,
20
+ "guilt tripping": 0.220,
21
+ "dismissiveness": 0.142,
22
+ "blame shifting": 0.183,
23
+ "projection": 0.253,
24
+ "insults": 0.247,
25
+ "contradictory statements": 0.200,
26
+ "obscure language": 0.455,
27
+ "nonabusive": 0.281,
28
+ "veiled threats": 0.310,
29
+ "stalking language": 0.339,
30
+ "false concern": 0.334,
31
+ "false equivalence": 0.317,
32
+ "future faking": 0.385
33
+ }
34
+
35
+ def analyze_message(self, text):
36
+ """Analyze a single message for abuse patterns"""
37
+ from utils import (
38
+ detect_explicit_abuse, detect_enhanced_threats, get_emotional_tone_tag,
39
+ compute_abuse_score, get_boundary_assessment, calculate_enhanced_risk_level
40
+ )
41
+
42
+ logger.debug(f"Analyzing message: {text[:50]}...")
43
+
44
  try:
45
+ if not text.strip():
46
+ logger.debug("Empty text, returning zeros")
47
+ return {
48
+ 'abuse_score': 0.0,
49
+ 'detected_patterns': [],
50
+ 'matched_scores': [],
51
+ 'sentiment': "neutral",
52
+ 'sentiment_confidence': 0.5,
53
+ 'stage': 1,
54
+ 'darvo_score': 0.0,
55
+ 'emotional_tone': "neutral",
56
+ 'boundary_assessment': {'assessment': 'neutral', 'confidence': 0.5},
57
+ 'risk_level': "Low"
58
+ }
59
+
60
+ # Check for explicit abuse
61
+ explicit_abuse = detect_explicit_abuse(text)
62
+ logger.debug(f"Explicit abuse detected: {explicit_abuse}")
63
+
64
+ # Get sentiment
65
+ sentiment, sentiment_confidence = self.model_manager.predict_sentiment(text)
66
+ logger.debug(f"Sentiment: {sentiment} (confidence: {sentiment_confidence:.3f})")
67
+
68
+ # Get boundary health
69
+ boundary_health = self.model_manager.predict_boundary_health(text)
70
+ boundary_assessment = get_boundary_assessment(text, boundary_health)
71
+ logger.debug(f"Boundary health: {boundary_assessment['assessment']}")
72
+
73
+ # Early supportive message check
74
+ innocent_indicators = [
75
+ 'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
76
+ 'device', 'battery', 'charger', 'wifi', 'internet', 'computer',
77
+ 'sorry', 'apologize', 'my fault', 'mistake'
78
+ ]
79
+
80
+ # Enhanced early return check - now includes boundary health
81
+ if (any(indicator in text.lower() for indicator in innocent_indicators) and
82
+ len(text.split()) < 20 and
83
+ not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
84
+ boundary_health > 0): # Healthy boundary
85
+
86
+ # If sentiment is strongly supportive AND boundary health is good, return early
87
+ if sentiment == "supportive" and sentiment_confidence > 0.8:
88
+ logger.debug("Early return: Message appears to be innocent/supportive with healthy boundaries")
89
+ return {
90
+ 'abuse_score': 0.0,
91
+ 'detected_patterns': [],
92
+ 'matched_scores': [],
93
+ 'sentiment': sentiment,
94
+ 'sentiment_confidence': sentiment_confidence,
95
+ 'stage': 1,
96
+ 'darvo_score': 0.0,
97
+ 'emotional_tone': "neutral",
98
+ 'boundary_assessment': boundary_assessment,
99
+ 'risk_level': "Low"
100
+ }
101
+
102
+ # Get abuse patterns
103
+ threshold_labels, matched_scores = self.model_manager.predict_abuse_patterns(text, self.thresholds)
104
+ logger.debug(f"Detected patterns: {threshold_labels}")
105
+
106
+ # Check for enhanced threats
107
+ enhanced_patterns = detect_enhanced_threats(text, threshold_labels)
108
+ for pattern in enhanced_patterns:
109
+ if pattern not in threshold_labels:
110
+ threshold_labels.append(pattern)
111
+ # Add to matched_scores with high confidence
112
+ weight = self.model_manager.get_pattern_weight(pattern)
113
+ matched_scores.append((pattern, 0.85, weight))
114
+
115
+ # Get DARVO score
116
+ darvo_score = self.model_manager.predict_darvo(text)
117
+ logger.debug(f"DARVO score: {darvo_score:.3f}")
118
+
119
+ # Get emotions
120
+ emotions = self.model_manager.get_emotion_profile(text)
121
+ logger.debug(f"Emotions: {emotions}")
122
+
123
+ # Calculate abuse score
124
+ abuse_score = compute_abuse_score(matched_scores, sentiment)
125
+ logger.debug(f"Abuse score: {abuse_score:.1f}")
126
+
127
+ # Apply explicit abuse override
128
+ if explicit_abuse:
129
+ abuse_score = max(abuse_score, 70.0)
130
+ if "insults" not in threshold_labels:
131
+ threshold_labels.append("insults")
132
+ matched_scores.append(("insults", 0.9, 1.4))
133
+
134
+ # Apply boundary health modifier to abuse score
135
+ if boundary_health > 0 and not explicit_abuse:
136
+ # Healthy boundaries - cap abuse score lower
137
+ abuse_score = min(abuse_score, 35.0)
138
+ logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
139
+
140
+ # Apply sentiment-based score capping
141
+ if sentiment == "supportive" and not explicit_abuse:
142
+ # For supportive messages, cap the abuse score much lower
143
+ abuse_score = min(abuse_score, 30.0)
144
+ logger.debug(f"Capped abuse score to {abuse_score} due to supportive sentiment")
145
+
146
+ # Get emotional tone
147
+ emotional_tone = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score, emotions)
148
+ logger.debug(f"Emotional tone: {emotional_tone}")
149
+
150
+ # Set stage
151
+ stage = 2 if explicit_abuse or abuse_score > 70 else 1
152
+
153
+ # Calculate risk level
154
+ risk_level = calculate_enhanced_risk_level(
155
+ abuse_score,
156
+ threshold_labels,
157
+ "Low" if abuse_score < 50 else "Moderate" if abuse_score < 70 else "High",
158
+ darvo_score
159
+ )
160
+
161
  return {
162
+ 'abuse_score': abuse_score,
163
+ 'detected_patterns': threshold_labels,
164
+ 'matched_scores': matched_scores,
165
+ 'sentiment': sentiment,
166
+ 'sentiment_confidence': sentiment_confidence,
167
+ 'stage': stage,
168
+ 'darvo_score': darvo_score,
169
+ 'emotional_tone': emotional_tone,
170
+ 'boundary_assessment': boundary_assessment,
171
+ 'risk_level': risk_level
172
  }
173
+
174
+ except Exception as e:
175
+ logger.error(f"Error in analyze_message: {e}")
176
+ logger.error(traceback.format_exc())
 
177
  return {
178
+ 'abuse_score': 0.0,
179
+ 'detected_patterns': [],
180
+ 'matched_scores': [],
181
+ 'sentiment': "error",
182
+ 'sentiment_confidence': 0.0,
183
+ 'stage': 1,
184
+ 'darvo_score': 0.0,
185
+ 'emotional_tone': "error",
186
+ 'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
187
+ 'risk_level': "Unknown"
188
  }
189
+
190
+ def analyze_chat_history(self, df):
191
+ """Analyze entire chat history"""
192
+ from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
193
+
194
+ logger.info(f"Analyzing chat history with {len(df)} messages")
195
+
196
+ try:
197
+ # Create results dataframe
198
+ results_df = df.copy()
199
+
200
+ # Add analysis columns
201
+ results_df['abuse_score'] = 0.0
202
+ results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
203
+ results_df['sentiment'] = "neutral"
204
+ results_df['darvo_score'] = 0.0
205
+ results_df['emotional_tone'] = "neutral"
206
+ results_df['boundary_health'] = "unknown"
207
+ results_df['risk_level'] = "Low"
208
+
209
+ # Analyze each message
210
+ for i, row in results_df.iterrows():
211
+ analysis = self.analyze_message(row['message'])
212
+
213
+ # Update dataframe with analysis results
214
+ results_df.at[i, 'abuse_score'] = analysis['abuse_score']
215
+ results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
216
+ results_df.at[i, 'sentiment'] = analysis['sentiment']
217
+ results_df.at[i, 'darvo_score'] = analysis['darvo_score']
218
+ results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
219
+ results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
220
+ results_df.at[i, 'risk_level'] = analysis['risk_level']
221
+
222
+ # Calculate sender statistics
223
+ sender_stats = {}
224
+ for sender in results_df['sender'].unique():
225
+ sender_df = results_df[results_df['sender'] == sender]
226
+
227
+ # Calculate key metrics
228
+ avg_abuse = sender_df['abuse_score'].mean()
229
+ max_abuse = sender_df['abuse_score'].max()
230
+
231
+ # Get most common patterns
232
+ all_patterns = []
233
+ for patterns in sender_df['detected_patterns']:
234
+ if patterns:
235
+ all_patterns.extend(patterns)
236
+
237
+ pattern_counts = Counter(all_patterns)
238
+ most_common = pattern_counts.most_common(3)
239
+
240
+ # Calculate percentage of abusive messages
241
+ abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
242
+ abusive_pct =
243
+ # Calculate percentage of abusive messages
244
+ abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
245
+ abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
246
+
247
+ # Store stats
248
+ sender_stats[sender] = {
249
+ 'message_count': len(sender_df),
250
+ 'avg_abuse_score': avg_abuse,
251
+ 'max_abuse_score': max_abuse,
252
+ 'abusive_message_count': abusive_count,
253
+ 'abusive_message_pct': abusive_pct,
254
+ 'common_patterns': most_common
255
+ }
256
+
257
+ # Detect escalation patterns
258
+ escalation_data = detect_escalation_patterns(results_df)
259
+
260
+ # Determine overall risk level
261
+ if results_df['risk_level'].isin(['Critical']).any():
262
+ overall_risk = "Critical"
263
+ elif results_df['risk_level'].isin(['High']).any():
264
+ overall_risk = "High"
265
+ elif results_df['risk_level'].isin(['Moderate']).any():
266
+ overall_risk = "Moderate"
267
+ else:
268
+ overall_risk = "Low"
269
+
270
+ # Generate safety plan
271
+ all_patterns = []
272
+ for patterns in results_df['detected_patterns']:
273
+ if patterns:
274
+ all_patterns.extend(patterns)
275
+
276
+ safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
277
+
278
+ # Generate professional recommendations
279
+ recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
280
+
281
+ # Identify primary abuser (if any)
282
+ primary_abuser = None
283
+ max_abusive_pct = 0
284
+
285
+ for sender, stats in sender_stats.items():
286
+ if stats['message_count'] >= 5 and stats['abusive_message_pct'] > max_abusive_pct:
287
+ max_abusive_pct = stats['abusive_message_pct']
288
+ primary_abuser = sender
289
+
290
+ # Only identify primary abuser if they have significant abusive content
291
+ if max_abusive_pct < 20:
292
+ primary_abuser = None
293
+
294
+ # Prepare summary
295
+ summary = {
296
+ 'message_count': len(results_df),
297
+ 'date_range': {
298
+ 'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
299
+ 'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
300
+ },
301
+ 'overall_risk_level': overall_risk,
302
+ 'sender_stats': sender_stats,
303
+ 'primary_abuser': primary_abuser,
304
+ 'escalation_data': escalation_data,
305
+ 'safety_plan': safety_plan,
306
+ 'recommendations': recommendations
307
  }
308
+
309
+ return results_df, summary
310
+
311
+ except Exception as e:
312
+ logger.error(f"Error in analyze_chat_history: {e}")
313
+ logger.error(traceback.format_exc())
314
+ return df, {
315
+ 'message_count': len(df),
316
+ 'date_range': {
317
+ 'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
318
+ 'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
319
+ },
320
+ 'overall_risk_level': "Unknown",
321
+ 'sender_stats': {},
322
+ 'primary_abuser': None,
323
+ 'escalation_data': {},
324
+ 'safety_plan': "Error generating safety plan.",
325
+ 'recommendations': []
 
 
 
 
 
 
 
 
 
 
 
326
  }