SamanthaStorm commited on
Commit
ef93a8b
·
verified ·
1 Parent(s): 67ca8dc

Create analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +530 -313
analyzer.py CHANGED
@@ -1,9 +1,9 @@
1
  import pandas as pd
2
  import numpy as np
3
  import logging
4
- from datetime import datetime
5
  import traceback
6
  from collections import Counter
 
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO)
@@ -31,8 +31,29 @@ class MessageAnalyzer:
31
  "false equivalence": 0.317,
32
  "future faking": 0.385
33
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def analyze_message(self, text):
35
- """Analyze a single message for abuse patterns"""
36
  from utils import (
37
  detect_explicit_abuse, detect_enhanced_threats, get_emotional_tone_tag,
38
  compute_abuse_score, get_boundary_assessment, calculate_enhanced_risk_level
@@ -42,368 +63,409 @@ class MessageAnalyzer:
42
 
43
  try:
44
  if not text.strip():
45
- logger.debug("Empty text, returning zeros")
46
- return {
47
- 'abuse_score': 0.0,
48
- 'detected_patterns': [],
49
- 'matched_scores': [],
50
- 'sentiment': "neutral",
51
- 'sentiment_confidence': 0.5,
52
- 'stage': 1,
53
- 'darvo_score': 0.0,
54
- 'emotional_tone': "neutral",
55
- 'boundary_assessment': {'assessment': 'neutral', 'confidence': 0.5},
56
- 'risk_level': "Low"
57
- }
58
-
59
- # Check for explicit abuse
60
- explicit_abuse = detect_explicit_abuse(text)
61
- logger.debug(f"Explicit abuse detected: {explicit_abuse}")
62
-
63
- # Get sentiment
64
- sentiment, sentiment_confidence = self.model_manager.predict_sentiment(text)
65
- logger.debug(f"Sentiment: {sentiment} (confidence: {sentiment_confidence:.3f})")
66
-
67
- # Get boundary health
68
- boundary_health = self.model_manager.predict_boundary_health(text)
69
- boundary_assessment = get_boundary_assessment(text, boundary_health)
70
- logger.debug(f"Boundary health: {boundary_assessment['assessment']}")
71
-
72
- # Early supportive message check
73
- innocent_indicators = [
74
- 'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
75
- 'device', 'battery', 'charger', 'wifi', 'internet', 'computer',
76
- 'sorry', 'apologize', 'my fault', 'mistake'
77
- ]
78
-
79
- # Enhanced early return check - now includes boundary health
80
- if (any(indicator in text.lower() for indicator in innocent_indicators) and
81
- len(text.split()) < 20 and
82
- not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
83
- boundary_health > 0): # Healthy boundary
84
-
85
- # If sentiment is strongly supportive AND boundary health is good, return early
86
- if sentiment == "supportive" and sentiment_confidence > 0.8:
87
- logger.debug("Early return: Message appears to be innocent/supportive with healthy boundaries")
88
- return {
89
- 'abuse_score': 0.0,
90
- 'detected_patterns': [],
91
- 'matched_scores': [],
92
- 'sentiment': sentiment,
93
- 'sentiment_confidence': sentiment_confidence,
94
- 'stage': 1,
95
- 'darvo_score': 0.0,
96
- 'emotional_tone': "neutral",
97
- 'boundary_assessment': boundary_assessment,
98
- 'risk_level': "Low"
99
- }
100
-
101
- # Get abuse patterns
102
- threshold_labels, matched_scores = self.model_manager.predict_abuse_patterns(text, self.thresholds)
103
- logger.debug(f"Detected patterns: {threshold_labels}")
104
 
105
- # Check for enhanced threats
106
- enhanced_patterns = detect_enhanced_threats(text, threshold_labels)
107
- for pattern in enhanced_patterns:
108
- if pattern not in threshold_labels:
109
- threshold_labels.append(pattern)
110
- # Add to matched_scores with high confidence
111
- weight = self.model_manager.get_pattern_weight(pattern)
112
- matched_scores.append((pattern, 0.85, weight))
113
 
114
- # Get DARVO score
115
- darvo_score = self.model_manager.predict_darvo(text)
116
- logger.debug(f"DARVO score: {darvo_score:.3f}")
117
 
118
- # Get emotions
119
- emotions = self.model_manager.get_emotion_profile(text)
120
- logger.debug(f"Emotions: {emotions}")
121
-
122
- # Calculate abuse score
123
- abuse_score = compute_abuse_score(matched_scores, sentiment)
124
- logger.debug(f"Abuse score: {abuse_score:.1f}")
125
-
126
- # Apply explicit abuse override
127
- if explicit_abuse:
128
- abuse_score = max(abuse_score, 70.0)
129
- if "insults" not in threshold_labels:
130
- threshold_labels.append("insults")
131
- matched_scores.append(("insults", 0.9, 1.4))
132
-
133
- # Apply boundary health modifier to abuse score
134
- if boundary_health > 0 and not explicit_abuse:
135
- # Healthy boundaries - cap abuse score lower
136
- abuse_score = min(abuse_score, 35.0)
137
- logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
138
-
139
- # Apply sentiment-based score capping
140
- if sentiment == "supportive" and not explicit_abuse:
141
- # For supportive messages, cap the abuse score much lower
142
- abuse_score = min(abuse_score, 30.0)
143
- logger.debug(f"Capped abuse score to {abuse_score} due to supportive sentiment")
144
-
145
- # Get emotional tone
146
- emotional_tone = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score, emotions)
147
- logger.debug(f"Emotional tone: {emotional_tone}")
148
 
149
- # Set stage
150
- stage = 2 if explicit_abuse or abuse_score > 70 else 1
 
 
 
 
 
151
 
152
- # Calculate risk level
153
- risk_level = calculate_enhanced_risk_level(
154
- abuse_score,
155
- threshold_labels,
156
- "Low" if abuse_score < 50 else "Moderate" if abuse_score < 70 else "High",
157
- darvo_score
158
  )
159
 
 
160
  return {
161
- 'abuse_score': abuse_score,
162
- 'detected_patterns': threshold_labels,
163
- 'matched_scores': matched_scores,
164
- 'sentiment': sentiment,
165
- 'sentiment_confidence': sentiment_confidence,
166
- 'stage': stage,
167
- 'darvo_score': darvo_score,
168
- 'emotional_tone': emotional_tone,
169
- 'boundary_assessment': boundary_assessment,
170
- 'risk_level': risk_level
171
  }
172
 
173
  except Exception as e:
174
  logger.error(f"Error in analyze_message: {e}")
175
  logger.error(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  return {
177
  'abuse_score': 0.0,
178
  'detected_patterns': [],
179
  'matched_scores': [],
180
- 'sentiment': "error",
181
- 'sentiment_confidence': 0.0,
182
  'stage': 1,
183
  'darvo_score': 0.0,
184
- 'emotional_tone': "error",
185
- 'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
186
- 'risk_level': "Unknown"
187
  }
188
- def identify_primary_abuser(self, results_df):
189
- """Identify the primary abuser based on comprehensive abuse metrics with pattern severity weighting"""
190
- logger.info("Identifying primary abuser...")
191
-
192
- # Define pattern severity weights (higher = more concerning)
193
- PATTERN_WEIGHTS = {
194
- "recovery phase": 0.7,
195
- "control": 1.4,
196
- "gaslighting": 1.3,
197
- "guilt tripping": 1.2,
198
- "dismissiveness": 0.9,
199
- "blame shifting": 1.0,
200
- "projection": 0.5,
201
- "insults": 1.4,
202
- "contradictory statements": 1.0,
203
- "obscure language": 0.9,
204
- "nonabusive": 0.0, # Zero weight for nonabusive pattern
205
- "veiled threats": 1.6,
206
- "stalking language": 1.8,
207
- "false concern": 1.1,
208
- "false equivalence": 1.3,
209
- "future faking": 0.8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  sender_abuse_metrics = {}
213
 
214
  for sender in results_df['sender'].unique():
215
  sender_df = results_df[results_df['sender'] == sender]
216
 
217
- if len(sender_df) < 3: # Need minimum messages for reliable assessment
218
  continue
219
-
220
- # Calculate comprehensive abuse metrics
221
- avg_abuse = sender_df['abuse_score'].mean()
222
- max_abuse = sender_df['abuse_score'].max()
223
- abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
224
- abusive_pct = (abusive_count / len(sender_df)) * 100
225
-
226
- # Calculate pattern-weighted score
227
- pattern_counts = Counter()
228
- for patterns in sender_df['detected_patterns']:
229
- pattern_counts.update(patterns)
230
 
231
- # Calculate weighted pattern score
232
- total_pattern_weight = 0
233
- for pattern, count in pattern_counts.items():
234
- weight = PATTERN_WEIGHTS.get(pattern, 1.0) # Default weight of 1.0
235
- total_pattern_weight += count * weight
236
-
237
- # Normalize by message count
238
- weighted_pattern_score = total_pattern_weight / len(sender_df) if len(sender_df) > 0 else 0
239
-
240
- # DARVO score
241
- avg_darvo = sender_df['darvo_score'].mean()
242
- high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
243
-
244
- # Risk level distribution
245
- high_risk_count = len(sender_df[sender_df['risk_level'].isin(['High', 'Critical'])])
246
- high_risk_pct = (high_risk_count / len(sender_df)) * 100
247
-
248
- # Composite abuse score (weighted combination of factors)
249
- composite_score = (
250
- avg_abuse * 0.25 +
251
- abusive_pct * 0.2 +
252
- weighted_pattern_score * 15 + # Heavily weight the pattern score
253
- avg_darvo * 100 * 0.15 +
254
- high_risk_pct * 0.1
255
- )
256
-
257
- # Store detailed pattern information for reporting
258
- pattern_details = [
259
- {
260
- 'pattern': pattern,
261
- 'count': count,
262
- 'weight': PATTERN_WEIGHTS.get(pattern, 1.0),
263
- 'weighted_score': count * PATTERN_WEIGHTS.get(pattern, 1.0)
264
- }
265
- for pattern, count in pattern_counts.items()
266
- ]
267
-
268
- # Sort patterns by weighted score
269
- pattern_details.sort(key=lambda x: x['weighted_score'], reverse=True)
270
-
271
- sender_abuse_metrics[sender] = {
272
- 'message_count': len(sender_df),
273
- 'avg_abuse_score': avg_abuse,
274
- 'max_abuse_score': max_abuse,
275
- 'abusive_message_pct': abusive_pct,
276
- 'pattern_details': pattern_details,
277
- 'weighted_pattern_score': weighted_pattern_score,
278
- 'avg_darvo_score': avg_darvo,
279
- 'high_risk_pct': high_risk_pct,
280
- 'composite_score': composite_score
281
- }
282
 
283
  if not sender_abuse_metrics:
284
  return None, sender_abuse_metrics
285
 
286
- # Find primary abuser (highest composite score with minimum thresholds)
287
- primary_abuser = None
288
- max_composite = 0
289
-
290
- for sender, metrics in sender_abuse_metrics.items():
291
- if (metrics['composite_score'] > max_composite and
292
- metrics['message_count'] >= 5):
293
- max_composite = metrics['composite_score']
294
- primary_abuser = sender
295
 
296
  logger.info(f"Primary abuser identified: {primary_abuser}")
297
  return primary_abuser, sender_abuse_metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  def analyze_chat_history(self, df):
299
- """Analyze entire chat history"""
300
  from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
301
 
302
  logger.info(f"Analyzing chat history with {len(df)} messages")
303
 
304
  try:
305
- # Create results dataframe
306
  results_df = df.copy()
307
 
308
- # Add analysis columns
309
- results_df['abuse_score'] = 0.0
310
- results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
311
- results_df['sentiment'] = "neutral"
312
- results_df['darvo_score'] = 0.0
313
- results_df['emotional_tone'] = "neutral"
314
- results_df['boundary_health'] = "unknown"
315
- results_df['risk_level'] = "Low"
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- # Analyze each message
318
  for i, row in results_df.iterrows():
319
  analysis = self.analyze_message(row['message'])
320
 
321
- # Update dataframe with analysis results
322
- results_df.at[i, 'abuse_score'] = analysis['abuse_score']
323
- results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
324
- results_df.at[i, 'sentiment'] = analysis['sentiment']
325
- results_df.at[i, 'darvo_score'] = analysis['darvo_score']
326
- results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
327
- results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
328
- results_df.at[i, 'risk_level'] = analysis['risk_level']
329
 
330
- # Calculate sender statistics
331
- sender_stats = {}
332
- for sender in results_df['sender'].unique():
333
- sender_df = results_df[results_df['sender'] == sender]
334
-
335
- # Calculate key metrics
336
- avg_abuse = sender_df['abuse_score'].mean()
337
- max_abuse = sender_df['abuse_score'].max()
338
-
339
- # Get most common patterns
340
- all_patterns = []
341
- for patterns in sender_df['detected_patterns']:
342
- if patterns:
343
- all_patterns.extend(patterns)
344
-
345
- pattern_counts = Counter(all_patterns)
346
- most_common = pattern_counts.most_common(5) # Get top 5 patterns
347
-
348
- # Calculate percentage of abusive messages
349
- abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
350
- abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
351
-
352
- # Calculate emotional tone distribution
353
- tone_counts = Counter(sender_df['emotional_tone'])
354
- most_common_tones = tone_counts.most_common(3) # Get top 3 emotional tones
355
-
356
- # Calculate DARVO score statistics
357
- avg_darvo = sender_df['darvo_score'].mean()
358
- high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
359
- high_darvo_pct = (high_darvo_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
360
-
361
- # Calculate risk level distribution
362
- risk_counts = Counter(sender_df['risk_level'])
363
-
364
- # Store stats
365
- sender_stats[sender] = {
366
- 'message_count': len(sender_df),
367
- 'avg_abuse_score': avg_abuse,
368
- 'max_abuse_score': max_abuse,
369
- 'abusive_message_count': abusive_count,
370
- 'abusive_message_pct': abusive_pct,
371
- 'common_patterns': most_common,
372
- 'emotional_tones': most_common_tones,
373
- 'avg_darvo_score': avg_darvo,
374
- 'high_darvo_count': high_darvo_count,
375
- 'high_darvo_pct': high_darvo_pct,
376
- 'risk_levels': risk_counts
377
- }
378
 
379
- # Identify primary abuser
380
  primary_abuser, sender_abuse_metrics = self.identify_primary_abuser(results_df)
381
 
382
  # Detect escalation patterns
383
  escalation_data = detect_escalation_patterns(results_df)
384
 
385
  # Determine overall risk level
386
- if results_df['risk_level'].isin(['Critical']).any():
387
- overall_risk = "Critical"
388
- elif results_df['risk_level'].isin(['High']).any():
389
- overall_risk = "High"
390
- elif results_df['risk_level'].isin(['Moderate']).any():
391
- overall_risk = "Moderate"
392
- else:
393
- overall_risk = "Low"
394
 
395
- # Generate safety plan
396
  all_patterns = []
397
  for patterns in results_df['detected_patterns']:
398
  if patterns:
399
  all_patterns.extend(patterns)
400
 
401
  safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
402
-
403
- # Generate professional recommendations
404
  recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
405
 
406
- # Prepare summary
407
  summary = {
408
  'message_count': len(results_df),
409
  'date_range': {
@@ -416,7 +478,9 @@ class MessageAnalyzer:
416
  'primary_abuser': primary_abuser,
417
  'escalation_data': escalation_data,
418
  'safety_plan': safety_plan,
419
- 'recommendations': recommendations
 
 
420
  }
421
 
422
  return results_df, summary
@@ -424,17 +488,170 @@ class MessageAnalyzer:
424
  except Exception as e:
425
  logger.error(f"Error in analyze_chat_history: {e}")
426
  logger.error(traceback.format_exc())
427
- return df, {
428
- 'message_count': len(df),
429
- 'date_range': {
430
- 'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
431
- 'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
432
- },
433
- 'overall_risk_level': "Unknown",
434
- 'sender_stats': {},
435
- 'sender_abuse_metrics': {},
436
- 'primary_abuser': None,
437
- 'escalation_data': {},
438
- 'safety_plan': "Error generating safety plan.",
439
- 'recommendations': []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import numpy as np
3
  import logging
 
4
  import traceback
5
  from collections import Counter
6
+ from typing import Dict, List, Tuple, Any
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO)
 
31
  "false equivalence": 0.317,
32
  "future faking": 0.385
33
  }
34
+
35
+ # Fallacy severity weights for enhanced scoring
36
+ self.fallacy_weights = {
37
+ 'Ad Hominem': 1.2,
38
+ 'Strawman': 1.0,
39
+ 'Whataboutism': 0.8,
40
+ 'Gaslighting': 1.8,
41
+ 'False Dichotomy': 1.0,
42
+ 'Appeal to Emotion': 1.3,
43
+ 'DARVO': 1.9,
44
+ 'Moving Goalposts': 1.1,
45
+ 'Cherry Picking': 0.9,
46
+ 'Appeal to Authority': 0.7,
47
+ 'Slippery Slope': 0.8,
48
+ 'Motte and Bailey': 1.0,
49
+ 'Gish Gallop': 1.1,
50
+ 'Kafkatrapping': 1.4,
51
+ 'Sealioning': 1.0,
52
+ 'No Fallacy': 0.0
53
+ }
54
+
55
  def analyze_message(self, text):
56
+ """Enhanced message analysis incorporating fallacy detection"""
57
  from utils import (
58
  detect_explicit_abuse, detect_enhanced_threats, get_emotional_tone_tag,
59
  compute_abuse_score, get_boundary_assessment, calculate_enhanced_risk_level
 
63
 
64
  try:
65
  if not text.strip():
66
+ return self._get_empty_analysis()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Get base analysis using your existing models
69
+ base_analysis = self._get_base_analysis(text)
 
 
 
 
 
 
70
 
71
+ # Add fallacy analysis using FallacyFinder
72
+ fallacy_type, fallacy_confidence = self.model_manager.predict_fallacy(text)
 
73
 
74
+ # Enhanced intent analysis incorporating fallacy context
75
+ enhanced_intent = self._analyze_enhanced_intent(
76
+ text, fallacy_type, base_analysis['detected_patterns']
77
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Calculate enhanced abuse score with fallacy impact
80
+ enhanced_abuse_score = self._calculate_enhanced_abuse_score(
81
+ base_analysis['abuse_score'],
82
+ fallacy_type,
83
+ fallacy_confidence,
84
+ base_analysis['darvo_score']
85
+ )
86
 
87
+ # Determine likely abuser indicators
88
+ abuser_indicators = self._calculate_abuser_indicators(
89
+ base_analysis, fallacy_type, fallacy_confidence
 
 
 
90
  )
91
 
92
+ # Enhanced analysis result
93
  return {
94
+ **base_analysis,
95
+ 'enhanced_abuse_score': enhanced_abuse_score,
96
+ 'fallacy_detected': fallacy_type,
97
+ 'fallacy_confidence': fallacy_confidence,
98
+ 'enhanced_intent': enhanced_intent,
99
+ 'abuser_indicators': abuser_indicators,
100
+ 'manipulation_score': self._calculate_manipulation_score(
101
+ base_analysis['detected_patterns'], fallacy_type
102
+ )
 
103
  }
104
 
105
  except Exception as e:
106
  logger.error(f"Error in analyze_message: {e}")
107
  logger.error(traceback.format_exc())
108
+ return self._get_error_analysis()
109
+
110
+ def _get_base_analysis(self, text):
111
+ """Get base analysis using existing trained models"""
112
+ from utils import (
113
+ detect_explicit_abuse, detect_enhanced_threats, get_emotional_tone_tag,
114
+ compute_abuse_score, get_boundary_assessment, calculate_enhanced_risk_level
115
+ )
116
+
117
+ # Check for explicit abuse
118
+ explicit_abuse = detect_explicit_abuse(text)
119
+
120
+ # Get predictions from your trained models
121
+ sentiment, sentiment_confidence = self.model_manager.predict_sentiment(text)
122
+ boundary_health = self.model_manager.predict_boundary_health(text)
123
+ boundary_assessment = get_boundary_assessment(text, boundary_health)
124
+
125
+ # Early return for clearly supportive messages
126
+ if self._is_clearly_supportive(text, sentiment, sentiment_confidence, boundary_health):
127
  return {
128
  'abuse_score': 0.0,
129
  'detected_patterns': [],
130
  'matched_scores': [],
131
+ 'sentiment': sentiment,
132
+ 'sentiment_confidence': sentiment_confidence,
133
  'stage': 1,
134
  'darvo_score': 0.0,
135
+ 'emotional_tone': "neutral",
136
+ 'boundary_assessment': boundary_assessment,
137
+ 'risk_level': "Low"
138
  }
139
+
140
+ # Get abuse patterns from multilabel model
141
+ threshold_labels, matched_scores = self.model_manager.predict_abuse_patterns(text, self.thresholds)
142
+
143
+ # Enhanced threat detection
144
+ enhanced_patterns = detect_enhanced_threats(text, threshold_labels)
145
+ for pattern in enhanced_patterns:
146
+ if pattern not in threshold_labels:
147
+ threshold_labels.append(pattern)
148
+ weight = self.model_manager.get_pattern_weight(pattern)
149
+ matched_scores.append((pattern, 0.85, weight))
150
+
151
+ # Get DARVO score
152
+ darvo_score = self.model_manager.predict_darvo(text)
153
+
154
+ # Get emotions
155
+ emotions = self.model_manager.get_emotion_profile(text)
156
+
157
+ # Calculate abuse score
158
+ abuse_score = compute_abuse_score(matched_scores, sentiment)
159
+
160
+ # Apply explicit abuse override
161
+ if explicit_abuse:
162
+ abuse_score = max(abuse_score, 70.0)
163
+ if "insults" not in threshold_labels:
164
+ threshold_labels.append("insults")
165
+ matched_scores.append(("insults", 0.9, 1.4))
166
+
167
+ # Apply boundary and sentiment modifiers
168
+ abuse_score = self._apply_score_modifiers(
169
+ abuse_score, boundary_health, sentiment, explicit_abuse
170
+ )
171
+
172
+ # Get emotional tone
173
+ emotional_tone = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score, emotions)
174
+
175
+ # Set stage and risk level
176
+ stage = 2 if explicit_abuse or abuse_score > 70 else 1
177
+ risk_level = calculate_enhanced_risk_level(
178
+ abuse_score, threshold_labels,
179
+ "Low" if abuse_score < 50 else "Moderate" if abuse_score < 70 else "High",
180
+ darvo_score
181
+ )
182
+
183
+ return {
184
+ 'abuse_score': abuse_score,
185
+ 'detected_patterns': threshold_labels,
186
+ 'matched_scores': matched_scores,
187
+ 'sentiment': sentiment,
188
+ 'sentiment_confidence': sentiment_confidence,
189
+ 'stage': stage,
190
+ 'darvo_score': darvo_score,
191
+ 'emotional_tone': emotional_tone,
192
+ 'boundary_assessment': boundary_assessment,
193
+ 'risk_level': risk_level
194
  }
195
+
196
+ def _is_clearly_supportive(self, text, sentiment, sentiment_confidence, boundary_health):
197
+ """Check if message is clearly supportive and non-abusive"""
198
+ innocent_indicators = [
199
+ 'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
200
+ 'device', 'battery', 'charger', 'wifi', 'internet', 'computer',
201
+ 'sorry', 'apologize', 'my fault', 'mistake'
202
+ ]
203
+
204
+ threat_indicators = ['kill', 'hurt', 'destroy', 'hate']
205
+
206
+ return (
207
+ any(indicator in text.lower() for indicator in innocent_indicators) and
208
+ len(text.split()) < 20 and
209
+ not any(threat in text.lower() for threat in threat_indicators) and
210
+ boundary_health > 0 and
211
+ sentiment == "supportive" and
212
+ sentiment_confidence > 0.8
213
+ )
214
+
215
+ def _apply_score_modifiers(self, abuse_score, boundary_health, sentiment, explicit_abuse):
216
+ """Apply boundary health and sentiment modifiers to abuse score"""
217
+ if boundary_health > 0 and not explicit_abuse:
218
+ abuse_score = min(abuse_score, 35.0)
219
+
220
+ if sentiment == "supportive" and not explicit_abuse:
221
+ abuse_score = min(abuse_score, 30.0)
222
+
223
+ return abuse_score
224
+
225
+ def _analyze_enhanced_intent(self, text, fallacy_type, abuse_patterns):
226
+ """Enhanced intent analysis using fallacy and pattern context"""
227
+ # Get base intent from trained model
228
+ base_intent, base_confidence = self.model_manager.predict_intent(text)
229
+
230
+ # Modify intent based on fallacy detection
231
+ intent_modifier = 0
232
+ likely_intent = base_intent
233
+
234
+ # High-concern fallacies strongly suggest manipulation
235
+ if fallacy_type in ['Gaslighting', 'DARVO', 'Kafkatrapping']:
236
+ intent_modifier += 0.3
237
+ likely_intent = "manipulative"
238
+ elif fallacy_type in ['Appeal to Emotion', 'Ad Hominem']:
239
+ intent_modifier += 0.2
240
+ if base_intent == "neutral":
241
+ likely_intent = "manipulative"
242
+
243
+ # Abuse patterns also modify intent
244
+ concerning_patterns = ['control', 'gaslighting', 'guilt tripping', 'blame shifting']
245
+ pattern_concern_count = sum(1 for pattern in abuse_patterns if pattern in concerning_patterns)
246
+
247
+ if pattern_concern_count >= 2:
248
+ intent_modifier += 0.2
249
+ if likely_intent == "neutral":
250
+ likely_intent = "controlling"
251
+
252
+ enhanced_confidence = min(base_confidence + intent_modifier, 1.0)
253
+
254
+ return {
255
+ 'primary_intent': likely_intent,
256
+ 'model_predicted': base_intent,
257
+ 'model_confidence': base_confidence,
258
+ 'enhanced_confidence': enhanced_confidence,
259
+ 'fallacy_influence': fallacy_type != 'No Fallacy',
260
+ 'pattern_influence': pattern_concern_count > 0
261
+ }
262
+
263
+ def _calculate_enhanced_abuse_score(self, base_score, fallacy_type, fallacy_confidence, darvo_score):
264
+ """Calculate enhanced abuse score incorporating fallacy analysis"""
265
+ enhanced_score = base_score
266
+
267
+ # Add fallacy impact based on severity
268
+ if fallacy_type != 'No Fallacy':
269
+ fallacy_weight = self.fallacy_weights.get(fallacy_type, 1.0)
270
+ fallacy_impact = fallacy_weight * fallacy_confidence * 12
271
+ enhanced_score += fallacy_impact
272
+
273
+ # Special boost for DARVO combination
274
+ if fallacy_type == 'DARVO' and darvo_score > 0.7:
275
+ enhanced_score += 18
276
+
277
+ # Gaslighting fallacy is particularly concerning
278
+ if fallacy_type == 'Gaslighting':
279
+ enhanced_score += 15
280
+
281
+ return min(enhanced_score, 100.0)
282
+
283
+ def _calculate_abuser_indicators(self, base_analysis, fallacy_type, fallacy_confidence):
284
+ """Calculate indicators that suggest this sender might be an abuser"""
285
+ indicators = {
286
+ 'high_abuse_score': base_analysis['abuse_score'] > 60,
287
+ 'serious_fallacy': fallacy_type in ['Gaslighting', 'DARVO', 'Kafkatrapping', 'Appeal to Emotion'],
288
+ 'high_darvo': base_analysis['darvo_score'] > 0.65,
289
+ 'multiple_abuse_patterns': len(base_analysis['detected_patterns']) > 2,
290
+ 'threatening_language': any(pattern in base_analysis['detected_patterns']
291
+ for pattern in ['veiled threats', 'stalking language', 'insults']),
292
+ 'reality_distortion': fallacy_type == 'Gaslighting' or 'gaslighting' in base_analysis['detected_patterns'],
293
+ 'victim_blaming': fallacy_type == 'DARVO' or base_analysis['darvo_score'] > 0.7
294
+ }
295
+
296
+ indicator_count = sum(indicators.values())
297
+ risk_level = 'high' if indicator_count >= 4 else 'moderate' if indicator_count >= 2 else 'low'
298
+
299
+ return {
300
+ 'indicators': indicators,
301
+ 'total_count': indicator_count,
302
+ 'risk_level': risk_level
303
+ }
304
+
305
+ def _calculate_manipulation_score(self, abuse_patterns, fallacy_type):
306
+ """Calculate overall manipulation score"""
307
+ manipulation_patterns = [
308
+ 'gaslighting', 'blame shifting', 'guilt tripping', 'false concern',
309
+ 'future faking', 'control'
310
+ ]
311
+
312
+ pattern_score = sum(1 for pattern in abuse_patterns if pattern in manipulation_patterns)
313
+ fallacy_score = 2 if fallacy_type in ['Gaslighting', 'DARVO', 'Kafkatrapping'] else 1 if fallacy_type != 'No Fallacy' else 0
314
+
315
+ return min(pattern_score + fallacy_score, 10)
316
+
317
+ def identify_primary_abuser(self, results_df):
318
+ """Enhanced primary abuser identification"""
319
+ logger.info("Identifying primary abuser with enhanced analysis...")
320
 
321
  sender_abuse_metrics = {}
322
 
323
  for sender in results_df['sender'].unique():
324
  sender_df = results_df[results_df['sender'] == sender]
325
 
326
+ if len(sender_df) < 3:
327
  continue
 
 
 
 
 
 
 
 
 
 
 
328
 
329
+ # Calculate comprehensive metrics
330
+ metrics = self._calculate_sender_metrics(sender_df)
331
+ sender_abuse_metrics[sender] = metrics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
333
  if not sender_abuse_metrics:
334
  return None, sender_abuse_metrics
335
 
336
+ # Find primary abuser based on composite score
337
+ primary_abuser = max(
338
+ sender_abuse_metrics.keys(),
339
+ key=lambda x: sender_abuse_metrics[x]['composite_score']
340
+ )
 
 
 
 
341
 
342
  logger.info(f"Primary abuser identified: {primary_abuser}")
343
  return primary_abuser, sender_abuse_metrics
344
+
345
+ def _calculate_sender_metrics(self, sender_df):
346
+ """Calculate comprehensive metrics for a sender"""
347
+ # Basic abuse metrics
348
+ avg_abuse = sender_df['abuse_score'].mean()
349
+ max_abuse = sender_df['abuse_score'].max()
350
+ enhanced_avg = sender_df.get('enhanced_abuse_score', sender_df['abuse_score']).mean()
351
+
352
+ abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
353
+ abusive_pct = (abusive_count / len(sender_df)) * 100
354
+
355
+ # Fallacy metrics
356
+ fallacy_counts = Counter()
357
+ serious_fallacy_count = 0
358
+
359
+ for _, row in sender_df.iterrows():
360
+ fallacy = row.get('fallacy_detected', 'No Fallacy')
361
+ if fallacy != 'No Fallacy':
362
+ fallacy_counts[fallacy] += 1
363
+ if fallacy in ['Gaslighting', 'DARVO', 'Kafkatrapping', 'Appeal to Emotion']:
364
+ serious_fallacy_count += 1
365
+
366
+ # Abuser indicators
367
+ total_abuser_indicators = 0
368
+ high_risk_messages = 0
369
+
370
+ for _, row in sender_df.iterrows():
371
+ indicators = row.get('abuser_indicators', {})
372
+ if isinstance(indicators, dict):
373
+ total_abuser_indicators += indicators.get('total_count', 0)
374
+ if indicators.get('risk_level') == 'high':
375
+ high_risk_messages += 1
376
+
377
+ # DARVO and manipulation metrics
378
+ avg_darvo = sender_df['darvo_score'].mean()
379
+ high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
380
+ avg_manipulation = sender_df.get('manipulation_score', pd.Series([0] * len(sender_df))).mean()
381
+
382
+ # Calculate composite score with enhanced weighting
383
+ composite_score = (
384
+ enhanced_avg * 0.25 +
385
+ abusive_pct * 0.15 +
386
+ (serious_fallacy_count / len(sender_df)) * 30 +
387
+ (total_abuser_indicators / len(sender_df)) * 20 +
388
+ avg_darvo * 100 * 0.1
389
+ )
390
+
391
+ return {
392
+ 'message_count': len(sender_df),
393
+ 'avg_abuse_score': avg_abuse,
394
+ 'enhanced_avg_abuse_score': enhanced_avg,
395
+ 'max_abuse_score': max_abuse,
396
+ 'abusive_message_pct': abusive_pct,
397
+ 'fallacy_breakdown': dict(fallacy_counts),
398
+ 'serious_fallacy_count': serious_fallacy_count,
399
+ 'serious_fallacy_rate': serious_fallacy_count / len(sender_df),
400
+ 'avg_darvo_score': avg_darvo,
401
+ 'high_darvo_count': high_darvo_count,
402
+ 'total_abuser_indicators': total_abuser_indicators,
403
+ 'high_risk_messages': high_risk_messages,
404
+ 'avg_manipulation_score': avg_manipulation,
405
+ 'composite_score': composite_score,
406
+ 'likely_abuser': composite_score > 50,
407
+ 'confidence': 'high' if len(sender_df) >= 20 else 'medium' if len(sender_df) >= 10 else 'low'
408
+ }
409
+
410
  def analyze_chat_history(self, df):
411
+ """Enhanced chat history analysis"""
412
  from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
413
 
414
  logger.info(f"Analyzing chat history with {len(df)} messages")
415
 
416
  try:
 
417
  results_df = df.copy()
418
 
419
+ # Initialize new columns for enhanced analysis
420
+ enhanced_columns = {
421
+ 'abuse_score': 0.0,
422
+ 'enhanced_abuse_score': 0.0,
423
+ 'detected_patterns': [[] for _ in range(len(results_df))],
424
+ 'sentiment': "neutral",
425
+ 'darvo_score': 0.0,
426
+ 'emotional_tone': "neutral",
427
+ 'boundary_health': "unknown",
428
+ 'risk_level': "Low",
429
+ 'fallacy_detected': "No Fallacy",
430
+ 'fallacy_confidence': 0.0,
431
+ 'enhanced_intent': {},
432
+ 'abuser_indicators': {},
433
+ 'manipulation_score': 0
434
+ }
435
+
436
+ for col, default_val in enhanced_columns.items():
437
+ results_df[col] = default_val
438
 
439
+ # Analyze each message with enhanced analysis
440
  for i, row in results_df.iterrows():
441
  analysis = self.analyze_message(row['message'])
442
 
443
+ for col in enhanced_columns.keys():
444
+ if col in analysis:
445
+ results_df.at[i, col] = analysis[col]
 
 
 
 
 
446
 
447
+ # Calculate enhanced sender statistics
448
+ sender_stats = self._calculate_enhanced_sender_stats(results_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
449
 
450
+ # Identify primary abuser with enhanced method
451
  primary_abuser, sender_abuse_metrics = self.identify_primary_abuser(results_df)
452
 
453
  # Detect escalation patterns
454
  escalation_data = detect_escalation_patterns(results_df)
455
 
456
  # Determine overall risk level
457
+ overall_risk = self._determine_overall_risk(results_df, sender_abuse_metrics)
 
 
 
 
 
 
 
458
 
459
+ # Generate enhanced safety plan and recommendations
460
  all_patterns = []
461
  for patterns in results_df['detected_patterns']:
462
  if patterns:
463
  all_patterns.extend(patterns)
464
 
465
  safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
 
 
466
  recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
467
 
468
+ # Enhanced summary with fallacy analysis
469
  summary = {
470
  'message_count': len(results_df),
471
  'date_range': {
 
478
  'primary_abuser': primary_abuser,
479
  'escalation_data': escalation_data,
480
  'safety_plan': safety_plan,
481
+ 'recommendations': recommendations,
482
+ 'fallacy_summary': self._create_fallacy_summary(results_df),
483
+ 'enhanced_insights': self._create_enhanced_insights(results_df, primary_abuser)
484
  }
485
 
486
  return results_df, summary
 
488
  except Exception as e:
489
  logger.error(f"Error in analyze_chat_history: {e}")
490
  logger.error(traceback.format_exc())
491
+ return df, self._get_error_summary(df)
492
+
493
+ def _calculate_enhanced_sender_stats(self, results_df):
494
+ """Calculate enhanced sender statistics including fallacy analysis"""
495
+ sender_stats = {}
496
+
497
+ for sender in results_df['sender'].unique():
498
+ sender_df = results_df[results_df['sender'] == sender]
499
+
500
+ # Basic stats
501
+ avg_abuse = sender_df['abuse_score'].mean()
502
+ enhanced_avg = sender_df['enhanced_abuse_score'].mean()
503
+ abusive_count = len(sender_df[sender_df['enhanced_abuse_score'] >= 50])
504
+ abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
505
+
506
+ # Pattern analysis
507
+ all_patterns = []
508
+ for patterns in sender_df['detected_patterns']:
509
+ if patterns:
510
+ all_patterns.extend(patterns)
511
+ pattern_counts = Counter(all_patterns)
512
+
513
+ # Fallacy analysis
514
+ fallacy_counts = Counter(sender_df['fallacy_detected'])
515
+ fallacy_counts.pop('No Fallacy', None) # Remove 'No Fallacy' entries
516
+
517
+ # Intent analysis
518
+ intent_counts = Counter()
519
+ for intent_data in sender_df['enhanced_intent']:
520
+ if isinstance(intent_data, dict):
521
+ intent_counts[intent_data.get('primary_intent', 'unknown')] += 1
522
+
523
+ sender_stats[sender] = {
524
+ 'message_count': len(sender_df),
525
+ 'avg_abuse_score': avg_abuse,
526
+ 'enhanced_avg_abuse_score': enhanced_avg,
527
+ 'abusive_message_count': abusive_count,
528
+ 'abusive_message_pct': abusive_pct,
529
+ 'common_patterns': pattern_counts.most_common(5),
530
+ 'fallacy_usage': dict(fallacy_counts),
531
+ 'intent_distribution': dict(intent_counts),
532
+ 'avg_darvo_score': sender_df['darvo_score'].mean(),
533
+ 'avg_manipulation_score': sender_df['manipulation_score'].mean()
534
  }
535
+
536
+ return sender_stats
537
+
538
+ def _determine_overall_risk(self, results_df, sender_abuse_metrics):
539
+ """Determine overall risk level with enhanced criteria"""
540
+ if any(metrics.get('likely_abuser', False) for metrics in sender_abuse_metrics.values()):
541
+ return "Critical"
542
+ elif results_df['enhanced_abuse_score'].max() > 80:
543
+ return "High"
544
+ elif results_df['enhanced_abuse_score'].mean() > 40:
545
+ return "Moderate"
546
+ else:
547
+ return "Low"
548
+
549
+ def _create_fallacy_summary(self, results_df):
550
+ """Create summary of fallacy usage"""
551
+ fallacy_counts = Counter(results_df['fallacy_detected'])
552
+ fallacy_counts.pop('No Fallacy', None)
553
+
554
+ total_messages = len(results_df)
555
+ messages_with_fallacies = total_messages - results_df[results_df['fallacy_detected'] == 'No Fallacy'].shape[0]
556
+
557
+ return {
558
+ 'total_fallacies_detected': sum(fallacy_counts.values()),
559
+ 'messages_with_fallacies': messages_with_fallacies,
560
+ 'fallacy_rate': round((messages_with_fallacies / total_messages) * 100, 1) if total_messages > 0 else 0,
561
+ 'most_common_fallacies': dict(fallacy_counts.most_common(5)),
562
+ 'serious_fallacies': {
563
+ fallacy: count for fallacy, count in fallacy_counts.items()
564
+ if fallacy in ['Gaslighting', 'DARVO', 'Kafkatrapping', 'Appeal to Emotion']
565
+ }
566
+ }
567
+
568
+ def _create_enhanced_insights(self, results_df, primary_abuser):
569
+ """Create enhanced insights from the analysis"""
570
+ insights = {
571
+ 'key_findings': [],
572
+ 'behavioral_patterns': {},
573
+ 'risk_indicators': []
574
+ }
575
+
576
+ # Key findings
577
+ if primary_abuser:
578
+ abuser_fallacies = results_df[results_df['sender'] == primary_abuser]['fallacy_detected']
579
+ serious_fallacies = [f for f in abuser_fallacies if f in ['Gaslighting', 'DARVO', 'Kafkatrapping']]
580
+
581
+ if serious_fallacies:
582
+ insights['key_findings'].append(f"Primary abuser uses serious psychological manipulation tactics: {', '.join(set(serious_fallacies))}")
583
+
584
+ # High manipulation scores
585
+ high_manipulation = results_df[results_df['manipulation_score'] >= 5]
586
+ if len(high_manipulation) > 0:
587
+ insights['key_findings'].append(f"{len(high_manipulation)} messages show high manipulation indicators")
588
+
589
+ # Risk indicators
590
+ critical_patterns = ['stalking language', 'veiled threats', 'insults']
591
+ for pattern in critical_patterns:
592
+ pattern_count = sum(1 for patterns in results_df['detected_patterns'] if pattern in patterns)
593
+ if pattern_count > 0:
594
+ insights['risk_indicators'].append(f"{pattern_count} instances of {pattern}")
595
+
596
+ return insights
597
+
598
+ def _get_empty_analysis(self):
599
+ """Return empty analysis for blank messages"""
600
+ return {
601
+ 'abuse_score': 0.0,
602
+ 'enhanced_abuse_score': 0.0,
603
+ 'detected_patterns': [],
604
+ 'matched_scores': [],
605
+ 'sentiment': "neutral",
606
+ 'sentiment_confidence': 0.5,
607
+ 'stage': 1,
608
+ 'darvo_score': 0.0,
609
+ 'emotional_tone': "neutral",
610
+ 'boundary_assessment': {'assessment': 'neutral', 'confidence': 0.5},
611
+ 'risk_level': "Low",
612
+ 'fallacy_detected': "No Fallacy",
613
+ 'fallacy_confidence': 0.0,
614
+ 'enhanced_intent': {'primary_intent': 'neutral'},
615
+ 'abuser_indicators': {'total_count': 0, 'risk_level': 'low'},
616
+ 'manipulation_score': 0
617
+ }
618
+
619
+ def _get_error_analysis(self):
620
+ """Return error analysis"""
621
+ return {
622
+ 'abuse_score': 0.0,
623
+ 'enhanced_abuse_score': 0.0,
624
+ 'detected_patterns': [],
625
+ 'matched_scores': [],
626
+ 'sentiment': "error",
627
+ 'sentiment_confidence': 0.0,
628
+ 'stage': 1,
629
+ 'darvo_score': 0.0,
630
+ 'emotional_tone': "error",
631
+ 'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
632
+ 'risk_level': "Unknown",
633
+ 'fallacy_detected': "No Fallacy",
634
+ 'fallacy_confidence': 0.0,
635
+ 'enhanced_intent': {'primary_intent': 'error'},
636
+ 'abuser_indicators': {'total_count': 0, 'risk_level': 'unknown'},
637
+ 'manipulation_score': 0
638
+ }
639
+
640
+ def _get_error_summary(self, df):
641
+ """Return error summary"""
642
+ return {
643
+ 'message_count': len(df),
644
+ 'date_range': {
645
+ 'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
646
+ 'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
647
+ },
648
+ 'overall_risk_level': "Unknown",
649
+ 'sender_stats': {},
650
+ 'sender_abuse_metrics': {},
651
+ 'primary_abuser': None,
652
+ 'escalation_data': {},
653
+ 'safety_plan': "Error generating safety plan.",
654
+ 'recommendations': [],
655
+ 'fallacy_summary': {},
656
+ 'enhanced_insights': {}
657
+ }