SamanthaStorm commited on
Commit
2765991
·
verified ·
1 Parent(s): d810e35

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +205 -250
analyzer.py CHANGED
@@ -186,13 +186,12 @@ class MessageAnalyzer:
186
  'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
187
  'risk_level': "Unknown"
188
  }
189
-
190
 
191
  def identify_primary_abuser(self, results_df):
192
  """Identify the primary abuser based on comprehensive abuse metrics with pattern severity weighting"""
193
  logger.info("Identifying primary abuser...")
194
-
195
- # Define pattern severity weights (higher = more concerning)
196
  PATTERN_WEIGHTS = {
197
  "recovery phase": 0.7,
198
  "control": 1.4,
@@ -211,44 +210,44 @@ class MessageAnalyzer:
211
  "false equivalence": 1.3,
212
  "future faking": 0.8
213
  }
214
-
215
  sender_abuse_metrics = {}
216
-
217
  for sender in results_df['sender'].unique():
218
  sender_df = results_df[results_df['sender'] == sender]
219
-
220
  if len(sender_df) < 3: # Need minimum messages for reliable assessment
221
  continue
222
-
223
  # Calculate comprehensive abuse metrics
224
  avg_abuse = sender_df['abuse_score'].mean()
225
  max_abuse = sender_df['abuse_score'].max()
226
  abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
227
  abusive_pct = (abusive_count / len(sender_df)) * 100
228
-
229
  # Calculate pattern-weighted score
230
  pattern_counts = Counter()
231
  for patterns in sender_df['detected_patterns']:
232
  pattern_counts.update(patterns)
233
-
234
- # Calculate weighted pattern score
235
  total_pattern_weight = 0
236
  for pattern, count in pattern_counts.items():
237
  weight = PATTERN_WEIGHTS.get(pattern, 1.0) # Default weight of 1.0
238
  total_pattern_weight += count * weight
239
-
240
- # Normalize by message count
241
  weighted_pattern_score = total_pattern_weight / len(sender_df) if len(sender_df) > 0 else 0
242
-
243
- # DARVO score
244
  avg_darvo = sender_df['darvo_score'].mean()
245
  high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
246
-
247
- # Risk level distribution
248
  high_risk_count = len(sender_df[sender_df['risk_level'].isin(['High', 'Critical'])])
249
  high_risk_pct = (high_risk_count / len(sender_df)) * 100
250
-
251
- # Composite abuse score (weighted combination of factors)
252
  composite_score = (
253
  avg_abuse * 0.25 +
254
  abusive_pct * 0.2 +
@@ -256,8 +255,8 @@ class MessageAnalyzer:
256
  avg_darvo * 100 * 0.15 +
257
  high_risk_pct * 0.1
258
  )
259
-
260
- # Store detailed pattern information for reporting
261
  pattern_details = [
262
  {
263
  'pattern': pattern,
@@ -267,10 +266,10 @@ class MessageAnalyzer:
267
  }
268
  for pattern, count in pattern_counts.items()
269
  ]
270
-
271
  # Sort patterns by weighted score
272
  pattern_details.sort(key=lambda x: x['weighted_score'], reverse=True)
273
-
274
  sender_abuse_metrics[sender] = {
275
  'message_count': len(sender_df),
276
  'avg_abuse_score': avg_abuse,
@@ -282,234 +281,231 @@ class MessageAnalyzer:
282
  'high_risk_pct': high_risk_pct,
283
  'composite_score': composite_score
284
  }
285
-
286
  if not sender_abuse_metrics:
287
  return None, sender_abuse_metrics
288
-
289
  # Find primary abuser (highest composite score with minimum thresholds)
290
  primary_abuser = None
291
  max_composite = 0
292
-
293
  for sender, metrics in sender_abuse_metrics.items():
294
  if (metrics['composite_score'] > max_composite and
295
  metrics['message_count'] >= 5):
296
  max_composite = metrics['composite_score']
297
  primary_abuser = sender
298
-
299
  logger.info(f"Primary abuser identified: {primary_abuser}")
300
  return primary_abuser, sender_abuse_metrics
301
-
302
 
303
  def analyze_chat_history(self, df):
304
- """Analyze entire chat history with focus on primary abuser"""
305
- from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
306
-
307
- logger.info(f"Analyzing chat history with {len(df)} messages")
308
-
309
- try:
310
- # Create results dataframe
311
- results_df = df.copy()
312
-
313
- # Add analysis columns
314
- results_df['abuse_score'] = 0.0
315
- results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
316
- results_df['sentiment'] = "neutral"
317
- results_df['darvo_score'] = 0.0
318
- results_df['emotional_tone'] = "neutral"
319
- results_df['boundary_health'] = "unknown"
320
- results_df['risk_level'] = "Low"
321
-
322
- # Analyze each message
323
- for i, row in results_df.iterrows():
324
- analysis = self.analyze_message(row['message'])
325
-
326
- # Update dataframe with analysis results
327
- results_df.at[i, 'abuse_score'] = analysis['abuse_score']
328
- results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
329
- results_df.at[i, 'sentiment'] = analysis['sentiment']
330
- results_df.at[i, 'darvo_score'] = analysis['darvo_score']
331
- results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
332
- results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
333
- results_df.at[i, 'risk_level'] = analysis['risk_level']
334
-
335
- # Identify primary abuser FIRST
336
- primary_abuser, sender_abuse_metrics = self.identify_primary_abuser(results_df)
337
-
338
- # Calculate traditional sender statistics for backward compatibility
339
- sender_stats = {}
340
- for sender in results_df['sender'].unique():
341
- sender_df = results_df[results_df['sender'] == sender]
342
-
343
- # Calculate key metrics
344
- avg_abuse = sender_df['abuse_score'].mean()
345
- max_abuse = sender_df['abuse_score'].max()
346
-
347
- # Get most common patterns
348
- all_patterns = []
349
- for patterns in sender_df['detected_patterns']:
350
- if patterns:
351
- all_patterns.extend(patterns)
352
-
353
- pattern_counts = Counter(all_patterns)
354
- most_common = pattern_counts.most_common(5) # Get top 5 patterns
355
-
356
- # Calculate percentage of abusive messages
357
- abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
358
- abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
359
-
360
- # Calculate emotional tone distribution
361
- tone_counts = Counter(sender_df['emotional_tone'])
362
- most_common_tones = tone_counts.most_common(3) # Get top 3 emotional tones
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
- # Calculate DARVO score statistics
365
- avg_darvo = sender_df['darvo_score'].mean()
366
- high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
367
- high_darvo_pct = (high_darvo_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
- # Calculate risk level distribution
370
- risk_counts = Counter(sender_df['risk_level'])
 
 
371
 
372
- # Store stats
373
- sender_stats[sender] = {
374
- 'message_count': len(sender_df),
375
- 'avg_abuse_score': avg_abuse,
376
- 'max_abuse_score': max_abuse,
377
- 'abusive_message_count': abusive_count,
378
- 'abusive_message_pct': abusive_pct,
379
- 'common_patterns': most_common,
380
- 'emotional_tones': most_common_tones,
381
- 'avg_darvo_score': avg_darvo,
382
- 'high_darvo_count': high_darvo_count,
383
- 'high_darvo_pct': high_darvo_pct,
384
- 'risk_levels': risk_counts
385
- }
386
-
387
- # If no primary abuser identified, provide basic analysis
388
- if not primary_abuser:
389
- logger.info("No primary abuser identified - providing general analysis")
390
 
391
- # Detect escalation patterns
392
- escalation_data = detect_escalation_patterns(results_df)
393
 
394
- # Determine overall risk level
395
- if results_df['risk_level'].isin(['Critical']).any():
 
396
  overall_risk = "Critical"
397
- elif results_df['risk_level'].isin(['High']).any():
398
  overall_risk = "High"
399
- elif results_df['risk_level'].isin(['Moderate']).any():
400
  overall_risk = "Moderate"
401
  else:
402
  overall_risk = "Low"
403
 
404
- # Generate safety plan
405
- all_patterns = []
406
- for patterns in results_df['detected_patterns']:
407
  if patterns:
408
- all_patterns.extend(patterns)
409
 
410
- safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
411
 
412
- # Generate recommendations
413
- recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
414
 
415
- # Prepare summary
416
  summary = {
417
  'message_count': len(results_df),
 
 
418
  'date_range': {
419
  'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
420
  'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
421
  },
422
  'overall_risk_level': overall_risk,
423
- 'sender_stats': sender_stats,
424
- 'sender_abuse_metrics': sender_abuse_metrics,
425
- 'primary_abuser': None,
426
- 'primary_abuser_analysis': None,
427
  'escalation_data': escalation_data,
428
  'safety_plan': safety_plan,
429
  'recommendations': recommendations,
430
- 'analysis_focus': 'general'
431
  }
432
 
433
  return results_df, summary
434
-
435
- # Focus analysis on primary abuser
436
- logger.info(f"Focusing analysis on primary abuser: {primary_abuser}")
437
- abuser_df = results_df[results_df['sender'] == primary_abuser]
438
- victim_df = results_df[results_df['sender'] != primary_abuser]
439
-
440
- # Generate comprehensive primary abuser analysis
441
- primary_abuser_analysis = self._analyze_primary_abuser(
442
- abuser_df, victim_df, results_df, primary_abuser
443
- ) if hasattr(self, '_analyze_primary_abuser') else None
444
-
445
- # Detect escalation patterns (focus on abuser's messages)
446
- escalation_data = detect_escalation_patterns(abuser_df)
447
-
448
- # Determine overall risk level based on primary abuser
449
- abuser_risk_levels = abuser_df['risk_level'].value_counts()
450
- if 'Critical' in abuser_risk_levels and abuser_risk_levels['Critical'] > 0:
451
- overall_risk = "Critical"
452
- elif 'High' in abuser_risk_levels and abuser_risk_levels['High'] > 0:
453
- overall_risk = "High"
454
- elif 'Moderate' in abuser_risk_levels and abuser_risk_levels['Moderate'] > 0:
455
- overall_risk = "Moderate"
456
- else:
457
- overall_risk = "Low"
458
-
459
- # Generate safety plan based on abuser's patterns
460
- abuser_patterns = []
461
- for patterns in abuser_df['detected_patterns']:
462
- if patterns:
463
- abuser_patterns.extend(patterns)
464
-
465
- safety_plan = generate_safety_plan(overall_risk, abuser_patterns, escalation_data)
466
-
467
- # Generate recommendations focused on the abuser's behavior
468
- recommendations = generate_professional_recommendations(abuser_df, escalation_data, overall_risk)
469
-
470
- # Prepare focused summary
471
- summary = {
472
- 'message_count': len(results_df),
473
- 'abuser_message_count': len(abuser_df),
474
- 'victim_message_count': len(victim_df),
475
- 'date_range': {
476
- 'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
477
- 'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
478
- },
479
- 'overall_risk_level': overall_risk,
480
- 'sender_stats': sender_stats, # Include traditional sender stats for backward compatibility
481
- 'sender_abuse_metrics': sender_abuse_metrics, # Include detailed abuse metrics
482
- 'primary_abuser': primary_abuser,
483
- 'primary_abuser_analysis': primary_abuser_analysis,
484
- 'escalation_data': escalation_data,
485
- 'safety_plan': safety_plan,
486
- 'recommendations': recommendations,
487
- 'analysis_focus': 'primary_abuser' # Flag to indicate focused analysis
488
- }
489
-
490
- return results_df, summary
491
-
492
- except Exception as e:
493
- logger.error(f"Error in analyze_chat_history: {e}")
494
- logger.error(traceback.format_exc())
495
- return df, {
496
- 'message_count': len(df),
497
- 'date_range': {
498
- 'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
499
- 'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
500
- },
501
- 'overall_risk_level': "Unknown",
502
- 'sender_stats': {},
503
- 'sender_abuse_metrics': {},
504
- 'primary_abuser': None,
505
- 'primary_abuser_analysis': None,
506
- 'escalation_data': {},
507
- 'safety_plan': "Error generating safety plan.",
508
- 'recommendations': [],
509
- 'analysis_focus': 'error'
510
- }
511
-
512
-
513
 
514
  def _analyze_primary_abuser(self, abuser_df, victim_df, full_df, primary_abuser):
515
  """Generate comprehensive analysis of the primary abuser"""
@@ -692,45 +688,4 @@ class MessageAnalyzer:
692
  'common_patterns': most_common,
693
  'emotional_tones': most_common_tones,
694
  'avg_darvo_score': avg_darvo,
695
- 'high_darvo_count': high_darvo_count,
696
- 'high_darvo_pct': high_darvo_pct,
697
- 'risk_levels': risk_counts
698
- }
699
-
700
- escalation_data = detect_escalation_patterns(results_df)
701
-
702
- if results_df['risk_level'].isin(['Critical']).any():
703
- overall_risk = "Critical"
704
- elif results_df['risk_level'].isin(['High']).any():
705
- overall_risk = "High"
706
- elif results_df['risk_level'].isin(['Moderate']).any():
707
- overall_risk = "Moderate"
708
- else:
709
- overall_risk = "Low"
710
-
711
- all_patterns = []
712
- for patterns in results_df['detected_patterns']:
713
- if patterns:
714
- all_patterns.extend(patterns)
715
-
716
- safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
717
- recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
718
-
719
- summary = {
720
- 'message_count': len(results_df),
721
- 'date_range': {
722
- 'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
723
- 'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
724
- },
725
- 'overall_risk_level': overall_risk,
726
- 'sender_stats': sender_stats,
727
- 'primary_abuser': None,
728
- 'primary_abuser_analysis': None,
729
- 'sender_abuse_metrics': sender_abuse_metrics,
730
- 'escalation_data': escalation_data,
731
- 'safety_plan': safety_plan,
732
- 'recommendations': recommendations,
733
- 'analysis_focus': 'general'
734
- }
735
-
736
- return results_df, summary
 
186
  'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
187
  'risk_level': "Unknown"
188
  }
 
189
 
190
  def identify_primary_abuser(self, results_df):
191
  """Identify the primary abuser based on comprehensive abuse metrics with pattern severity weighting"""
192
  logger.info("Identifying primary abuser...")
193
+
194
+ # Define pattern severity weights (higher = more concerning)
195
  PATTERN_WEIGHTS = {
196
  "recovery phase": 0.7,
197
  "control": 1.4,
 
210
  "false equivalence": 1.3,
211
  "future faking": 0.8
212
  }
213
+
214
  sender_abuse_metrics = {}
215
+
216
  for sender in results_df['sender'].unique():
217
  sender_df = results_df[results_df['sender'] == sender]
218
+
219
  if len(sender_df) < 3: # Need minimum messages for reliable assessment
220
  continue
221
+
222
  # Calculate comprehensive abuse metrics
223
  avg_abuse = sender_df['abuse_score'].mean()
224
  max_abuse = sender_df['abuse_score'].max()
225
  abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
226
  abusive_pct = (abusive_count / len(sender_df)) * 100
227
+
228
  # Calculate pattern-weighted score
229
  pattern_counts = Counter()
230
  for patterns in sender_df['detected_patterns']:
231
  pattern_counts.update(patterns)
232
+
233
+ # Calculate weighted pattern score
234
  total_pattern_weight = 0
235
  for pattern, count in pattern_counts.items():
236
  weight = PATTERN_WEIGHTS.get(pattern, 1.0) # Default weight of 1.0
237
  total_pattern_weight += count * weight
238
+
239
+ # Normalize by message count
240
  weighted_pattern_score = total_pattern_weight / len(sender_df) if len(sender_df) > 0 else 0
241
+
242
+ # DARVO score
243
  avg_darvo = sender_df['darvo_score'].mean()
244
  high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
245
+
246
+ # Risk level distribution
247
  high_risk_count = len(sender_df[sender_df['risk_level'].isin(['High', 'Critical'])])
248
  high_risk_pct = (high_risk_count / len(sender_df)) * 100
249
+
250
+ # Composite abuse score (weighted combination of factors)
251
  composite_score = (
252
  avg_abuse * 0.25 +
253
  abusive_pct * 0.2 +
 
255
  avg_darvo * 100 * 0.15 +
256
  high_risk_pct * 0.1
257
  )
258
+
259
+ # Store detailed pattern information for reporting
260
  pattern_details = [
261
  {
262
  'pattern': pattern,
 
266
  }
267
  for pattern, count in pattern_counts.items()
268
  ]
269
+
270
  # Sort patterns by weighted score
271
  pattern_details.sort(key=lambda x: x['weighted_score'], reverse=True)
272
+
273
  sender_abuse_metrics[sender] = {
274
  'message_count': len(sender_df),
275
  'avg_abuse_score': avg_abuse,
 
281
  'high_risk_pct': high_risk_pct,
282
  'composite_score': composite_score
283
  }
284
+
285
  if not sender_abuse_metrics:
286
  return None, sender_abuse_metrics
287
+
288
  # Find primary abuser (highest composite score with minimum thresholds)
289
  primary_abuser = None
290
  max_composite = 0
291
+
292
  for sender, metrics in sender_abuse_metrics.items():
293
  if (metrics['composite_score'] > max_composite and
294
  metrics['message_count'] >= 5):
295
  max_composite = metrics['composite_score']
296
  primary_abuser = sender
297
+
298
  logger.info(f"Primary abuser identified: {primary_abuser}")
299
  return primary_abuser, sender_abuse_metrics
 
300
 
301
  def analyze_chat_history(self, df):
302
+ """Analyze entire chat history with focus on primary abuser"""
303
+ from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
304
+
305
+ logger.info(f"Analyzing chat history with {len(df)} messages")
306
+
307
+ try:
308
+ # Create results dataframe
309
+ results_df = df.copy()
310
+
311
+ # Add analysis columns
312
+ results_df['abuse_score'] = 0.0
313
+ results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
314
+ results_df['sentiment'] = "neutral"
315
+ results_df['darvo_score'] = 0.0
316
+ results_df['emotional_tone'] = "neutral"
317
+ results_df['boundary_health'] = "unknown"
318
+ results_df['risk_level'] = "Low"
319
+
320
+ # Analyze each message
321
+ for i, row in results_df.iterrows():
322
+ analysis = self.analyze_message(row['message'])
323
+
324
+ # Update dataframe with analysis results
325
+ results_df.at[i, 'abuse_score'] = analysis['abuse_score']
326
+ results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
327
+ results_df.at[i, 'sentiment'] = analysis['sentiment']
328
+ results_df.at[i, 'darvo_score'] = analysis['darvo_score']
329
+ results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
330
+ results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
331
+ results_df.at[i, 'risk_level'] = analysis['risk_level']
332
+
333
+ # Identify primary abuser FIRST
334
+ primary_abuser, sender_abuse_metrics = self.identify_primary_abuser(results_df)
335
+
336
+ # Calculate traditional sender statistics for backward compatibility
337
+ sender_stats = {}
338
+ for sender in results_df['sender'].unique():
339
+ sender_df = results_df[results_df['sender'] == sender]
340
+
341
+ # Calculate key metrics
342
+ avg_abuse = sender_df['abuse_score'].mean()
343
+ max_abuse = sender_df['abuse_score'].max()
344
+
345
+ # Get most common patterns
346
+ all_patterns = []
347
+ for patterns in sender_df['detected_patterns']:
348
+ if patterns:
349
+ all_patterns.extend(patterns)
350
+
351
+ pattern_counts = Counter(all_patterns)
352
+ most_common = pattern_counts.most_common(5) # Get top 5 patterns
353
+
354
+ # Calculate percentage of abusive messages
355
+ abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
356
+ abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
357
+
358
+ # Calculate emotional tone distribution
359
+ tone_counts = Counter(sender_df['emotional_tone'])
360
+ most_common_tones = tone_counts.most_common(3) # Get top 3 emotional tones
361
+
362
+ # Calculate DARVO score statistics
363
+ avg_darvo = sender_df['darvo_score'].mean()
364
+ high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
365
+ high_darvo_pct = (high_darvo_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
366
+
367
+ # Calculate risk level distribution
368
+ risk_counts = Counter(sender_df['risk_level'])
369
+
370
+ # Store stats
371
+ sender_stats[sender] = {
372
+ 'message_count': len(sender_df),
373
+ 'avg_abuse_score': avg_abuse,
374
+ 'max_abuse_score': max_abuse,
375
+ 'abusive_message_count': abusive_count,
376
+ 'abusive_message_pct': abusive_pct,
377
+ 'common_patterns': most_common,
378
+ 'emotional_tones': most_common_tones,
379
+ 'avg_darvo_score': avg_darvo,
380
+ 'high_darvo_count': high_darvo_count,
381
+ 'high_darvo_pct': high_darvo_pct,
382
+ 'risk_levels': risk_counts
383
+ }
384
 
385
+ # If no primary abuser identified, provide basic analysis
386
+ if not primary_abuser:
387
+ logger.info("No primary abuser identified - providing general analysis")
388
+
389
+ # Detect escalation patterns
390
+ escalation_data = detect_escalation_patterns(results_df)
391
+
392
+ # Determine overall risk level
393
+ if results_df['risk_level'].isin(['Critical']).any():
394
+ overall_risk = "Critical"
395
+ elif results_df['risk_level'].isin(['High']).any():
396
+ overall_risk = "High"
397
+ elif results_df['risk_level'].isin(['Moderate']).any():
398
+ overall_risk = "Moderate"
399
+ else:
400
+ overall_risk = "Low"
401
+
402
+ # Generate safety plan
403
+ all_patterns = []
404
+ for patterns in results_df['detected_patterns']:
405
+ if patterns:
406
+ all_patterns.extend(patterns)
407
+
408
+ safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
409
+
410
+ # Generate recommendations
411
+ recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
412
+
413
+ # Prepare summary
414
+ summary = {
415
+ 'message_count': len(results_df),
416
+ 'date_range': {
417
+ 'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
418
+ 'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
419
+ },
420
+ 'overall_risk_level': overall_risk,
421
+ 'sender_stats': sender_stats,
422
+ 'sender_abuse_metrics': sender_abuse_metrics,
423
+ 'primary_abuser': None,
424
+ 'primary_abuser_analysis': None,
425
+ 'escalation_data': escalation_data,
426
+ 'safety_plan': safety_plan,
427
+ 'recommendations': recommendations,
428
+ 'analysis_focus': 'general'
429
+ }
430
+
431
+ return results_df, summary
432
 
433
+ # Focus analysis on primary abuser
434
+ logger.info(f"Focusing analysis on primary abuser: {primary_abuser}")
435
+ abuser_df = results_df[results_df['sender'] == primary_abuser]
436
+ victim_df = results_df[results_df['sender'] != primary_abuser]
437
 
438
+ # Generate comprehensive primary abuser analysis
439
+ primary_abuser_analysis = self._analyze_primary_abuser(
440
+ abuser_df, victim_df, results_df, primary_abuser
441
+ ) if hasattr(self, '_analyze_primary_abuser') else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
+ # Detect escalation patterns (focus on abuser's messages)
444
+ escalation_data = detect_escalation_patterns(abuser_df)
445
 
446
+ # Determine overall risk level based on primary abuser
447
+ abuser_risk_levels = abuser_df['risk_level'].value_counts()
448
+ if 'Critical' in abuser_risk_levels and abuser_risk_levels['Critical'] > 0:
449
  overall_risk = "Critical"
450
+ elif 'High' in abuser_risk_levels and abuser_risk_levels['High'] > 0:
451
  overall_risk = "High"
452
+ elif 'Moderate' in abuser_risk_levels and abuser_risk_levels['Moderate'] > 0:
453
  overall_risk = "Moderate"
454
  else:
455
  overall_risk = "Low"
456
 
457
+ # Generate safety plan based on abuser's patterns
458
+ abuser_patterns = []
459
+ for patterns in abuser_df['detected_patterns']:
460
  if patterns:
461
+ abuser_patterns.extend(patterns)
462
 
463
+ safety_plan = generate_safety_plan(overall_risk, abuser_patterns, escalation_data)
464
 
465
+ # Generate recommendations focused on the abuser's behavior
466
+ recommendations = generate_professional_recommendations(abuser_df, escalation_data, overall_risk)
467
 
468
+ # Prepare focused summary
469
  summary = {
470
  'message_count': len(results_df),
471
+ 'abuser_message_count': len(abuser_df),
472
+ 'victim_message_count': len(victim_df),
473
  'date_range': {
474
  'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
475
  'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
476
  },
477
  'overall_risk_level': overall_risk,
478
+ 'sender_stats': sender_stats, # Include traditional sender stats for backward compatibility
479
+ 'sender_abuse_metrics': sender_abuse_metrics, # Include detailed abuse metrics
480
+ 'primary_abuser': primary_abuser,
481
+ 'primary_abuser_analysis': primary_abuser_analysis,
482
  'escalation_data': escalation_data,
483
  'safety_plan': safety_plan,
484
  'recommendations': recommendations,
485
+ 'analysis_focus': 'primary_abuser' # Flag to indicate focused analysis
486
  }
487
 
488
  return results_df, summary
489
+
490
+ except Exception as e:
491
+ logger.error(f"Error in analyze_chat_history: {e}")
492
+ logger.error(traceback.format_exc())
493
+ return df, {
494
+ 'message_count': len(df),
495
+ 'date_range': {
496
+ 'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
497
+ 'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
498
+ },
499
+ 'overall_risk_level': "Unknown",
500
+ 'sender_stats': {},
501
+ 'sender_abuse_metrics': {},
502
+ 'primary_abuser': None,
503
+ 'primary_abuser_analysis': None,
504
+ 'escalation_data': {},
505
+ 'safety_plan': "Error generating safety plan.",
506
+ 'recommendations': [],
507
+ 'analysis_focus': 'error'
508
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
 
510
  def _analyze_primary_abuser(self, abuser_df, victim_df, full_df, primary_abuser):
511
  """Generate comprehensive analysis of the primary abuser"""
 
688
  'common_patterns': most_common,
689
  'emotional_tones': most_common_tones,
690
  'avg_darvo_score': avg_darvo,
691
+ 'high_