Spaces:
Runtime error
Runtime error
Update analyzer.py
Browse files- analyzer.py +205 -250
analyzer.py
CHANGED
@@ -186,13 +186,12 @@ class MessageAnalyzer:
|
|
186 |
'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
|
187 |
'risk_level': "Unknown"
|
188 |
}
|
189 |
-
|
190 |
|
191 |
def identify_primary_abuser(self, results_df):
|
192 |
"""Identify the primary abuser based on comprehensive abuse metrics with pattern severity weighting"""
|
193 |
logger.info("Identifying primary abuser...")
|
194 |
-
|
195 |
-
|
196 |
PATTERN_WEIGHTS = {
|
197 |
"recovery phase": 0.7,
|
198 |
"control": 1.4,
|
@@ -211,44 +210,44 @@ class MessageAnalyzer:
|
|
211 |
"false equivalence": 1.3,
|
212 |
"future faking": 0.8
|
213 |
}
|
214 |
-
|
215 |
sender_abuse_metrics = {}
|
216 |
-
|
217 |
for sender in results_df['sender'].unique():
|
218 |
sender_df = results_df[results_df['sender'] == sender]
|
219 |
-
|
220 |
if len(sender_df) < 3: # Need minimum messages for reliable assessment
|
221 |
continue
|
222 |
-
|
223 |
# Calculate comprehensive abuse metrics
|
224 |
avg_abuse = sender_df['abuse_score'].mean()
|
225 |
max_abuse = sender_df['abuse_score'].max()
|
226 |
abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
|
227 |
abusive_pct = (abusive_count / len(sender_df)) * 100
|
228 |
-
|
229 |
# Calculate pattern-weighted score
|
230 |
pattern_counts = Counter()
|
231 |
for patterns in sender_df['detected_patterns']:
|
232 |
pattern_counts.update(patterns)
|
233 |
-
|
234 |
-
|
235 |
total_pattern_weight = 0
|
236 |
for pattern, count in pattern_counts.items():
|
237 |
weight = PATTERN_WEIGHTS.get(pattern, 1.0) # Default weight of 1.0
|
238 |
total_pattern_weight += count * weight
|
239 |
-
|
240 |
-
|
241 |
weighted_pattern_score = total_pattern_weight / len(sender_df) if len(sender_df) > 0 else 0
|
242 |
-
|
243 |
-
|
244 |
avg_darvo = sender_df['darvo_score'].mean()
|
245 |
high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
|
246 |
-
|
247 |
-
|
248 |
high_risk_count = len(sender_df[sender_df['risk_level'].isin(['High', 'Critical'])])
|
249 |
high_risk_pct = (high_risk_count / len(sender_df)) * 100
|
250 |
-
|
251 |
-
|
252 |
composite_score = (
|
253 |
avg_abuse * 0.25 +
|
254 |
abusive_pct * 0.2 +
|
@@ -256,8 +255,8 @@ class MessageAnalyzer:
|
|
256 |
avg_darvo * 100 * 0.15 +
|
257 |
high_risk_pct * 0.1
|
258 |
)
|
259 |
-
|
260 |
-
|
261 |
pattern_details = [
|
262 |
{
|
263 |
'pattern': pattern,
|
@@ -267,10 +266,10 @@ class MessageAnalyzer:
|
|
267 |
}
|
268 |
for pattern, count in pattern_counts.items()
|
269 |
]
|
270 |
-
|
271 |
# Sort patterns by weighted score
|
272 |
pattern_details.sort(key=lambda x: x['weighted_score'], reverse=True)
|
273 |
-
|
274 |
sender_abuse_metrics[sender] = {
|
275 |
'message_count': len(sender_df),
|
276 |
'avg_abuse_score': avg_abuse,
|
@@ -282,234 +281,231 @@ class MessageAnalyzer:
|
|
282 |
'high_risk_pct': high_risk_pct,
|
283 |
'composite_score': composite_score
|
284 |
}
|
285 |
-
|
286 |
if not sender_abuse_metrics:
|
287 |
return None, sender_abuse_metrics
|
288 |
-
|
289 |
# Find primary abuser (highest composite score with minimum thresholds)
|
290 |
primary_abuser = None
|
291 |
max_composite = 0
|
292 |
-
|
293 |
for sender, metrics in sender_abuse_metrics.items():
|
294 |
if (metrics['composite_score'] > max_composite and
|
295 |
metrics['message_count'] >= 5):
|
296 |
max_composite = metrics['composite_score']
|
297 |
primary_abuser = sender
|
298 |
-
|
299 |
logger.info(f"Primary abuser identified: {primary_abuser}")
|
300 |
return primary_abuser, sender_abuse_metrics
|
301 |
-
|
302 |
|
303 |
def analyze_chat_history(self, df):
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
|
364 |
-
#
|
365 |
-
|
366 |
-
|
367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
368 |
|
369 |
-
#
|
370 |
-
|
|
|
|
|
371 |
|
372 |
-
#
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
'max_abuse_score': max_abuse,
|
377 |
-
'abusive_message_count': abusive_count,
|
378 |
-
'abusive_message_pct': abusive_pct,
|
379 |
-
'common_patterns': most_common,
|
380 |
-
'emotional_tones': most_common_tones,
|
381 |
-
'avg_darvo_score': avg_darvo,
|
382 |
-
'high_darvo_count': high_darvo_count,
|
383 |
-
'high_darvo_pct': high_darvo_pct,
|
384 |
-
'risk_levels': risk_counts
|
385 |
-
}
|
386 |
-
|
387 |
-
# If no primary abuser identified, provide basic analysis
|
388 |
-
if not primary_abuser:
|
389 |
-
logger.info("No primary abuser identified - providing general analysis")
|
390 |
|
391 |
-
# Detect escalation patterns
|
392 |
-
escalation_data = detect_escalation_patterns(
|
393 |
|
394 |
-
# Determine overall risk level
|
395 |
-
|
|
|
396 |
overall_risk = "Critical"
|
397 |
-
elif
|
398 |
overall_risk = "High"
|
399 |
-
elif
|
400 |
overall_risk = "Moderate"
|
401 |
else:
|
402 |
overall_risk = "Low"
|
403 |
|
404 |
-
# Generate safety plan
|
405 |
-
|
406 |
-
for patterns in
|
407 |
if patterns:
|
408 |
-
|
409 |
|
410 |
-
safety_plan = generate_safety_plan(overall_risk,
|
411 |
|
412 |
-
# Generate recommendations
|
413 |
-
recommendations = generate_professional_recommendations(
|
414 |
|
415 |
-
# Prepare summary
|
416 |
summary = {
|
417 |
'message_count': len(results_df),
|
|
|
|
|
418 |
'date_range': {
|
419 |
'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
|
420 |
'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
|
421 |
},
|
422 |
'overall_risk_level': overall_risk,
|
423 |
-
'sender_stats': sender_stats,
|
424 |
-
'sender_abuse_metrics': sender_abuse_metrics,
|
425 |
-
'primary_abuser':
|
426 |
-
'primary_abuser_analysis':
|
427 |
'escalation_data': escalation_data,
|
428 |
'safety_plan': safety_plan,
|
429 |
'recommendations': recommendations,
|
430 |
-
'analysis_focus': '
|
431 |
}
|
432 |
|
433 |
return results_df, summary
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
elif 'Moderate' in abuser_risk_levels and abuser_risk_levels['Moderate'] > 0:
|
455 |
-
overall_risk = "Moderate"
|
456 |
-
else:
|
457 |
-
overall_risk = "Low"
|
458 |
-
|
459 |
-
# Generate safety plan based on abuser's patterns
|
460 |
-
abuser_patterns = []
|
461 |
-
for patterns in abuser_df['detected_patterns']:
|
462 |
-
if patterns:
|
463 |
-
abuser_patterns.extend(patterns)
|
464 |
-
|
465 |
-
safety_plan = generate_safety_plan(overall_risk, abuser_patterns, escalation_data)
|
466 |
-
|
467 |
-
# Generate recommendations focused on the abuser's behavior
|
468 |
-
recommendations = generate_professional_recommendations(abuser_df, escalation_data, overall_risk)
|
469 |
-
|
470 |
-
# Prepare focused summary
|
471 |
-
summary = {
|
472 |
-
'message_count': len(results_df),
|
473 |
-
'abuser_message_count': len(abuser_df),
|
474 |
-
'victim_message_count': len(victim_df),
|
475 |
-
'date_range': {
|
476 |
-
'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
|
477 |
-
'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
|
478 |
-
},
|
479 |
-
'overall_risk_level': overall_risk,
|
480 |
-
'sender_stats': sender_stats, # Include traditional sender stats for backward compatibility
|
481 |
-
'sender_abuse_metrics': sender_abuse_metrics, # Include detailed abuse metrics
|
482 |
-
'primary_abuser': primary_abuser,
|
483 |
-
'primary_abuser_analysis': primary_abuser_analysis,
|
484 |
-
'escalation_data': escalation_data,
|
485 |
-
'safety_plan': safety_plan,
|
486 |
-
'recommendations': recommendations,
|
487 |
-
'analysis_focus': 'primary_abuser' # Flag to indicate focused analysis
|
488 |
-
}
|
489 |
-
|
490 |
-
return results_df, summary
|
491 |
-
|
492 |
-
except Exception as e:
|
493 |
-
logger.error(f"Error in analyze_chat_history: {e}")
|
494 |
-
logger.error(traceback.format_exc())
|
495 |
-
return df, {
|
496 |
-
'message_count': len(df),
|
497 |
-
'date_range': {
|
498 |
-
'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
|
499 |
-
'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
|
500 |
-
},
|
501 |
-
'overall_risk_level': "Unknown",
|
502 |
-
'sender_stats': {},
|
503 |
-
'sender_abuse_metrics': {},
|
504 |
-
'primary_abuser': None,
|
505 |
-
'primary_abuser_analysis': None,
|
506 |
-
'escalation_data': {},
|
507 |
-
'safety_plan': "Error generating safety plan.",
|
508 |
-
'recommendations': [],
|
509 |
-
'analysis_focus': 'error'
|
510 |
-
}
|
511 |
-
|
512 |
-
|
513 |
|
514 |
def _analyze_primary_abuser(self, abuser_df, victim_df, full_df, primary_abuser):
|
515 |
"""Generate comprehensive analysis of the primary abuser"""
|
@@ -692,45 +688,4 @@ class MessageAnalyzer:
|
|
692 |
'common_patterns': most_common,
|
693 |
'emotional_tones': most_common_tones,
|
694 |
'avg_darvo_score': avg_darvo,
|
695 |
-
'
|
696 |
-
'high_darvo_pct': high_darvo_pct,
|
697 |
-
'risk_levels': risk_counts
|
698 |
-
}
|
699 |
-
|
700 |
-
escalation_data = detect_escalation_patterns(results_df)
|
701 |
-
|
702 |
-
if results_df['risk_level'].isin(['Critical']).any():
|
703 |
-
overall_risk = "Critical"
|
704 |
-
elif results_df['risk_level'].isin(['High']).any():
|
705 |
-
overall_risk = "High"
|
706 |
-
elif results_df['risk_level'].isin(['Moderate']).any():
|
707 |
-
overall_risk = "Moderate"
|
708 |
-
else:
|
709 |
-
overall_risk = "Low"
|
710 |
-
|
711 |
-
all_patterns = []
|
712 |
-
for patterns in results_df['detected_patterns']:
|
713 |
-
if patterns:
|
714 |
-
all_patterns.extend(patterns)
|
715 |
-
|
716 |
-
safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
|
717 |
-
recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
|
718 |
-
|
719 |
-
summary = {
|
720 |
-
'message_count': len(results_df),
|
721 |
-
'date_range': {
|
722 |
-
'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
|
723 |
-
'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
|
724 |
-
},
|
725 |
-
'overall_risk_level': overall_risk,
|
726 |
-
'sender_stats': sender_stats,
|
727 |
-
'primary_abuser': None,
|
728 |
-
'primary_abuser_analysis': None,
|
729 |
-
'sender_abuse_metrics': sender_abuse_metrics,
|
730 |
-
'escalation_data': escalation_data,
|
731 |
-
'safety_plan': safety_plan,
|
732 |
-
'recommendations': recommendations,
|
733 |
-
'analysis_focus': 'general'
|
734 |
-
}
|
735 |
-
|
736 |
-
return results_df, summary
|
|
|
186 |
'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
|
187 |
'risk_level': "Unknown"
|
188 |
}
|
|
|
189 |
|
190 |
def identify_primary_abuser(self, results_df):
|
191 |
"""Identify the primary abuser based on comprehensive abuse metrics with pattern severity weighting"""
|
192 |
logger.info("Identifying primary abuser...")
|
193 |
+
|
194 |
+
# Define pattern severity weights (higher = more concerning)
|
195 |
PATTERN_WEIGHTS = {
|
196 |
"recovery phase": 0.7,
|
197 |
"control": 1.4,
|
|
|
210 |
"false equivalence": 1.3,
|
211 |
"future faking": 0.8
|
212 |
}
|
213 |
+
|
214 |
sender_abuse_metrics = {}
|
215 |
+
|
216 |
for sender in results_df['sender'].unique():
|
217 |
sender_df = results_df[results_df['sender'] == sender]
|
218 |
+
|
219 |
if len(sender_df) < 3: # Need minimum messages for reliable assessment
|
220 |
continue
|
221 |
+
|
222 |
# Calculate comprehensive abuse metrics
|
223 |
avg_abuse = sender_df['abuse_score'].mean()
|
224 |
max_abuse = sender_df['abuse_score'].max()
|
225 |
abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
|
226 |
abusive_pct = (abusive_count / len(sender_df)) * 100
|
227 |
+
|
228 |
# Calculate pattern-weighted score
|
229 |
pattern_counts = Counter()
|
230 |
for patterns in sender_df['detected_patterns']:
|
231 |
pattern_counts.update(patterns)
|
232 |
+
|
233 |
+
# Calculate weighted pattern score
|
234 |
total_pattern_weight = 0
|
235 |
for pattern, count in pattern_counts.items():
|
236 |
weight = PATTERN_WEIGHTS.get(pattern, 1.0) # Default weight of 1.0
|
237 |
total_pattern_weight += count * weight
|
238 |
+
|
239 |
+
# Normalize by message count
|
240 |
weighted_pattern_score = total_pattern_weight / len(sender_df) if len(sender_df) > 0 else 0
|
241 |
+
|
242 |
+
# DARVO score
|
243 |
avg_darvo = sender_df['darvo_score'].mean()
|
244 |
high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
|
245 |
+
|
246 |
+
# Risk level distribution
|
247 |
high_risk_count = len(sender_df[sender_df['risk_level'].isin(['High', 'Critical'])])
|
248 |
high_risk_pct = (high_risk_count / len(sender_df)) * 100
|
249 |
+
|
250 |
+
# Composite abuse score (weighted combination of factors)
|
251 |
composite_score = (
|
252 |
avg_abuse * 0.25 +
|
253 |
abusive_pct * 0.2 +
|
|
|
255 |
avg_darvo * 100 * 0.15 +
|
256 |
high_risk_pct * 0.1
|
257 |
)
|
258 |
+
|
259 |
+
# Store detailed pattern information for reporting
|
260 |
pattern_details = [
|
261 |
{
|
262 |
'pattern': pattern,
|
|
|
266 |
}
|
267 |
for pattern, count in pattern_counts.items()
|
268 |
]
|
269 |
+
|
270 |
# Sort patterns by weighted score
|
271 |
pattern_details.sort(key=lambda x: x['weighted_score'], reverse=True)
|
272 |
+
|
273 |
sender_abuse_metrics[sender] = {
|
274 |
'message_count': len(sender_df),
|
275 |
'avg_abuse_score': avg_abuse,
|
|
|
281 |
'high_risk_pct': high_risk_pct,
|
282 |
'composite_score': composite_score
|
283 |
}
|
284 |
+
|
285 |
if not sender_abuse_metrics:
|
286 |
return None, sender_abuse_metrics
|
287 |
+
|
288 |
# Find primary abuser (highest composite score with minimum thresholds)
|
289 |
primary_abuser = None
|
290 |
max_composite = 0
|
291 |
+
|
292 |
for sender, metrics in sender_abuse_metrics.items():
|
293 |
if (metrics['composite_score'] > max_composite and
|
294 |
metrics['message_count'] >= 5):
|
295 |
max_composite = metrics['composite_score']
|
296 |
primary_abuser = sender
|
297 |
+
|
298 |
logger.info(f"Primary abuser identified: {primary_abuser}")
|
299 |
return primary_abuser, sender_abuse_metrics
|
|
|
300 |
|
301 |
def analyze_chat_history(self, df):
|
302 |
+
"""Analyze entire chat history with focus on primary abuser"""
|
303 |
+
from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
|
304 |
+
|
305 |
+
logger.info(f"Analyzing chat history with {len(df)} messages")
|
306 |
+
|
307 |
+
try:
|
308 |
+
# Create results dataframe
|
309 |
+
results_df = df.copy()
|
310 |
+
|
311 |
+
# Add analysis columns
|
312 |
+
results_df['abuse_score'] = 0.0
|
313 |
+
results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
|
314 |
+
results_df['sentiment'] = "neutral"
|
315 |
+
results_df['darvo_score'] = 0.0
|
316 |
+
results_df['emotional_tone'] = "neutral"
|
317 |
+
results_df['boundary_health'] = "unknown"
|
318 |
+
results_df['risk_level'] = "Low"
|
319 |
+
|
320 |
+
# Analyze each message
|
321 |
+
for i, row in results_df.iterrows():
|
322 |
+
analysis = self.analyze_message(row['message'])
|
323 |
+
|
324 |
+
# Update dataframe with analysis results
|
325 |
+
results_df.at[i, 'abuse_score'] = analysis['abuse_score']
|
326 |
+
results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
|
327 |
+
results_df.at[i, 'sentiment'] = analysis['sentiment']
|
328 |
+
results_df.at[i, 'darvo_score'] = analysis['darvo_score']
|
329 |
+
results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
|
330 |
+
results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
|
331 |
+
results_df.at[i, 'risk_level'] = analysis['risk_level']
|
332 |
+
|
333 |
+
# Identify primary abuser FIRST
|
334 |
+
primary_abuser, sender_abuse_metrics = self.identify_primary_abuser(results_df)
|
335 |
+
|
336 |
+
# Calculate traditional sender statistics for backward compatibility
|
337 |
+
sender_stats = {}
|
338 |
+
for sender in results_df['sender'].unique():
|
339 |
+
sender_df = results_df[results_df['sender'] == sender]
|
340 |
+
|
341 |
+
# Calculate key metrics
|
342 |
+
avg_abuse = sender_df['abuse_score'].mean()
|
343 |
+
max_abuse = sender_df['abuse_score'].max()
|
344 |
+
|
345 |
+
# Get most common patterns
|
346 |
+
all_patterns = []
|
347 |
+
for patterns in sender_df['detected_patterns']:
|
348 |
+
if patterns:
|
349 |
+
all_patterns.extend(patterns)
|
350 |
+
|
351 |
+
pattern_counts = Counter(all_patterns)
|
352 |
+
most_common = pattern_counts.most_common(5) # Get top 5 patterns
|
353 |
+
|
354 |
+
# Calculate percentage of abusive messages
|
355 |
+
abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
|
356 |
+
abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
|
357 |
+
|
358 |
+
# Calculate emotional tone distribution
|
359 |
+
tone_counts = Counter(sender_df['emotional_tone'])
|
360 |
+
most_common_tones = tone_counts.most_common(3) # Get top 3 emotional tones
|
361 |
+
|
362 |
+
# Calculate DARVO score statistics
|
363 |
+
avg_darvo = sender_df['darvo_score'].mean()
|
364 |
+
high_darvo_count = len(sender_df[sender_df['darvo_score'] >= 0.65])
|
365 |
+
high_darvo_pct = (high_darvo_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
|
366 |
+
|
367 |
+
# Calculate risk level distribution
|
368 |
+
risk_counts = Counter(sender_df['risk_level'])
|
369 |
+
|
370 |
+
# Store stats
|
371 |
+
sender_stats[sender] = {
|
372 |
+
'message_count': len(sender_df),
|
373 |
+
'avg_abuse_score': avg_abuse,
|
374 |
+
'max_abuse_score': max_abuse,
|
375 |
+
'abusive_message_count': abusive_count,
|
376 |
+
'abusive_message_pct': abusive_pct,
|
377 |
+
'common_patterns': most_common,
|
378 |
+
'emotional_tones': most_common_tones,
|
379 |
+
'avg_darvo_score': avg_darvo,
|
380 |
+
'high_darvo_count': high_darvo_count,
|
381 |
+
'high_darvo_pct': high_darvo_pct,
|
382 |
+
'risk_levels': risk_counts
|
383 |
+
}
|
384 |
|
385 |
+
# If no primary abuser identified, provide basic analysis
|
386 |
+
if not primary_abuser:
|
387 |
+
logger.info("No primary abuser identified - providing general analysis")
|
388 |
+
|
389 |
+
# Detect escalation patterns
|
390 |
+
escalation_data = detect_escalation_patterns(results_df)
|
391 |
+
|
392 |
+
# Determine overall risk level
|
393 |
+
if results_df['risk_level'].isin(['Critical']).any():
|
394 |
+
overall_risk = "Critical"
|
395 |
+
elif results_df['risk_level'].isin(['High']).any():
|
396 |
+
overall_risk = "High"
|
397 |
+
elif results_df['risk_level'].isin(['Moderate']).any():
|
398 |
+
overall_risk = "Moderate"
|
399 |
+
else:
|
400 |
+
overall_risk = "Low"
|
401 |
+
|
402 |
+
# Generate safety plan
|
403 |
+
all_patterns = []
|
404 |
+
for patterns in results_df['detected_patterns']:
|
405 |
+
if patterns:
|
406 |
+
all_patterns.extend(patterns)
|
407 |
+
|
408 |
+
safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
|
409 |
+
|
410 |
+
# Generate recommendations
|
411 |
+
recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
|
412 |
+
|
413 |
+
# Prepare summary
|
414 |
+
summary = {
|
415 |
+
'message_count': len(results_df),
|
416 |
+
'date_range': {
|
417 |
+
'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
|
418 |
+
'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
|
419 |
+
},
|
420 |
+
'overall_risk_level': overall_risk,
|
421 |
+
'sender_stats': sender_stats,
|
422 |
+
'sender_abuse_metrics': sender_abuse_metrics,
|
423 |
+
'primary_abuser': None,
|
424 |
+
'primary_abuser_analysis': None,
|
425 |
+
'escalation_data': escalation_data,
|
426 |
+
'safety_plan': safety_plan,
|
427 |
+
'recommendations': recommendations,
|
428 |
+
'analysis_focus': 'general'
|
429 |
+
}
|
430 |
+
|
431 |
+
return results_df, summary
|
432 |
|
433 |
+
# Focus analysis on primary abuser
|
434 |
+
logger.info(f"Focusing analysis on primary abuser: {primary_abuser}")
|
435 |
+
abuser_df = results_df[results_df['sender'] == primary_abuser]
|
436 |
+
victim_df = results_df[results_df['sender'] != primary_abuser]
|
437 |
|
438 |
+
# Generate comprehensive primary abuser analysis
|
439 |
+
primary_abuser_analysis = self._analyze_primary_abuser(
|
440 |
+
abuser_df, victim_df, results_df, primary_abuser
|
441 |
+
) if hasattr(self, '_analyze_primary_abuser') else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
|
443 |
+
# Detect escalation patterns (focus on abuser's messages)
|
444 |
+
escalation_data = detect_escalation_patterns(abuser_df)
|
445 |
|
446 |
+
# Determine overall risk level based on primary abuser
|
447 |
+
abuser_risk_levels = abuser_df['risk_level'].value_counts()
|
448 |
+
if 'Critical' in abuser_risk_levels and abuser_risk_levels['Critical'] > 0:
|
449 |
overall_risk = "Critical"
|
450 |
+
elif 'High' in abuser_risk_levels and abuser_risk_levels['High'] > 0:
|
451 |
overall_risk = "High"
|
452 |
+
elif 'Moderate' in abuser_risk_levels and abuser_risk_levels['Moderate'] > 0:
|
453 |
overall_risk = "Moderate"
|
454 |
else:
|
455 |
overall_risk = "Low"
|
456 |
|
457 |
+
# Generate safety plan based on abuser's patterns
|
458 |
+
abuser_patterns = []
|
459 |
+
for patterns in abuser_df['detected_patterns']:
|
460 |
if patterns:
|
461 |
+
abuser_patterns.extend(patterns)
|
462 |
|
463 |
+
safety_plan = generate_safety_plan(overall_risk, abuser_patterns, escalation_data)
|
464 |
|
465 |
+
# Generate recommendations focused on the abuser's behavior
|
466 |
+
recommendations = generate_professional_recommendations(abuser_df, escalation_data, overall_risk)
|
467 |
|
468 |
+
# Prepare focused summary
|
469 |
summary = {
|
470 |
'message_count': len(results_df),
|
471 |
+
'abuser_message_count': len(abuser_df),
|
472 |
+
'victim_message_count': len(victim_df),
|
473 |
'date_range': {
|
474 |
'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
|
475 |
'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
|
476 |
},
|
477 |
'overall_risk_level': overall_risk,
|
478 |
+
'sender_stats': sender_stats, # Include traditional sender stats for backward compatibility
|
479 |
+
'sender_abuse_metrics': sender_abuse_metrics, # Include detailed abuse metrics
|
480 |
+
'primary_abuser': primary_abuser,
|
481 |
+
'primary_abuser_analysis': primary_abuser_analysis,
|
482 |
'escalation_data': escalation_data,
|
483 |
'safety_plan': safety_plan,
|
484 |
'recommendations': recommendations,
|
485 |
+
'analysis_focus': 'primary_abuser' # Flag to indicate focused analysis
|
486 |
}
|
487 |
|
488 |
return results_df, summary
|
489 |
+
|
490 |
+
except Exception as e:
|
491 |
+
logger.error(f"Error in analyze_chat_history: {e}")
|
492 |
+
logger.error(traceback.format_exc())
|
493 |
+
return df, {
|
494 |
+
'message_count': len(df),
|
495 |
+
'date_range': {
|
496 |
+
'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
|
497 |
+
'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
|
498 |
+
},
|
499 |
+
'overall_risk_level': "Unknown",
|
500 |
+
'sender_stats': {},
|
501 |
+
'sender_abuse_metrics': {},
|
502 |
+
'primary_abuser': None,
|
503 |
+
'primary_abuser_analysis': None,
|
504 |
+
'escalation_data': {},
|
505 |
+
'safety_plan': "Error generating safety plan.",
|
506 |
+
'recommendations': [],
|
507 |
+
'analysis_focus': 'error'
|
508 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
|
510 |
def _analyze_primary_abuser(self, abuser_df, victim_df, full_df, primary_abuser):
|
511 |
"""Generate comprehensive analysis of the primary abuser"""
|
|
|
688 |
'common_patterns': most_common,
|
689 |
'emotional_tones': most_common_tones,
|
690 |
'avg_darvo_score': avg_darvo,
|
691 |
+
'high_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|