Spaces:

SamanthaStorm
/

TetherPro

Runtime error

App Files Files Community

SamanthaStorm commited on Jul 12

Commit

37c4d94

verified ·

1 Parent(s): 8538561

Create analyzer.py

Browse files

Files changed (1) hide show

analyzer.py +314 -153

analyzer.py CHANGED Viewed

@@ -1,165 +1,326 @@
-import json
-from datetime import datetime
 import pandas as pd
 import numpy as np
 from collections import Counter
-from dataclasses import asdict
-from typing import Dict, List
-from models import MessageAnalysis, RiskTrend
-from utils import logger
-class TetherProAnalyzer:
-    """Comprehensive temporal analysis for Tether Pro"""
-    def __init__(self):
-        self.conversation_history: List[MessageAnalysis] = []
-    def analyze_conversation_history(self, messages_json: str) -> Dict:
-        """Parse JSON and run full temporal analysis"""
         try:
-            raw = json.loads(messages_json)
-            self.conversation_history = []
-            for i, msg in enumerate(raw):
-                ma = MessageAnalysis(
-                    timestamp=msg.get('timestamp', datetime.now().isoformat()),
-                    message_id=msg.get('id', f"msg_{i}"),
-                    text=msg.get('text', msg.get('message', '')),
-                    sender=msg.get('sender', 'unknown'),
-                    abuse_score=float(msg.get('abuse_score', 0)),
-                    darvo_score=float(msg.get('darvo_score', 0)),
-                    boundary_health=msg.get('boundary_health', 'unknown'),
-                    detected_patterns=msg.get('patterns', msg.get('detected_patterns', [])),
-                    emotional_tone=msg.get('emotional_tone', 'neutral'),
-                    risk_level=msg.get('risk_level', 'low')
-                )
-                self.conversation_history.append(ma)
-            return self._perform_temporal_analysis()
-        except Exception as e:
-            logger.error(f"Error in analyze_conversation_history: {e}")
             return {
-                'error': f"Analysis failed: {e}",
-                'total_messages': 0,
-                'temporal_analysis': {},
-                'recommendations': []
             }
-    def _perform_temporal_analysis(self) -> Dict:
-        """Perform comprehensive temporal analysis"""
-        n = len(self.conversation_history)
-        if n < 3:
             return {
-                'total_messages': n,
-                'analysis_status': 'insufficient_data',
-                'message': 'Need at least 3 messages for temporal analysis',
-                'basic_stats': self._get_basic_stats(),
-                'recommendations': ['Upload more conversation history for detailed analysis']
             }
-        df = self._to_dataframe()
-        escalation = self._detect_escalation_trends(df)
-        cycles = self._detect_cycles(df)
-        combos = self._analyze_pattern_combinations(df)
-        risk = self._calculate_risk_trajectory(df)
-        triggers = self._analyze_temporal_triggers(df)
-        recs = self._generate_recommendations(escalation, combos, risk)
-        viz = self._generate_visualizations(df)
-        date_range = self._get_date_range()
-        return {
-            'total_messages': n,
-            'analysis_status': 'complete',
-            'basic_stats': self._get_basic_stats(),
-            'temporal_analysis': {
-                'escalation_patterns': escalation,
-                'cyclical_patterns': cycles,
-                'pattern_combinations': combos,
-                'temporal_triggers': triggers
-            },
-            'risk_assessment': risk,
-            'professional_recommendations': recs,
-            'visualizations': viz,
-            'date_range': date_range
-        }
-    def _to_dataframe(self) -> pd.DataFrame:
-        """Convert conversation history to DataFrame"""
-        data = []
-        for msg in self.conversation_history:
-            try:
-                ts = datetime.fromisoformat(msg.timestamp.replace('Z', '+00:00'))
-            except:
-                ts = datetime.now()
-            data.append({
-                'timestamp': ts,
-                'message_id': msg.message_id,
-                'sender': msg.sender,
-                'abuse_score': msg.abuse_score,
-                'darvo_score': msg.darvo_score,
-                'patterns': '|'.join(msg.detected_patterns)
-            })
-        return pd.DataFrame(data).sort_values('timestamp')
-    def _detect_escalation_trends(self, df: pd.DataFrame) -> Dict:
-        """Detect escalating abuse patterns over time"""
-        if len(df) < 5:
-            return {'detected': False, 'reason': 'insufficient_data'}
-        df['abuse_rolling'] = df['abuse_score'].rolling(3, min_periods=1).mean()
-        recent = df.tail(10)
-        if len(recent) < 5:
-            return {'detected': False, 'reason': 'insufficient_recent_data'}
-        x = np.arange(len(recent))
-        y = recent['abuse_rolling'].values
-        corr = np.corrcoef(x, y)[0,1] if len(x) > 1 else 0
-        if corr > 0.3:
-            inc = float(y[-1] - y[0])
-            severity = 'high' if inc > 20 else 'moderate' if inc > 10 else 'mild'
-            return {
-                'detected': True,
-                'severity': severity,
-                'increase_amount': round(inc, 1),
-                'timeframe': f"Last {len(recent)} messages",
-                'confidence': min(abs(corr), 1.0),
-                'description': f"Abuse intensity increased by {inc:.1f}% over recent communications"
             }
-        return {'detected': False, 'reason': 'no_escalation_trend'}
-    def _detect_cycles(self, df: pd.DataFrame) -> Dict:
-        """Detect cyclical abuse patterns"""
-        if len(df) < 15:
-            return {'detected': False, 'reason': 'insufficient_data_for_cycles'}
-        df['date'] = df['timestamp'].dt.date
-        daily = df.groupby('date')['abuse_score'].mean()
-        if len(daily) < 10:
-            return {'detected': False, 'reason': 'insufficient_days'}
-        scores = daily.values
-        peaks = [
-            i for i in range(1, len(scores)-1)
-            if scores[i] > scores[i-1] and scores[i] > scores[i+1] and scores[i] > 60
-        ]
-        valleys = [
-            i for i in range(1, len(scores)-1)
-            if scores[i] < scores[i-1] and scores[i] < scores[i+1] and scores[i] < 40
-        ]
-        if len(peaks) >= 2 and len(valleys) >= 2:
-            intervals = [peaks[i+1] - peaks[i] for i in range(len(peaks)-1)]
-            avg = float(np.mean(intervals))
-            return {
-                'detected': True,
-                'cycle_count': min(len(peaks), len(valleys)),
-                'avg_cycle_length_days': round(avg, 1),
-                'pattern_type': 'tension_escalation_reconciliation',
-                'confidence': min(len(peaks) / 3.0, 1.0),
-                'description': f"Detected {min(len(peaks), len(valleys))} abuse cycles with average length of {avg:.1f} days"
             }
-        return {'detected': False, 'reason': 'no_cyclical_pattern'}
-    def _analyze_pattern_combinations(self, df: pd.DataFrame) -> List[Dict]:
-        """Analyze dangerous pattern combinations"""
-        allp = []
-        for s in df['patterns']:
-            if s:
-                allp.extend(s.split('|'))
-        counts = Counter(allp)
-        combos = [
-            {'name': 'Control + Manipulation Complex', 'patterns': ['control', 'gaslighting', 'darvo'], 'severity': 'critical'},
-            {'name': 'Stalking + Threat Pattern', 'patterns': ['stalking language', 'veiled threats

 import pandas as pd
 import numpy as np
+import logging
+from datetime import datetime
+import traceback
 from collections import Counter
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MessageAnalyzer:
+    def __init__(self, model_manager):
+        """Initialize analyzer with model manager"""
+        self.model_manager = model_manager
+        self.thresholds = {
+            "recovery phase": 0.278,
+            "control": 0.287,
+            "gaslighting": 0.144,
+            "guilt tripping": 0.220,
+            "dismissiveness": 0.142,
+            "blame shifting": 0.183,
+            "projection": 0.253,
+            "insults": 0.247,
+            "contradictory statements": 0.200,
+            "obscure language": 0.455,
+            "nonabusive": 0.281,
+            "veiled threats": 0.310,
+            "stalking language": 0.339,
+            "false concern": 0.334,
+            "false equivalence": 0.317,
+            "future faking": 0.385
+        }
+    def analyze_message(self, text):
+        """Analyze a single message for abuse patterns"""
+        from utils import (
+            detect_explicit_abuse, detect_enhanced_threats, get_emotional_tone_tag,
+            compute_abuse_score, get_boundary_assessment, calculate_enhanced_risk_level
+        )
+        logger.debug(f"Analyzing message: {text[:50]}...")
         try:
+            if not text.strip():
+                logger.debug("Empty text, returning zeros")
+                return {
+                    'abuse_score': 0.0,
+                    'detected_patterns': [],
+                    'matched_scores': [],
+                    'sentiment': "neutral",
+                    'sentiment_confidence': 0.5,
+                    'stage': 1,
+                    'darvo_score': 0.0,
+                    'emotional_tone': "neutral",
+                    'boundary_assessment': {'assessment': 'neutral', 'confidence': 0.5},
+                    'risk_level': "Low"
+                }
+            # Check for explicit abuse
+            explicit_abuse = detect_explicit_abuse(text)
+            logger.debug(f"Explicit abuse detected: {explicit_abuse}")
+            # Get sentiment
+            sentiment, sentiment_confidence = self.model_manager.predict_sentiment(text)
+            logger.debug(f"Sentiment: {sentiment} (confidence: {sentiment_confidence:.3f})")
+            # Get boundary health
+            boundary_health = self.model_manager.predict_boundary_health(text)
+            boundary_assessment = get_boundary_assessment(text, boundary_health)
+            logger.debug(f"Boundary health: {boundary_assessment['assessment']}")
+            # Early supportive message check
+            innocent_indicators = [
+                'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
+                'device', 'battery', 'charger', 'wifi', 'internet', 'computer',
+                'sorry', 'apologize', 'my fault', 'mistake'
+            ]
+            # Enhanced early return check - now includes boundary health
+            if (any(indicator in text.lower() for indicator in innocent_indicators) and
+                len(text.split()) < 20 and
+                not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
+                boundary_health > 0):  # Healthy boundary
+                # If sentiment is strongly supportive AND boundary health is good, return early
+                if sentiment == "supportive" and sentiment_confidence > 0.8:
+                    logger.debug("Early return: Message appears to be innocent/supportive with healthy boundaries")
+                    return {
+                        'abuse_score': 0.0,
+                        'detected_patterns': [],
+                        'matched_scores': [],
+                        'sentiment': sentiment,
+                        'sentiment_confidence': sentiment_confidence,
+                        'stage': 1,
+                        'darvo_score': 0.0,
+                        'emotional_tone': "neutral",
+                        'boundary_assessment': boundary_assessment,
+                        'risk_level': "Low"
+                    }
+            # Get abuse patterns
+            threshold_labels, matched_scores = self.model_manager.predict_abuse_patterns(text, self.thresholds)
+            logger.debug(f"Detected patterns: {threshold_labels}")
+            # Check for enhanced threats
+            enhanced_patterns = detect_enhanced_threats(text, threshold_labels)
+            for pattern in enhanced_patterns:
+                if pattern not in threshold_labels:
+                    threshold_labels.append(pattern)
+                    # Add to matched_scores with high confidence
+                    weight = self.model_manager.get_pattern_weight(pattern)
+                    matched_scores.append((pattern, 0.85, weight))
+            # Get DARVO score
+            darvo_score = self.model_manager.predict_darvo(text)
+            logger.debug(f"DARVO score: {darvo_score:.3f}")
+            # Get emotions
+            emotions = self.model_manager.get_emotion_profile(text)
+            logger.debug(f"Emotions: {emotions}")
+            # Calculate abuse score
+            abuse_score = compute_abuse_score(matched_scores, sentiment)
+            logger.debug(f"Abuse score: {abuse_score:.1f}")
+            # Apply explicit abuse override
+            if explicit_abuse:
+                abuse_score = max(abuse_score, 70.0)
+                if "insults" not in threshold_labels:
+                    threshold_labels.append("insults")
+                    matched_scores.append(("insults", 0.9, 1.4))
+            # Apply boundary health modifier to abuse score
+            if boundary_health > 0 and not explicit_abuse:
+                # Healthy boundaries - cap abuse score lower
+                abuse_score = min(abuse_score, 35.0)
+                logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
+            # Apply sentiment-based score capping
+            if sentiment == "supportive" and not explicit_abuse:
+                # For supportive messages, cap the abuse score much lower
+                abuse_score = min(abuse_score, 30.0)
+                logger.debug(f"Capped abuse score to {abuse_score} due to supportive sentiment")
+            # Get emotional tone
+            emotional_tone = get_emotional_tone_tag(text, sentiment, threshold_labels, abuse_score, emotions)
+            logger.debug(f"Emotional tone: {emotional_tone}")
+            # Set stage
+            stage = 2 if explicit_abuse or abuse_score > 70 else 1
+            # Calculate risk level
+            risk_level = calculate_enhanced_risk_level(
+                abuse_score,
+                threshold_labels,
+                "Low" if abuse_score < 50 else "Moderate" if abuse_score < 70 else "High",
+                darvo_score
+            )
             return {
+                'abuse_score': abuse_score,
+                'detected_patterns': threshold_labels,
+                'matched_scores': matched_scores,
+                'sentiment': sentiment,
+                'sentiment_confidence': sentiment_confidence,
+                'stage': stage,
+                'darvo_score': darvo_score,
+                'emotional_tone': emotional_tone,
+                'boundary_assessment': boundary_assessment,
+                'risk_level': risk_level
             }
+        except Exception as e:
+            logger.error(f"Error in analyze_message: {e}")
+            logger.error(traceback.format_exc())
             return {
+                'abuse_score': 0.0,
+                'detected_patterns': [],
+                'matched_scores': [],
+                'sentiment': "error",
+                'sentiment_confidence': 0.0,
+                'stage': 1,
+                'darvo_score': 0.0,
+                'emotional_tone': "error",
+                'boundary_assessment': {'assessment': 'error', 'confidence': 0.0},
+                'risk_level': "Unknown"
             }
+    def analyze_chat_history(self, df):
+        """Analyze entire chat history"""
+        from utils import detect_escalation_patterns, generate_safety_plan, generate_professional_recommendations
+        logger.info(f"Analyzing chat history with {len(df)} messages")
+        try:
+            # Create results dataframe
+            results_df = df.copy()
+            # Add analysis columns
+            results_df['abuse_score'] = 0.0
+            results_df['detected_patterns'] = [[] for _ in range(len(results_df))]
+            results_df['sentiment'] = "neutral"
+            results_df['darvo_score'] = 0.0
+            results_df['emotional_tone'] = "neutral"
+            results_df['boundary_health'] = "unknown"
+            results_df['risk_level'] = "Low"
+            # Analyze each message
+            for i, row in results_df.iterrows():
+                analysis = self.analyze_message(row['message'])
+                # Update dataframe with analysis results
+                results_df.at[i, 'abuse_score'] = analysis['abuse_score']
+                results_df.at[i, 'detected_patterns'] = analysis['detected_patterns']
+                results_df.at[i, 'sentiment'] = analysis['sentiment']
+                results_df.at[i, 'darvo_score'] = analysis['darvo_score']
+                results_df.at[i, 'emotional_tone'] = analysis['emotional_tone']
+                results_df.at[i, 'boundary_health'] = analysis['boundary_assessment']['assessment']
+                results_df.at[i, 'risk_level'] = analysis['risk_level']
+            # Calculate sender statistics
+            sender_stats = {}
+            for sender in results_df['sender'].unique():
+                sender_df = results_df[results_df['sender'] == sender]
+                # Calculate key metrics
+                avg_abuse = sender_df['abuse_score'].mean()
+                max_abuse = sender_df['abuse_score'].max()
+                # Get most common patterns
+                all_patterns = []
+                for patterns in sender_df['detected_patterns']:
+                    if patterns:
+                        all_patterns.extend(patterns)
+                pattern_counts = Counter(all_patterns)
+                most_common = pattern_counts.most_common(3)
+                # Calculate percentage of abusive messages
+                abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
+                abusive_pct =
+                # Calculate percentage of abusive messages
+                abusive_count = len(sender_df[sender_df['abuse_score'] >= 50])
+                abusive_pct = (abusive_count / len(sender_df)) * 100 if len(sender_df) > 0 else 0
+                # Store stats
+                sender_stats[sender] = {
+                    'message_count': len(sender_df),
+                    'avg_abuse_score': avg_abuse,
+                    'max_abuse_score': max_abuse,
+                    'abusive_message_count': abusive_count,
+                    'abusive_message_pct': abusive_pct,
+                    'common_patterns': most_common
+                }
+            # Detect escalation patterns
+            escalation_data = detect_escalation_patterns(results_df)
+            # Determine overall risk level
+            if results_df['risk_level'].isin(['Critical']).any():
+                overall_risk = "Critical"
+            elif results_df['risk_level'].isin(['High']).any():
+                overall_risk = "High"
+            elif results_df['risk_level'].isin(['Moderate']).any():
+                overall_risk = "Moderate"
+            else:
+                overall_risk = "Low"
+            # Generate safety plan
+            all_patterns = []
+            for patterns in results_df['detected_patterns']:
+                if patterns:
+                    all_patterns.extend(patterns)
+            safety_plan = generate_safety_plan(overall_risk, all_patterns, escalation_data)
+            # Generate professional recommendations
+            recommendations = generate_professional_recommendations(results_df, escalation_data, overall_risk)
+            # Identify primary abuser (if any)
+            primary_abuser = None
+            max_abusive_pct = 0
+            for sender, stats in sender_stats.items():
+                if stats['message_count'] >= 5 and stats['abusive_message_pct'] > max_abusive_pct:
+                    max_abusive_pct = stats['abusive_message_pct']
+                    primary_abuser = sender
+            # Only identify primary abuser if they have significant abusive content
+            if max_abusive_pct < 20:
+                primary_abuser = None
+            # Prepare summary
+            summary = {
+                'message_count': len(results_df),
+                'date_range': {
+                    'start': results_df['timestamp'].min().strftime('%Y-%m-%d'),
+                    'end': results_df['timestamp'].max().strftime('%Y-%m-%d')
+                },
+                'overall_risk_level': overall_risk,
+                'sender_stats': sender_stats,
+                'primary_abuser': primary_abuser,
+                'escalation_data': escalation_data,
+                'safety_plan': safety_plan,
+                'recommendations': recommendations
             }
+            return results_df, summary
+        except Exception as e:
+            logger.error(f"Error in analyze_chat_history: {e}")
+            logger.error(traceback.format_exc())
+            return df, {
+                'message_count': len(df),
+                'date_range': {
+                    'start': df['timestamp'].min().strftime('%Y-%m-%d') if not df.empty else 'unknown',
+                    'end': df['timestamp'].max().strftime('%Y-%m-%d') if not df.empty else 'unknown'
+                },
+                'overall_risk_level': "Unknown",
+                'sender_stats': {},
+                'primary_abuser': None,
+                'escalation_data': {},
+                'safety_plan': "Error generating safety plan.",
+                'recommendations': []
             }