Spaces:

SamanthaStorm
/

TetherPro

Runtime error

App Files Files Community

SamanthaStorm commited on Jul 12

Commit

483940b

verified ·

1 Parent(s): 9a02869

Create models.py

Browse files

Files changed (1) hide show

models.py +252 -22

models.py CHANGED Viewed

@@ -1,24 +1,254 @@
-from dataclasses import dataclass
-from typing import List
-from enum import Enum
-@dataclass
-class MessageAnalysis:
-    timestamp: str
-    message_id: str
-    text: str
-    sender: str
-    abuse_score: float
-    darvo_score: float
-    boundary_health: str
-    detected_patterns: List[str]
-    emotional_tone: str
-    risk_level: str
-class RiskTrend(Enum):
-    ESCALATING = "escalating"
-    IMPROVING = "improving"
-    STABLE_HIGH = "stable_high"
-    STABLE_MODERATE = "stable_moderate"
-    CYCLICAL = "cyclical"
-    UNKNOWN = "unknown"

+import torch
+import logging
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from torch.nn.functional import sigmoid, softmax
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ModelManager:
+    def __init__(self, device=None):
+        """Initialize model manager with device detection"""
+        self.device = device if device else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        logger.info(f"Using device: {self.device}")
+        # Initialize model containers
+        self.models = {}
+        self.tokenizers = {}
+    def load_models(self):
+        """Load all required models"""
+        # Core abuse pattern detection model
+        self._load_model(
+            "abuse_patterns",
+            "SamanthaStorm/tether-multilabel-v6",
+            is_multilabel=True
+        )
+        # Sentiment model
+        self._load_model(
+            "sentiment",
+            "SamanthaStorm/tether-sentiment-v3",
+            is_multilabel=False
+        )
+        # DARVO model
+        self._load_model(
+            "darvo",
+            "SamanthaStorm/tether-darvo-regressor-v1",
+            is_multilabel=False,
+            is_regression=True
+        )
+        # Boundary health model
+        self._load_model(
+            "boundary",
+            "SamanthaStorm/healthy-boundary-predictor",
+            is_multilabel=False
+        )
+        # Intent analyzer model
+        self._load_model(
+            "intent",
+            "SamanthaStorm/intentanalyzer",
+            is_multilabel=False
+        )
+        # Emotion model
+        try:
+            from transformers import pipeline
+            self.emotion_pipeline = pipeline(
+                "text-classification",
+                model="j-hartmann/emotion-english-distilroberta-base",
+                return_all_scores=True,
+                top_k=None,
+                truncation=True,
+                device=0 if torch.cuda.is_available() else -1
+            )
+            logger.info("Emotion pipeline loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading emotion pipeline: {e}")
+            self.emotion_pipeline = None
+        logger.info("All models loaded successfully")
+    def _load_model(self, name, model_path, is_multilabel=False, is_regression=False):
+        """Helper to load a model and its tokenizer"""
+        try:
+            logger.info(f"Loading {name} model from {model_path}")
+            self.models[name] = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)
+            self.tokenizers[name] = AutoTokenizer.from_pretrained(model_path, use_fast=False)
+            # Store model metadata
+            self.models[name].is_multilabel = is_multilabel
+            self.models[name].is_regression = is_regression
+            logger.info(f"{name} model loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading {name} model: {e}")
+            raise
+    def predict_abuse_patterns(self, text, thresholds):
+        """Predict abuse patterns with thresholds"""
+        if not text.strip():
+            return [], []
+        inputs = self._prepare_inputs("abuse_patterns", text)
+        with torch.no_grad():
+            outputs = self.models["abuse_patterns"](**inputs)
+        # Get sigmoid scores for multi-label classification
+        raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
+        # Get labels
+        labels = self.get_abuse_pattern_labels()
+        # Apply thresholds and return
+        predictions = list(zip(labels, raw_scores))
+        matched_scores = []
+        threshold_labels = []
+        for label, score in predictions:
+            if score > thresholds.get(label, 0.25):
+                threshold_labels.append(label)
+                weight = self.get_pattern_weight(label)
+                matched_scores.append((label, float(score), weight))
+        return threshold_labels, matched_scores
+    def predict_sentiment(self, text):
+        """Predict sentiment (supportive vs undermining)"""
+        if not text.strip():
+            return "neutral", 0.5
+        inputs = self._prepare_inputs("sentiment", text)
+        with torch.no_grad():
+            outputs = self.models["sentiment"](**inputs)
+            logits = outputs.logits[0]
+            probs = softmax(logits, dim=-1).cpu().numpy()
+        # Get sentiment labels
+        labels = ["supportive", "undermining"]
+        sentiment = labels[int(probs.argmax())]
+        confidence = float(probs.max())
+        return sentiment, confidence
+    def predict_darvo(self, text):
+        """Predict DARVO score"""
+        if not text.strip():
+            return 0.0
+        inputs = self._prepare_inputs("darvo", text)
+        with torch.no_grad():
+            logits = self.models["darvo"](**inputs).logits
+            score = float(sigmoid(logits.cpu()).item())
+        return score
+    def predict_boundary_health(self, text):
+        """Predict boundary health (1 for healthy, 0 for unhealthy)"""
+        if not text.strip():
+            return 0
+        inputs = self._prepare_inputs("boundary", text)
+        with torch.no_grad():
+            outputs = self.models["boundary"](**inputs)
+            predictions = softmax(outputs.logits, dim=-1)
+            predicted_class = torch.argmax(predictions, dim=-1).item()
+        return predicted_class
+    def predict_intent(self, text):
+        """Predict intent"""
+        if not text.strip():
+            return "neutral", 0.5
+        inputs = self._prepare_inputs("intent", text)
+        with torch.no_grad():
+            outputs = self.models["intent"](**inputs)
+            probs = softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+        # Get intent labels (adjust based on actual model outputs)
+        labels = ["neutral", "manipulative", "supportive", "controlling"]
+        intent = labels[int(probs.argmax())]
+        confidence = float(probs.max())
+        return intent, confidence
+    def get_emotion_profile(self, text):
+        """Get emotion profile from text"""
+        if not text.strip() or not self.emotion_pipeline:
+            return {
+                "sadness": 0.0,
+                "joy": 0.0,
+                "neutral": 0.0,
+                "disgust": 0.0,
+                "anger": 0.0,
+                "fear": 0.0
+            }
+        try:
+            emotions = self.emotion_pipeline(text)
+            if isinstance(emotions, list) and isinstance(emotions[0], list):
+                emotion_scores = emotions[0]
+                return {e['label'].lower(): round(e['score'], 3) for e in emotion_scores}
+            return {}
+        except Exception as e:
+            logger.error(f"Error in get_emotion_profile: {e}")
+            return {
+                "sadness": 0.0,
+                "joy": 0.0,
+                "neutral": 0.0,
+                "disgust": 0.0,
+                "anger": 0.0,
+                "fear": 0.0
+            }
+    def _prepare_inputs(self, model_name, text):
+        """Prepare inputs for the model"""
+        inputs = self.tokenizers[model_name](
+            text,
+            return_tensors="pt",
+            truncation=True,
+            padding=True
+        )
+        return {k: v.to(self.device) for k, v in inputs.items()}
+    def get_abuse_pattern_labels(self):
+        """Get abuse pattern labels"""
+        return [
+            "recovery phase", "control", "gaslighting", "guilt tripping", "dismissiveness",
+            "blame shifting", "nonabusive", "projection", "insults",
+            "contradictory statements", "obscure language",
+            "veiled threats", "stalking language", "false concern",
+            "false equivalence", "future faking"
+        ]
+    def get_pattern_weight(self, label):
+        """Get pattern weight for scoring"""
+        weights = {
+            "recovery phase": 0.7,
+            "control": 1.4,
+            "gaslighting": 1.3,
+            "guilt tripping": 1.2,
+            "dismissiveness": 0.9,
+            "blame shifting": 1.0,
+            "projection": 0.5,
+            "insults": 1.4,
+            "contradictory statements": 1.0,
+            "obscure language": 0.9,
+            "nonabusive": 0.0,
+            "veiled threats": 1.6,
+            "stalking language": 1.8,
+            "false concern": 1.1,
+            "false equivalence": 1.3,
+            "future faking": 0.8
+        }
+        return weights.get(label, 1.0)