Spaces:

SamanthaStorm
/

TetherPro

Runtime error

App Files Files Community

SamanthaStorm commited on Jul 12

Commit

b63e8f7

verified ·

1 Parent(s): c3eb637

Create models.py

Browse files

Files changed (1) hide show

models.py +290 -135

models.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import torch
 import logging
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from torch.nn.functional import sigmoid, softmax
 # Set up logging
@@ -18,169 +20,314 @@ class ModelManager:
         self.tokenizers = {}
     def load_models(self):
-        """Load all required models"""
-        # Core abuse pattern detection model
-        self._load_model(
-            "abuse_patterns",
-            "SamanthaStorm/tether-multilabel-v6",
-            is_multilabel=True
-        )
-        # Sentiment model
-        self._load_model(
-            "sentiment",
-            "SamanthaStorm/tether-sentiment-v3",
-            is_multilabel=False
-        )
-        # DARVO model
-        self._load_model(
-            "darvo",
-            "SamanthaStorm/tether-darvo-regressor-v1",
-            is_multilabel=False,
-            is_regression=True
-        )
-        # Boundary health model
-        self._load_model(
-            "boundary",
-            "SamanthaStorm/healthy-boundary-predictor",
-            is_multilabel=False
-        )
-        # Intent analyzer model
-        self._load_model(
-            "intent",
-            "SamanthaStorm/intentanalyzer",
-            is_multilabel=False
-        )
-        # Emotion model
-        try:
-            from transformers import pipeline
-            self.emotion_pipeline = pipeline(
-                "text-classification",
-                model="j-hartmann/emotion-english-distilroberta-base",
-                return_all_scores=True,
-                top_k=None,
-                truncation=True,
-                device=0 if torch.cuda.is_available() else -1
-            )
-            logger.info("Emotion pipeline loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading emotion pipeline: {e}")
-            self.emotion_pipeline = None
-        logger.info("All models loaded successfully")
-    def _load_model(self, name, model_path, is_multilabel=False, is_regression=False):
-        """Helper to load a model and its tokenizer"""
-        try:
-            logger.info(f"Loading {name} model from {model_path}")
-            self.models[name] = AutoModelForSequenceClassification.from_pretrained(model_path).to(self.device)
-            self.tokenizers[name] = AutoTokenizer.from_pretrained(model_path, use_fast=False)
-            # Store model metadata
-            self.models[name].is_multilabel = is_multilabel
-            self.models[name].is_regression = is_regression
-            logger.info(f"{name} model loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading {name} model: {e}")
-            raise
     def predict_abuse_patterns(self, text, thresholds):
         """Predict abuse patterns with thresholds"""
         if not text.strip():
             return [], []
-        inputs = self._prepare_inputs("abuse_patterns", text)
-        with torch.no_grad():
-            outputs = self.models["abuse_patterns"](**inputs)
-        # Get sigmoid scores for multi-label classification
-        raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
-        # Get labels
-        labels = self.get_abuse_pattern_labels()
-        # Apply thresholds and return
-        predictions = list(zip(labels, raw_scores))
-        matched_scores = []
-        threshold_labels = []
-        for label, score in predictions:
-            if score > thresholds.get(label, 0.25):
-                threshold_labels.append(label)
-                weight = self.get_pattern_weight(label)
-                matched_scores.append((label, float(score), weight))
-        return threshold_labels, matched_scores
     def predict_sentiment(self, text):
         """Predict sentiment (supportive vs undermining)"""
         if not text.strip():
             return "neutral", 0.5
-        inputs = self._prepare_inputs("sentiment", text)
-        with torch.no_grad():
-            outputs = self.models["sentiment"](**inputs)
-            logits = outputs.logits[0]
-            probs = softmax(logits, dim=-1).cpu().numpy()
-        # Get sentiment labels
-        labels = ["supportive", "undermining"]
-        sentiment = labels[int(probs.argmax())]
-        confidence = float(probs.max())
-        return sentiment, confidence
     def predict_darvo(self, text):
         """Predict DARVO score"""
         if not text.strip():
             return 0.0
-        inputs = self._prepare_inputs("darvo", text)
-        with torch.no_grad():
-            logits = self.models["darvo"](**inputs).logits
-            score = float(sigmoid(logits.cpu()).item())
-        return score
     def predict_boundary_health(self, text):
         """Predict boundary health (1 for healthy, 0 for unhealthy)"""
         if not text.strip():
             return 0
-        inputs = self._prepare_inputs("boundary", text)
-        with torch.no_grad():
-            outputs = self.models["boundary"](**inputs)
-            predictions = softmax(outputs.logits, dim=-1)
-            predicted_class = torch.argmax(predictions, dim=-1).item()
-        return predicted_class
     def predict_intent(self, text):
         """Predict intent"""
         if not text.strip():
             return "neutral", 0.5
-        inputs = self._prepare_inputs("intent", text)
-        with torch.no_grad():
-            outputs = self.models["intent"](**inputs)
-            probs = softmax(outputs.logits, dim=-1).cpu().numpy()[0]
-        # Get intent labels (adjust based on actual model outputs)
-        labels = ["neutral", "manipulative", "supportive", "controlling"]
-        intent = labels[int(probs.argmax())]
-        confidence = float(probs.max())
-        return intent, confidence
     def get_emotion_profile(self, text):
         """Get emotion profile from text"""
@@ -213,13 +360,21 @@ class ModelManager:
     def _prepare_inputs(self, model_name, text):
         """Prepare inputs for the model"""
-        inputs = self.tokenizers[model_name](
-            text,
-            return_tensors="pt",
-            truncation=True,
-            padding=True
-        )
-        return {k: v.to(self.device) for k, v in inputs.items()}
     def get_abuse_pattern_labels(self):
         """Get abuse pattern labels"""

 import torch
 import logging
+import os
+import time
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 from torch.nn.functional import sigmoid, softmax
 # Set up logging
         self.tokenizers = {}
     def load_models(self):
+        """Load all required models with retry logic and fallbacks"""
+        # Define models to load with fallbacks
+        model_configs = [
+            {
+                "name": "abuse_patterns",
+                "primary_path": "SamanthaStorm/tether-multilabel-v6",
+                "fallback_path": "SamanthaStorm/tether-multilabel-v5",  # Fallback to older version
+                "is_multilabel": True
+            },
+            {
+                "name": "sentiment",
+                "primary_path": "SamanthaStorm/tether-sentiment-v3",
+                "fallback_path": "SamanthaStorm/tether-sentiment-v2",
+                "is_multilabel": False
+            },
+            {
+                "name": "darvo",
+                "primary_path": "SamanthaStorm/tether-darvo-regressor-v1",
+                "fallback_path": None,  # No fallback, will use dummy model if fails
+                "is_multilabel": False,
+                "is_regression": True
+            },
+            {
+                "name": "boundary",
+                "primary_path": "SamanthaStorm/healthy-boundary-predictor",
+                "fallback_path": None,  # No fallback, will use dummy model if fails
+                "is_multilabel": False
+            },
+            {
+                "name": "intent",
+                "primary_path": "SamanthaStorm/intentanalyzer",
+                "fallback_path": None,  # No fallback, will use dummy model if fails
+                "is_multilabel": False
+            }
+        ]
+        # Load each model with retry logic
+        for config in model_configs:
+            success = self._load_model_with_retry(
+                config["name"],
+                config["primary_path"],
+                config["fallback_path"],
+                is_multilabel=config.get("is_multilabel", False),
+                is_regression=config.get("is_regression", False)
+            )
+            if not success:
+                logger.warning(f"Creating dummy model for {config['name']}")
+                self._create_dummy_model(config["name"], config.get("is_multilabel", False))
+        # Load emotion pipeline separately with retry
+        self._load_emotion_pipeline()
+        logger.info("Model loading completed")
+    def _load_model_with_retry(self, name, primary_path, fallback_path=None, is_multilabel=False, is_regression=False, max_retries=3):
+        """Load a model with retry logic and fallback option"""
+        for attempt in range(max_retries):
+            try:
+                logger.info(f"Loading {name} model from {primary_path} (attempt {attempt+1}/{max_retries})")
+                # Try to load from primary path
+                self.models[name] = AutoModelForSequenceClassification.from_pretrained(
+                    primary_path,
+                    local_files_only=False,
+                    trust_remote_code=False
+                ).to(self.device)
+                self.tokenizers[name] = AutoTokenizer.from_pretrained(
+                    primary_path,
+                    use_fast=False,
+                    local_files_only=False,
+                    trust_remote_code=False
+                )
+                # Store model metadata
+                self.models[name].is_multilabel = is_multilabel
+                self.models[name].is_regression = is_regression
+                logger.info(f"{name} model loaded successfully")
+                return True
+            except Exception as e:
+                logger.error(f"Error loading {name} model (attempt {attempt+1}): {e}")
+                time.sleep(2)  # Wait before retry
+        # If primary path failed, try fallback if available
+        if fallback_path:
+            try:
+                logger.info(f"Trying fallback path for {name}: {fallback_path}")
+                self.models[name] = AutoModelForSequenceClassification.from_pretrained(
+                    fallback_path,
+                    local_files_only=False,
+                    trust_remote_code=False
+                ).to(self.device)
+                self.tokenizers[name] = AutoTokenizer.from_pretrained(
+                    fallback_path,
+                    use_fast=False,
+                    local_files_only=False,
+                    trust_remote_code=False
+                )
+                # Store model metadata
+                self.models[name].is_multilabel = is_multilabel
+                self.models[name].is_regression = is_regression
+                logger.info(f"{name} model loaded from fallback path")
+                return True
+            except Exception as e:
+                logger.error(f"Error loading {name} model from fallback path: {e}")
+        return False
+    def _load_emotion_pipeline(self, max_retries=3):
+        """Load emotion pipeline with retry logic"""
+        for attempt in range(max_retries):
+            try:
+                from transformers import pipeline
+                logger.info(f"Loading emotion pipeline (attempt {attempt+1}/{max_retries})")
+                self.emotion_pipeline = pipeline(
+                    "text-classification",
+                    model="j-hartmann/emotion-english-distilroberta-base",
+                    return_all_scores=True,
+                    top_k=None,
+                    truncation=True,
+                    device=0 if torch.cuda.is_available() else -1
+                )
+                logger.info("Emotion pipeline loaded successfully")
+                return True
+            except Exception as e:
+                logger.error(f"Error loading emotion pipeline (attempt {attempt+1}): {e}")
+                time.sleep(2)  # Wait before retry
+        logger.warning("Failed to load emotion pipeline, using dummy")
+        self.emotion_pipeline = None
+        return False
+    def _create_dummy_model(self, name, is_multilabel=False):
+        """Create a dummy model that returns neutral predictions"""
+        class DummyModel:
+            def __init__(self, is_multilabel=False):
+                self.is_multilabel = is_multilabel
+                self.is_regression = False
+            def __call__(self, **kwargs):
+                class DummyOutput:
+                    def __init__(self, is_multilabel):
+                        if is_multilabel:
+                            # For multilabel, create logits for each class (16 classes)
+                            self.logits = torch.zeros((1, 16))
+                        else:
+                            # For classification, create logits for 2 classes
+                            self.logits = torch.zeros((1, 2))
+                            # Slightly bias toward first class
+                            self.logits[0, 0] = 0.2
+                return DummyOutput(self.is_multilabel)
+            def eval(self):
+                return self
+            def to(self, device):
+                return self
+        # Create dummy model and tokenizer
+        self.models[name] = DummyModel(is_multilabel)
+        class DummyTokenizer:
+            def __call__(self, text, **kwargs):
+                return {
+                    "input_ids": torch.ones((1, 10), dtype=torch.long),
+                    "attention_mask": torch.ones((1, 10), dtype=torch.long)
+                }
+        self.tokenizers[name] = DummyTokenizer()
+        logger.warning(f"Created dummy model for {name}")
     def predict_abuse_patterns(self, text, thresholds):
         """Predict abuse patterns with thresholds"""
         if not text.strip():
             return [], []
+        try:
+            inputs = self._prepare_inputs("abuse_patterns", text)
+            with torch.no_grad():
+                outputs = self.models["abuse_patterns"](**inputs)
+            # Get sigmoid scores for multi-label classification
+            if self.models["abuse_patterns"].is_multilabel:
+                raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
+            else:
+                # Fallback for non-multilabel model
+                raw_scores = torch.softmax(outputs.logits.squeeze(0), dim=0).cpu().numpy()
+            # Get labels
+            labels = self.get_abuse_pattern_labels()
+            # Apply thresholds and return
+            predictions = list(zip(labels, raw_scores))
+            matched_scores = []
+            threshold_labels = []
+            for label, score in predictions:
+                if score > thresholds.get(label, 0.25):
+                    threshold_labels.append(label)
+                    weight = self.get_pattern_weight(label)
+                    matched_scores.append((label, float(score), weight))
+            return threshold_labels, matched_scores
+        except Exception as e:
+            logger.error(f"Error in predict_abuse_patterns: {e}")
+            return [], []
     def predict_sentiment(self, text):
         """Predict sentiment (supportive vs undermining)"""
         if not text.strip():
             return "neutral", 0.5
+        try:
+            inputs = self._prepare_inputs("sentiment", text)
+            with torch.no_grad():
+                outputs = self.models["sentiment"](**inputs)
+                logits = outputs.logits[0]
+                probs = softmax(logits, dim=-1).cpu().numpy()
+            # Get sentiment labels
+            labels = ["supportive", "undermining"]
+            sentiment = labels[int(probs.argmax())]
+            confidence = float(probs.max())
+            return sentiment, confidence
+        except Exception as e:
+            logger.error(f"Error in predict_sentiment: {e}")
+            return "neutral", 0.5
     def predict_darvo(self, text):
         """Predict DARVO score"""
         if not text.strip():
             return 0.0
+        try:
+            inputs = self._prepare_inputs("darvo", text)
+            with torch.no_grad():
+                logits = self.models["darvo"](**inputs).logits
+                if self.models["darvo"].is_regression:
+                    score = float(sigmoid(logits.cpu()).item())
+                else:
+                    # Fallback for classification model
+                    probs = softmax(logits, dim=-1).cpu().numpy()[0]
+                    score = float(probs[1])  # Assume second class is DARVO
+            return score
+        except Exception as e:
+            logger.error(f"Error in predict_darvo: {e}")
+            return 0.0
     def predict_boundary_health(self, text):
         """Predict boundary health (1 for healthy, 0 for unhealthy)"""
         if not text.strip():
             return 0
+        try:
+            inputs = self._prepare_inputs("boundary", text)
+            with torch.no_grad():
+                outputs = self.models["boundary"](**inputs)
+                predictions = softmax(outputs.logits, dim=-1)
+                predicted_class = torch.argmax(predictions, dim=-1).item()
+            return predicted_class
+        except Exception as e:
+            logger.error(f"Error in predict_boundary_health: {e}")
+            return 0
     def predict_intent(self, text):
         """Predict intent"""
         if not text.strip():
             return "neutral", 0.5
+        try:
+            inputs = self._prepare_inputs("intent", text)
+            with torch.no_grad():
+                outputs = self.models["intent"](**inputs)
+                probs = softmax(outputs.logits, dim=-1).cpu().numpy()[0]
+            # Get intent labels (adjust based on actual model outputs)
+            labels = ["neutral", "manipulative", "supportive", "controlling"]
+            intent = labels[int(probs.argmax())]
+            confidence = float(probs.max())
+            return intent, confidence
+        except Exception as e:
+            logger.error(f"Error in predict_intent: {e}")
+            return "neutral", 0.5
     def get_emotion_profile(self, text):
         """Get emotion profile from text"""
     def _prepare_inputs(self, model_name, text):
         """Prepare inputs for the model"""
+        try:
+            inputs = self.tokenizers[model_name](
+                text,
+                return_tensors="pt",
+                truncation=True,
+                padding=True
+            )
+            return {k: v.to(self.device) for k, v in inputs.items()}
+        except Exception as e:
+            logger.error(f"Error preparing inputs for {model_name}: {e}")
+            # Return dummy inputs
+            return {
+                "input_ids": torch.ones((1, 10), dtype=torch.long).to(self.device),
+                "attention_mask": torch.ones((1, 10), dtype=torch.long).to(self.device)
+            }
     def get_abuse_pattern_labels(self):
         """Get abuse pattern labels"""