Spaces:

SamanthaStorm
/

TetherPro

Runtime error

App Files Files Community

SamanthaStorm commited on Jul 17

Commit

f7d2bee

verified ·

1 Parent(s): ba85ad6

Create models.py

Browse files

Files changed (1) hide show

models.py +18 -337

models.py CHANGED Viewed

@@ -116,7 +116,7 @@ class ModelManager:
                     try:
                         logger.info("Loading custom intent model with MultiLabelIntentClassifier")
-                        # Create the custom model architecture (referencing the global class)
                         intent_model = MultiLabelIntentClassifier("distilbert-base-uncased", 6)
                         # Download the model file from HuggingFace
@@ -174,43 +174,27 @@ class ModelManager:
             try:
                 logger.info(f"Trying fallback path for {name}: {fallback_path}")
-                # Special handling for intent model fallback too
                 if name == "intent":
                     try:
-                        try:
-                            from transformers import MultiLabelIntentClassifier
-                        except ImportError:
-                            self.models[name] = AutoModelForSequenceClassification.from_pretrained(
-                                fallback_path,
-                                local_files_only=False,
-                                trust_remote_code=True
-                            ).to(self.device)
-                        else:
-                            self.models[name] = MultiLabelIntentClassifier.from_pretrained(
-                                fallback_path,
-                                local_files_only=False,
-                                trust_remote_code=True
-                            ).to(self.device)
-                        self.tokenizers[name] = AutoTokenizer.from_pretrained(
-                            fallback_path,
-                            use_fast=False,
-                            local_files_only=False,
-                            trust_remote_code=True
                         )
-                    except Exception:
-                        self.models[name] = AutoModelForSequenceClassification.from_pretrained(
-                            fallback_path,
-                            local_files_only=False,
-                            trust_remote_code=True
-                        ).to(self.device)
-                        self.tokenizers[name] = AutoTokenizer.from_pretrained(
-                            fallback_path,
-                            use_fast=False,
-                            local_files_only=False,
-                            trust_remote_code=True
-                        )
                 else:
                     self.models[name] = AutoModelForSequenceClassification.from_pretrained(
                         fallback_path,
@@ -236,306 +220,3 @@ class ModelManager:
                 logger.error(f"Error loading {name} model from fallback path: {e}")
         return False
-    def _load_emotion_pipeline(self, max_retries=3):
-        """Load emotion pipeline with retry logic"""
-        for attempt in range(max_retries):
-            try:
-                logger.info(f"Loading emotion pipeline (attempt {attempt+1}/{max_retries})")
-                self.emotion_pipeline = pipeline(
-                    "text-classification",
-                    model="j-hartmann/emotion-english-distilroberta-base",
-                    return_all_scores=True,
-                    top_k=None,
-                    truncation=True,
-                    device=0 if torch.cuda.is_available() else -1
-                )
-                logger.info("Emotion pipeline loaded successfully")
-                return True
-            except Exception as e:
-                logger.error(f"Error loading emotion pipeline (attempt {attempt+1}): {e}")
-                time.sleep(2)  # Wait before retry
-        logger.error("Failed to load emotion pipeline after all retries")
-        return False
-    def predict_fallacy(self, text):
-        """Predict logical fallacy using FallacyFinder model"""
-        if not text.strip():
-            return "No Fallacy", 0.0
-        try:
-            inputs = self._prepare_inputs("fallacy", text)
-            with torch.no_grad():
-                outputs = self.models["fallacy"](**inputs)
-                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-                predicted_class_id = predictions.argmax().item()
-                confidence = predictions.max().item()
-            # Get the label from model config or use fallback labels
-            if hasattr(self.models["fallacy"], 'config') and hasattr(self.models["fallacy"].config, 'id2label'):
-                predicted_label = self.models["fallacy"].config.id2label[predicted_class_id]
-            else:
-                # Fallback labels in case config is missing
-                fallacy_labels = [
-                    "Ad Hominem", "Strawman", "Whataboutism", "Gaslighting",
-                    "False Dichotomy", "Appeal to Emotion", "DARVO", "Moving Goalposts",
-                    "Cherry Picking", "Appeal to Authority", "Slippery Slope",
-                    "Motte and Bailey", "Gish Gallop", "Kafkatrapping", "Sealioning", "No Fallacy"
-                ]
-                predicted_label = fallacy_labels[predicted_class_id] if predicted_class_id < len(fallacy_labels) else "No Fallacy"
-            return predicted_label, float(confidence)
-        except Exception as e:
-            logger.error(f"Error in predict_fallacy: {e}")
-            return "No Fallacy", 0.0
-    def predict_abuse_patterns(self, text, thresholds):
-        """Predict abuse patterns with thresholds"""
-        if not text.strip():
-            return [], []
-        try:
-            inputs = self._prepare_inputs("abuse_patterns", text)
-            with torch.no_grad():
-                outputs = self.models["abuse_patterns"](**inputs)
-            # Get sigmoid scores for multi-label classification
-            if self.models["abuse_patterns"].is_multilabel:
-                raw_scores = torch.sigmoid(outputs.logits.squeeze(0)).cpu().numpy()
-            else:
-                # Fallback for non-multilabel model
-                raw_scores = torch.softmax(outputs.logits.squeeze(0), dim=0).cpu().numpy()
-            # Get labels
-            labels = self.get_abuse_pattern_labels()
-            # Apply thresholds and return
-            predictions = list(zip(labels, raw_scores))
-            matched_scores = []
-            threshold_labels = []
-            for label, score in predictions:
-                if score > thresholds.get(label, 0.25):
-                    threshold_labels.append(label)
-                    weight = self.get_pattern_weight(label)
-                    matched_scores.append((label, float(score), weight))
-            return threshold_labels, matched_scores
-        except Exception as e:
-            logger.error(f"Error in predict_abuse_patterns: {e}")
-            return [], []
-    def predict_sentiment(self, text):
-        """Predict sentiment (supportive vs undermining)"""
-        if not text.strip():
-            return "neutral", 0.5
-        try:
-            inputs = self._prepare_inputs("sentiment", text)
-            with torch.no_grad():
-                outputs = self.models["sentiment"](**inputs)
-                logits = outputs.logits[0]
-                probs = softmax(logits, dim=-1).cpu().numpy()
-            # Get sentiment labels
-            labels = ["supportive", "undermining"]
-            sentiment = labels[int(probs.argmax())]
-            confidence = float(probs.max())
-            return sentiment, confidence
-        except Exception as e:
-            logger.error(f"Error in predict_sentiment: {e}")
-            return "neutral", 0.5
-    def predict_darvo(self, text):
-        """Predict DARVO score"""
-        if not text.strip():
-            return 0.0
-        try:
-            inputs = self._prepare_inputs("darvo", text)
-            with torch.no_grad():
-                logits = self.models["darvo"](**inputs).logits
-                if self.models["darvo"].is_regression:
-                    score = float(sigmoid(logits.cpu()).item())
-                else:
-                    # Fallback for classification model
-                    probs = softmax(logits, dim=-1).cpu().numpy()[0]
-                    score = float(probs[1])  # Assume second class is DARVO
-            return score
-        except Exception as e:
-            logger.error(f"Error in predict_darvo: {e}")
-            return 0.0
-    def predict_boundary_health(self, text):
-        """Predict boundary health (1 for healthy, 0 for unhealthy)"""
-        if not text.strip():
-            return 0
-        try:
-            inputs = self._prepare_inputs("boundary", text)
-            with torch.no_grad():
-                outputs = self.models["boundary"](**inputs)
-                predictions = softmax(outputs.logits, dim=-1)
-                predicted_class = torch.argmax(predictions, dim=-1).item()
-            return predicted_class
-        except Exception as e:
-            logger.error(f"Error in predict_boundary_health: {e}")
-            return 0
-    def predict_intent(self, text):
-        """Predict intent using custom multilabel classification model"""
-        if not text.strip():
-            return "neutral", 0.5
-        # Check if intent model is available
-        if "intent" not in self.models:
-            logger.warning("Intent model not available, returning neutral intent")
-            return "neutral", 0.5
-        try:
-            self.models["intent"].eval()
-            inputs = self.tokenizers["intent"](text, return_tensors="pt", truncation=True, padding=True, max_length=128)
-            inputs = {k: v.to(self.device) for k, v in inputs.items()}
-            with torch.no_grad():
-                outputs = self.models["intent"](inputs['input_ids'], inputs['attention_mask'])
-                probabilities = torch.sigmoid(outputs).cpu().numpy()[0]
-            # Intent categories (same as your working app)
-            intent_categories = ['trolling', 'dismissive', 'manipulative', 'emotionally_reactive', 'constructive', 'unclear']
-            intent_thresholds = {
-                'trolling': 0.70,
-                'manipulative': 0.65,
-                'dismissive': 0.60,
-                'constructive': 0.60,
-                'emotionally_reactive': 0.55,
-                'unclear': 0.50
-            }
-            # Get predictions above threshold
-            detected_intents = {}
-            for i, category in enumerate(intent_categories):
-                prob = probabilities[i]
-                threshold = intent_thresholds[category]
-                if prob > threshold:
-                    detected_intents[category] = prob
-            # If no intents above threshold, use the highest one if it's reasonable
-            if not detected_intents:
-                max_idx = probabilities.argmax()
-                max_category = intent_categories[max_idx]
-                max_prob = probabilities[max_idx]
-                if max_prob > 0.3:  # Minimum confidence
-                    detected_intents[max_category] = max_prob
-            # Return primary intent for compatibility with existing analyzer
-            if detected_intents:
-                primary_intent = max(detected_intents.items(), key=lambda x: x[1])
-                return primary_intent[0], primary_intent[1]
-            else:
-                return "neutral", 0.5
-        except Exception as e:
-            logger.error(f"Error in predict_intent: {e}")
-            return "neutral", 0.5
-    def get_emotion_profile(self, text):
-        """Get emotion profile from text"""
-        if not text.strip() or not self.emotion_pipeline:
-            return {
-                "sadness": 0.0,
-                "joy": 0.0,
-                "neutral": 0.0,
-                "disgust": 0.0,
-                "anger": 0.0,
-                "fear": 0.0
-            }
-        try:
-            emotions = self.emotion_pipeline(text)
-            if isinstance(emotions, list) and isinstance(emotions[0], list):
-                emotion_scores = emotions[0]
-                return {e['label'].lower(): round(e['score'], 3) for e in emotion_scores}
-            return {}
-        except Exception as e:
-            logger.error(f"Error in get_emotion_profile: {e}")
-            return {
-                "sadness": 0.0,
-                "joy": 0.0,
-                "neutral": 0.0,
-                "disgust": 0.0,
-                "anger": 0.0,
-                "fear": 0.0
-            }
-    def _prepare_inputs(self, model_name, text):
-        """Prepare inputs for the model"""
-        try:
-            # Set max_length for fallacy model to match training
-            max_length = 512 if model_name == "fallacy" else None
-            inputs = self.tokenizers[model_name](
-                text,
-                return_tensors="pt",
-                truncation=True,
-                padding=True,
-                max_length=max_length
-            )
-            return {k: v.to(self.device) for k, v in inputs.items()}
-        except Exception as e:
-            logger.error(f"Error preparing inputs for {model_name}: {e}")
-            # Return dummy inputs
-            return {
-                "input_ids": torch.ones((1, 10), dtype=torch.long).to(self.device),
-                "attention_mask": torch.ones((1, 10), dtype=torch.long).to(self.device)
-            }
-    def get_abuse_pattern_labels(self):
-        """Get abuse pattern labels"""
-        return [
-            "recovery phase", "control", "gaslighting", "guilt tripping", "dismissiveness",
-            "blame shifting", "nonabusive", "projection", "insults",
-            "contradictory statements", "obscure language",
-            "veiled threats", "stalking language", "false concern",
-            "false equivalence", "future faking"
-        ]
-    def get_pattern_weight(self, label):
-        """Get pattern weight for scoring"""
-        weights = {
-            "recovery phase": 0.7,
-            "control": 1.4,
-            "gaslighting": 1.3,
-            "guilt tripping": 1.2,
-            "dismissiveness": 0.9,
-            "blame shifting": 1.0,
-            "projection": 0.5,
-            "insults": 1.4,
-            "contradictory statements": 1.0,
-            "obscure language": 0.9,
-            "nonabusive": 0.0,
-            "veiled threats": 1.6,
-            "stalking language": 1.8,
-            "false concern": 1.1,
-            "false equivalence": 1.3,
-            "future faking": 0.8
-        }
-        return weights.get(label, 1.0)

                     try:
                         logger.info("Loading custom intent model with MultiLabelIntentClassifier")
+                        # Create the custom model architecture using the class defined at module level
                         intent_model = MultiLabelIntentClassifier("distilbert-base-uncased", 6)
                         # Download the model file from HuggingFace
             try:
                 logger.info(f"Trying fallback path for {name}: {fallback_path}")
+                # Special handling for intent model fallback
                 if name == "intent":
+                    # Use the locally defined MultiLabelIntentClassifier class
+                    custom_model = MultiLabelIntentClassifier("distilbert-base-uncased", 6)
                     try:
+                        model_path = hf_hub_download(
+                            repo_id=fallback_path,
+                            filename="pytorch_model.bin"
                         )
+                        state_dict = torch.load(model_path, map_location='cpu')
+                        custom_model.load_state_dict(state_dict)
+                        self.models[name] = custom_model.to(self.device)
+                        # Use distilbert tokenizer for intent model
+                        self.tokenizers[name] = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+                    except Exception as fallback_error:
+                        logger.error(f"Failed to load intent model from fallback: {fallback_error}")
+                        raise fallback_error
                 else:
                     self.models[name] = AutoModelForSequenceClassification.from_pretrained(
                         fallback_path,
                 logger.error(f"Error loading {name} model from fallback path: {e}")
         return False