Vishal-Padia
/

SentimentSound

Model card Files Files and versions Community

Vishal-Padia commited on Dec 2, 2024

Commit

3d9d3c4

verified ·

1 Parent(s): c8fe9e1

Upload speech emotion recognition model

Browse files

Files changed (1) hide show

emotion_predictor.py +157 -0

emotion_predictor.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import os
+import torch
+import librosa
+import numpy as np
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from main import Config, HybridEmotionRecognitionModel, extract_advanced_features
+class EmotionPredictor:
+    def __init__(self, model_path="best_emotion_model.pth"):
+        """
+        Initialize the emotion predictor
+        Args:
+            model_path (str): Path to the saved model weights
+        """
+        # Prepare feature extraction specifics
+        self.features = Config.FEATURES
+        # Emotion mapping (same as in original script)
+        self.emotion_map = {
+            "01": "neutral",
+            "02": "calm",
+            "03": "happy",
+            "04": "sad",
+            "05": "angry",
+            "06": "fearful",
+            "07": "disgust",
+            "08": "surprised",
+        }
+        # Load the model
+        # First, prepare a dummy dataset to get the input dimension and number of classes
+        dummy_features, dummy_labels = self._prepare_dummy_dataset()
+        # Initialize the model
+        self.model = HybridEmotionRecognitionModel(
+            input_dim=len(dummy_features[0]), num_classes=len(np.unique(dummy_labels))
+        )
+        # Load the saved weights
+        self.model.load_state_dict(torch.load(model_path))
+        self.model.eval()  # Set to evaluation mode
+        # Prepare label encoder
+        self.label_encoder = LabelEncoder()
+        self.label_encoder.fit(dummy_labels)
+        # Prepare scaler
+        self.scaler = StandardScaler()
+        self.scaler.fit(dummy_features)
+    def _prepare_dummy_dataset(self):
+        """
+        Prepare a dummy dataset similar to the original preparation method
+        Returns:
+            tuple: Features and labels
+        """
+        features = []
+        labels = []
+        # Walk through all directories and subdirectories
+        for root, dirs, files in os.walk(Config.DATA_DIR):
+            for filename in files:
+                if filename.endswith(".wav"):
+                    # Full file path
+                    file_path = os.path.join(root, filename)
+                    try:
+                        # Extract emotion from filename
+                        emotion_code = filename.split("-")[2]
+                        emotion = self.emotion_map.get(emotion_code, "unknown")
+                        # Extract features
+                        file_features = extract_advanced_features(file_path)
+                        features.append(file_features)
+                        labels.append(emotion)
+                    except Exception as e:
+                        print(f"Error processing {filename}: {e}")
+                    # Limit to a small number of files for efficiency
+                    if len(features) >= 100:
+                        break
+                if len(features) >= 100:
+                    break
+            if len(features) >= 100:
+                break
+        return np.array(features), np.array(labels)
+    def predict_emotion(self, audio_file_path):
+        """
+        Predict emotion for a given audio file
+        Args:
+            audio_file_path (str): Path to the audio file
+        Returns:
+            str: Predicted emotion
+        """
+        # Extract features
+        try:
+            features = extract_advanced_features(audio_file_path)
+        except Exception as e:
+            print(f"Error extracting features: {e}")
+            return "Unknown"
+        # Standardize features
+        features = self.scaler.transform(features.reshape(1, -1))
+        # Convert to tensor
+        features_tensor = torch.FloatTensor(features)
+        # Predict
+        with torch.no_grad():
+            outputs = self.model(features_tensor)
+            _, predicted = torch.max(outputs, 1)
+            predicted_label_index = predicted.numpy()[0]
+        # Convert numeric label to emotion string
+        return self.label_encoder.classes_[predicted_label_index]
+def main():
+    # Initialize predictor
+    predictor = EmotionPredictor()
+    # Example usage
+    print("Emotion Prediction Script")
+    print("------------------------")
+    # Prompt user to input audio file path
+    while True:
+        audio_path = input("Enter the path to an audio file (or 'q' to quit): ").strip()
+        if audio_path.lower() == "q":
+            break
+        if not os.path.exists(audio_path):
+            print("File does not exist. Please check the path.")
+            continue
+        try:
+            # Predict emotion
+            emotion = predictor.predict_emotion(audio_path)
+            print(f"Predicted Emotion: {emotion}")
+        except Exception as e:
+            print(f"Error predicting emotion: {e}")
+if __name__ == "__main__":
+    main()