Spaces:
Runtime error
Runtime error
import numpy as np | |
import soundfile as sf | |
import librosa | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
import torch | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
# Load Hugging Face's Wav2Vec2 model and tokenizer | |
model_name = "facebook/wav2vec2-large-xlsr-53" | |
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name) | |
model = Wav2Vec2ForCTC.from_pretrained(model_name) | |
def load_audio(file_path): | |
audio, sample_rate = sf.read(file_path) | |
return audio | |
def extract_mfcc_features(audio, sample_rate): | |
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) | |
mfccs_scaled = np.mean(mfccs.T, axis=0) | |
return mfccs_scaled | |
def predict_emotion(file_path): | |
audio = load_audio(file_path) | |
mfcc_features = extract_mfcc_features(audio, 16000) # Adjust sample rate if needed | |
# Prepare for prediction (just using random sample for this dummy) | |
encoded_input = tokenizer(audio, sampling_rate=16000, return_tensors="pt", padding=True) | |
# Make sure to use the correct model input and outputs for emotion prediction | |
with torch.no_grad(): | |
logits = model(**encoded_input).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
return tokenizer.decode(predicted_ids[0]) | |
# Example usage of the model | |
if __name__ == "__main__": | |
file_name = "path_to_your_audio_file.wav" # Replace with your audio file path | |
emotion = predict_emotion(file_name) | |
print(f'Predicted Emotion: {emotion}') |