b1nay commited on
Commit
dad2433
·
1 Parent(s): ef53c2e
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import HubertForSequenceClassification, HubertConfig, Wav2Vec2FeatureExtractor
3
+ import torch
4
+ import soundfile as sf
5
+
6
+ # Load model and tokenizer
7
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
8
+ model_name = "model_hubert_finetuned_nopeft.pth" # Replace with your model path or Hugging Face model hub path
9
+ config = HubertConfig.from_pretrained("superb/hubert-large-superb-er")
10
+ config.id2label = {0: 'neu', 1: 'hap', 2: 'ang', 3: 'sad', 4: 'dis', 5: 'sur', 6: 'fea', 7: 'cal'}
11
+ config.label2id = {"neu": 0, "hap": 1, "ang": 2, "sad": 3, "dis": 4, "sur": 5, "fea": 6, "cal": 7}
12
+ config.num_labels = 8 # Set it to the number of classes in your SER task
13
+
14
+ # Load the pre-trained model with the modified configuration
15
+ model = HubertForSequenceClassification.from_pretrained("superb/hubert-large-superb-er", config=config, ignore_mismatched_sizes=True)
16
+ model.to(device)
17
+ checkpoint =torch.load(model_name, map_location = device)
18
+ model.load_state_dict(checkpoint)
19
+
20
+
21
+ model.eval()
22
+
23
+ # Load feature extractor
24
+ feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("superb/hubert-large-superb-er")
25
+
26
+ st.title("Speech Emotion Recognition Model")
27
+
28
+ uploaded_file = st.file_uploader("Upload an audio file", type=["wav"])
29
+
30
+ if uploaded_file is not None:
31
+ # Load audio file
32
+ audio_input, sampling_rate = sf.read(uploaded_file)
33
+
34
+ # Preprocess audio input
35
+ inputs = feature_extractor(audio_input, sampling_rate=16000, return_tensors="pt", padding=True)
36
+ inputs = {key: value.to('cuda' if torch.cuda.is_available() else 'cpu') for key, value in inputs.items()}
37
+
38
+ # Get prediction
39
+ with torch.no_grad():
40
+ outputs = model(**inputs)
41
+ logits = outputs.logits
42
+ probabilities = torch.softmax(logits, dim=-1)
43
+ predicted_class = torch.argmax(probabilities, dim=1).item()
44
+
45
+ # Display prediction
46
+ st.write(f"Predicted class: {config.id2label[predicted_class]}")
47
+ st.write(f"Class probabilities: {probabilities}")