File size: 2,208 Bytes
bda0b25
 
adf299f
 
 
bda0b25
adf299f
 
628b599
adf299f
c07f07c
adf299f
 
c07f07c
628b599
bda0b25
 
 
 
 
 
 
 
 
 
628b599
adf299f
bda0b25
 
 
 
 
bc8c795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0176633
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import soundfile as sf
import torch
from transformers import AutoModel, AutoFeatureExtractor
import os

# Get the Hugging Face API token from environment variables
token = os.getenv("HF_TOKEN")

# Load the model and feature extractor using your token
try:
    model = AutoModel.from_pretrained("sami606713/emotion_classification", use_auth_token=token)
    feature_extractor = AutoFeatureExtractor.from_pretrained("sami606713/emotion_classification", use_auth_token=token)
except Exception as e:
    st.write(f"Error loading model: {e}")

# Title and description
st.title("Audio Emotion Classification")
st.write("Upload an audio file and the model will classify the emotion.")

# File uploader
uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "ogg"])

if uploaded_file is not None:
    # Load the audio file
    audio_input, sample_rate = sf.read(uploaded_file)
    sample_rate = 16000  # Ensure the sample rate is 16000

    # Display the audio player
    st.audio(uploaded_file)

    # Perform emotion classification
    if st.button("Classifying"):
        try:
            inputs = feature_extractor(audio_input, sampling_rate=sample_rate, return_tensors="pt")
    
            # Make prediction
            with torch.no_grad():
                outputs = model(**inputs)
    
            embeddings = outputs.pooler_output
    
            # Apply a classification head on top of the embeddings
            id2label={
                0:"angry",
                1:'calm',
                2:'disgust',
                3:'fearful',
                4:'happy',
                5:'neutral',
                6:'sad',
                7:'surprised'
            }
            classifier = torch.nn.Linear(embeddings.shape[-1], len(id2label))
    
            # Pass embeddings through the classifier
            logits = classifier(embeddings)
    
            # Get predicted class
            predicted_class_idx = logits.argmax(-1).item()
            predicted_class = id2label[predicted_class_idx]
    
            st.write(f"Predicted Emotion: {predicted_class}")
        except Exception as e:
            st.write(f"Error during classification: {e}")