#IMPORT THE LIBRARIES import pandas as pd import numpy as np import joblib import os import sys # librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later. import librosa import librosa.display import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.metrics import confusion_matrix, classification_report from sklearn.model_selection import train_test_split # to play the audio files import keras from keras.preprocessing import sequence from keras.models import Sequential,model_from_json from keras.layers import Dense, Embedding from keras.layers import LSTM,BatchNormalization , GRU from keras.preprocessing.text import Tokenizer from tensorflow.keras.utils import to_categorical from keras.layers import Input, Flatten, Dropout, Activation from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D from keras.models import Model from keras.callbacks import ModelCheckpoint from tensorflow.keras.optimizers import SGD import warnings if not sys.warnoptions: warnings.simplefilter("ignore") warnings.filterwarnings("ignore", category=DeprecationWarning) import tensorflow as tf from huggingface_hub import from_pretrained_keras import gradio as gr from huggingface_hub import from_pretrained_keras model=from_pretrained_keras( 'Mohamed41/MODEL_EMOTION_AR_TEXT_72P') def feat_ext(data): #Time_domain_features # ZCR Persody features or Low level ascoustic features result = np.array([]) zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0) result=np.hstack((result, zcr)) # stacking horizontally #Frequency_domain_features #Spectral and wavelet Features #MFCC mfcc = np.mean(librosa.feature.mfcc(y=data, sr=22050,n_mfcc=40).T, axis=0) result = np.hstack((result, mfcc)) # stacking horizontally return result scaler = joblib.load('scaler.joblib') encoder= joblib.load('encoder.joblib') def get_predict_feat(path): d, s_rate= librosa.load(path, duration=2.5, offset=0.6) res=feat_ext(d) result=np.array(res) result=np.reshape(result,newshape=(1,41)) i_result = scaler.transform(result) final_result=np.expand_dims(i_result, axis=2) return final_result emotions1={1:'Neutral', 2:'Calm', 3:'Happy', 4:'Sad', 5:'Angry', 6:'Fear', 7:'Disgust',8:'Surprise'} def prediction(path1): res=get_predict_feat(path1) predictions=model.predict(res) y_pred = encoder.inverse_transform(predictions) return y_pred[0][0] def mainfunc(data): print(data) return str(data) audio_input = gr.inputs.Audio(type="filepath") iface = gr.Interface(fn=mainfunc, inputs=audio_input, outputs="text") iface.launch(inline=False)