import tensorflow as tf #from transformers import pipeline from huggingface_hub import from_pretrained_keras import pandas as pd import numpy as np import joblib import os import sys import pickle import shutil # librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later. import librosa import librosa.display import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.metrics import confusion_matrix, classification_report from sklearn.model_selection import train_test_split # to play the audio files import keras from keras.preprocessing import sequence from keras.models import Sequential, model_from_json from keras.layers import Dense, Embedding from keras.layers import LSTM, BatchNormalization, GRU from keras.preprocessing.text import Tokenizer from tensorflow.keras.utils import to_categorical from keras.layers import Input, Flatten, Dropout, Activation from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D from keras.models import Model from keras.callbacks import ModelCheckpoint from tensorflow.keras.optimizers import SGD from fastapi import FastAPI, Request, UploadFile, File import warnings if not sys.warnoptions: warnings.simplefilter("ignore") warnings.filterwarnings("ignore", category=DeprecationWarning) model=from_pretrained_keras( 'Mohamed41/MODEL_EMOTION_AR_TEXT_72P') # def feat_ext(data): # # Time_domain_features # # ZCR Persody features or Low level ascoustic features # result = np.array([]) # zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0) # result = np.hstack((result, zcr)) # stacking horizontally # # Frequency_domain_features # # Spectral and wavelet Features # # MFCC # mfcc = np.mean(librosa.feature.mfcc(y=data, sr=22050, n_mfcc=40).T, axis=0) # result = np.hstack((result, mfcc)) # stacking horizontally # return result with open('scaler3.pickle', 'rb') as f: scaler3 = pickle.load(f) with open('encoder3.pickle', 'rb') as f: encoder3 = pickle.load(f) def feat_ext_test(data): #Time_domain_features # ZCR Persody features or Low level ascoustic features result = np.array([]) zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0) result=np.hstack((result, zcr)) # stacking horizontally #Frequency_domain_features #Spectral and wavelet Features #MFCC mfcc = np.mean(librosa.feature.mfcc(y=data, sr=22050,n_mfcc=40).T, axis=0) result = np.hstack((result, mfcc)) # stacking horizontally return result def get_predict_feat(path): d, s_rate= librosa.load(path, duration=2.5, offset=0.6) res=feat_ext_test(d) result=np.array(res) result=np.reshape(result,newshape=(1,41)) i_result = scaler3.transform(result) final_result=np.expand_dims(i_result, axis=2) return final_result emotions1 = {1: 'Neutral', 2: 'Calm', 3: 'Happy', 4: 'Sad', 5: 'Angry', 6: 'Fear', 7: 'Disgust', 8: 'Surprise'} def prediction(path1): res = get_predict_feat(path1) predictions = model.predict(res) y_pred = encoder3.inverse_transform(predictions) return y_pred[0][0] app = FastAPI() @app.post("/") async def read_root( file: UploadFile = File(...)): file_extension = os.path.splitext(file.filename)[1] with open("tmp"+file_extension, "wb") as buffer: shutil.copyfileobj(file.file, buffer) x = prediction("tmp"+file_extension) return {"filename": file.filename, "filepath": f"/app/{file.filename}","prediction":x}