import tensorflow as tf #from transformers import pipeline from huggingface_hub import from_pretrained_keras import pandas as pd import numpy as np import joblib import os import sys import pickle import shutil # librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later. import librosa import librosa.display import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.metrics import confusion_matrix, classification_report from sklearn.model_selection import train_test_split # to play the audio files import keras from keras.preprocessing import sequence from keras.models import Sequential, model_from_json from keras.layers import Dense, Embedding from keras.layers import LSTM, BatchNormalization, GRU from keras.preprocessing.text import Tokenizer from tensorflow.keras.utils import to_categorical from keras.layers import Input, Flatten, Dropout, Activation from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D from keras.models import Model from keras.callbacks import ModelCheckpoint from tensorflow.keras.optimizers import SGD from fastapi import FastAPI, Request, UploadFile, File import warnings if not sys.warnoptions: warnings.simplefilter("ignore") warnings.filterwarnings("ignore", category=DeprecationWarning) model=from_pretrained_keras( 'Mohamed41/MODEL_EMOTION_AR_TEXT_72P') with open('scaler3.pickle', 'rb') as f: scaler3 = pickle.load(f) with open('encoder3.pickle', 'rb') as f: encoder3 = pickle.load(f) def zcr(data,frame_length,hop_length): zcr=librosa.feature.zero_crossing_rate(data,frame_length=frame_length,hop_length=hop_length) return np.squeeze(zcr) def rmse(data,frame_length=2048,hop_length=512): rmse=librosa.feature.rms(y=data,frame_length=frame_length,hop_length=hop_length) return np.squeeze(rmse) def mfcc(data,sr,frame_length=2048,hop_length=512,flatten:bool=True): mfcc=librosa.feature.mfcc(y=data,sr=sr) return np.squeeze(mfcc.T)if not flatten else np.ravel(mfcc.T) def extract_features(data,sr=22050,frame_length=2048,hop_length=512): result=np.array([]) result=np.hstack((result, zcr(data,frame_length,hop_length), rmse(data,frame_length,hop_length), mfcc(data,sr,frame_length,hop_length) )) return result def get_predict_feat(path): d, s_rate= librosa.load(path, duration=2.5, offset=0.6) res=extract_features(d) result=np.array(res) result=np.reshape(result,newshape=(1,2376)) i_result = scaler3.transform(result) final_result=np.expand_dims(i_result, axis=2) return final_result emotions1 = {1: 'Neutral', 2: 'Calm', 3: 'Happy', 4: 'Sad', 5: 'Angry', 6: 'Fear', 7: 'Disgust', 8: 'Surprise'} def prediction(path1): res=get_predict_feat(path1) predictions=model.predict(res) y_pred = encoder3.inverse_transform(predictions) print(y_pred[0][0]) app = FastAPI() @app.post("/") async def read_root( file: UploadFile = File(...)): file_extension = os.path.splitext(file.filename)[1] with open("tmp"+file_extension, "wb") as buffer: shutil.copyfileobj(file.file, buffer) x = prediction("tmp"+file_extension) return {"filename": file.filename, "filepath": f"/app/{file.filename}","prediction":x}