import numpy as np import streamlit as st import cv2 import librosa import librosa.display from tensorflow.keras.models import load_model import os from datetime import datetime import streamlit.components.v1 as components import matplotlib.pyplot as plt from PIL import Image from melspec import plot_colored_polar, plot_melspec # load models model = load_model("model3.h5") # constants starttime = datetime.now() CAT6 = ['fear', 'angry', 'neutral', 'happy', 'sad', 'surprise'] CAT7 = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry'] CAT3 = ["positive", "neutral", "negative"] COLOR_DICT = {"neutral": "grey", "positive": "green", "happy": "green", "surprise": "orange", "fear": "purple", "negative": "red", "angry": "red", "sad": "lightblue", "disgust": "brown"} TEST_CAT = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry'] TEST_PRED = np.array([.3, .3, .4, .1, .6, .9, .1]) # page settings st.set_page_config(page_title="SER web-app", page_icon=":speech_balloon:", layout="wide") # COLOR = "#1f1f2e" # BACKGROUND_COLOR = "#d1d1e0" # @st.cache(hash_funcs={tf_agents.utils.object_identity.ObjectIdentityDictionary: load_model}) # def load_model_cache(model): # return load_model(model) # @st.cache def log_file(txt=None): with open("log.txt", "a") as f: datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S") f.write(f"{txt} - {datetoday};\n") # @st.cache def save_audio(file): if file.size > 4000000: return 1 # if not os.path.exists("audio"): # os.makedirs("audio") folder = "audio" datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S") # clear the folder to avoid storage overload for filename in os.listdir(folder): file_path = os.path.join(folder, filename) try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) except Exception as e: print('Failed to delete %s. Reason: %s' % (file_path, e)) try: with open("log0.txt", "a") as f: f.write(f"{file.name} - {file.size} - {datetoday};\n") except: pass with open(os.path.join(folder, file.name), "wb") as f: f.write(file.getbuffer()) return 0 # @st.cache def get_melspec(audio): y, sr = librosa.load(audio, sr=44100) X = librosa.stft(y) Xdb = librosa.amplitude_to_db(abs(X)) img = np.stack((Xdb,) * 3, -1) img = img.astype(np.uint8) grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) grayImage = cv2.resize(grayImage, (224, 224)) rgbImage = np.repeat(grayImage[..., np.newaxis], 3, -1) return (rgbImage, Xdb) # @st.cache def get_mfccs(audio, limit): y, sr = librosa.load(audio) a = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40) if a.shape[1] > limit: mfccs = a[:, :limit] elif a.shape[1] < limit: mfccs = np.zeros((a.shape[0], limit)) mfccs[:, :a.shape[1]] = a return mfccs @st.cache def get_title(predictions, categories=CAT6): title = f"Detected emotion: {categories[predictions.argmax()]} \ - {predictions.max() * 100:.2f}%" return title @st.cache def color_dict(coldict=COLOR_DICT): return COLOR_DICT @st.cache def plot_polar(fig, predictions=TEST_PRED, categories=TEST_CAT, title="TEST", colors=COLOR_DICT): # color_sector = "grey" N = len(predictions) ind = predictions.argmax() COLOR = color_sector = colors[categories[ind]] theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False) radii = np.zeros_like(predictions) radii[predictions.argmax()] = predictions.max() * 10 width = np.pi / 1.8 * predictions fig.set_facecolor("#d1d1e0") ax = plt.subplot(111, polar="True") ax.bar(theta, radii, width=width, bottom=0.0, color=color_sector, alpha=0.25) angles = [i / float(N) * 2 * np.pi for i in range(N)] angles += angles[:1] data = list(predictions) data += data[:1] plt.polar(angles, data, color=COLOR, linewidth=2) plt.fill(angles, data, facecolor=COLOR, alpha=0.25) ax.spines['polar'].set_color('lightgrey') ax.set_theta_offset(np.pi / 3) ax.set_theta_direction(-1) plt.xticks(angles[:-1], categories) ax.set_rlabel_position(0) plt.yticks([0, .25, .5, .75, 1], color="grey", size=8) plt.suptitle(title, color="darkblue", size=12) plt.title(f"BIG {N}\n", color=COLOR) plt.ylim(0, 1) plt.subplots_adjust(top=0.75) def main(): side_img = Image.open("images/emotion3.jpg") with st.sidebar: st.image(side_img, width=300) st.sidebar.subheader("Menu") website_menu = st.sidebar.selectbox("Menu", ("Emotion Recognition", "Project description")) st.set_option('deprecation.showfileUploaderEncoding', False) if website_menu == "Emotion Recognition": st.sidebar.subheader("Model") model_type = st.sidebar.selectbox("How would you like to predict?", ("mfccs", "mel-specs")) em3 = em6 = em7 = gender = False st.sidebar.subheader("Settings") st.markdown("## Upload the file") with st.container(): col1, col2, col3 = st.columns(3) # audio_file = None # path = None with col1: audio_file = st.file_uploader("Upload audio file", type=['wav', 'mp3', 'ogg']) if audio_file is not None: if not os.path.exists("audio"): os.makedirs("audio") path = os.path.join("audio", audio_file.name) if_save_audio = save_audio(audio_file) if if_save_audio == 1: st.warning("File size is too large. Try another file.") elif if_save_audio == 0: # extract features # display audio st.audio(audio_file, format='audio/wav', start_time=0) try: wav, sr = librosa.load(path, sr=44100) Xdb = get_melspec(path)[1] mfccs = librosa.feature.mfcc(y=wav, sr=sr) # # display audio # st.audio(audio_file, format='audio/wav', start_time=0) except Exception as e: audio_file = None st.error(f"Error {e} - wrong format of the file. Try another .wav file.") else: st.error("Unknown error") else: if st.button("Try test file"): wav, sr = librosa.load("test.wav", sr=44100) Xdb = get_melspec("test.wav")[1] mfccs = librosa.feature.mfcc(y=wav, sr=sr) # display audio st.audio("test.wav", format='audio/wav', start_time=0) path = "test.wav" audio_file = "test" with col2: if audio_file is not None: fig = plt.figure(figsize=(10, 2)) fig.set_facecolor('#d1d1e0') plt.title("Wave-form") librosa.display.waveshow(wav, sr=44100, color="blue") plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.get_xaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) plt.gca().axes.spines["bottom"].set_visible(False) plt.gca().axes.set_facecolor('#d1d1e0') st.write(fig) else: pass # st.write("Record audio file") # if st.button('Record'): # with st.spinner(f'Recording for 5 seconds ....'): # st.write("Recording...") # time.sleep(3) # st.success("Recording completed") # st.write("Error while loading the file") with col3: st.title("Convert any MP3 audio file to .WAV") st.subheader("Convert audio file") link = '[File conversion]' \ '(https://cloudconvert.com/mp3-to-wav)' st.markdown(link, unsafe_allow_html=True) if model_type == "mfccs": em3 = st.sidebar.checkbox("3 emotions", True) em6 = st.sidebar.checkbox("6 emotions", True) em7 = st.sidebar.checkbox("7 emotions") gender = st.sidebar.checkbox("gender") elif model_type == "mel-specs": st.sidebar.warning("This model is temporarily disabled") else: st.sidebar.warning("This model is temporarily disabled") # with st.sidebar.expander("Change colors"): # st.sidebar.write("Use this options after you got the plots") # col1, col2, col3, col4, col5, col6, col7 = st.columns(7) # # with col1: # a = st.color_picker("Angry", value="#FF0000") # with col2: # f = st.color_picker("Fear", value="#800080") # with col3: # d = st.color_picker("Disgust", value="#A52A2A") # with col4: # sd = st.color_picker("Sad", value="#ADD8E6") # with col5: # n = st.color_picker("Neutral", value="#808080") # with col6: # sp = st.color_picker("Surprise", value="#FFA500") # with col7: # h = st.color_picker("Happy", value="#008000") # if st.button("Update colors"): # global COLOR_DICT # COLOR_DICT = {"neutral": n, # "positive": h, # "happy": h, # "surprise": sp, # "fear": f, # "negative": a, # "angry": a, # "sad": sd, # "disgust": d} # st.success(COLOR_DICT) if audio_file is not None: st.markdown("## Analyzing...") if not audio_file == "test": st.sidebar.subheader("Audio file") file_details = {"Filename": audio_file.name, "FileSize": audio_file.size} st.sidebar.write(file_details) with st.container(): col1, col2 = st.columns(2) with col1: fig = plt.figure(figsize=(10, 2)) fig.set_facecolor('#d1d1e0') plt.title("MFCCs") librosa.display.specshow(mfccs, sr=sr, x_axis='time') plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) st.write(fig) with col2: fig2 = plt.figure(figsize=(10, 2)) fig2.set_facecolor('#d1d1e0') plt.title("Mel-log-spectrogram") librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz') plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) st.write(fig2) if model_type == "mfccs": st.markdown("## Predictions") with st.container(): col1, col2, col3, col4 = st.columns(4) mfccs = get_mfccs(path, model.input_shape[-1]) mfccs = mfccs.reshape(1, *mfccs.shape) pred = model.predict(mfccs)[0] with col1: if em3: pos = pred[3] + pred[5] * .5 neu = pred[2] + pred[5] * .5 + pred[4] * .5 neg = pred[0] + pred[1] + pred[4] * .5 data3 = np.array([pos, neu, neg]) txt = "MFCCs\n" + get_title(data3, CAT3) fig = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig, predictions=data3, categories=CAT3, title=txt, colors=COLORS) # plot_polar(fig, predictions=data3, categories=CAT3, # title=txt, colors=COLORS) st.write(fig) with col2: if em6: txt = "MFCCs\n" + get_title(pred, CAT6) fig2 = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig2, predictions=pred, categories=CAT6, title=txt, colors=COLORS) # plot_polar(fig2, predictions=pred, categories=CAT6, # title=txt, colors=COLORS) st.write(fig2) with col3: if em7: model_ = load_model("model4.h5") mfccs_ = get_mfccs(path, model_.input_shape[-2]) mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape) pred_ = model_.predict(mfccs_)[0] txt = "MFCCs\n" + get_title(pred_, CAT7) fig3 = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig3, predictions=pred_, categories=CAT7, title=txt, colors=COLORS) # plot_polar(fig3, predictions=pred_, categories=CAT7, # title=txt, colors=COLORS) st.write(fig3) with col4: if gender: with st.spinner('Wait for it...'): gmodel = load_model("model_mw.h5") gmfccs = get_mfccs(path, gmodel.input_shape[-1]) gmfccs = gmfccs.reshape(1, *gmfccs.shape) gpred = gmodel.predict(gmfccs)[0] gdict = [["female", "woman.png"], ["male", "man.png"]] ind = gpred.argmax() txt = "Predicted gender: " + gdict[ind][0] img = Image.open("images/" + gdict[ind][1]) fig4 = plt.figure(figsize=(3, 3)) fig4.set_facecolor('#d1d1e0') plt.title(txt) plt.imshow(img) plt.axis("off") st.write(fig4) # if model_type == "mel-specs": # st.markdown("## Predictions") # st.warning("The model in test mode. It may not be working properly.") # if st.checkbox("I'm OK with it"): # try: # with st.spinner("Wait... It can take some time"): # global tmodel # tmodel = load_model_cache("tmodel_all.h5") # fig, tpred = plot_melspec(path, tmodel) # col1, col2, col3 = st.columns(3) # with col1: # st.markdown("### Emotional spectrum") # dimg = Image.open("images/spectrum.png") # st.image(dimg, use_column_width=True) # with col2: # fig_, tpred_ = plot_melspec(path=path, # tmodel=tmodel, # three=True) # st.write(fig_, use_column_width=True) # with col3: # st.write(fig, use_column_width=True) # except Exception as e: # st.error(f"Error {e}, model is not loaded") elif website_menu == "Project description": import pandas as pd import plotly.express as px st.title("Project description") st.subheader("Student Details") txt = """ Student information include; * Student Name: **Adewuyi Gbenga Kolawole** * Student Matric No: **HNDCOM/22/035** * Session: **2022/2023** * Class: **HND 2** * Level: **400L** This machine learning web-application PROJECT is a partial fulfillment of requirement in Higher National Diploma (HND) computer science **The Federal College of Animal Health and Production Technology** **FCAHPTIB, 2023**. """ st.markdown(txt, unsafe_allow_html=True) st.subheader("Theory") link = '[Theory behind - the project(emotion recognition) ]' st.markdown(link + ":clap::clap::clap:", unsafe_allow_html=True) with st.expander("See Wikipedia definition"): components.iframe("https://en.wikipedia.org/wiki/Emotion_recognition", height=320, scrolling=True) st.subheader("Dataset") txt = """ Datasets used in this project * Crowd-sourced Emotional Mutimodal Actors Dataset (**Crema-D**) ("https://www.kaggle.com/code/ejlok1/audio-emotion-part-1-explore-data") * Ryerson Audio-Visual Database of Emotional Speech and Song (**Ravdess**) ("https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio") * Surrey Audio-Visual Expressed Emotion (**Savee**) ("https://www.kaggle.com/datasets/ejlok1/surrey-audiovisual-expressed-emotion-savee") * Toronto emotional speech set (**Tess**) All datasets used can be found on **Kaggle** The above datasets was used in the model training of this software before deployment """ st.markdown(txt, unsafe_allow_html=True) df = pd.read_csv("df_audio.csv") fig = px.violin(df, y="source", x="emotion4", color="actors", box=True, points="all", hover_data=df.columns) st.plotly_chart(fig, use_container_width=True) else: pass if __name__ == '__main__': main()