Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- .gitattributes +1 -0
- app.py +510 -0
- demo2.gif +3 -0
- df_audio.csv +0 -0
- melspec.py +113 -0
- model3.h5 +3 -0
- model4.h5 +3 -0
- model_mw.h5 +3 -0
- packages.txt +3 -0
- requirements.txt +21 -0
- setup.sh +20 -0
- test.wav +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
demo2.gif filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,510 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import streamlit as st
|
3 |
+
import cv2
|
4 |
+
import librosa
|
5 |
+
import librosa.display
|
6 |
+
from tensorflow.keras.models import load_model
|
7 |
+
import os
|
8 |
+
from datetime import datetime
|
9 |
+
import streamlit.components.v1 as components
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from PIL import Image
|
12 |
+
from melspec import plot_colored_polar, plot_melspec
|
13 |
+
|
14 |
+
# load models
|
15 |
+
model = load_model("model3.h5")
|
16 |
+
|
17 |
+
# constants
|
18 |
+
starttime = datetime.now()
|
19 |
+
|
20 |
+
CAT6 = ['fear', 'angry', 'neutral', 'happy', 'sad', 'surprise']
|
21 |
+
CAT7 = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
|
22 |
+
CAT3 = ["positive", "neutral", "negative"]
|
23 |
+
|
24 |
+
COLOR_DICT = {"neutral": "grey",
|
25 |
+
"positive": "green",
|
26 |
+
"happy": "green",
|
27 |
+
"surprise": "orange",
|
28 |
+
"fear": "purple",
|
29 |
+
"negative": "red",
|
30 |
+
"angry": "red",
|
31 |
+
"sad": "lightblue",
|
32 |
+
"disgust": "brown"}
|
33 |
+
|
34 |
+
TEST_CAT = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
|
35 |
+
TEST_PRED = np.array([.3, .3, .4, .1, .6, .9, .1])
|
36 |
+
|
37 |
+
# page settings
|
38 |
+
st.set_page_config(page_title="SER web-app", page_icon=":speech_balloon:", layout="wide")
|
39 |
+
# COLOR = "#1f1f2e"
|
40 |
+
# BACKGROUND_COLOR = "#d1d1e0"
|
41 |
+
|
42 |
+
|
43 |
+
# @st.cache(hash_funcs={tf_agents.utils.object_identity.ObjectIdentityDictionary: load_model})
|
44 |
+
# def load_model_cache(model):
|
45 |
+
# return load_model(model)
|
46 |
+
|
47 |
+
# @st.cache
|
48 |
+
def log_file(txt=None):
|
49 |
+
with open("log.txt", "a") as f:
|
50 |
+
datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
51 |
+
f.write(f"{txt} - {datetoday};\n")
|
52 |
+
|
53 |
+
|
54 |
+
# @st.cache
|
55 |
+
def save_audio(file):
|
56 |
+
if file.size > 4000000:
|
57 |
+
return 1
|
58 |
+
# if not os.path.exists("audio"):
|
59 |
+
# os.makedirs("audio")
|
60 |
+
folder = "audio"
|
61 |
+
datetoday = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
|
62 |
+
# clear the folder to avoid storage overload
|
63 |
+
for filename in os.listdir(folder):
|
64 |
+
file_path = os.path.join(folder, filename)
|
65 |
+
try:
|
66 |
+
if os.path.isfile(file_path) or os.path.islink(file_path):
|
67 |
+
os.unlink(file_path)
|
68 |
+
except Exception as e:
|
69 |
+
print('Failed to delete %s. Reason: %s' % (file_path, e))
|
70 |
+
|
71 |
+
try:
|
72 |
+
with open("log0.txt", "a") as f:
|
73 |
+
f.write(f"{file.name} - {file.size} - {datetoday};\n")
|
74 |
+
except:
|
75 |
+
pass
|
76 |
+
|
77 |
+
with open(os.path.join(folder, file.name), "wb") as f:
|
78 |
+
f.write(file.getbuffer())
|
79 |
+
return 0
|
80 |
+
|
81 |
+
|
82 |
+
# @st.cache
|
83 |
+
def get_melspec(audio):
|
84 |
+
y, sr = librosa.load(audio, sr=44100)
|
85 |
+
X = librosa.stft(y)
|
86 |
+
Xdb = librosa.amplitude_to_db(abs(X))
|
87 |
+
img = np.stack((Xdb,) * 3, -1)
|
88 |
+
img = img.astype(np.uint8)
|
89 |
+
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
90 |
+
grayImage = cv2.resize(grayImage, (224, 224))
|
91 |
+
rgbImage = np.repeat(grayImage[..., np.newaxis], 3, -1)
|
92 |
+
return (rgbImage, Xdb)
|
93 |
+
|
94 |
+
|
95 |
+
# @st.cache
|
96 |
+
def get_mfccs(audio, limit):
|
97 |
+
y, sr = librosa.load(audio)
|
98 |
+
a = librosa.feature.mfcc(y, sr=sr, n_mfcc=40)
|
99 |
+
if a.shape[1] > limit:
|
100 |
+
mfccs = a[:, :limit]
|
101 |
+
elif a.shape[1] < limit:
|
102 |
+
mfccs = np.zeros((a.shape[0], limit))
|
103 |
+
mfccs[:, :a.shape[1]] = a
|
104 |
+
return mfccs
|
105 |
+
|
106 |
+
|
107 |
+
@st.cache
|
108 |
+
def get_title(predictions, categories=CAT6):
|
109 |
+
title = f"Detected emotion: {categories[predictions.argmax()]} \
|
110 |
+
- {predictions.max() * 100:.2f}%"
|
111 |
+
return title
|
112 |
+
|
113 |
+
|
114 |
+
@st.cache
|
115 |
+
def color_dict(coldict=COLOR_DICT):
|
116 |
+
return COLOR_DICT
|
117 |
+
|
118 |
+
|
119 |
+
@st.cache
|
120 |
+
def plot_polar(fig, predictions=TEST_PRED, categories=TEST_CAT,
|
121 |
+
title="TEST", colors=COLOR_DICT):
|
122 |
+
# color_sector = "grey"
|
123 |
+
|
124 |
+
N = len(predictions)
|
125 |
+
ind = predictions.argmax()
|
126 |
+
|
127 |
+
COLOR = color_sector = colors[categories[ind]]
|
128 |
+
theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
|
129 |
+
radii = np.zeros_like(predictions)
|
130 |
+
radii[predictions.argmax()] = predictions.max() * 10
|
131 |
+
width = np.pi / 1.8 * predictions
|
132 |
+
fig.set_facecolor("#d1d1e0")
|
133 |
+
ax = plt.subplot(111, polar="True")
|
134 |
+
ax.bar(theta, radii, width=width, bottom=0.0, color=color_sector, alpha=0.25)
|
135 |
+
|
136 |
+
angles = [i / float(N) * 2 * np.pi for i in range(N)]
|
137 |
+
angles += angles[:1]
|
138 |
+
|
139 |
+
data = list(predictions)
|
140 |
+
data += data[:1]
|
141 |
+
plt.polar(angles, data, color=COLOR, linewidth=2)
|
142 |
+
plt.fill(angles, data, facecolor=COLOR, alpha=0.25)
|
143 |
+
|
144 |
+
ax.spines['polar'].set_color('lightgrey')
|
145 |
+
ax.set_theta_offset(np.pi / 3)
|
146 |
+
ax.set_theta_direction(-1)
|
147 |
+
plt.xticks(angles[:-1], categories)
|
148 |
+
ax.set_rlabel_position(0)
|
149 |
+
plt.yticks([0, .25, .5, .75, 1], color="grey", size=8)
|
150 |
+
plt.suptitle(title, color="darkblue", size=12)
|
151 |
+
plt.title(f"BIG {N}\n", color=COLOR)
|
152 |
+
plt.ylim(0, 1)
|
153 |
+
plt.subplots_adjust(top=0.75)
|
154 |
+
|
155 |
+
|
156 |
+
def main():
|
157 |
+
side_img = Image.open("images/emotion3.jpg")
|
158 |
+
with st.sidebar:
|
159 |
+
st.image(side_img, width=300)
|
160 |
+
st.sidebar.subheader("Menu")
|
161 |
+
website_menu = st.sidebar.selectbox("Menu", ("Emotion Recognition", "Project description", "Our team",
|
162 |
+
"Leave feedback", "Relax"))
|
163 |
+
st.set_option('deprecation.showfileUploaderEncoding', False)
|
164 |
+
|
165 |
+
if website_menu == "Emotion Recognition":
|
166 |
+
st.sidebar.subheader("Model")
|
167 |
+
model_type = st.sidebar.selectbox("How would you like to predict?", ("mfccs", "mel-specs"))
|
168 |
+
em3 = em6 = em7 = gender = False
|
169 |
+
st.sidebar.subheader("Settings")
|
170 |
+
|
171 |
+
st.markdown("## Upload the file")
|
172 |
+
with st.container():
|
173 |
+
col1, col2 = st.columns(2)
|
174 |
+
# audio_file = None
|
175 |
+
# path = None
|
176 |
+
with col1:
|
177 |
+
audio_file = st.file_uploader("Upload audio file", type=['wav', 'mp3', 'ogg'])
|
178 |
+
if audio_file is not None:
|
179 |
+
if not os.path.exists("audio"):
|
180 |
+
os.makedirs("audio")
|
181 |
+
path = os.path.join("audio", audio_file.name)
|
182 |
+
if_save_audio = save_audio(audio_file)
|
183 |
+
if if_save_audio == 1:
|
184 |
+
st.warning("File size is too large. Try another file.")
|
185 |
+
elif if_save_audio == 0:
|
186 |
+
# extract features
|
187 |
+
# display audio
|
188 |
+
st.audio(audio_file, format='audio/wav', start_time=0)
|
189 |
+
try:
|
190 |
+
wav, sr = librosa.load(path, sr=44100)
|
191 |
+
Xdb = get_melspec(path)[1]
|
192 |
+
mfccs = librosa.feature.mfcc(wav, sr=sr)
|
193 |
+
# # display audio
|
194 |
+
# st.audio(audio_file, format='audio/wav', start_time=0)
|
195 |
+
except Exception as e:
|
196 |
+
audio_file = None
|
197 |
+
st.error(f"Error {e} - wrong format of the file. Try another .wav file.")
|
198 |
+
else:
|
199 |
+
st.error("Unknown error")
|
200 |
+
else:
|
201 |
+
if st.button("Try test file"):
|
202 |
+
wav, sr = librosa.load("test.wav", sr=44100)
|
203 |
+
Xdb = get_melspec("test.wav")[1]
|
204 |
+
mfccs = librosa.feature.mfcc(wav, sr=sr)
|
205 |
+
# display audio
|
206 |
+
st.audio("test.wav", format='audio/wav', start_time=0)
|
207 |
+
path = "test.wav"
|
208 |
+
audio_file = "test"
|
209 |
+
with col2:
|
210 |
+
if audio_file is not None:
|
211 |
+
fig = plt.figure(figsize=(10, 2))
|
212 |
+
fig.set_facecolor('#d1d1e0')
|
213 |
+
plt.title("Wave-form")
|
214 |
+
librosa.display.waveplot(wav, sr=44100)
|
215 |
+
plt.gca().axes.get_yaxis().set_visible(False)
|
216 |
+
plt.gca().axes.get_xaxis().set_visible(False)
|
217 |
+
plt.gca().axes.spines["right"].set_visible(False)
|
218 |
+
plt.gca().axes.spines["left"].set_visible(False)
|
219 |
+
plt.gca().axes.spines["top"].set_visible(False)
|
220 |
+
plt.gca().axes.spines["bottom"].set_visible(False)
|
221 |
+
plt.gca().axes.set_facecolor('#d1d1e0')
|
222 |
+
st.write(fig)
|
223 |
+
else:
|
224 |
+
pass
|
225 |
+
# st.write("Record audio file")
|
226 |
+
# if st.button('Record'):
|
227 |
+
# with st.spinner(f'Recording for 5 seconds ....'):
|
228 |
+
# st.write("Recording...")
|
229 |
+
# time.sleep(3)
|
230 |
+
# st.success("Recording completed")
|
231 |
+
# st.write("Error while loading the file")
|
232 |
+
|
233 |
+
if model_type == "mfccs":
|
234 |
+
em3 = st.sidebar.checkbox("3 emotions", True)
|
235 |
+
em6 = st.sidebar.checkbox("6 emotions", True)
|
236 |
+
em7 = st.sidebar.checkbox("7 emotions")
|
237 |
+
gender = st.sidebar.checkbox("gender")
|
238 |
+
|
239 |
+
elif model_type == "mel-specs":
|
240 |
+
st.sidebar.warning("This model is temporarily disabled")
|
241 |
+
|
242 |
+
else:
|
243 |
+
st.sidebar.warning("This model is temporarily disabled")
|
244 |
+
|
245 |
+
# with st.sidebar.expander("Change colors"):
|
246 |
+
# st.sidebar.write("Use this options after you got the plots")
|
247 |
+
# col1, col2, col3, col4, col5, col6, col7 = st.columns(7)
|
248 |
+
#
|
249 |
+
# with col1:
|
250 |
+
# a = st.color_picker("Angry", value="#FF0000")
|
251 |
+
# with col2:
|
252 |
+
# f = st.color_picker("Fear", value="#800080")
|
253 |
+
# with col3:
|
254 |
+
# d = st.color_picker("Disgust", value="#A52A2A")
|
255 |
+
# with col4:
|
256 |
+
# sd = st.color_picker("Sad", value="#ADD8E6")
|
257 |
+
# with col5:
|
258 |
+
# n = st.color_picker("Neutral", value="#808080")
|
259 |
+
# with col6:
|
260 |
+
# sp = st.color_picker("Surprise", value="#FFA500")
|
261 |
+
# with col7:
|
262 |
+
# h = st.color_picker("Happy", value="#008000")
|
263 |
+
# if st.button("Update colors"):
|
264 |
+
# global COLOR_DICT
|
265 |
+
# COLOR_DICT = {"neutral": n,
|
266 |
+
# "positive": h,
|
267 |
+
# "happy": h,
|
268 |
+
# "surprise": sp,
|
269 |
+
# "fear": f,
|
270 |
+
# "negative": a,
|
271 |
+
# "angry": a,
|
272 |
+
# "sad": sd,
|
273 |
+
# "disgust": d}
|
274 |
+
# st.success(COLOR_DICT)
|
275 |
+
|
276 |
+
if audio_file is not None:
|
277 |
+
st.markdown("## Analyzing...")
|
278 |
+
if not audio_file == "test":
|
279 |
+
st.sidebar.subheader("Audio file")
|
280 |
+
file_details = {"Filename": audio_file.name, "FileSize": audio_file.size}
|
281 |
+
st.sidebar.write(file_details)
|
282 |
+
|
283 |
+
with st.container():
|
284 |
+
col1, col2 = st.columns(2)
|
285 |
+
with col1:
|
286 |
+
fig = plt.figure(figsize=(10, 2))
|
287 |
+
fig.set_facecolor('#d1d1e0')
|
288 |
+
plt.title("MFCCs")
|
289 |
+
librosa.display.specshow(mfccs, sr=sr, x_axis='time')
|
290 |
+
plt.gca().axes.get_yaxis().set_visible(False)
|
291 |
+
plt.gca().axes.spines["right"].set_visible(False)
|
292 |
+
plt.gca().axes.spines["left"].set_visible(False)
|
293 |
+
plt.gca().axes.spines["top"].set_visible(False)
|
294 |
+
st.write(fig)
|
295 |
+
with col2:
|
296 |
+
fig2 = plt.figure(figsize=(10, 2))
|
297 |
+
fig2.set_facecolor('#d1d1e0')
|
298 |
+
plt.title("Mel-log-spectrogram")
|
299 |
+
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
|
300 |
+
plt.gca().axes.get_yaxis().set_visible(False)
|
301 |
+
plt.gca().axes.spines["right"].set_visible(False)
|
302 |
+
plt.gca().axes.spines["left"].set_visible(False)
|
303 |
+
plt.gca().axes.spines["top"].set_visible(False)
|
304 |
+
st.write(fig2)
|
305 |
+
|
306 |
+
if model_type == "mfccs":
|
307 |
+
st.markdown("## Predictions")
|
308 |
+
with st.container():
|
309 |
+
col1, col2, col3, col4 = st.columns(4)
|
310 |
+
mfccs = get_mfccs(path, model.input_shape[-1])
|
311 |
+
mfccs = mfccs.reshape(1, *mfccs.shape)
|
312 |
+
pred = model.predict(mfccs)[0]
|
313 |
+
|
314 |
+
with col1:
|
315 |
+
if em3:
|
316 |
+
pos = pred[3] + pred[5] * .5
|
317 |
+
neu = pred[2] + pred[5] * .5 + pred[4] * .5
|
318 |
+
neg = pred[0] + pred[1] + pred[4] * .5
|
319 |
+
data3 = np.array([pos, neu, neg])
|
320 |
+
txt = "MFCCs\n" + get_title(data3, CAT3)
|
321 |
+
fig = plt.figure(figsize=(5, 5))
|
322 |
+
COLORS = color_dict(COLOR_DICT)
|
323 |
+
plot_colored_polar(fig, predictions=data3, categories=CAT3,
|
324 |
+
title=txt, colors=COLORS)
|
325 |
+
# plot_polar(fig, predictions=data3, categories=CAT3,
|
326 |
+
# title=txt, colors=COLORS)
|
327 |
+
st.write(fig)
|
328 |
+
with col2:
|
329 |
+
if em6:
|
330 |
+
txt = "MFCCs\n" + get_title(pred, CAT6)
|
331 |
+
fig2 = plt.figure(figsize=(5, 5))
|
332 |
+
COLORS = color_dict(COLOR_DICT)
|
333 |
+
plot_colored_polar(fig2, predictions=pred, categories=CAT6,
|
334 |
+
title=txt, colors=COLORS)
|
335 |
+
# plot_polar(fig2, predictions=pred, categories=CAT6,
|
336 |
+
# title=txt, colors=COLORS)
|
337 |
+
st.write(fig2)
|
338 |
+
with col3:
|
339 |
+
if em7:
|
340 |
+
model_ = load_model("model4.h5")
|
341 |
+
mfccs_ = get_mfccs(path, model_.input_shape[-2])
|
342 |
+
mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape)
|
343 |
+
pred_ = model_.predict(mfccs_)[0]
|
344 |
+
txt = "MFCCs\n" + get_title(pred_, CAT7)
|
345 |
+
fig3 = plt.figure(figsize=(5, 5))
|
346 |
+
COLORS = color_dict(COLOR_DICT)
|
347 |
+
plot_colored_polar(fig3, predictions=pred_, categories=CAT7,
|
348 |
+
title=txt, colors=COLORS)
|
349 |
+
# plot_polar(fig3, predictions=pred_, categories=CAT7,
|
350 |
+
# title=txt, colors=COLORS)
|
351 |
+
st.write(fig3)
|
352 |
+
with col4:
|
353 |
+
if gender:
|
354 |
+
with st.spinner('Wait for it...'):
|
355 |
+
gmodel = load_model("model_mw.h5")
|
356 |
+
gmfccs = get_mfccs(path, gmodel.input_shape[-1])
|
357 |
+
gmfccs = gmfccs.reshape(1, *gmfccs.shape)
|
358 |
+
gpred = gmodel.predict(gmfccs)[0]
|
359 |
+
gdict = [["female", "woman.png"], ["male", "man.png"]]
|
360 |
+
ind = gpred.argmax()
|
361 |
+
txt = "Predicted gender: " + gdict[ind][0]
|
362 |
+
img = Image.open("images/" + gdict[ind][1])
|
363 |
+
|
364 |
+
fig4 = plt.figure(figsize=(3, 3))
|
365 |
+
fig4.set_facecolor('#d1d1e0')
|
366 |
+
plt.title(txt)
|
367 |
+
plt.imshow(img)
|
368 |
+
plt.axis("off")
|
369 |
+
st.write(fig4)
|
370 |
+
|
371 |
+
# if model_type == "mel-specs":
|
372 |
+
# st.markdown("## Predictions")
|
373 |
+
# st.warning("The model in test mode. It may not be working properly.")
|
374 |
+
# if st.checkbox("I'm OK with it"):
|
375 |
+
# try:
|
376 |
+
# with st.spinner("Wait... It can take some time"):
|
377 |
+
# global tmodel
|
378 |
+
# tmodel = load_model_cache("tmodel_all.h5")
|
379 |
+
# fig, tpred = plot_melspec(path, tmodel)
|
380 |
+
# col1, col2, col3 = st.columns(3)
|
381 |
+
# with col1:
|
382 |
+
# st.markdown("### Emotional spectrum")
|
383 |
+
# dimg = Image.open("images/spectrum.png")
|
384 |
+
# st.image(dimg, use_column_width=True)
|
385 |
+
# with col2:
|
386 |
+
# fig_, tpred_ = plot_melspec(path=path,
|
387 |
+
# tmodel=tmodel,
|
388 |
+
# three=True)
|
389 |
+
# st.write(fig_, use_column_width=True)
|
390 |
+
# with col3:
|
391 |
+
# st.write(fig, use_column_width=True)
|
392 |
+
# except Exception as e:
|
393 |
+
# st.error(f"Error {e}, model is not loaded")
|
394 |
+
|
395 |
+
|
396 |
+
elif website_menu == "Project description":
|
397 |
+
import pandas as pd
|
398 |
+
import plotly.express as px
|
399 |
+
st.title("Project description")
|
400 |
+
st.subheader("GitHub")
|
401 |
+
link = '[GitHub repository of the web-application]' \
|
402 |
+
'(https://github.com/CyberMaryVer/speech-emotion-webapp)'
|
403 |
+
st.markdown(link, unsafe_allow_html=True)
|
404 |
+
|
405 |
+
st.subheader("Theory")
|
406 |
+
link = '[Theory behind - Medium article]' \
|
407 |
+
'(https://talbaram3192.medium.com/classifying-emotions-using-audio-recordings-and-python-434e748a95eb)'
|
408 |
+
st.markdown(link + ":clap::clap::clap: Tal!", unsafe_allow_html=True)
|
409 |
+
with st.expander("See Wikipedia definition"):
|
410 |
+
components.iframe("https://en.wikipedia.org/wiki/Emotion_recognition",
|
411 |
+
height=320, scrolling=True)
|
412 |
+
|
413 |
+
st.subheader("Dataset")
|
414 |
+
txt = """
|
415 |
+
This web-application is a part of the final **Data Mining** project for **ITC Fellow Program 2020**.
|
416 |
+
|
417 |
+
Datasets used in this project
|
418 |
+
* Crowd-sourced Emotional Mutimodal Actors Dataset (**Crema-D**)
|
419 |
+
* Ryerson Audio-Visual Database of Emotional Speech and Song (**Ravdess**)
|
420 |
+
* Surrey Audio-Visual Expressed Emotion (**Savee**)
|
421 |
+
* Toronto emotional speech set (**Tess**)
|
422 |
+
"""
|
423 |
+
st.markdown(txt, unsafe_allow_html=True)
|
424 |
+
|
425 |
+
df = pd.read_csv("df_audio.csv")
|
426 |
+
fig = px.violin(df, y="source", x="emotion4", color="actors", box=True, points="all", hover_data=df.columns)
|
427 |
+
st.plotly_chart(fig, use_container_width=True)
|
428 |
+
|
429 |
+
st.subheader("FYI")
|
430 |
+
st.write("Since we are currently using a free tier instance of AWS, "
|
431 |
+
"we disabled mel-spec and ensemble models.\n\n"
|
432 |
+
"If you want to try them we recommend to clone our GitHub repo")
|
433 |
+
st.code("git clone https://github.com/CyberMaryVer/speech-emotion-webapp.git", language='bash')
|
434 |
+
|
435 |
+
st.write("After that, just uncomment the relevant sections in the app.py file "
|
436 |
+
"to use these models:")
|
437 |
+
|
438 |
+
elif website_menu == "Our team":
|
439 |
+
st.subheader("Our team")
|
440 |
+
st.balloons()
|
441 |
+
col1, col2 = st.columns([3, 2])
|
442 |
+
with col1:
|
443 |
+
st.info("[email protected]")
|
444 |
+
st.info("[email protected]")
|
445 |
+
st.info("[email protected]")
|
446 |
+
with col2:
|
447 |
+
liimg = Image.open("images/LI-Logo.png")
|
448 |
+
st.image(liimg)
|
449 |
+
st.markdown(f""":speech_balloon: [Maria Startseva](https://www.linkedin.com/in/maria-startseva)""",
|
450 |
+
unsafe_allow_html=True)
|
451 |
+
st.markdown(f""":speech_balloon: [Tal Baram](https://www.linkedin.com/in/tal-baram-b00b66180)""",
|
452 |
+
unsafe_allow_html=True)
|
453 |
+
st.markdown(f""":speech_balloon: [Asher Holder](https://www.linkedin.com/in/asher-holder-526a05173)""",
|
454 |
+
unsafe_allow_html=True)
|
455 |
+
|
456 |
+
elif website_menu == "Leave feedback":
|
457 |
+
st.subheader("Leave feedback")
|
458 |
+
user_input = st.text_area("Your feedback is greatly appreciated")
|
459 |
+
user_name = st.selectbox("Choose your personality", ["checker1", "checker2", "checker3", "checker4"])
|
460 |
+
|
461 |
+
if st.button("Submit"):
|
462 |
+
st.success(f"Message\n\"\"\"{user_input}\"\"\"\nwas sent")
|
463 |
+
|
464 |
+
if user_input == "log123456" and user_name == "checker4":
|
465 |
+
with open("log0.txt", "r", encoding="utf8") as f:
|
466 |
+
st.text(f.read())
|
467 |
+
elif user_input == "feedback123456" and user_name == "checker4":
|
468 |
+
with open("log.txt", "r", encoding="utf8") as f:
|
469 |
+
st.text(f.read())
|
470 |
+
else:
|
471 |
+
log_file(user_name + " " + user_input)
|
472 |
+
thankimg = Image.open("images/sticky.png")
|
473 |
+
st.image(thankimg)
|
474 |
+
|
475 |
+
else:
|
476 |
+
import requests
|
477 |
+
import json
|
478 |
+
|
479 |
+
url = 'http://api.quotable.io/random'
|
480 |
+
if st.button("get random mood"):
|
481 |
+
with st.container():
|
482 |
+
col1, col2 = st.columns(2)
|
483 |
+
n = np.random.randint(1, 1000, 1)[0]
|
484 |
+
with col1:
|
485 |
+
quotes = {"Good job and almost done": "checker1",
|
486 |
+
"Great start!!": "checker2",
|
487 |
+
"Please make corrections base on the following observation": "checker3",
|
488 |
+
"DO NOT train with test data": "folk wisdom",
|
489 |
+
"good work, but no docstrings": "checker4",
|
490 |
+
"Well done!": "checker3",
|
491 |
+
"For the sake of reproducibility, I recommend setting the random seed": "checker1"}
|
492 |
+
if n % 5 == 0:
|
493 |
+
a = np.random.choice(list(quotes.keys()), 1)[0]
|
494 |
+
quote, author = a, quotes[a]
|
495 |
+
else:
|
496 |
+
try:
|
497 |
+
r = requests.get(url=url)
|
498 |
+
text = json.loads(r.text)
|
499 |
+
quote, author = text['content'], text['author']
|
500 |
+
except Exception as e:
|
501 |
+
a = np.random.choice(list(quotes.keys()), 1)[0]
|
502 |
+
quote, author = a, quotes[a]
|
503 |
+
st.markdown(f"## *{quote}*")
|
504 |
+
st.markdown(f"### ***{author}***")
|
505 |
+
with col2:
|
506 |
+
st.image(image=f"https://picsum.photos/800/600?random={n}")
|
507 |
+
|
508 |
+
|
509 |
+
if __name__ == '__main__':
|
510 |
+
main()
|
demo2.gif
ADDED
![]() |
Git LFS Details
|
df_audio.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
melspec.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
import librosa
|
4 |
+
import librosa.display
|
5 |
+
from tensorflow.keras.models import load_model
|
6 |
+
from datetime import datetime
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
|
9 |
+
# constants
|
10 |
+
starttime = datetime.now()
|
11 |
+
|
12 |
+
CAT6 = ['fear', 'angry', 'neutral', 'happy', 'sad', 'surprise']
|
13 |
+
CAT7 = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
|
14 |
+
CAT3 = ["positive", "neutral", "negative"]
|
15 |
+
|
16 |
+
COLOR_DICT = {"neutral": "grey",
|
17 |
+
"positive": "green",
|
18 |
+
"happy": "green",
|
19 |
+
"surprise": "orange",
|
20 |
+
"fear": "purple",
|
21 |
+
"negative": "red",
|
22 |
+
"angry": "red",
|
23 |
+
"sad": "lightblue",
|
24 |
+
"disgust":"brown"}
|
25 |
+
|
26 |
+
TEST_CAT = ['fear', 'disgust', 'neutral', 'happy', 'sad', 'surprise', 'angry']
|
27 |
+
TEST_PRED = np.array([.3,.3,.4,.1,.6,.9,.1])
|
28 |
+
|
29 |
+
# page settings
|
30 |
+
# st.set_page_config(page_title="SER web-app", page_icon=":speech_balloon:", layout="wide")
|
31 |
+
|
32 |
+
def get_melspec(audio):
|
33 |
+
y, sr = librosa.load(audio, sr=44100)
|
34 |
+
X = librosa.stft(y)
|
35 |
+
Xdb = librosa.amplitude_to_db(abs(X))
|
36 |
+
img = np.stack((Xdb,) * 3,-1)
|
37 |
+
img = img.astype(np.uint8)
|
38 |
+
grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
39 |
+
grayImage = cv2.resize(grayImage, (224, 224))
|
40 |
+
rgbImage = np.repeat(grayImage[..., np.newaxis], 3, -1)
|
41 |
+
return (rgbImage, Xdb)
|
42 |
+
|
43 |
+
|
44 |
+
def get_title(predictions, categories, first_line=''):
|
45 |
+
txt = f"{first_line}\nDetected emotion: \
|
46 |
+
{categories[predictions.argmax()]} - {predictions.max() * 100:.2f}%"
|
47 |
+
return txt
|
48 |
+
|
49 |
+
|
50 |
+
def plot_colored_polar(fig, predictions, categories,
|
51 |
+
title="", colors=COLOR_DICT):
|
52 |
+
N = len(predictions)
|
53 |
+
ind = predictions.argmax()
|
54 |
+
|
55 |
+
COLOR = color_sector = colors[categories[ind]]
|
56 |
+
sector_colors = [colors[i] for i in categories]
|
57 |
+
|
58 |
+
fig.set_facecolor("#d1d1e0")
|
59 |
+
ax = plt.subplot(111, polar="True")
|
60 |
+
|
61 |
+
theta = np.linspace(0.0, 2 * np.pi, N, endpoint=False)
|
62 |
+
for sector in range(predictions.shape[0]):
|
63 |
+
radii = np.zeros_like(predictions)
|
64 |
+
radii[sector] = predictions[sector] * 10
|
65 |
+
width = np.pi / 1.8 * predictions
|
66 |
+
c = sector_colors[sector]
|
67 |
+
ax.bar(theta, radii, width=width, bottom=0.0, color=c, alpha=0.25)
|
68 |
+
|
69 |
+
angles = [i / float(N) * 2 * np.pi for i in range(N)]
|
70 |
+
angles += angles[:1]
|
71 |
+
|
72 |
+
data = list(predictions)
|
73 |
+
data += data[:1]
|
74 |
+
plt.polar(angles, data, color=COLOR, linewidth=2)
|
75 |
+
plt.fill(angles, data, facecolor=COLOR, alpha=0.25)
|
76 |
+
|
77 |
+
ax.spines['polar'].set_color('lightgrey')
|
78 |
+
ax.set_theta_offset(np.pi / 3)
|
79 |
+
ax.set_theta_direction(-1)
|
80 |
+
plt.xticks(angles[:-1], categories)
|
81 |
+
ax.set_rlabel_position(0)
|
82 |
+
plt.yticks([0, .25, .5, .75, 1], color="grey", size=8)
|
83 |
+
|
84 |
+
plt.suptitle(title, color="darkblue", size=10)
|
85 |
+
plt.title(f"BIG {N}\n", color=COLOR)
|
86 |
+
plt.ylim(0, 1)
|
87 |
+
plt.subplots_adjust(top=0.75)
|
88 |
+
|
89 |
+
def plot_melspec(path, tmodel=None, three=False,
|
90 |
+
CAT3=CAT3, CAT6=CAT6):
|
91 |
+
# load model if it is not loaded
|
92 |
+
if tmodel is None:
|
93 |
+
tmodel = load_model("tmodel_all.h5")
|
94 |
+
# mel-spec model results
|
95 |
+
mel = get_melspec(path)[0]
|
96 |
+
mel = mel.reshape(1, *mel.shape)
|
97 |
+
tpred = tmodel.predict(mel)[0]
|
98 |
+
cat = CAT6
|
99 |
+
|
100 |
+
if three:
|
101 |
+
pos = tpred[3] + tpred[5] * .5
|
102 |
+
neu = tpred[2] + tpred[5] * .5 + tpred[4] * .5
|
103 |
+
neg = tpred[0] + tpred[1] + tpred[4] * .5
|
104 |
+
tpred = np.array([pos, neu, neg])
|
105 |
+
cat = CAT3
|
106 |
+
|
107 |
+
txt = get_title(tpred, cat)
|
108 |
+
fig = plt.figure(figsize=(6, 4))
|
109 |
+
plot_colored_polar(fig, predictions=tpred, categories=cat, title=txt)
|
110 |
+
return (fig, tpred)
|
111 |
+
|
112 |
+
if __name__ == "__main__":
|
113 |
+
plot_melspec("test.wav")
|
model3.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:723debe8cee53d5eb24de2cf1da5f14aa6914444bc839a932e6164123723bba5
|
3 |
+
size 3519440
|
model4.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0edf230f79014586ad32b2ad53659c55667c0a8ec73e3f613109d50a8d88467d
|
3 |
+
size 3325240
|
model_mw.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d16bceb3b416b85c83cf7cd208ade1c6268c129d536a17f042d3353d501c4799
|
3 |
+
size 3760168
|
packages.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
libsndfile1-dev
|
2 |
+
ffmpeg
|
3 |
+
python3-cffi
|
requirements.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.19.2
|
2 |
+
pandas
|
3 |
+
matplotlib
|
4 |
+
Pillow
|
5 |
+
opencv-python-headless==4.5.1.48
|
6 |
+
plotly
|
7 |
+
requests
|
8 |
+
librosa==0.8.0
|
9 |
+
streamlit
|
10 |
+
tensorflow-cpu==2.4.1
|
11 |
+
|
12 |
+
# numpy
|
13 |
+
# pandas==1.2.4
|
14 |
+
# matplotlib==3.3.4
|
15 |
+
# Pillow==8.2.0
|
16 |
+
# opencv-python-headless==4.5.1.48
|
17 |
+
# plotly==4.14.3
|
18 |
+
# requests==2.25.1
|
19 |
+
# librosa==0.8.0
|
20 |
+
# streamlit==0.81.1
|
21 |
+
# tensorflow-cpu==2.4.1
|
setup.sh
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mkdir -p ~/.streamlit/
|
2 |
+
cat ~/.streamlit/test.txt
|
3 |
+
|
4 |
+
echo "\
|
5 |
+
[general]\n\
|
6 |
+
email = \"[email protected]\"\n\
|
7 |
+
" > ~/.streamlit/credentials.toml
|
8 |
+
|
9 |
+
echo "\
|
10 |
+
[server]\n\
|
11 |
+
headless = true\n\
|
12 |
+
enableCORS=false\n\
|
13 |
+
port = $PORT\n\
|
14 |
+
\[theme]\n\
|
15 |
+
primaryColor = \"#d33682\"\n\
|
16 |
+
backgroundColor = \"#d1d1e0\"\n\
|
17 |
+
secondaryBackgroundColor = \"#586e75\"\n\
|
18 |
+
textColor = \"#fafafa\"\n\
|
19 |
+
font = \"sans serif\"\n\
|
20 |
+
" > ~/.streamlit/config.toml
|
test.wav
ADDED
Binary file (237 kB). View file
|
|