Spaces:
Runtime error
Runtime error
Guhanselvam
commited on
Commit
•
ec8aa5d
1
Parent(s):
196c1f2
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa as lb
|
2 |
+
import soundfile as sf
|
3 |
+
import numpy as np
|
4 |
+
import os, glob, pickle
|
5 |
+
import sounddevice as sd
|
6 |
+
import time
|
7 |
+
import requests
|
8 |
+
import webbrowser
|
9 |
+
import random
|
10 |
+
from sklearn.model_selection import train_test_split
|
11 |
+
from sklearn.neural_network import MLPClassifier
|
12 |
+
from sklearn.metrics import accuracy_score
|
13 |
+
from scipy.io.wavfile import write
|
14 |
+
|
15 |
+
emotion_labels = {
|
16 |
+
'01':'neutral',
|
17 |
+
'02':'calm',
|
18 |
+
'03':'happy',
|
19 |
+
'04':'sad',
|
20 |
+
'05':'angry',
|
21 |
+
'06':'fearful',
|
22 |
+
'07':'disgust',
|
23 |
+
'08':'surprised'
|
24 |
+
}
|
25 |
+
|
26 |
+
focused_emotion_labels = ['happy', 'sad', 'angry']
|
27 |
+
|
28 |
+
def audio_features(file_title, mfcc, chroma, mel):
|
29 |
+
with sf.SoundFile(file_title) as audio_recording:
|
30 |
+
audio = audio_recording.read(dtype="float32")
|
31 |
+
sample_rate = audio_recording.samplerate
|
32 |
+
|
33 |
+
if chroma:
|
34 |
+
stft=np.abs(lb.stft(audio))
|
35 |
+
result=np.array([])
|
36 |
+
if mfcc:
|
37 |
+
mfccs=np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
|
38 |
+
result=np.hstack((result, mfccs))
|
39 |
+
if chroma:
|
40 |
+
chroma=np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
|
41 |
+
result=np.hstack((result, chroma))
|
42 |
+
if mel:
|
43 |
+
mel=np.mean(lb.feature.melspectrogram(audio, sr=sample_rate).T,axis=0)
|
44 |
+
result=np.hstack((result, mel))
|
45 |
+
return result
|
46 |
+
|
47 |
+
def get_emotion_form_file_name(file_name):
|
48 |
+
return emotion_labels[file_name.split("-")[2]]
|
49 |
+
|
50 |
+
def loading_audio_data():
|
51 |
+
x = [] # Input - features
|
52 |
+
y = [] # Output - labels emotions
|
53 |
+
|
54 |
+
# Going through all sound files
|
55 |
+
for file in glob.glob("data//Actor_*//*.wav"):
|
56 |
+
file_name = os.path.basename(file)
|
57 |
+
emotion = get_emotion_form_file_name(file_name)
|
58 |
+
|
59 |
+
# Work only with sounds that are part of focused_emotion_labels
|
60 |
+
if emotion in focused_emotion_labels:
|
61 |
+
try:
|
62 |
+
feature = audio_features(file, mfcc=True, chroma=True, mel=True)
|
63 |
+
x.append(feature)
|
64 |
+
y.append(emotion)
|
65 |
+
except:
|
66 |
+
print("This file wasn't process because of an error: " + file)
|
67 |
+
|
68 |
+
# Split the dateset, most for training it, and some for testing it accuracy
|
69 |
+
final_dataset = train_test_split(np.array(x), y, test_size=0.1, random_state=9)
|
70 |
+
return final_dataset
|
71 |
+
|
72 |
+
def record_sound():
|
73 |
+
fs = 44100 # Sample rate
|
74 |
+
seconds = 3 # Duration of recording
|
75 |
+
|
76 |
+
print("Recoreding in 3")
|
77 |
+
time.sleep(1)
|
78 |
+
print("Recoreding in 2")
|
79 |
+
time.sleep(1)
|
80 |
+
print("Recoreding in 1")
|
81 |
+
time.sleep(1)
|
82 |
+
|
83 |
+
# Record and save
|
84 |
+
my_recording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)
|
85 |
+
print("Recoreding: Started")
|
86 |
+
sd.wait()
|
87 |
+
print("Recoreding: Stopped")
|
88 |
+
write('output.wav', fs, my_recording)
|
89 |
+
|
90 |
+
return glob.glob("output.wav")[0]
|
91 |
+
|
92 |
+
def get_playlist(mood):
|
93 |
+
# Sign up to rapidAPI, subscribe to this end point, and obtain your headers (x-rapidapi-key)
|
94 |
+
# https://rapidapi.com/shekhar1000.sc/api/unsa-unofficial-spotify-api/
|
95 |
+
|
96 |
+
url = "https://unsa-unofficial-spotify-api.p.rapidapi.com/search"
|
97 |
+
|
98 |
+
querystring = {"query": mood,"count":"10","type":"playlists"}
|
99 |
+
|
100 |
+
headers = {
|
101 |
+
'x-rapidapi-key': "06b6013060msh678afa5c6a5cf22p116a90jsn8b2b444ad800",
|
102 |
+
'x-rapidapi-host': "unsa-unofficial-spotify-api.p.rapidapi.com"
|
103 |
+
}
|
104 |
+
|
105 |
+
response = requests.request("GET", url, headers=headers, params=querystring)
|
106 |
+
|
107 |
+
playlist_id = response.json()["Results"][random.randint(0,9)]["id"]
|
108 |
+
|
109 |
+
return playlist_id
|
110 |
+
|
111 |
+
def open_playlist_in_browser(playlist_id):
|
112 |
+
webbrowser.open('https://open.spotify.com/playlist/' + str(playlist_id))
|
113 |
+
|
114 |
+
def train_model():
|
115 |
+
# Prep
|
116 |
+
X_train, X_test, y_train, y_test = loading_audio_data()
|
117 |
+
|
118 |
+
# Create and train modal
|
119 |
+
model = MLPClassifier(hidden_layer_sizes = (200,), learning_rate = "adaptive", max_iter = 400)
|
120 |
+
model.fit(X_train,y_train)
|
121 |
+
|
122 |
+
# Predict
|
123 |
+
y_pred = model.predict(X_test)
|
124 |
+
|
125 |
+
# Model Prediction Accuracy Score
|
126 |
+
accuracy = accuracy_score(y_true = y_test, y_pred = y_pred) * 100
|
127 |
+
print ("Accuracy of Recognizer is: %.2f" % accuracy)
|
128 |
+
|
129 |
+
return model, accuracy
|
130 |
+
|
131 |
+
def recognize_your_mood(model):
|
132 |
+
while True:
|
133 |
+
my_sound_file = record_sound()
|
134 |
+
|
135 |
+
feature = audio_features(my_sound_file, mfcc=True, chroma=True, mel=True)
|
136 |
+
mood_prediction = model.predict([feature])[0]
|
137 |
+
|
138 |
+
print("Are you " + mood_prediction + "? type yes/no")
|
139 |
+
if (input() == "yes"):
|
140 |
+
return mood_prediction
|
141 |
+
|
142 |
+
def main():
|
143 |
+
# Training modal (TODO: should probably save the result and not run every time)
|
144 |
+
model, accuracy = train_model()
|
145 |
+
|
146 |
+
if accuracy > 60:
|
147 |
+
mood = recognize_your_mood(model)
|
148 |
+
playlist_id = get_playlist(mood)
|
149 |
+
open_playlist_in_browser(playlist_id)
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
main()
|
153 |
+
|
154 |
+
# ----- Some Info -----
|
155 |
+
# Dataset: RAVDESS - 60 audio clips of each actor X 24 actors = 1440 audio clips
|