Spaces:
Runtime error
Runtime error
import librosa as lb | |
import soundfile as sf | |
import numpy as np | |
import os, glob, pickle | |
import sounddevice as sd | |
import time | |
import requests | |
import webbrowser | |
import random | |
from sklearn.model_selection import train_test_split | |
from sklearn.neural_network import MLPClassifier | |
from sklearn.metrics import accuracy_score | |
from scipy.io.wavfile import write | |
emotion_labels = { | |
'01':'neutral', | |
'02':'calm', | |
'03':'happy', | |
'04':'sad', | |
'05':'angry', | |
'06':'fearful', | |
'07':'disgust', | |
'08':'surprised' | |
} | |
focused_emotion_labels = ['happy', 'sad', 'angry'] | |
def audio_features(file_title, mfcc, chroma, mel): | |
with sf.SoundFile(file_title) as audio_recording: | |
audio = audio_recording.read(dtype="float32") | |
sample_rate = audio_recording.samplerate | |
if chroma: | |
stft=np.abs(lb.stft(audio)) | |
result=np.array([]) | |
if mfcc: | |
mfccs=np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0) | |
result=np.hstack((result, mfccs)) | |
if chroma: | |
chroma=np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) | |
result=np.hstack((result, chroma)) | |
if mel: | |
mel=np.mean(lb.feature.melspectrogram(audio, sr=sample_rate).T,axis=0) | |
result=np.hstack((result, mel)) | |
return result | |
def get_emotion_form_file_name(file_name): | |
return emotion_labels[file_name.split("-")[2]] | |
def loading_audio_data(): | |
x = [] # Input - features | |
y = [] # Output - labels emotions | |
# Going through all sound files | |
for file in glob.glob("data//Actor_*//*.wav"): | |
file_name = os.path.basename(file) | |
emotion = get_emotion_form_file_name(file_name) | |
# Work only with sounds that are part of focused_emotion_labels | |
if emotion in focused_emotion_labels: | |
try: | |
feature = audio_features(file, mfcc=True, chroma=True, mel=True) | |
x.append(feature) | |
y.append(emotion) | |
except: | |
print("This file wasn't process because of an error: " + file) | |
# Split the dateset, most for training it, and some for testing it accuracy | |
final_dataset = train_test_split(np.array(x), y, test_size=0.1, random_state=9) | |
return final_dataset | |
def record_sound(): | |
fs = 44100 # Sample rate | |
seconds = 3 # Duration of recording | |
print("Recoreding in 3") | |
time.sleep(1) | |
print("Recoreding in 2") | |
time.sleep(1) | |
print("Recoreding in 1") | |
time.sleep(1) | |
# Record and save | |
my_recording = sd.rec(int(seconds * fs), samplerate=fs, channels=1) | |
print("Recoreding: Started") | |
sd.wait() | |
print("Recoreding: Stopped") | |
write('output.wav', fs, my_recording) | |
return glob.glob("output.wav")[0] | |
def get_playlist(mood): | |
# Sign up to rapidAPI, subscribe to this end point, and obtain your headers (x-rapidapi-key) | |
# https://rapidapi.com/shekhar1000.sc/api/unsa-unofficial-spotify-api/ | |
url = "https://unsa-unofficial-spotify-api.p.rapidapi.com/search" | |
querystring = {"query": mood,"count":"10","type":"playlists"} | |
headers = { | |
'x-rapidapi-key': "06b6013060msh678afa5c6a5cf22p116a90jsn8b2b444ad800", | |
'x-rapidapi-host': "unsa-unofficial-spotify-api.p.rapidapi.com" | |
} | |
response = requests.request("GET", url, headers=headers, params=querystring) | |
playlist_id = response.json()["Results"][random.randint(0,9)]["id"] | |
return playlist_id | |
def open_playlist_in_browser(playlist_id): | |
webbrowser.open('https://open.spotify.com/playlist/' + str(playlist_id)) | |
def train_model(): | |
# Prep | |
X_train, X_test, y_train, y_test = loading_audio_data() | |
# Create and train modal | |
model = MLPClassifier(hidden_layer_sizes = (200,), learning_rate = "adaptive", max_iter = 400) | |
model.fit(X_train,y_train) | |
# Predict | |
y_pred = model.predict(X_test) | |
# Model Prediction Accuracy Score | |
accuracy = accuracy_score(y_true = y_test, y_pred = y_pred) * 100 | |
print ("Accuracy of Recognizer is: %.2f" % accuracy) | |
return model, accuracy | |
def recognize_your_mood(model): | |
while True: | |
my_sound_file = record_sound() | |
feature = audio_features(my_sound_file, mfcc=True, chroma=True, mel=True) | |
mood_prediction = model.predict([feature])[0] | |
print("Are you " + mood_prediction + "? type yes/no") | |
if (input() == "yes"): | |
return mood_prediction | |
def main(): | |
# Training modal (TODO: should probably save the result and not run every time) | |
model, accuracy = train_model() | |
if accuracy > 60: | |
mood = recognize_your_mood(model) | |
playlist_id = get_playlist(mood) | |
open_playlist_in_browser(playlist_id) | |
if __name__ == "__main__": | |
main() | |
# ----- Some Info ----- | |
# Dataset: RAVDESS - 60 audio clips of each actor X 24 actors = 1440 audio clips |