File size: 3,668 Bytes
e2eef75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import keras
import librosa
import hopsworks
import os
import numpy as np
import shutil
from functions import log_mel_spectrogram, split_spectrogram, load_audio_file, image_transformer, save_spectrogram_as_png
from datasets import load_dataset

def empty_string():
    return ""

def create_image_folder(folder):
    try:
        os.mkdir(folder)
    except:
        FileExistsError()
    return

def delete_folder(folder):
    try:
        shutil.rmtree(folder)
    except:
        FileNotFoundError()
    return

 
def create_dataset(image_folder):
    image_dataset = load_dataset(image_folder, split=None)["train"]
    print(image_dataset)
    image_dataset = image_dataset.map(image_transformer, batched=True, fn_kwargs={"mode": "L"})
    
    image_dataset_tf = image_dataset.to_tf_dataset(batch_size=1, columns="image")
    
    return image_dataset_tf


def majority_vote(raw_predictions):
    label_predictions = np.argmax(raw_predictions, axis=1)
    labels, count = np.unique(label_predictions, return_counts=True)
    
    winner = labels[np.argmax(count)]
    return label_decoding[winner]

def predict(audio):
    create_image_folder(folder)
    try:
        audio_array = load_audio_file(audio, sample_rate, res_type, duration)
    except:
        return "Error when loading audio. Did you submit a file?"
    spectrogram = log_mel_spectrogram(audio_array, sample_rate, nfft, hop_length, window)
    spec_splits = split_spectrogram(spectrogram, output_shape)
    for idx, split in enumerate(spec_splits):
        save_path = os.path.join(folder, f"{idx+1}_spec.png")
        save_spectrogram_as_png(split, save_path, sample_rate, nfft, hop_length)
    
    image_dataset = create_dataset(folder)
    
    raw_preds = model.predict(image_dataset, verbose=0)
    
    genre_pred = majority_vote(raw_preds)
    return f"The genre of the submitted audio is {genre_pred}!"

sample_rate = 22050
res_type = "kaiser_fast"
nfft = 2048
hop_length = 512
window = "hann"
output_shape = (128, 256)
duration = 0
folder = "images"

label_decoding = {0: "Electronic",
                  1: "Experimental",
                  2: "Folk",
                  3: "Hip-Hop",
                  4: "Instrumental",
                  5: "International",
                  6: "Pop",
                  7: "Rock"}

model_path = "best_model.keras"
model = keras.models.load_model(model_path)


"""
model_version = 1
project = hopsworks.login()
mr = project.get_model_registry()
model = mr.get_model("cnn_genre_classifier", version=model_version)

model_dir = model.download()

model = keras.models.load_model(model_dir)
"""



with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown(
    """
    # Music Genre Classifier
    
    Hello!
    
    This is a prototype for a genre classification service, where you can upload an audio file, 
    and the model will predict which genre it belongs to!
    
    The model has been trained to predict 8 top-level genres, that each encompasses a multitude of sub-genres.
    
    Upload your favorite song and give it a try!
    """
        )
    with gr.Row():
        with gr.Column():
            audio = gr.Audio(sources="upload", type="filepath", label="Upload your song here", format="mp3")
        with gr.Column():
            answer_box = gr.Text(label="Answer appears here", interactive=False)
    with gr.Row():
        submit_audio = gr.Button("Submit audio for prediction")
        submit_audio.click(fn=empty_string, outputs=answer_box)
        submit_audio.click(fn=predict, inputs=audio, outputs=answer_box, trigger_mode="once")
        submit_audio.click(fn=delete_folder)


demo.launch(share=True)