File size: 4,029 Bytes
e2eef75
 
 
cbbff9c
e2eef75
 
 
 
 
 
cbbff9c
 
e2eef75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223715d
e2eef75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbbff9c
 
 
 
 
 
 
 
 
 
 
 
e2eef75
 
 
 
223715d
e2eef75
cbbff9c
 
 
 
e2eef75
223715d
e2eef75
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
import keras
import librosa
#import hopsworks
import os
import numpy as np
import shutil
from functions import log_mel_spectrogram, split_spectrogram, load_audio_file, image_transformer, save_spectrogram_as_png
from datasets import load_dataset

def return_input(input):
    return input

def create_image_folder(folder):
    try:
        os.mkdir(folder)
    except:
        FileExistsError()
    return

def delete_folder(folder):
    try:
        shutil.rmtree(folder)
    except:
        FileNotFoundError()
    return

 
def create_dataset(image_folder):
    image_dataset = load_dataset(image_folder, split=None)["train"]
    print(image_dataset)
    image_dataset = image_dataset.map(image_transformer, batched=True, fn_kwargs={"mode": "L"})
    
    image_dataset_tf = image_dataset.to_tf_dataset(batch_size=1, columns="image")
    
    return image_dataset_tf


def majority_vote(raw_predictions):
    label_predictions = np.argmax(raw_predictions, axis=1)
    labels, count = np.unique(label_predictions, return_counts=True)
    
    winner = labels[np.argmax(count)]
    return label_decoding[winner]

def predict(audio):
    create_image_folder(folder)
    try:
        audio_array = load_audio_file(audio, sample_rate, res_type, duration)
    except:
        return "Error when loading audio. Did you submit a file?"
    spectrogram = log_mel_spectrogram(audio_array, sample_rate, nfft, hop_length, window)
    spec_splits = split_spectrogram(spectrogram, output_shape)
    for idx, split in enumerate(spec_splits):
        save_path = os.path.join(folder, f"{idx+1}_spec.png")
        save_spectrogram_as_png(split, save_path, sample_rate, nfft, hop_length)
    
    image_dataset = create_dataset(folder)
    
    raw_preds = model.predict(image_dataset, verbose=0)
    
    genre_pred = majority_vote(raw_preds)
    return f"The submitted audio belongs to the {genre_pred} genre!"

sample_rate = 22050
res_type = "kaiser_fast"
nfft = 2048
hop_length = 512
window = "hann"
output_shape = (128, 256)
duration = 0
folder = "images"

label_decoding = {0: "Electronic",
                  1: "Experimental",
                  2: "Folk",
                  3: "Hip-Hop",
                  4: "Instrumental",
                  5: "International",
                  6: "Pop",
                  7: "Rock"}

model_path = "best_model.keras"
model = keras.models.load_model(model_path)


"""
model_version = 1
project = hopsworks.login()
mr = project.get_model_registry()
model = mr.get_model("cnn_genre_classifier", version=model_version)

model_dir = model.download()

model = keras.models.load_model(model_dir)
"""



with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown(
    """
    # Music Genre Classifier
    
    Hello!
    
    This is a prototype for a genre classification service, where you can upload an audio file, 
    and the model will predict which genre it belongs to!
    
    The model has been trained to predict 8 top-level genres, that each encompasses a multitude of sub-genres.
    
    The top-level genres are:
    
    1. Electronic
    2. Experimental
    3. Folk
    4. Hip-Hop
    5. Intrumental
    6. International
    7. Pop
    8. Rock
    
    Upload your favorite song or choose one of the example tracks and give it a try!
    """
        )
    with gr.Row():
        with gr.Column():
            audio = gr.Audio(sources="upload", type="filepath", label="Upload your song here", format="wav")
        with gr.Column():
            files = gr.FileExplorer(label="Example songs", file_count="single", root="examples", interactive=True)
            files.change(fn=return_input, inputs=files, outputs=audio)
    with gr.Row():
        answer_box = gr.Text(label="Answer appears here", interactive=False)
    with gr.Row():
        submit_audio = gr.Button(value="Submit audio for prediction")
        submit_audio.click(fn=predict, inputs=audio, outputs=answer_box, trigger_mode="once")
        submit_audio.click(fn=delete_folder)


demo.launch(share=True)