ruben09 commited on
Commit
5d27c1e
·
1 Parent(s): 4781bf8

base gradio

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import json
3
+ import os
4
+ import numpy as np
5
+ import gradio as gr
6
+ from tensorflow import keras
7
+ from huggingface_hub import hf_hub_download
8
+ import librosa
9
+
10
+
11
+ # Download the model
12
+ model_path = hf_hub_download(repo_id='ruben09/music_genre_classification', filename='music_genre_model.h5')
13
+
14
+ # Load the model
15
+ model = keras.models.load_model(model_path)
16
+
17
+ def process_audio(audio_file):
18
+ map_labels = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]
19
+ SR = 22050
20
+ TD = 30
21
+ SPT = SR * TD
22
+ num_segments = 3
23
+ n_fft=2048
24
+ hop_length=512
25
+
26
+ summed_predictions = np.zeros(len(map_labels))
27
+
28
+ sample_per_segment = int(SPT / num_segments)
29
+ num_spectrogram_per_segment = math.ceil(sample_per_segment / hop_length)
30
+
31
+ signal, sr = librosa.load(audio_file, sr=SR)
32
+
33
+ for d in range(num_segments):
34
+ start = sample_per_segment * d
35
+ finish = start + sample_per_segment
36
+
37
+ spectrogram = librosa.feature.mfcc(y=signal[start:finish], sr=sr, n_mfcc=13, n_fft=n_fft, hop_length=hop_length)
38
+ spectrogram_db = spectrogram.T
39
+ if len(spectrogram_db) == num_spectrogram_per_segment:
40
+ input_data = np.array(spectrogram_db)
41
+ input_data = input_data[None,..., np.newaxis]
42
+ input_data = np.transpose(input_data, (0, 2, 1, 3))
43
+ prediction = model.predict(input_data)
44
+ summed_predictions += prediction[0]
45
+
46
+ averaged_predictions = summed_predictions / num_segments
47
+
48
+ # Get the final prediction (the class with the highest probability)
49
+ final_prediction_idx = np.argmax(averaged_predictions)
50
+ final_class_label = map_labels[final_prediction_idx]
51
+ final_probability = averaged_predictions[final_prediction_idx]
52
+
53
+ # Format the result as a dictionary
54
+ result = {
55
+ "final_prediction": final_class_label,
56
+ "confidence": round(float(final_probability), 2),
57
+ "all_probabilities": {map_labels[i]: round(float(prob), 2) for i, prob in enumerate(averaged_predictions)}
58
+ }
59
+
60
+ return result
61
+
62
+ iface = gr.Interface(
63
+ fn=process_audio, # The function to process the uploaded audio
64
+ inputs=gr.Audio(type="filepath", label="Upload Audio (WAV, MP3, FLAC)"), # Accept audio input
65
+ outputs="json", # Return predictions as text
66
+ title="Audio Classification", # Title of the interface
67
+ description="Upload an audio file (max 30 seconds) to get a genre classification."
68
+ )
69
+
70
+ # Launch the Gradio app
71
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ tensorflow
3
+ huggingface_hub
4
+ librosa