gputrain commited on
Commit
074e471
·
1 Parent(s): 2c7b7b3
Files changed (1) hide show
  1. app.py +131 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ import gradio
8
+
9
+ from fastai.vision.all import *
10
+ from fastai.data.all import *
11
+ from pathlib import Path
12
+ import pandas as pd
13
+ from matplotlib.pyplot import specgram
14
+ import librosa
15
+ import librosa.display
16
+ from huggingface_hub import hf_hub_download
17
+ from fastai.learner import load_learner
18
+
19
+
20
+ # In[9]:
21
+
22
+
23
+ ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv")
24
+
25
+ model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl")
26
+
27
+
28
+ # In[10]:
29
+
30
+
31
+ df = pd.read_csv(ref_file)
32
+ df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 )
33
+ my_dict = dict(zip(df.fname,df['class']))
34
+ def label_func(f_name):
35
+ f_name = str(f_name).split('/')[-1:][0]
36
+ return my_dict[f_name]
37
+ model = load_learner (model_file)
38
+ EXAMPLES_PATH = Path("./examples")
39
+ labels = model.dls.vocab
40
+
41
+
42
+ # In[11]:
43
+
44
+
45
+ with open("article.md") as f:
46
+ article = f.read()
47
+
48
+
49
+ # In[12]:
50
+
51
+
52
+ interface_options = {
53
+ "title": "Urban Sound 8K Classification",
54
+ "description": "Fast AI example of using a pre-trained Resnet34 vision model for an audio classification task on the [Urban Sounds](https://urbansounddataset.weebly.com/urbansound8k.html) dataset. ",
55
+ "article": article,
56
+ "interpretation": "default",
57
+ "layout": "horizontal",
58
+ # Audio from validation file
59
+ "examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav",
60
+ "jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"],
61
+ "allow_flagging": "never"
62
+ }
63
+
64
+
65
+ # In[13]:
66
+
67
+
68
+ def convert_sounds_melspectogram (audio_file):
69
+
70
+ samples, sample_rate = librosa.load(audio_file) #create onces with librosa
71
+
72
+ fig = plt.figure(figsize=[0.72,0.72])
73
+ ax = fig.add_subplot(111)
74
+ ax.axes.get_xaxis().set_visible(False)
75
+ ax.axes.get_yaxis().set_visible(False)
76
+ ax.set_frame_on(False)
77
+ melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
78
+ librosa.display.specshow(librosa.power_to_db(melS, ref=np.max))
79
+ filename = 'temp.png'
80
+ plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
81
+ plt.close('all')
82
+
83
+ return None
84
+
85
+
86
+ # In[14]:
87
+
88
+
89
+ def predict():
90
+ img = PILImage.create('temp.png')
91
+ pred,pred_idx,probs = model.predict(img)
92
+ return {labels[i]: float(probs[i]) for i in range(len(labels))}
93
+ return labels_probs
94
+
95
+
96
+ # In[20]:
97
+
98
+
99
+ def end2endpipeline(filename):
100
+ convert_sounds_melspectogram(filename)
101
+ return predict()
102
+
103
+
104
+ # In[16]:
105
+
106
+
107
+ demo = gradio.Interface(
108
+ fn=end2endpipeline,
109
+ inputs=gradio.inputs.Audio(source="upload", type="filepath"),
110
+ outputs=gradio.outputs.Label(num_top_classes=10),
111
+ **interface_options,
112
+ )
113
+
114
+
115
+ # In[19]:
116
+
117
+
118
+ launch_options = {
119
+ "enable_queue": True,
120
+ "share": False,
121
+ #"cache_examples": True,
122
+ }
123
+
124
+ demo.launch(**launch_options)
125
+
126
+
127
+ # In[ ]:
128
+
129
+
130
+
131
+