alvi123 commited on
Commit
296d077
Β·
1 Parent(s): f7a2a43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -91
app.py CHANGED
@@ -1,103 +1,102 @@
1
  import gradio as gr
2
- import wave
3
- import matplotlib.pyplot as plt
4
- import numpy as np
5
- from extract_features import *
6
- import pickle
7
- import soundfile
8
  import librosa
 
 
 
9
 
10
- classifier = pickle.load(open('finalized_rf.sav', 'rb'))
11
-
12
- def emotion_predict(input):
13
- input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
14
- rf_prediction = classifier.predict(input_features.reshape(1,-1))
15
- if rf_prediction == 'happy':
16
- return 'kata-kerja '
17
- elif rf_prediction == 'neutral':
18
- return 'kata-benda '
19
- elif rf_prediction == 'sad':
20
- return 'kata-sifat '
21
- else:
22
- return 'kata-keterangan'
23
-
24
-
25
- def plot_fig(input):
26
- wav = wave.open(input, 'r')
27
-
28
- raw = wav.readframes(-1)
29
- raw = np.frombuffer(raw, "int16")
30
- sampleRate = wav.getframerate()
31
-
32
- Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))
33
-
34
- fig = plt.figure()
35
-
36
- plt.rcParams["figure.figsize"] = (50,15)
37
-
38
- plt.title("Waveform Of the Audio", fontsize=25)
39
-
40
- plt.xticks(fontsize=15)
41
-
42
- plt.yticks(fontsize=15)
43
 
44
- plt.ylabel("Amplitude", fontsize=25)
45
 
46
- plt.plot(Time, raw, color='red')
47
 
48
- return fig
49
 
50
 
51
- with gr.Blocks() as app:
52
- gr.Markdown(
 
53
  """
54
- # Speech Detected 🎡😍
55
- This application classifies inputted audio πŸ”Š according to the prediction into four categories:
56
- 1. kata-benda 😎
57
- 2. kata-kerja 😐
58
- 3. kata-sifat 😒
59
- 4. kata-keterangan 😀
60
- """
61
- )
62
- with gr.Tab("Record Audio"):
63
- record_input = gr.Audio(source="microphone", type="filepath")
64
 
65
- with gr.Accordion("Audio Visualization", open=False):
66
- gr.Markdown(
67
- """
68
- ### Visualization will work only after Audio has been submitted
69
- """
70
- )
71
- plot_record = gr.Button("Display Audio Signal")
72
- plot_record_c = gr.Plot(label='Waveform Of the Audio')
73
-
74
- record_button = gr.Button("Detect Emotion")
75
- record_output = gr.Text(label = 'Emotion Detected')
76
-
77
- with gr.Tab("Upload Audio File"):
78
- gr.Markdown(
 
 
 
 
 
 
 
 
79
  """
80
- ## Uploaded Audio should be of .wav format
81
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  )
83
-
84
- upload_input = gr.Audio(type="filepath")
85
-
86
- with gr.Accordion("Audio Visualization", open=False):
87
- gr.Markdown(
88
- """
89
- ### Visualization will work only after Audio has been submitted
90
- """
91
- )
92
- plot_upload = gr.Button("Display Audio Signal")
93
- plot_upload_c = gr.Plot(label='Waveform Of the Audio')
94
-
95
- upload_button = gr.Button("Detect Emotion")
96
- upload_output = gr.Text(label = 'Emotion Detected')
97
-
98
- record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
99
- upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
100
- plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
101
- plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)
102
-
103
- app.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
2
  import librosa
3
+ import matplotlib.pyplot as plt
4
+ import plotly.express as px
5
+ from radar_chart import radar_factory
6
 
7
+ from keras.models import load_model
8
+ import os
9
+ import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
11
 
 
12
 
13
+ model = load_model(os.path.join("model", "Emotion_Voice_Detection_Model_tuned_2.h5"))
14
 
15
 
16
+ def convert_class_to_emotion(pred):
17
+ """
18
+ Method to convert the predictions (int) into human readable strings.
19
  """
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # label_conversion = {0: 'neutral',
22
+ # 1: 'calm',
23
+ # 2: 'happy',
24
+ # 3: 'sad',
25
+ # 4: 'angry',
26
+ # 5: 'fearful',
27
+ # 6: 'disgust',
28
+ # 7: 'surprised'}
29
+
30
+ label_conversion = {0: 'very happy',
31
+ 1: 'happy',
32
+ 2: 'very happy',
33
+ 3: 'very unhappy',
34
+ 4: 'very unhappy',
35
+ 5: 'unhappy',
36
+ 6: 'unhappy',
37
+ 7: 'happy'}
38
+
39
+ return label_conversion[int(pred)]
40
+
41
+
42
+ def make_predictions(file, micro=None):
43
  """
44
+ Method to process the files and create your features.
45
+ """
46
+ if file is not None and micro is None:
47
+ input_audio = file
48
+ elif file is None and micro is not None:
49
+ input_audio = micro
50
+ else:
51
+ print("THERE IS A PROBLEM")
52
+ input_audio = file
53
+
54
+ data, sampling_rate = librosa.load(input_audio)
55
+ print(data)
56
+ print(f"THE SAMPLING RATE IS {sampling_rate}")
57
+ mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
58
+ x = np.expand_dims(mfccs, axis=1)
59
+ x = np.expand_dims(x, axis=0)
60
+ predictions = np.argmax(model.predict(x), axis=1)
61
+
62
+ N = 8
63
+ theta = radar_factory(N, frame='polygon')
64
+ spoke_labels = np.array(['neutral',
65
+ 'calm',
66
+ 'happy',
67
+ 'sad',
68
+ 'angry',
69
+ 'fearful',
70
+ 'disgust',
71
+ 'surprised'])
72
+ fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
73
+ subplot_kw=dict(projection='radar'))
74
+ vec = model.predict(x)[0]
75
+ axs.plot(theta, vec, color="b")
76
+ axs.fill(theta, vec, alpha=0.3)
77
+
78
+ axs.set_varlabels(spoke_labels)
79
+
80
+ fig = plt.figure()
81
+ plt.plot(data, alpha=0.8)
82
+ plt.xlabel("temps")
83
+ plt.ylabel("amplitude")
84
+
85
+
86
+ return convert_class_to_emotion(predictions), fig, fig_radar
87
+
88
+
89
+
90
+ # Set the starting state to an empty string
91
+ iface = gr.Interface(
92
+ fn=make_predictions,
93
+ title="identify emotion of a chunk of audio speech",
94
+ description="a simple interface to perform emotion recognition from an audio file",
95
+ article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
96
+ inputs=[gr.Audio(source="upload", type="filepath", label="File"),
97
+ gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
98
+ ,
99
+ examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
100
+ outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
101
  )
102
+ iface.launch(debug=True)