File size: 2,841 Bytes
1acd416
 
 
 
8206f72
1acd416
283f036
 
1acd416
 
 
 
 
 
 
a0690fd
1acd416
a0690fd
1acd416
a0690fd
1acd416
a0690fd
1acd416
 
 
 
 
 
 
 
 
 
 
 
 
f5c23b8
1acd416
f5c23b8
1acd416
f5c23b8
1acd416
f5c23b8
1acd416
f5c23b8
1acd416
 
 
 
 
 
01613c6
1acd416
 
a0690fd
 
 
 
 
 
1acd416
 
 
 
 
628e0ac
 
 
 
 
 
1acd416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628e0ac
 
 
 
 
 
1acd416
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import gradio as gr
import wave
import matplotlib.pyplot as plt
import numpy as np
from extract_features import *
import pickle
import soundfile 
import librosa

classifier = pickle.load(open('finalized_rf.sav', 'rb'))

def emotion_predict(input):
  input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
  rf_prediction = classifier.predict(input_features.reshape(1,-1))
  if rf_prediction == 'happy':
    return 'kata-benda 😎'
  elif rf_prediction == 'neutral':
    return 'kata-kerja 😐'
  elif rf_prediction == 'sad':
    return 'kata-sifat 😒'
  else:
    return 'kata-keterangan 😀'
  

def plot_fig(input):
  wav = wave.open(input, 'r')

  raw = wav.readframes(-1)
  raw = np.frombuffer(raw, "int16")
  sampleRate = wav.getframerate()

  Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))

  fig = plt.figure()

  plt.rcParams["figure.figsize"] = (50,15)

  plt.title("Waveform Of the Audio", fontsize=25)

  plt.xticks(fontsize=15)

  plt.yticks(fontsize=15)

  plt.ylabel("Amplitude", fontsize=25)

  plt.plot(Time, raw, color='red')

  return fig


with gr.Blocks() as app:
  gr.Markdown(
        """
     Speech Predict Detector 🎡😍
    This application classifies inputted audio πŸ”Š according to the prediction into four categories:
    1. kata-benda 😎
    2. kata-kerja 😐
    3. kata-sifat 😒
    4. kata-keterangan 😀
    """
  )
  with gr.Tab("Record Audio"):
    record_input = gr.Audio(source="microphone", type="filepath")
        
    with gr.Accordion("Audio Visualization", open=False):
      gr.Markdown(
          """
      ### Visualization will work only after Audio has been submitted
      """
      )    
      plot_record = gr.Button("Display Audio Signal")
      plot_record_c = gr.Plot(label='Waveform Of the Audio')
    
    record_button = gr.Button("Detect Emotion")
    record_output = gr.Text(label = 'Emotion Detected')

  with gr.Tab("Upload Audio File"):
    gr.Markdown(
        """
    ## Uploaded Audio should be of .wav format
    """
    )

    upload_input = gr.Audio(type="filepath")

    with gr.Accordion("Audio Visualization", open=False):
      gr.Markdown(
          """
      ### Visualization will work only after Audio has been submitted
      """
      )
      plot_upload = gr.Button("Display Audio Signal")
      plot_upload_c = gr.Plot(label='Waveform Of the Audio')

    upload_button = gr.Button("Detect Emotion")
    upload_output = gr.Text(label = 'Emotion Detected')
    
  record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
  upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
  plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
  plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)

app.launch()