File size: 1,083 Bytes
0eddaee
56e6969
 
8b77c99
 
 
 
 
 
 
dc80bd6
8b77c99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56e6969
 
0eddaee
 
 
 
 
 
 
56e6969
 
 
64fa054
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import numpy as np
import gradio as gr

import pickle
import librosa

from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()

with open("./lr_mfcc_model.pkl", "rb") as f:
    lr_model = pickle.load(f)

def extract_mfcc_gradio(audio):
    
    sample_rate, y = audio

    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if y.ndim == 1:
        data = y
    else:
        data = y[:, 0]

    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)

    return mfcc

def voice_mfcc_classification(audio):

    mfcc = extract_mfcc_gradio(audio)

    prediction = lr_model.predict([mfcc])

    if prediction[0] == 0:
        return "engaging"
    else:
        return "boring"


def voice_classification(audio):

    # generate a random number between 0 and 1
    # if 1 for engaging, 0 for boring
    random_number = np.random.rand()
    if random_number > 0.5:
        result = "boring"
    else:
        result = "engaging"
    
    return result

gr.Interface(fn=voice_mfcc_classification, inputs="audio", outputs="text").launch()