KhadgaA commited on
Commit
38d869c
·
1 Parent(s): 9c3903e

gradio inference

Browse files
Files changed (3) hide show
  1. app.py +93 -0
  2. lda.pkl +3 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import pickle
4
+ import numpy as np
5
+ import spafe
6
+ from spafe.frequencies import dominant_frequencies
7
+ from spafe.features.mfcc import mfcc, imfcc
8
+ from spafe.features.bfcc import bfcc
9
+ from spafe.features.cqcc import cqcc
10
+ from spafe.features.gfcc import erb_spectrogram
11
+ from spafe.features.lfcc import linear_spectrogram
12
+ from spafe.features.msrcc import msrcc
13
+ from spafe.features.ngcc import ngcc
14
+ from spafe.utils.preprocessing import SlidingWindow
15
+ from sklearn.metrics.pairwise import cosine_similarity
16
+ def dominant_freq_density(min_dom_freq,max_dom_freq,signal,sr):
17
+ dom_f = dominant_frequencies.get_dominant_frequencies(signal,sr,nfft=512,butter_filter=True)
18
+ dom_f = dom_f[(dom_f>min_dom_freq) & (dom_f<max_dom_freq)]
19
+ h,e = np.histogram(dom_f,bins = range(min_dom_freq,max_dom_freq,100),density=True)
20
+ return h
21
+
22
+ def dominant_freq(x):
23
+ return dominant_freq_density(100,1000,x['y'],x['sr'])
24
+ def apply_mfcc(x):
25
+ return np.mean(np.nan_to_num(mfcc(x['y'],fs=x['sr'],pre_emph=1,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=128,nfft=512,low_freq=50,high_freq=4000,normalize="mvn"),posinf=0,neginf=0),axis=0)
26
+
27
+ def apply_bfcc(x):
28
+ return np.mean(np.nan_to_num(bfcc(x['y'],fs=x['sr'],pre_emph=1,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=128,nfft=512,low_freq=50,high_freq=4000,normalize="mvn"),posinf=0,neginf=0),axis=0)
29
+ def apply_cqcc(x):
30
+ return np.mean(np.nan_to_num(cqcc(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfft=512,low_freq=0,high_freq=None,number_of_octaves=7,number_of_bins_per_octave=24,spectral_threshold=0.005,f0=120,q_rate=1.0),posinf=0,neginf=0),axis=0)
31
+
32
+
33
+ def apply_gfcc(x):
34
+ return np.mean(np.nan_to_num(erb_spectrogram(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',fbanks=None,conversion_approach='Glasberg')[0],posinf=0,neginf=0),axis=0)
35
+
36
+ def apply_lfcc(x):
37
+ return np.mean(np.nan_to_num(linear_spectrogram(x['y'],fs=x['sr'],pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',fbanks=None)[0],posinf=0,neginf=0),axis=0)
38
+
39
+ def apply_msrcc(x):
40
+ return np.mean(np.nan_to_num(msrcc(x['y'],fs=x['sr'],num_ceps=13,pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='ascendant',gamma=-0.14285714285714285,dct_type=2,use_energy=False,lifter=None,normalize=None,fbanks=None,conversion_approach='Oshaghnessy'),posinf=0,neginf=0),axis=0)
41
+
42
+ def apply_ngcc(x):
43
+ return np.mean(np.nan_to_num(ngcc(x['y'],fs=x['sr'],num_ceps=13,pre_emph=True,pre_emph_coeff=0.97,window=SlidingWindow(0.03, 0.015, "hamming"),nfilts=24,nfft=512,low_freq=0,high_freq=None,scale='constant',dct_type=2,use_energy=False,lifter=None,normalize=None,fbanks=None,conversion_approach='Glasberg'),posinf=0,neginf=0),axis=0)
44
+
45
+ def load_model(checkpoint):
46
+ model = pickle.load(open(checkpoint, 'rb'))
47
+ return model
48
+
49
+ def extract_features(audio):
50
+ y, sr = librosa.load(audio)
51
+ features = []
52
+ dom_freq = dominant_freq({'y':y, 'sr':sr})
53
+ features.append(dom_freq)
54
+ mfcc = apply_mfcc({'y':y, 'sr':sr})
55
+ features.append(mfcc)
56
+ bfcc = apply_bfcc({'y':y, 'sr':sr})
57
+ features.append(bfcc)
58
+ cqcc = apply_cqcc({'y':y, 'sr':sr})
59
+ features.append(cqcc)
60
+ gfcc = apply_gfcc({'y':y, 'sr':sr})
61
+ features.append(gfcc)
62
+ lfcc = apply_lfcc({'y':y, 'sr':sr})
63
+ features.append(lfcc)
64
+ msrcc = apply_msrcc({'y':y, 'sr':sr})
65
+ features.append(msrcc)
66
+ ngcc = apply_ngcc({'y':y, 'sr':sr})
67
+ features.append(ngcc)
68
+ features = np.concatenate(features).flatten()
69
+
70
+ return features
71
+
72
+ def inference_Verification(audio_1, audio_2):
73
+ model = load_model('lda.pkl')
74
+
75
+ features1 = extract_features(audio_1)
76
+ features2 = extract_features(audio_2)
77
+
78
+
79
+
80
+ embed1 = model.transform([features1])
81
+ embed2 = model.transform([features2])
82
+ return cosine_similarity(embed1, embed2).flatten()[0].round(4)
83
+
84
+ audio_1 = gr.Audio(sources="upload", type="filepath", label="Audio 1")
85
+ audio_2 = gr.Audio(sources="upload", type="filepath", label="Audio 2")
86
+ text_output = gr.Textbox(label="Similarity Score")
87
+ gr.Interface(
88
+ fn=inference_Verification,
89
+ inputs=[audio_1, audio_2],
90
+ outputs=text_output,
91
+ title="Speaker Verification",
92
+ description="Speaker Verification on Multilingual dataset.",
93
+ ).launch()
lda.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f8ec4727c15b390439f9a81a6a30098ba680af872a61d81b7a7f51e1822b3a
3
+ size 59305
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ numpy
2
+ spafe
3
+ librosa
4
+ scikit-learn