Cristian Tatu commited on
Commit
1e50356
Β·
1 Parent(s): 179fb16
Files changed (14) hide show
  1. .gitattributes +8 -0
  2. README.md +5 -4
  3. female_1.wav +3 -0
  4. female_2.wav +3 -0
  5. female_3.wav +3 -0
  6. female_4.wav +3 -0
  7. main.py +13 -0
  8. male_1.wav +3 -0
  9. male_2.wav +3 -0
  10. male_3.wav +3 -0
  11. male_4.wav +3 -0
  12. model.onnx +3 -0
  13. model.py +64 -0
  14. requirements.txt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ male_1.wav filter=lfs diff=lfs merge=lfs -text
37
+ male_2.wav filter=lfs diff=lfs merge=lfs -text
38
+ male_3.wav filter=lfs diff=lfs merge=lfs -text
39
+ male_4.wav filter=lfs diff=lfs merge=lfs -text
40
+ female_1.wav filter=lfs diff=lfs merge=lfs -text
41
+ female_2.wav filter=lfs diff=lfs merge=lfs -text
42
+ female_3.wav filter=lfs diff=lfs merge=lfs -text
43
+ female_4.wav filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
  title: Cnn Voice Classifier
3
- emoji: πŸ‘€
4
- colorFrom: gray
5
- colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.35.0
8
- app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Cnn Voice Classifier
3
+ emoji: πŸ§‘β€πŸ’ΌπŸ€΅β€β™€οΈ
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.35.0
8
+ app_file: main.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Voice gender classification model trained with many datasets
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
female_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690abe805180c50c0784f8721f7577707fac609566fe789adf6573f28d627a38
3
+ size 331050
female_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23367a63c4966046aa8626dabb486cda8dcdd5554f01d74c6fa08179efd1078b
3
+ size 67480
female_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088ef297b02fbbb7f5606364b806cc4ac5dbf1ad0f6a7736584ca3b67cf34ca1
3
+ size 330828
female_4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d6dc49ad7bfa6e5da8f6203564e358fb367bc71215ab27a7037a4014147972
3
+ size 330828
main.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from model import CnnVoiceClassifier
3
+
4
+ model = CnnVoiceClassifier()
5
+
6
+ audio_component = gr.Audio(type='filepath', label='Upload your audio file here')
7
+ label_component = gr.Label(label='Gender classification result')
8
+
9
+ sample_female = [f'female_{i}.wav' for i in range(1, 5)]
10
+ sample_male = [f'male_{i}.wav' for i in range(1, 5)]
11
+
12
+ demo = gr.Interface(fn=model.inference, inputs=audio_component, outputs=label_component, examples=sample_female + sample_male)
13
+ demo.launch()
male_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3e3539feeb7acddd0648e49529d5dcd017da380abe06eb898d13ecbf8a7ad4
3
+ size 330828
male_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4cad1f12d071951045f763b89e68166617a5187ee66676bfc007993c5268db
3
+ size 330828
male_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2a730eb06133800165c03ec672da271430d236fa547dc71ec8270c17d0d6c7
3
+ size 330828
male_4.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd70dc5223a1acf0073eb3ab589c9699f0cabc1e4f3bd77d030054943846dd10
3
+ size 330828
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a828584699ca3b798aa47fa0e95447637e6607bf703b3cfde8cccc25974c46
3
+ size 4609037
model.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as ort
2
+ import librosa
3
+ import numpy as np
4
+ import scipy
5
+
6
+ SR = 22050
7
+ LENGTH_SEC = 1.5
8
+
9
+ def resample(audio_data, original_sr, target_sr):
10
+ num_samples = int(len(audio_data) * target_sr / original_sr)
11
+ return scipy.signal.resample(audio_data, num_samples)
12
+
13
+
14
+ def load_audio_slices(af):
15
+ audio, sr = librosa.load(af, sr=None)
16
+ if sr != SR:
17
+ audio = resample(audio, sr, SR)
18
+
19
+ LENGTH_SAMPLES = int(LENGTH_SEC * SR)
20
+
21
+ slices = []
22
+ if len(audio) < LENGTH_SAMPLES:
23
+ padding_needed = LENGTH_SAMPLES - len(audio)
24
+ audio = np.pad(audio, (0, padding_needed), mode='constant')
25
+ slices.append(audio)
26
+ else:
27
+ num_chunks = len(audio) / LENGTH_SAMPLES
28
+
29
+ if num_chunks > 2:
30
+ num_chunks = 5
31
+ elif num_chunks > 1.5:
32
+ num_chunks = 3
33
+ elif num_chunks > 1:
34
+ num_chunks = 2
35
+
36
+ end = len(audio) - LENGTH_SAMPLES
37
+ idxs_split = np.arange(0, end, end // num_chunks, dtype=int)
38
+ for idx_split in idxs_split:
39
+ sl = slice(idx_split, idx_split+LENGTH_SAMPLES)
40
+ slices.append(audio[sl])
41
+
42
+ slices = np.vstack(slices)
43
+
44
+ return slices / np.max(slices, axis=1)[:, np.newaxis]
45
+
46
+ class CnnVoiceClassifier:
47
+ def __init__(self):
48
+ self.session = ort.InferenceSession('model.onnx')
49
+ self.input_name = self.session.get_inputs()[0].name
50
+ self.output_name = self.session.get_outputs()[0].name
51
+
52
+ def inference(self, audio_path):
53
+ audio = load_audio_slices(audio_path)
54
+ input_feed = {self.input_name: np.expand_dims(audio, axis=-1)}
55
+ outputs = self.session.run([self.output_name], input_feed)
56
+ probs = outputs[0].flatten()
57
+ w = np.abs((probs - 0.5)*2)
58
+ final_prob = np.average(probs, weights=w)
59
+
60
+ return {'Male': final_prob, 'Female': 1-final_prob}
61
+
62
+
63
+
64
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ huggingface_hub
2
+ librosa
3
+ onnxruntime