Update modules/vad/silero_vad.py
Browse files- modules/vad/silero_vad.py +10 -3
modules/vad/silero_vad.py
CHANGED
@@ -106,10 +106,17 @@ class SileroVAD:
|
|
106 |
min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
|
107 |
|
108 |
audio_length_samples = len(audio)
|
|
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
triggered = False
|
115 |
speeches = []
|
|
|
106 |
min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
|
107 |
|
108 |
audio_length_samples = len(audio)
|
109 |
+
state, context = self.model.get_initial_states(batch_size=1)
|
110 |
|
111 |
+
speech_probs = []
|
112 |
+
for current_start_sample in range(0, audio_length_samples, window_size_samples):
|
113 |
+
progress(current_start_sample/audio_length_samples, desc="Detecting speeches only using VAD...")
|
114 |
+
|
115 |
+
chunk = audio[current_start_sample: current_start_sample + window_size_samples]
|
116 |
+
if len(chunk) < window_size_samples:
|
117 |
+
chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk))))
|
118 |
+
speech_prob, state, context = self.model(chunk, state, context, sampling_rate)
|
119 |
+
speech_probs.append(speech_prob)
|
120 |
|
121 |
triggered = False
|
122 |
speeches = []
|