LAP-DEV commited on
Commit
87fbd2b
·
verified ·
1 Parent(s): 2fe9892

Update modules/vad/silero_vad.py

Browse files
Files changed (1) hide show
  1. modules/vad/silero_vad.py +10 -3
modules/vad/silero_vad.py CHANGED
@@ -106,10 +106,17 @@ class SileroVAD:
106
  min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
107
 
108
  audio_length_samples = len(audio)
 
109
 
110
- padded_audio = np.pad(audio, (0, window_size_samples - audio.shape[0] % window_size_samples))
111
- padded_audio = padded_audio.reshape(1, -1)
112
- speech_probs = self.model(padded_audio).squeeze(0)
 
 
 
 
 
 
113
 
114
  triggered = False
115
  speeches = []
 
106
  min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
107
 
108
  audio_length_samples = len(audio)
109
+ state, context = self.model.get_initial_states(batch_size=1)
110
 
111
+ speech_probs = []
112
+ for current_start_sample in range(0, audio_length_samples, window_size_samples):
113
+ progress(current_start_sample/audio_length_samples, desc="Detecting speeches only using VAD...")
114
+
115
+ chunk = audio[current_start_sample: current_start_sample + window_size_samples]
116
+ if len(chunk) < window_size_samples:
117
+ chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk))))
118
+ speech_prob, state, context = self.model(chunk, state, context, sampling_rate)
119
+ speech_probs.append(speech_prob)
120
 
121
  triggered = False
122
  speeches = []