Xin Zhang commited on
Commit
750e8d5
·
1 Parent(s): 0c9fcfc

[fix]: parameter.

Browse files
Files changed (1) hide show
  1. transcribe/pipelines/pipe_vad.py +11 -11
transcribe/pipelines/pipe_vad.py CHANGED
@@ -18,25 +18,25 @@ class VadPipe(BasePipe):
18
  super().__init__(in_queue, out_queue)
19
  self._offset = 0 # 处理的frame size offset
20
  self._status = 'END'
21
-
22
 
23
  def reset(self):
24
- self._offset = 0
25
  self._status = 'END'
26
-
27
  @classmethod
28
  def init(cls):
29
  if cls.vac is None:
30
  cls.vac = FixedVADIterator(
31
- threshold=0.3,
32
- sampling_rate=cls.sample_rate,
33
  # speech_pad_ms=10
34
- min_silence_duration_ms = 100,
35
  # speech_pad_ms = 30,
36
- max_speech_duration_s=15
37
  )
38
  cls.vac.reset_states()
39
-
40
 
41
  # def reduce_noise(self, data):
42
  # return nr.reduce_noise(y=data, sr=self.sample_rate)
@@ -52,16 +52,16 @@ class VadPipe(BasePipe):
52
  if end_frame:
53
  relative_end_frame = end_frame - self._offset
54
  return relative_start_frame, relative_end_frame
55
-
56
  def process(self, in_data: MetaItem) -> MetaItem:
57
  if self._offset == 0:
58
  self.vac.reset_states()
59
  # silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
60
  source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
61
  speech_data = self._process_speech_chunk(source_audio)
62
-
63
  if speech_data: # 表示有音频的变化点出现
64
- rel_start_frame, rel_end_frame = speech_data
65
  if rel_start_frame is not None and rel_end_frame is None:
66
  self._status = "START" # 语音开始
67
  target_audio = source_audio[rel_start_frame:]
 
18
  super().__init__(in_queue, out_queue)
19
  self._offset = 0 # 处理的frame size offset
20
  self._status = 'END'
21
+
22
 
23
  def reset(self):
24
+ self._offset = 0
25
  self._status = 'END'
26
+
27
  @classmethod
28
  def init(cls):
29
  if cls.vac is None:
30
  cls.vac = FixedVADIterator(
31
+ threshold=0.5,
32
+ sampling_rate=cls.sample_rate,
33
  # speech_pad_ms=10
34
+ min_silence_duration_ms = 150,
35
  # speech_pad_ms = 30,
36
+ max_speech_duration_s=5.0,
37
  )
38
  cls.vac.reset_states()
39
+
40
 
41
  # def reduce_noise(self, data):
42
  # return nr.reduce_noise(y=data, sr=self.sample_rate)
 
52
  if end_frame:
53
  relative_end_frame = end_frame - self._offset
54
  return relative_start_frame, relative_end_frame
55
+
56
  def process(self, in_data: MetaItem) -> MetaItem:
57
  if self._offset == 0:
58
  self.vac.reset_states()
59
  # silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
60
  source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
61
  speech_data = self._process_speech_chunk(source_audio)
62
+
63
  if speech_data: # 表示有音频的变化点出现
64
+ rel_start_frame, rel_end_frame = speech_data
65
  if rel_start_frame is not None and rel_end_frame is None:
66
  self._status = "START" # 语音开始
67
  target_audio = source_audio[rel_start_frame:]