Xin Zhang
commited on
Commit
·
750e8d5
1
Parent(s):
0c9fcfc
[fix]: parameter.
Browse files- transcribe/pipelines/pipe_vad.py +11 -11
transcribe/pipelines/pipe_vad.py
CHANGED
@@ -18,25 +18,25 @@ class VadPipe(BasePipe):
|
|
18 |
super().__init__(in_queue, out_queue)
|
19 |
self._offset = 0 # 处理的frame size offset
|
20 |
self._status = 'END'
|
21 |
-
|
22 |
|
23 |
def reset(self):
|
24 |
-
self._offset = 0
|
25 |
self._status = 'END'
|
26 |
-
|
27 |
@classmethod
|
28 |
def init(cls):
|
29 |
if cls.vac is None:
|
30 |
cls.vac = FixedVADIterator(
|
31 |
-
threshold=0.
|
32 |
-
sampling_rate=cls.sample_rate,
|
33 |
# speech_pad_ms=10
|
34 |
-
min_silence_duration_ms =
|
35 |
# speech_pad_ms = 30,
|
36 |
-
max_speech_duration_s=
|
37 |
)
|
38 |
cls.vac.reset_states()
|
39 |
-
|
40 |
|
41 |
# def reduce_noise(self, data):
|
42 |
# return nr.reduce_noise(y=data, sr=self.sample_rate)
|
@@ -52,16 +52,16 @@ class VadPipe(BasePipe):
|
|
52 |
if end_frame:
|
53 |
relative_end_frame = end_frame - self._offset
|
54 |
return relative_start_frame, relative_end_frame
|
55 |
-
|
56 |
def process(self, in_data: MetaItem) -> MetaItem:
|
57 |
if self._offset == 0:
|
58 |
self.vac.reset_states()
|
59 |
# silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
|
60 |
source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
|
61 |
speech_data = self._process_speech_chunk(source_audio)
|
62 |
-
|
63 |
if speech_data: # 表示有音频的变化点出现
|
64 |
-
rel_start_frame, rel_end_frame = speech_data
|
65 |
if rel_start_frame is not None and rel_end_frame is None:
|
66 |
self._status = "START" # 语音开始
|
67 |
target_audio = source_audio[rel_start_frame:]
|
|
|
18 |
super().__init__(in_queue, out_queue)
|
19 |
self._offset = 0 # 处理的frame size offset
|
20 |
self._status = 'END'
|
21 |
+
|
22 |
|
23 |
def reset(self):
|
24 |
+
self._offset = 0
|
25 |
self._status = 'END'
|
26 |
+
|
27 |
@classmethod
|
28 |
def init(cls):
|
29 |
if cls.vac is None:
|
30 |
cls.vac = FixedVADIterator(
|
31 |
+
threshold=0.5,
|
32 |
+
sampling_rate=cls.sample_rate,
|
33 |
# speech_pad_ms=10
|
34 |
+
min_silence_duration_ms = 150,
|
35 |
# speech_pad_ms = 30,
|
36 |
+
max_speech_duration_s=5.0,
|
37 |
)
|
38 |
cls.vac.reset_states()
|
39 |
+
|
40 |
|
41 |
# def reduce_noise(self, data):
|
42 |
# return nr.reduce_noise(y=data, sr=self.sample_rate)
|
|
|
52 |
if end_frame:
|
53 |
relative_end_frame = end_frame - self._offset
|
54 |
return relative_start_frame, relative_end_frame
|
55 |
+
|
56 |
def process(self, in_data: MetaItem) -> MetaItem:
|
57 |
if self._offset == 0:
|
58 |
self.vac.reset_states()
|
59 |
# silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
|
60 |
source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
|
61 |
speech_data = self._process_speech_chunk(source_audio)
|
62 |
+
|
63 |
if speech_data: # 表示有音频的变化点出现
|
64 |
+
rel_start_frame, rel_end_frame = speech_data
|
65 |
if rel_start_frame is not None and rel_end_frame is None:
|
66 |
self._status = "START" # 语音开始
|
67 |
target_audio = source_audio[rel_start_frame:]
|