update whisper config

Files changed (3) hide show

config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pathlib
 import logging
 logging.basicConfig(
@@ -18,6 +18,12 @@ ASSERT_DIR = BASE_DIR / "assets"
 SENTENCE_END_MARKERS =  ['.', '!', '?', '。', '！', '？', ';', '；', ':', '：']
 PAUSE_END_MARKERS = [',', '，', '、']
 # whisper推理参数
 WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
 MAX_LENTH_ZH = 4

 import pathlib
+import re
 import logging
 logging.basicConfig(
 SENTENCE_END_MARKERS =  ['.', '!', '?', '。', '！', '？', ';', '；', ':', '：']
 PAUSE_END_MARKERS = [',', '，', '、']
+sentence_end_chars = ''.join([re.escape(char) for char in SENTENCE_END_MARKERS])
+SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]')
+# Method 2: Alternative approach with a character class
+pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + ']'
+PAUSEE_END_PATTERN = re.compile(pattern_string)
 # whisper推理参数
 WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
 MAX_LENTH_ZH = 4

transcribe/helpers/whisper.py CHANGED Viewed

@@ -17,7 +17,10 @@ class WhisperCPP:
             print_realtime=False,
             print_progress=False,
             print_timestamps=False,
-            translate=False
         )
         if warmup:
             self.warmup()

             print_realtime=False,
             print_progress=False,
             print_timestamps=False,
+            translate=False,
+            # beam_search=1,
+            temperature=0.,
+            no_context=True
         )
         if warmup:
             self.warmup()

transcribe/strategy.py CHANGED Viewed

@@ -98,7 +98,7 @@ def segement_merge(segments):
     for seg in segments:
         temp_seq.append(seg)
-        if any([mk in seg.text for mk in config.SENTENCE_END_MARKERS + config.PAUSE_END_MARKERS]):
             sequences.append(temp_seq.copy())
             temp_seq = []
     if temp_seq:
@@ -123,7 +123,8 @@ def segments_split(segments, audio_buffer: np.ndarray, sample_rate=16000):
             if seg.text and seg.text[-1] in markers:
                 seg_index = int(seg.t1 / 100 * sample_rate)
                 # rest_buffer_duration = (len(audio_buffer) - seg_index) / sample_rate
-                # is_end = any(i in seg.text for i  in config.SENTENCE_END_MARKERS)
                 right_watch_sequences = segments[min(idx+1, len(segments)):]
                 # if rest_buffer_duration >= 1.5:
                 left_watch_idx = seg_index

     for seg in segments:
         temp_seq.append(seg)
+        if any([mk in seg.text for mk in config.SENTENCE_END_MARKERS]):
             sequences.append(temp_seq.copy())
             temp_seq = []
     if temp_seq:
             if seg.text and seg.text[-1] in markers:
                 seg_index = int(seg.t1 / 100 * sample_rate)
                 # rest_buffer_duration = (len(audio_buffer) - seg_index) / sample_rate
+                is_end = config.SENTENCE_END_PATTERN.search(seg.text)
                 right_watch_sequences = segments[min(idx+1, len(segments)):]
                 # if rest_buffer_duration >= 1.5:
                 left_watch_idx = seg_index