daihui.zhang commited on
Commit
996895d
·
1 Parent(s): f14a125

update some keywords

Browse files
config.py CHANGED
@@ -21,10 +21,8 @@ console_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s
21
  console_handler.setFormatter(console_formatter)
22
  logging.getLogger().addHandler(console_handler)
23
 
24
- # 文字输出长度阈值
25
- TEXT_THREHOLD = 6
26
  # 音频段的决策时间
27
- FRAME_SCOPE_TIME_THREHOLD = 3
28
  # 最长语音时长
29
  MAX_SPEECH_DURATION_S = 15
30
 
@@ -34,7 +32,7 @@ ASSERT_DIR = BASE_DIR / "assets"
34
 
35
  SAMPLE_RATE = 16000
36
  # 标点
37
- SENTENCE_END_MARKERS = ['.', '!', '?', '。', '!', '?', ';', ';', ':', ':']
38
  PAUSE_END_MARKERS = [',', ',', '、']
39
  # 合并所有标点
40
  ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
@@ -46,13 +44,13 @@ SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]')
46
 
47
  # Method 2: Alternative approach with a character class
48
  pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$'
49
- PAUSEE_END_PATTERN = re.compile(pattern_string)
50
  # whisper推理参数
51
  WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
52
- MAX_LENTH_ZH = 4
53
 
54
- WHISPER_PROMPT_EN = ""# "The following is an English sentence."
55
- MAX_LENGTH_EN= 8
56
 
57
  WHISPER_MODEL_EN = 'medium-q5_0'
58
  # WHISPER_MODEL = 'large-v3-turbo-q5_0'
@@ -66,19 +64,6 @@ LLM_LARGE_MODEL_PATH = (MODEL_DIR / "qwen2.5-1.5b-instruct-q5_0.gguf").as_posix(
66
  # VAD
67
  VAD_MODEL_PATH = (MODEL_DIR / "silero-vad" / "silero_vad.onnx").as_posix()
68
 
69
- LLM_SYS_PROMPT = """"You are a professional {src_lang} to {dst_lang} translator, not a conversation agent. Your only task is to take {src_lang} input and translate it into accurate, natural {dst_lang}. If you cannot understand the input, just output the original input. Please strictly abide by the following rules: "
70
- "No matter what the user asks, never answer questions, you only provide translation results. "
71
- "Do not actively initiate dialogue or lead users to ask questions. "
72
- "When you don't know how to translate, just output the original text. "
73
- "The translation task always takes precedence over any other tasks. "
74
- "Do not try to understand or respond to non-translation related questions raised by users. "
75
- "Never provide any explanations. "
76
- "Be precise, preserve tone, and localize appropriately "
77
- "for professional audiences."
78
- "Never answer any questions or engage in other forms of dialogue. "
79
- "Only output the translation results.
80
- """
81
-
82
  LLM_SYS_PROMPT_ZH = """
83
  你是一个中英文翻译专家,将用户输入的中文翻译成英文。对于非中文内容,它将提供中文翻译结果。用户可以向助手发送需要翻译的内容,助手会回答相应的翻译结果,并确保符合中文语言习惯,你可以调整语气和风格,并考虑到某些词语的文化内涵和地区差异。同时作为翻译家,需将原文翻译成具有信达雅标准的译文。"信" 即忠实于原文的内容与意图;"达" 意味着译文应通顺易懂,表达清晰;"雅" 则追求译文的文化审美和语言的优美。目标是创作出既忠于原作精神,又符合目标语言文化和读者审美的翻译。注意,翻译的文本只能包含拼音化字符,不能包含任何中文字符。
84
  """
 
21
  console_handler.setFormatter(console_formatter)
22
  logging.getLogger().addHandler(console_handler)
23
 
 
 
24
  # 音频段的决策时间
25
+ FRAME_SCOPE_TIME_THRESHOLD = 3
26
  # 最长语音时长
27
  MAX_SPEECH_DURATION_S = 15
28
 
 
32
 
33
  SAMPLE_RATE = 16000
34
  # 标点
35
+ SENTENCE_END_MARKERS = ['.', '!', '?', '。', '!', '?', ';', ';', ':', ':']
36
  PAUSE_END_MARKERS = [',', ',', '、']
37
  # 合并所有标点
38
  ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
 
44
 
45
  # Method 2: Alternative approach with a character class
46
  pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$'
47
+ PAUSE_END_PATTERN = re.compile(pattern_string)
48
  # whisper推理参数
49
  WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
50
+ MAX_LENGTH_ZH = 4
51
 
52
+ WHISPER_PROMPT_EN = "" # "The following is an English sentence."
53
+ MAX_LENGTH_EN = 8
54
 
55
  WHISPER_MODEL_EN = 'medium-q5_0'
56
  # WHISPER_MODEL = 'large-v3-turbo-q5_0'
 
64
  # VAD
65
  VAD_MODEL_PATH = (MODEL_DIR / "silero-vad" / "silero_vad.onnx").as_posix()
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  LLM_SYS_PROMPT_ZH = """
68
  你是一个中英文翻译专家,将用户输入的中文翻译成英文。对于非中文内容,它将提供中文翻译结果。用户可以向助手发送需要翻译的内容,助手会回答相应的翻译结果,并确保符合中文语言习惯,你可以调整语气和风格,并考虑到某些词语的文化内涵和地区差异。同时作为翻译家,需将原文翻译成具有信达雅标准的译文。"信" 即忠实于原文的内容与意图;"达" 意味着译文应通顺易懂,表达清晰;"雅" 则追求译文的文化审美和语言的优美。目标是创作出既忠于原作精神,又符合目标语言文化和读者审美的翻译。注意,翻译的文本只能包含拼音化字符,不能包含任何中文字符。
69
  """
transcribe/translatepipes.py CHANGED
@@ -3,9 +3,7 @@ from transcribe.pipelines import WhisperPipe, MetaItem, WhisperChinese, Translat
3
 
4
  class TranslatePipes:
5
  def __init__(self) -> None:
6
- # self.whisper_input_q = mp.Queue()
7
- # self.translate_input_q = mp.Queue()
8
- # self.result_queue = mp.Queue()
9
  self._process = []
10
  # whisper 转录
11
  self._whisper_pipe_en = self._launch_process(WhisperPipe())
@@ -17,9 +15,6 @@ class TranslatePipes:
17
  self._translate_7b_pipe = self._launch_process(Translate7BPipe())
18
  # vad
19
  self._vad_pipe = self._launch_process(VadPipe())
20
-
21
- # def reset(self):
22
- # self._vad_pipe.reset()
23
 
24
  def _launch_process(self, process_obj):
25
  process_obj.daemon = True
@@ -47,11 +42,6 @@ class TranslatePipes:
47
  self._translate_7b_pipe.input_queue.put(item)
48
  return self._translate_7b_pipe.output_queue.get()
49
 
50
- def get_whisper_model(self, lang: str = 'en'):
51
- if lang == 'zh':
52
- return self._whisper_pipe_zh
53
- return self._whisper_pipe_en
54
-
55
  def get_transcription_model(self, lang: str = 'en'):
56
  if lang == 'zh':
57
  return self._funasr_pipe
 
3
 
4
  class TranslatePipes:
5
  def __init__(self) -> None:
6
+
 
 
7
  self._process = []
8
  # whisper 转录
9
  self._whisper_pipe_en = self._launch_process(WhisperPipe())
 
15
  self._translate_7b_pipe = self._launch_process(Translate7BPipe())
16
  # vad
17
  self._vad_pipe = self._launch_process(VadPipe())
 
 
 
18
 
19
  def _launch_process(self, process_obj):
20
  process_obj.daemon = True
 
42
  self._translate_7b_pipe.input_queue.put(item)
43
  return self._translate_7b_pipe.output_queue.get()
44
 
 
 
 
 
 
45
  def get_transcription_model(self, lang: str = 'en'):
46
  if lang == 'zh':
47
  return self._funasr_pipe
transcribe/whisper_llm_serve.py CHANGED
@@ -1,5 +1,4 @@
1
- import asyncio
2
- import json
3
  import queue
4
  import threading
5
  import time
@@ -13,40 +12,24 @@ from api_model import TransResult, Message, DebugResult
13
  from .utils import log_block, save_to_wave, TestDataWriter, filter_words
14
  from .translatepipes import TranslatePipes
15
 
16
- from transcribe.helpers.vadprocessor import VadProcessor
17
  from transcribe.pipelines import MetaItem
18
- from dataclasses import dataclass, field
19
 
20
 
21
  logger = getLogger("TranscriptionService")
22
 
23
- @dataclass
24
- class FullSegment:
25
- """整句"""
26
- audio_array: np.ndarray
27
- created_time: float = field(default_factory=time.time)
28
-
29
- @staticmethod
30
- def merge(*audio_segments: list["FullSegment"]):
31
- audio_segments_sorted = sorted([*audio_segments], key=lambda item: item.created_time)
32
- return FullSegment(
33
- created_time=audio_segments_sorted[0].created_time,
34
- audio_array=np.concatenate([i.audio_array for i in audio_segments_sorted], axis=0)
35
- )
36
-
37
- @property
38
- def time_duration(self) -> float:
39
- return len(self.audio_array) / config.SAMPLE_RATE
40
-
41
- @property
42
- def start_timestamp(self):
43
- return self.created_time
44
-
45
- @property
46
- def end_timestamp(self):
47
- return self.created_time + self.time_duration
48
-
49
-
50
 
51
  class WhisperTranscriptionService:
52
  """
@@ -67,11 +50,11 @@ class WhisperTranscriptionService:
67
  self._translate_pipe = pipe
68
 
69
  # 音频处理相关
70
- self.sample_rate = 16000
71
 
72
  self.lock = threading.Lock()
73
  # 文本分隔符,根据语言设置
74
- self.text_separator = self._get_text_separator(language)
75
  self.loop = asyncio.get_event_loop()
76
  # 发送就绪状态
77
  # 原始音频队列
@@ -85,8 +68,8 @@ class WhisperTranscriptionService:
85
  self._translate_thread_stop = threading.Event()
86
  self._frame_processing_thread_stop = threading.Event()
87
 
88
- self.translate_thread = self._start_thread(self._transcription_processing_loop)
89
- self.frame_processing_thread = self._start_thread(self._frame_processing_loop)
90
  self.row_number = 0
91
  # for test
92
  self._transcrible_time_cost = 0.
@@ -95,9 +78,8 @@ class WhisperTranscriptionService:
95
  if config.SAVE_DATA_SAVE:
96
  self._save_task_stop = threading.Event()
97
  self._save_queue = queue.Queue()
98
- self._save_thread = self._start_thread(self.save_data_loop)
99
 
100
- # self._c = 0
101
 
102
  def save_data_loop(self):
103
  writer = TestDataWriter()
@@ -105,18 +87,6 @@ class WhisperTranscriptionService:
105
  test_data = self._save_queue.get()
106
  writer.write(test_data) # Save test_data to CSV
107
 
108
-
109
- def _start_thread(self, target_function) -> threading.Thread:
110
- """启动守护线程执行指定函数"""
111
- thread = threading.Thread(target=target_function)
112
- thread.daemon = True
113
- thread.start()
114
- return thread
115
-
116
- def _get_text_separator(self, language: str) -> str:
117
- """根据语言返回适当的文本分隔符"""
118
- return "" if language == "zh" else " "
119
-
120
  def add_frames(self, frame_np: np.ndarray) -> None:
121
  """添加音频帧到处理队列"""
122
  self._frame_queue.put(frame_np)
@@ -128,7 +98,6 @@ class WhisperTranscriptionService:
128
  speech_status = processed_audio.speech_status
129
  return speech_audio, speech_status
130
 
131
-
132
 
133
  def _frame_processing_loop(self) -> None:
134
  """从队列获取音频帧并合并到缓冲区"""
@@ -153,7 +122,7 @@ class WhisperTranscriptionService:
153
 
154
  elif speech_status == "END" and len(self.frames_np) > 0 and self.frames_np_start_timestamp:
155
  time_diff = time.time() - self.frames_np_start_timestamp
156
- if time_diff >= config.FRAME_SCOPE_TIME_THREHOLD:
157
  audio_array=self.frames_np.copy()
158
  self.full_segments_queue.appendleft(audio_array) # 根据时间是否满足三秒长度 来整合音频块
159
  self.frames_np_start_timestamp = None
@@ -203,12 +172,13 @@ class WhisperTranscriptionService:
203
  tran_content=self._translate_text_large(seg_text),
204
  partial=partial
205
  )
206
- if partial == False:
 
207
  self.row_number += 1
208
  frame_epoch = 1
209
  else:
210
  frame_epoch += 1
211
- self._send_result_to_client(result)
212
 
213
 
214
 
@@ -221,7 +191,6 @@ class WhisperTranscriptionService:
221
  segments = result.segments
222
  time_diff = (time.perf_counter() - start_time)
223
  logger.debug(f"📝 Transcrible Segments: {segments} ")
224
- # logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
225
  log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
226
  log_block("📝 Transcrible time", f"{time_diff:.3f}", "s")
227
  self._transcrible_time_cost = round(time_diff, 3)
 
1
+
 
2
  import queue
3
  import threading
4
  import time
 
12
  from .utils import log_block, save_to_wave, TestDataWriter, filter_words
13
  from .translatepipes import TranslatePipes
14
 
 
15
  from transcribe.pipelines import MetaItem
 
16
 
17
 
18
  logger = getLogger("TranscriptionService")
19
 
20
+
21
+ def _get_text_separator(language: str) -> str:
22
+ """根据语言返回适当的文本分隔符"""
23
+ return "" if language == "zh" else " "
24
+
25
+
26
+ def _start_thread(target_function) -> threading.Thread:
27
+ """启动守护线程执行指定函数"""
28
+ thread = threading.Thread(target=target_function)
29
+ thread.daemon = True
30
+ thread.start()
31
+ return thread
32
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  class WhisperTranscriptionService:
35
  """
 
50
  self._translate_pipe = pipe
51
 
52
  # 音频处理相关
53
+ self.sample_rate = config.SAMPLE_RATE
54
 
55
  self.lock = threading.Lock()
56
  # 文本分隔符,根据语言设置
57
+ self.text_separator = _get_text_separator(language)
58
  self.loop = asyncio.get_event_loop()
59
  # 发送就绪状态
60
  # 原始音频队列
 
68
  self._translate_thread_stop = threading.Event()
69
  self._frame_processing_thread_stop = threading.Event()
70
 
71
+ self.translate_thread = _start_thread(self._transcription_processing_loop)
72
+ self.frame_processing_thread = _start_thread(self._frame_processing_loop)
73
  self.row_number = 0
74
  # for test
75
  self._transcrible_time_cost = 0.
 
78
  if config.SAVE_DATA_SAVE:
79
  self._save_task_stop = threading.Event()
80
  self._save_queue = queue.Queue()
81
+ self._save_thread = _start_thread(self.save_data_loop)
82
 
 
83
 
84
  def save_data_loop(self):
85
  writer = TestDataWriter()
 
87
  test_data = self._save_queue.get()
88
  writer.write(test_data) # Save test_data to CSV
89
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def add_frames(self, frame_np: np.ndarray) -> None:
91
  """添加音频帧到处理队列"""
92
  self._frame_queue.put(frame_np)
 
98
  speech_status = processed_audio.speech_status
99
  return speech_audio, speech_status
100
 
 
101
 
102
  def _frame_processing_loop(self) -> None:
103
  """从队列获取音频帧并合并到缓冲区"""
 
122
 
123
  elif speech_status == "END" and len(self.frames_np) > 0 and self.frames_np_start_timestamp:
124
  time_diff = time.time() - self.frames_np_start_timestamp
125
+ if time_diff >= config.FRAME_SCOPE_TIME_THRESHOLD:
126
  audio_array=self.frames_np.copy()
127
  self.full_segments_queue.appendleft(audio_array) # 根据时间是否满足三秒长度 来整合音频块
128
  self.frames_np_start_timestamp = None
 
172
  tran_content=self._translate_text_large(seg_text),
173
  partial=partial
174
  )
175
+ self._send_result_to_client(result)
176
+ if not partial:
177
  self.row_number += 1
178
  frame_epoch = 1
179
  else:
180
  frame_epoch += 1
181
+
182
 
183
 
184
 
 
191
  segments = result.segments
192
  time_diff = (time.perf_counter() - start_time)
193
  logger.debug(f"📝 Transcrible Segments: {segments} ")
 
194
  log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
195
  log_block("📝 Transcrible time", f"{time_diff:.3f}", "s")
196
  self._transcrible_time_cost = round(time_diff, 3)