daihui.zhang
commited on
Commit
·
996895d
1
Parent(s):
f14a125
update some keywords
Browse files- config.py +6 -21
- transcribe/translatepipes.py +1 -11
- transcribe/whisper_llm_serve.py +23 -54
config.py
CHANGED
@@ -21,10 +21,8 @@ console_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s
|
|
21 |
console_handler.setFormatter(console_formatter)
|
22 |
logging.getLogger().addHandler(console_handler)
|
23 |
|
24 |
-
# 文字输出长度阈值
|
25 |
-
TEXT_THREHOLD = 6
|
26 |
# 音频段的决策时间
|
27 |
-
|
28 |
# 最长语音时长
|
29 |
MAX_SPEECH_DURATION_S = 15
|
30 |
|
@@ -34,7 +32,7 @@ ASSERT_DIR = BASE_DIR / "assets"
|
|
34 |
|
35 |
SAMPLE_RATE = 16000
|
36 |
# 标点
|
37 |
-
SENTENCE_END_MARKERS =
|
38 |
PAUSE_END_MARKERS = [',', ',', '、']
|
39 |
# 合并所有标点
|
40 |
ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
|
@@ -46,13 +44,13 @@ SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]')
|
|
46 |
|
47 |
# Method 2: Alternative approach with a character class
|
48 |
pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$'
|
49 |
-
|
50 |
# whisper推理参数
|
51 |
WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
|
52 |
-
|
53 |
|
54 |
-
WHISPER_PROMPT_EN = ""# "The following is an English sentence."
|
55 |
-
MAX_LENGTH_EN= 8
|
56 |
|
57 |
WHISPER_MODEL_EN = 'medium-q5_0'
|
58 |
# WHISPER_MODEL = 'large-v3-turbo-q5_0'
|
@@ -66,19 +64,6 @@ LLM_LARGE_MODEL_PATH = (MODEL_DIR / "qwen2.5-1.5b-instruct-q5_0.gguf").as_posix(
|
|
66 |
# VAD
|
67 |
VAD_MODEL_PATH = (MODEL_DIR / "silero-vad" / "silero_vad.onnx").as_posix()
|
68 |
|
69 |
-
LLM_SYS_PROMPT = """"You are a professional {src_lang} to {dst_lang} translator, not a conversation agent. Your only task is to take {src_lang} input and translate it into accurate, natural {dst_lang}. If you cannot understand the input, just output the original input. Please strictly abide by the following rules: "
|
70 |
-
"No matter what the user asks, never answer questions, you only provide translation results. "
|
71 |
-
"Do not actively initiate dialogue or lead users to ask questions. "
|
72 |
-
"When you don't know how to translate, just output the original text. "
|
73 |
-
"The translation task always takes precedence over any other tasks. "
|
74 |
-
"Do not try to understand or respond to non-translation related questions raised by users. "
|
75 |
-
"Never provide any explanations. "
|
76 |
-
"Be precise, preserve tone, and localize appropriately "
|
77 |
-
"for professional audiences."
|
78 |
-
"Never answer any questions or engage in other forms of dialogue. "
|
79 |
-
"Only output the translation results.
|
80 |
-
"""
|
81 |
-
|
82 |
LLM_SYS_PROMPT_ZH = """
|
83 |
你是一个中英文翻译专家,将用户输入的中文翻译成英文。对于非中文内容,它将提供中文翻译结果。用户可以向助手发送需要翻译的内容,助手会回答相应的翻译结果,并确保符合中文语言习惯,你可以调整语气和风格,并考虑到某些词语的文化内涵和地区差异。同时作为翻译家,需将原文翻译成具有信达雅标准的译文。"信" 即忠实于原文的内容与意图;"达" 意味着译文应通顺易懂,表达清晰;"雅" 则追求译文的文化审美和语言的优美。目标是创作出既忠于原作精神,又符合目标语言文化和读者审美的翻译。注意,翻译的文本只能包含拼音化字符,不能包含任何中文字符。
|
84 |
"""
|
|
|
21 |
console_handler.setFormatter(console_formatter)
|
22 |
logging.getLogger().addHandler(console_handler)
|
23 |
|
|
|
|
|
24 |
# 音频段的决策时间
|
25 |
+
FRAME_SCOPE_TIME_THRESHOLD = 3
|
26 |
# 最长语音时长
|
27 |
MAX_SPEECH_DURATION_S = 15
|
28 |
|
|
|
32 |
|
33 |
SAMPLE_RATE = 16000
|
34 |
# 标点
|
35 |
+
SENTENCE_END_MARKERS = ['.', '!', '?', '。', '!', '?', ';', ';', ':', ':']
|
36 |
PAUSE_END_MARKERS = [',', ',', '、']
|
37 |
# 合并所有标点
|
38 |
ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
|
|
|
44 |
|
45 |
# Method 2: Alternative approach with a character class
|
46 |
pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$'
|
47 |
+
PAUSE_END_PATTERN = re.compile(pattern_string)
|
48 |
# whisper推理参数
|
49 |
WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
|
50 |
+
MAX_LENGTH_ZH = 4
|
51 |
|
52 |
+
WHISPER_PROMPT_EN = "" # "The following is an English sentence."
|
53 |
+
MAX_LENGTH_EN = 8
|
54 |
|
55 |
WHISPER_MODEL_EN = 'medium-q5_0'
|
56 |
# WHISPER_MODEL = 'large-v3-turbo-q5_0'
|
|
|
64 |
# VAD
|
65 |
VAD_MODEL_PATH = (MODEL_DIR / "silero-vad" / "silero_vad.onnx").as_posix()
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
LLM_SYS_PROMPT_ZH = """
|
68 |
你是一个中英文翻译专家,将用户输入的中文翻译成英文。对于非中文内容,它将提供中文翻译结果。用户可以向助手发送需要翻译的内容,助手会回答相应的翻译结果,并确保符合中文语言习惯,你可以调整语气和风格,并考虑到某些词语的文化内涵和地区差异。同时作为翻译家,需将原文翻译成具有信达雅标准的译文。"信" 即忠实于原文的内容与意图;"达" 意味着译文应通顺易懂,表达清晰;"雅" 则追求译文的文化审美和语言的优美。目标是创作出既忠于原作精神,又符合目标语言文化和读者审美的翻译。注意,翻译的文本只能包含拼音化字符,不能包含任何中文字符。
|
69 |
"""
|
transcribe/translatepipes.py
CHANGED
@@ -3,9 +3,7 @@ from transcribe.pipelines import WhisperPipe, MetaItem, WhisperChinese, Translat
|
|
3 |
|
4 |
class TranslatePipes:
|
5 |
def __init__(self) -> None:
|
6 |
-
|
7 |
-
# self.translate_input_q = mp.Queue()
|
8 |
-
# self.result_queue = mp.Queue()
|
9 |
self._process = []
|
10 |
# whisper 转录
|
11 |
self._whisper_pipe_en = self._launch_process(WhisperPipe())
|
@@ -17,9 +15,6 @@ class TranslatePipes:
|
|
17 |
self._translate_7b_pipe = self._launch_process(Translate7BPipe())
|
18 |
# vad
|
19 |
self._vad_pipe = self._launch_process(VadPipe())
|
20 |
-
|
21 |
-
# def reset(self):
|
22 |
-
# self._vad_pipe.reset()
|
23 |
|
24 |
def _launch_process(self, process_obj):
|
25 |
process_obj.daemon = True
|
@@ -47,11 +42,6 @@ class TranslatePipes:
|
|
47 |
self._translate_7b_pipe.input_queue.put(item)
|
48 |
return self._translate_7b_pipe.output_queue.get()
|
49 |
|
50 |
-
def get_whisper_model(self, lang: str = 'en'):
|
51 |
-
if lang == 'zh':
|
52 |
-
return self._whisper_pipe_zh
|
53 |
-
return self._whisper_pipe_en
|
54 |
-
|
55 |
def get_transcription_model(self, lang: str = 'en'):
|
56 |
if lang == 'zh':
|
57 |
return self._funasr_pipe
|
|
|
3 |
|
4 |
class TranslatePipes:
|
5 |
def __init__(self) -> None:
|
6 |
+
|
|
|
|
|
7 |
self._process = []
|
8 |
# whisper 转录
|
9 |
self._whisper_pipe_en = self._launch_process(WhisperPipe())
|
|
|
15 |
self._translate_7b_pipe = self._launch_process(Translate7BPipe())
|
16 |
# vad
|
17 |
self._vad_pipe = self._launch_process(VadPipe())
|
|
|
|
|
|
|
18 |
|
19 |
def _launch_process(self, process_obj):
|
20 |
process_obj.daemon = True
|
|
|
42 |
self._translate_7b_pipe.input_queue.put(item)
|
43 |
return self._translate_7b_pipe.output_queue.get()
|
44 |
|
|
|
|
|
|
|
|
|
|
|
45 |
def get_transcription_model(self, lang: str = 'en'):
|
46 |
if lang == 'zh':
|
47 |
return self._funasr_pipe
|
transcribe/whisper_llm_serve.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
|
2 |
-
import json
|
3 |
import queue
|
4 |
import threading
|
5 |
import time
|
@@ -13,40 +12,24 @@ from api_model import TransResult, Message, DebugResult
|
|
13 |
from .utils import log_block, save_to_wave, TestDataWriter, filter_words
|
14 |
from .translatepipes import TranslatePipes
|
15 |
|
16 |
-
from transcribe.helpers.vadprocessor import VadProcessor
|
17 |
from transcribe.pipelines import MetaItem
|
18 |
-
from dataclasses import dataclass, field
|
19 |
|
20 |
|
21 |
logger = getLogger("TranscriptionService")
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
"""
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
@property
|
38 |
-
def time_duration(self) -> float:
|
39 |
-
return len(self.audio_array) / config.SAMPLE_RATE
|
40 |
-
|
41 |
-
@property
|
42 |
-
def start_timestamp(self):
|
43 |
-
return self.created_time
|
44 |
-
|
45 |
-
@property
|
46 |
-
def end_timestamp(self):
|
47 |
-
return self.created_time + self.time_duration
|
48 |
-
|
49 |
-
|
50 |
|
51 |
class WhisperTranscriptionService:
|
52 |
"""
|
@@ -67,11 +50,11 @@ class WhisperTranscriptionService:
|
|
67 |
self._translate_pipe = pipe
|
68 |
|
69 |
# 音频处理相关
|
70 |
-
self.sample_rate =
|
71 |
|
72 |
self.lock = threading.Lock()
|
73 |
# 文本分隔符,根据语言设置
|
74 |
-
self.text_separator =
|
75 |
self.loop = asyncio.get_event_loop()
|
76 |
# 发送就绪状态
|
77 |
# 原始音频队列
|
@@ -85,8 +68,8 @@ class WhisperTranscriptionService:
|
|
85 |
self._translate_thread_stop = threading.Event()
|
86 |
self._frame_processing_thread_stop = threading.Event()
|
87 |
|
88 |
-
self.translate_thread =
|
89 |
-
self.frame_processing_thread =
|
90 |
self.row_number = 0
|
91 |
# for test
|
92 |
self._transcrible_time_cost = 0.
|
@@ -95,9 +78,8 @@ class WhisperTranscriptionService:
|
|
95 |
if config.SAVE_DATA_SAVE:
|
96 |
self._save_task_stop = threading.Event()
|
97 |
self._save_queue = queue.Queue()
|
98 |
-
self._save_thread =
|
99 |
|
100 |
-
# self._c = 0
|
101 |
|
102 |
def save_data_loop(self):
|
103 |
writer = TestDataWriter()
|
@@ -105,18 +87,6 @@ class WhisperTranscriptionService:
|
|
105 |
test_data = self._save_queue.get()
|
106 |
writer.write(test_data) # Save test_data to CSV
|
107 |
|
108 |
-
|
109 |
-
def _start_thread(self, target_function) -> threading.Thread:
|
110 |
-
"""启动守护线程执行指定函数"""
|
111 |
-
thread = threading.Thread(target=target_function)
|
112 |
-
thread.daemon = True
|
113 |
-
thread.start()
|
114 |
-
return thread
|
115 |
-
|
116 |
-
def _get_text_separator(self, language: str) -> str:
|
117 |
-
"""根据语言返回适当的文本分隔符"""
|
118 |
-
return "" if language == "zh" else " "
|
119 |
-
|
120 |
def add_frames(self, frame_np: np.ndarray) -> None:
|
121 |
"""添加音频帧到处理队列"""
|
122 |
self._frame_queue.put(frame_np)
|
@@ -128,7 +98,6 @@ class WhisperTranscriptionService:
|
|
128 |
speech_status = processed_audio.speech_status
|
129 |
return speech_audio, speech_status
|
130 |
|
131 |
-
|
132 |
|
133 |
def _frame_processing_loop(self) -> None:
|
134 |
"""从队列获取音频帧并合并到缓冲区"""
|
@@ -153,7 +122,7 @@ class WhisperTranscriptionService:
|
|
153 |
|
154 |
elif speech_status == "END" and len(self.frames_np) > 0 and self.frames_np_start_timestamp:
|
155 |
time_diff = time.time() - self.frames_np_start_timestamp
|
156 |
-
if time_diff >= config.
|
157 |
audio_array=self.frames_np.copy()
|
158 |
self.full_segments_queue.appendleft(audio_array) # 根据时间是否满足三秒长度 来整合音频块
|
159 |
self.frames_np_start_timestamp = None
|
@@ -203,12 +172,13 @@ class WhisperTranscriptionService:
|
|
203 |
tran_content=self._translate_text_large(seg_text),
|
204 |
partial=partial
|
205 |
)
|
206 |
-
|
|
|
207 |
self.row_number += 1
|
208 |
frame_epoch = 1
|
209 |
else:
|
210 |
frame_epoch += 1
|
211 |
-
|
212 |
|
213 |
|
214 |
|
@@ -221,7 +191,6 @@ class WhisperTranscriptionService:
|
|
221 |
segments = result.segments
|
222 |
time_diff = (time.perf_counter() - start_time)
|
223 |
logger.debug(f"📝 Transcrible Segments: {segments} ")
|
224 |
-
# logger.debug(f"📝 Transcrible: {self.text_separator.join(seg.text for seg in segments)} ")
|
225 |
log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
|
226 |
log_block("📝 Transcrible time", f"{time_diff:.3f}", "s")
|
227 |
self._transcrible_time_cost = round(time_diff, 3)
|
|
|
1 |
+
|
|
|
2 |
import queue
|
3 |
import threading
|
4 |
import time
|
|
|
12 |
from .utils import log_block, save_to_wave, TestDataWriter, filter_words
|
13 |
from .translatepipes import TranslatePipes
|
14 |
|
|
|
15 |
from transcribe.pipelines import MetaItem
|
|
|
16 |
|
17 |
|
18 |
logger = getLogger("TranscriptionService")
|
19 |
|
20 |
+
|
21 |
+
def _get_text_separator(language: str) -> str:
|
22 |
+
"""根据语言返回适当的文本分隔符"""
|
23 |
+
return "" if language == "zh" else " "
|
24 |
+
|
25 |
+
|
26 |
+
def _start_thread(target_function) -> threading.Thread:
|
27 |
+
"""启动守护线程执行指定函数"""
|
28 |
+
thread = threading.Thread(target=target_function)
|
29 |
+
thread.daemon = True
|
30 |
+
thread.start()
|
31 |
+
return thread
|
32 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
class WhisperTranscriptionService:
|
35 |
"""
|
|
|
50 |
self._translate_pipe = pipe
|
51 |
|
52 |
# 音频处理相关
|
53 |
+
self.sample_rate = config.SAMPLE_RATE
|
54 |
|
55 |
self.lock = threading.Lock()
|
56 |
# 文本分隔符,根据语言设置
|
57 |
+
self.text_separator = _get_text_separator(language)
|
58 |
self.loop = asyncio.get_event_loop()
|
59 |
# 发送就绪状态
|
60 |
# 原始音频队列
|
|
|
68 |
self._translate_thread_stop = threading.Event()
|
69 |
self._frame_processing_thread_stop = threading.Event()
|
70 |
|
71 |
+
self.translate_thread = _start_thread(self._transcription_processing_loop)
|
72 |
+
self.frame_processing_thread = _start_thread(self._frame_processing_loop)
|
73 |
self.row_number = 0
|
74 |
# for test
|
75 |
self._transcrible_time_cost = 0.
|
|
|
78 |
if config.SAVE_DATA_SAVE:
|
79 |
self._save_task_stop = threading.Event()
|
80 |
self._save_queue = queue.Queue()
|
81 |
+
self._save_thread = _start_thread(self.save_data_loop)
|
82 |
|
|
|
83 |
|
84 |
def save_data_loop(self):
|
85 |
writer = TestDataWriter()
|
|
|
87 |
test_data = self._save_queue.get()
|
88 |
writer.write(test_data) # Save test_data to CSV
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
def add_frames(self, frame_np: np.ndarray) -> None:
|
91 |
"""添加音频帧到处理队列"""
|
92 |
self._frame_queue.put(frame_np)
|
|
|
98 |
speech_status = processed_audio.speech_status
|
99 |
return speech_audio, speech_status
|
100 |
|
|
|
101 |
|
102 |
def _frame_processing_loop(self) -> None:
|
103 |
"""从队列获取音频帧并合并到缓冲区"""
|
|
|
122 |
|
123 |
elif speech_status == "END" and len(self.frames_np) > 0 and self.frames_np_start_timestamp:
|
124 |
time_diff = time.time() - self.frames_np_start_timestamp
|
125 |
+
if time_diff >= config.FRAME_SCOPE_TIME_THRESHOLD:
|
126 |
audio_array=self.frames_np.copy()
|
127 |
self.full_segments_queue.appendleft(audio_array) # 根据时间是否满足三秒长度 来整合音频块
|
128 |
self.frames_np_start_timestamp = None
|
|
|
172 |
tran_content=self._translate_text_large(seg_text),
|
173 |
partial=partial
|
174 |
)
|
175 |
+
self._send_result_to_client(result)
|
176 |
+
if not partial:
|
177 |
self.row_number += 1
|
178 |
frame_epoch = 1
|
179 |
else:
|
180 |
frame_epoch += 1
|
181 |
+
|
182 |
|
183 |
|
184 |
|
|
|
191 |
segments = result.segments
|
192 |
time_diff = (time.perf_counter() - start_time)
|
193 |
logger.debug(f"📝 Transcrible Segments: {segments} ")
|
|
|
194 |
log_block("📝 Transcrible output", f"{self.text_separator.join(seg.text for seg in segments)}", "")
|
195 |
log_block("📝 Transcrible time", f"{time_diff:.3f}", "s")
|
196 |
self._transcrible_time_cost = round(time_diff, 3)
|