import pathlib import re import logging import json DEBUG = False LOG_LEVEL = logging.DEBUG if DEBUG else logging.WARNING logging.getLogger("pywhispercpp").setLevel(logging.WARNING) logging.basicConfig( level=LOG_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s", filename='translator.log', datefmt="%H:%M:%S" ) SAVE_DATA_SAVE = False console_handler = logging.StreamHandler() console_handler.setLevel(LOG_LEVEL) console_formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") console_handler.setFormatter(console_formatter) logging.getLogger().addHandler(console_handler) # 音频段的决策时间 FRAME_SCOPE_TIME_THRESHOLD = 6 # 最长语音时长 MAX_SPEECH_DURATION_S = 15 BASE_DIR = pathlib.Path(__file__).parent.parent MODEL_DIR = BASE_DIR / "moyoyo_asr_models" ASSERT_DIR = BASE_DIR / "assets" CONFIG_DIR = BASE_DIR / "config" SAMPLE_RATE = 16000 # 标点 SENTENCE_END_MARKERS = ['.', '!', '?', '。', '!', '?', ';', ';', ':', ':'] PAUSE_END_MARKERS = [',', ',', '、'] # 合并所有标点 ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS # 构造正则表达式字符类 REGEX_MARKERS = re.compile(r'[' + re.escape(''.join(ALL_MARKERS)) + r']$') sentence_end_chars = ''.join([re.escape(char) for char in SENTENCE_END_MARKERS]) SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]') # Method 2: Alternative approach with a character class pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$' PAUSE_END_PATTERN = re.compile(pattern_string) # whisper推理参数 WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。" MAX_LENGTH_ZH = 4 WHISPER_PROMPT_EN = "" # "The following is an English sentence." MAX_LENGTH_EN = 8 # WHISPER_MODEL_EN = 'medium-q5_0' WHISPER_MODEL_EN = 'large-v3-turbo-q5_0' # WHISPER_MODEL_ZH = 'small' WHISPER_MODEL_ZH = 'large-v3-turbo-q5_0' # LLM LLM_MODEL_PATH = (MODEL_DIR / "qwen2.5-1.5b-instruct-q5_0.gguf").as_posix() LLM_LARGE_MODEL_PATH = (MODEL_DIR / "qwen2.5-1.5b-instruct-q5_0.gguf").as_posix() # LLM_LARGE_MODEL_PATH = (MODEL_DIR / "qwen2.5-7b-instruct-q5_0-00001-of-00002.gguf").as_posix() # VAD VAD_MODEL_PATH = (MODEL_DIR / "silero-vad" / "silero_vad.onnx").as_posix()