from .pipelines import WhisperPipe, MetaItem, WhisperChinese, Translate7BPipe, FunASRPipe, VadPipe, TranslatePipe from .utils import timer class ProcessingPipes: def __init__(self) -> None: self._process = [] # whisper 转录 self._whisper_pipe_en = self._launch_process(WhisperPipe()) # self._whisper_pipe_zh = self._launch_process(WhisperChinese()) self._funasr_pipe = self._launch_process(FunASRPipe()) # llm 翻译 self._translate_pipe = self._launch_process(TranslatePipe()) self._translate_7b_pipe = self._launch_process(Translate7BPipe()) # vad self._vad_pipe = self._launch_process(VadPipe()) def _launch_process(self, process_obj): process_obj.daemon = True process_obj.start() self._process.append(process_obj) return process_obj def wait_ready(self): for p in self._process: p.wait() @timer(name="🐧 Translate") def translate(self, text, src_lang, dst_lang) -> MetaItem: item = MetaItem( transcribe_content=text, source_language=src_lang, destination_language=dst_lang) self._translate_pipe.input_queue.put(item) return self._translate_pipe.output_queue.get() @timer(name="🐧 Translate-large") def translate_large(self, text, src_lang, dst_lang) -> MetaItem: item = MetaItem( transcribe_content=text, source_language=src_lang, destination_language=dst_lang) self._translate_7b_pipe.input_queue.put(item) return self._translate_7b_pipe.output_queue.get() def get_transcription_model(self, lang: str = 'en'): if lang == 'zh': return self._funasr_pipe return self._whisper_pipe_en @timer(name="📝 transcribe") def transcribe(self, audio_buffer: bytes, src_lang: str) -> MetaItem: transcription_model = self.get_transcription_model(src_lang) item = MetaItem(audio=audio_buffer, source_language=src_lang) transcription_model.input_queue.put(item) return transcription_model.output_queue.get() def voice_detect(self, audio_buffer: bytes) -> MetaItem: item = MetaItem(source_audio=audio_buffer) self._vad_pipe.input_queue.put(item) return self._vad_pipe.output_queue.get() if __name__ == "__main__": import soundfile tp = TranslatePipes() # result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en") mel, _, = soundfile.read("assets/jfk.flac") # result = tp.transcribe(mel, 'en') result = tp.voice_detect(mel) print(result)