File size: 2,684 Bytes
83ea845
 
c6b44fd
9494251
485d8e3
996895d
269051f
485d8e3
31ad35a
ca5d527
3a0633a
 
485d8e3
f13dceb
2aed46a
c0447ed
ca5d527
3a0633a
c0447ed
 
 
269051f
c0447ed
485d8e3
6f13b8c
269051f
 
3a0633a
83ea845
485d8e3
 
2aed46a
3a0633a
2aed46a
 
 
 
83ea845
2aed46a
 
485d8e3
3a0633a
6f13b8c
31ad35a
 
3a0633a
 
 
 
 
 
83ea845
1c6c20c
3a0633a
485d8e3
3a0633a
 
 
 
c0447ed
 
 
 
485d8e3
 
 
3a0633a
485d8e3
 
 
1c6c20c
c0447ed
485d8e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from .pipelines import WhisperPipe, MetaItem, WhisperChinese, Translate7BPipe, FunASRPipe, VadPipe, TranslatePipe
from .utils import timer

class ProcessingPipes:
    def __init__(self) -> None:

        self._process = []
        # whisper 转录
        self._whisper_pipe_en = self._launch_process(WhisperPipe())
        # self._whisper_pipe_zh = self._launch_process(WhisperChinese())
        self._funasr_pipe = self._launch_process(FunASRPipe())

        # llm 翻译
        self._translate_pipe = self._launch_process(TranslatePipe())
        self._translate_7b_pipe = self._launch_process(Translate7BPipe())
        # vad 
        self._vad_pipe = self._launch_process(VadPipe())

    def _launch_process(self, process_obj):
        process_obj.daemon = True
        process_obj.start()
        self._process.append(process_obj)
        return process_obj

    def wait_ready(self):
        for p in self._process:
            p.wait()

    @timer(name="🐧 Translate")
    def translate(self, text, src_lang, dst_lang) -> MetaItem:
        item = MetaItem(
            transcribe_content=text,
            source_language=src_lang,
            destination_language=dst_lang)
        self._translate_pipe.input_queue.put(item)
        return self._translate_pipe.output_queue.get()

    @timer(name="🐧 Translate-large")
    def translate_large(self, text, src_lang, dst_lang) -> MetaItem:
        item = MetaItem(
            transcribe_content=text,
            source_language=src_lang,
            destination_language=dst_lang)
        self._translate_7b_pipe.input_queue.put(item)
        return self._translate_7b_pipe.output_queue.get()

    def get_transcription_model(self, lang: str = 'en'):
        if lang == 'zh':
            return self._funasr_pipe
        return self._whisper_pipe_en

    @timer(name="📝 transcribe")
    def transcribe(self, audio_buffer: bytes, src_lang: str) -> MetaItem:
        transcription_model = self.get_transcription_model(src_lang)
        item = MetaItem(audio=audio_buffer, source_language=src_lang)
        transcription_model.input_queue.put(item)
        return transcription_model.output_queue.get()

    def voice_detect(self, audio_buffer: bytes) -> MetaItem:
        item = MetaItem(source_audio=audio_buffer)
        self._vad_pipe.input_queue.put(item)
        return self._vad_pipe.output_queue.get()


if __name__ == "__main__":
    import soundfile

    tp = TranslatePipes()
    # result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en")
    mel, _, = soundfile.read("assets/jfk.flac")
    # result = tp.transcribe(mel, 'en')
    result = tp.voice_detect(mel)
    print(result)