File size: 1,315 Bytes

9f6a51c
3a0633a
485d8e3
c0447ed
485d8e3
3a0633a
485d8e3
 
 
 
 
 
31ad35a
485d8e3
 
 
 
 
31ad35a
485d8e3
9f6a51c
485d8e3
 
 
9f6a51c
 
 
 
 
 
 
62d476f
31ad35a

import unicodedata

from .base import MetaItem, BasePipe, Segment
from ..helpers.whisper import WhisperCPP


class WhisperPipe(BasePipe):
    whisper = None

    @classmethod
    def init(cls):
        if cls.whisper is None:
            # cls.zh_whisper = WhisperCPP(source_lange='zh')
            cls.whisper = WhisperCPP()

    def process(self, in_data: MetaItem) -> MetaItem:
        audio_data = in_data.audio
        source_language = in_data.source_language
        segments = self.whisper.transcribe(audio_data, source_language)
        texts = "".join([s.text for s in segments])
        in_data.segments = [Segment(t0=s.t0, t1=s.t1, text=self.filter_chinese_printable(s.text)) for s in segments]
        in_data.transcribe_content = texts
        in_data.audio = b""
        return in_data

    def filter_chinese_printable(self, s):
        printable = []
        bytearray_chars = s.encode('utf-8')
        for char in bytearray_chars.decode('utf-8', errors='replace'):
            if unicodedata.category(char) != 'Cc':  # 不可打印字符的分类为 'Cc'
                printable.append(char)
        return ''.join(printable).strip()


class WhisperChinese(WhisperPipe):
    @classmethod
    def init(cls):
        if cls.whisper is None:
            cls.whisper = WhisperCPP(source_lange='zh')