File size: 2,727 Bytes
3a0633a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import unicodedata

from .base import MetaItem, BasePipe, Segment
from ..helpers.funasr import FunASR


class FunASRPipe(BasePipe):
    funasr = None

    @classmethod
    def init(cls):
        if cls.funasr is None:
            cls.funasr = FunASR()

    def process(self, in_data: MetaItem) -> MetaItem:
        audio_data = in_data.audio
        source_language = in_data.source_language
        result = self.funasr.transcribe(audio_data, source_language)
        
        # 处理 FunASR 的输出结果
        if result and isinstance(result, list) and 'text' in result[0]:
            # FunASR 输出格式为包含文本和时间戳的字典列表
            segments = []
            texts = []
            
            for item in result:
                text = item.get('text', '')
                start = item.get('start', 0)
                end = item.get('end', 0)
                segments.append(Segment(t0=start, t1=end, text=self.filter_chinese_printable(text)))
                texts.append(text)
            
            in_data.segments = segments
            in_data.transcribe_content = "".join(texts)
        else:
            # 如果 FunASR 返回的是单个文本字符串或其他格式
            if isinstance(result, str):
                in_data.transcribe_content = result
                in_data.segments = [Segment(t0=0, t1=0, text=self.filter_chinese_printable(result))]
            elif result and hasattr(result[0], 'text'):
                # 如果是对象列表
                segments = []
                texts = []
                for item in result:
                    text = item.text
                    start = getattr(item, 'start', 0) or getattr(item, 't0', 0)
                    end = getattr(item, 'end', 0) or getattr(item, 't1', 0)
                    segments.append(Segment(t0=start, t1=end, text=self.filter_chinese_printable(text)))
                    texts.append(text)
                
                in_data.segments = segments
                in_data.transcribe_content = "".join(texts)
            else:
                in_data.transcribe_content = ""
                in_data.segments = []
        
        in_data.audio = b""
        return in_data

    def filter_chinese_printable(self, s):
        printable = []
        bytearray_chars = s.encode('utf-8')
        for char in bytearray_chars.decode('utf-8', errors='replace'):
            if unicodedata.category(char) != 'Cc':  # 不可打印字符的分类为 'Cc'
                printable.append(char)
        return ''.join(printable).strip()


class FunASRChinese(FunASRPipe):
    @classmethod
    def init(cls):
        if cls.funasr is None:
            cls.funasr = FunASR(source_lange='zh')