|
import unicodedata |
|
|
|
from .base import MetaItem, BasePipe, Segment |
|
from ..helpers.funasr import FunASR |
|
|
|
|
|
class FunASRPipe(BasePipe): |
|
funasr = None |
|
|
|
@classmethod |
|
def init(cls): |
|
if cls.funasr is None: |
|
cls.funasr = FunASR() |
|
|
|
def process(self, in_data: MetaItem) -> MetaItem: |
|
audio_data = in_data.audio |
|
source_language = in_data.source_language |
|
result = self.funasr.transcribe(audio_data, source_language) |
|
|
|
|
|
if result and isinstance(result, list) and 'text' in result[0]: |
|
|
|
segments = [] |
|
texts = [] |
|
|
|
for item in result: |
|
text = item.get('text', '') |
|
start = item.get('start', 0) |
|
end = item.get('end', 0) |
|
segments.append(Segment(t0=start, t1=end, text=self.filter_chinese_printable(text))) |
|
texts.append(text) |
|
|
|
in_data.segments = segments |
|
in_data.transcribe_content = "".join(texts) |
|
else: |
|
|
|
if isinstance(result, str): |
|
in_data.transcribe_content = result |
|
in_data.segments = [Segment(t0=0, t1=0, text=self.filter_chinese_printable(result))] |
|
elif result and hasattr(result[0], 'text'): |
|
|
|
segments = [] |
|
texts = [] |
|
for item in result: |
|
text = item.text |
|
start = getattr(item, 'start', 0) or getattr(item, 't0', 0) |
|
end = getattr(item, 'end', 0) or getattr(item, 't1', 0) |
|
segments.append(Segment(t0=start, t1=end, text=self.filter_chinese_printable(text))) |
|
texts.append(text) |
|
|
|
in_data.segments = segments |
|
in_data.transcribe_content = "".join(texts) |
|
else: |
|
in_data.transcribe_content = "" |
|
in_data.segments = [] |
|
|
|
in_data.audio = b"" |
|
return in_data |
|
|
|
def filter_chinese_printable(self, s): |
|
printable = [] |
|
bytearray_chars = s.encode('utf-8') |
|
for char in bytearray_chars.decode('utf-8', errors='replace'): |
|
if unicodedata.category(char) != 'Cc': |
|
printable.append(char) |
|
return ''.join(printable).strip() |
|
|
|
|
|
class FunASRChinese(FunASRPipe): |
|
@classmethod |
|
def init(cls): |
|
if cls.funasr is None: |
|
cls.funasr = FunASR(source_lange='zh') |