|
from funasr import AutoModel |
|
from funasr.utils.postprocess_utils import rich_transcription_postprocess |
|
|
|
model_dir = "FunAudioLLM/SenseVoiceSmall" |
|
|
|
|
|
model = AutoModel( |
|
model=model_dir, |
|
vad_model="fsmn-vad", |
|
vad_kwargs={"max_single_segment_time": 30000}, |
|
device="cuda:0", |
|
hub="hf", |
|
) |
|
|
|
|
|
res = model.generate( |
|
input=f"{model.model_path}/example/en.mp3", |
|
cache={}, |
|
language="auto", |
|
use_itn=True, |
|
batch_size_s=60, |
|
merge_vad=True, |
|
merge_length_s=15, |
|
) |
|
text = rich_transcription_postprocess(res[0]["text"]) |
|
print(text) |
|
|
|
|