from transformers import pipeline import gradio as gr transcribe = pipeline( task="automatic-speech-recognition", model="ckpt_large_v2/checkpoint-1740", tokenizer="ckpt_large_v2", chunk_length_s=30, device=-1, ) transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ja", task="transcribe") def main(audio_path): return transcribe(audio_path)["text"] with open('./README.md') as f: md = f.readlines() md = md[11:] md = "\n".join(md) iface = gr.Interface( fn=main, inputs=[gr.Audio(type='filepath',sources=['microphone','upload'])], description=md, outputs="text", title="CoCoCap-beta 日本語声質キャプショニンング with CocoNut Corpus", ).launch(share=True)