|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1") |
|
|
|
def transcribe(input_str): |
|
input_enc = tokenizer.encode(input_str.strip(), return_tensors='pt') |
|
output_enc = model.generate(input_enc, max_length=256) |
|
return tokenizer.decode(output_enc[0], skip_special_tokens=True) |
|
|
|
gradio_app = gr.Interface( |
|
transcribe, |
|
inputs=gr.Textbox(label='Input (kana)', value='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', placeholder='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True), |
|
outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'), |
|
title="BYT5 Ainu Kana-Latin Converter (V1)", |
|
) |
|
|
|
if __name__ == "__main__": |
|
gradio_app.launch(share=True) |