File size: 1,125 Bytes
dbd0cd3 f7c5310 59c9f9b f7c5310 b042fbc 4aedf7f 45fdae9 f7c5310 b042fbc 6c82ba0 f7c5310 4aedf7f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1")
model = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-ain-kana-latin-converter-v1")
def transcribe(input_str):
input_enc = tokenizer.encode(input_str.strip(), return_tensors='pt')
output_enc = model.generate(input_enc, max_length=256)
return tokenizer.decode(output_enc[0], skip_special_tokens=True)
gradio_app = gr.Interface(
transcribe,
inputs=gr.Textbox(label='Input (kana)', value='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', placeholder='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True),
outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'),
title="BYT5 Ainu Kana-Latin Converter (V1)",
)
if __name__ == "__main__":
gradio_app.launch(share=True) |