|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts") |
|
model1 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-cos_w_restarts") |
|
model2 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-polynomial") |
|
model3 = AutoModelForSeq2SeqLM.from_pretrained("TwentyNine/byt5-small-ainu-latinizer-linear") |
|
|
|
def transcribe(input_str, model_index): |
|
output_str = '' |
|
model = None |
|
|
|
match model_index: |
|
case 1: |
|
model = model1 |
|
case 2: |
|
model = model2 |
|
case 3: |
|
model = model3 |
|
case _: |
|
model = model1 |
|
|
|
for input in input_str.split('\n'): |
|
input_enc = tokenizer.encode(input.strip(), return_tensors='pt') |
|
output_enc = model.generate(input_enc, max_length=256) |
|
|
|
if len(output_str) > 0: |
|
output_str = output_str + '\n' |
|
|
|
output_str = output_str + tokenizer.decode(output_enc[0], skip_special_tokens=True) |
|
|
|
return output_str |
|
|
|
gradio_app = gr.Interface( |
|
transcribe, |
|
inputs=[gr.Textbox(label='Input (kana)', value='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', placeholder='トゥイマ ヒ ワ エエㇰ ワ ヒオーイオイ。ピㇼカノ ヌカㇻ ヤン!', info='Ainu text written in Japanese katakana (input).', interactive=True, autofocus=True), gr.Radio(label="Training scheduler type", choices=[("Cosine with Restarts", 1), ("Polynomial", 2), ("Linear", 3)])], |
|
outputs=gr.Textbox(label='Output (alphabet)', info='Ainu text written in the Latin alphabet (output).'), |
|
title='KIT/TIP ByT5 Ainu Kana-Latin Converter', |
|
article='<p>Example sentence borrowed from <a href="https://www.hakusuisha.co.jp/book/b584600.html">New Express Ainu-go</a> by <a href="https://researchmap.jp/read0064265/?lang=english">Professor NAKAGAWA Hiroshi</a> of Chiba University.</p>' |
|
) |
|
|
|
if __name__ == '__main__': |
|
gradio_app.launch() |