KoTAN / app.py
jisukim8873's picture
test
c968835
raw
history blame
4.2 kB
import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
en2ko = "KoJLabs/nllb-finetuned-en2ko"
ko2en = "KoJLabs/nllb-finetuned-ko2en"
style = "KoJLabs/bart-speech-style-converter"
en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko)
ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en)
style_model = AutoModelForSeq2SeqLM.from_pretrained(style)
en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko)
ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en)
style_tokenizer = AutoTokenizer.from_pretrained(style)
def translate(source, target, text):
formats = {"English":"eng_Latn", "Korean":"kor_Hang"}
src = formats[source]
tgt = formats[target]
if src == "eng_Latn":
translator = pipeline(
'translation',
model=en2ko_model,
tokenizer=ko2en_tokenizer,
src_lang=src,
tgt_lang=tgt,
)
if src == "kor_Hang":
translator = pipeline(
'translation',
model=ko2en_model,
tokenizer=en2ko_tokenizer,
src_lang=src,
tgt_lang=tgt
)
output = translator(text)
translated_text = output[0]['translation_text']
return translated_text
def conversion(source, text):
formats = {
"formal":"문어체",
"informal":"ꡬ어체",
"android":"μ•ˆλ“œλ‘œμ΄λ“œ",
"azae":"μ•„μž¬",
"chat":"μ±„νŒ…",
"choding":"μ΄ˆλ“±ν•™μƒ",
"emoticon":"이λͺ¨ν‹°μ½˜",
"enfp":"enfp",
"gentle":"신사",
"halbae":"할아버지",
"halmae":"ν• λ¨Έλ‹ˆ",
"joongding":"쀑학생",
"king":"μ™•",
"naruto":"λ‚˜λ£¨ν† ",
"seonbi":"μ„ λΉ„",
"sosim":"μ†Œμ‹¬ν•œ",
"translator":"λ²ˆμ—­κΈ°",
}
style = formats[source]
input_text = f"{style} ν˜•μ‹μœΌλ‘œ λ³€ν™˜:" + text
converter = pipeline(
'text2text-generation',
model=style_model,
tokenizer=style_tokenizer,
)
output = converter(input_text)
generated_text = output[0]['generated_text']
return generated_text
title = 'KoTAN Translator & Speech-style converter'
lang = ['English','Korean']
style = ['formal', 'informal', 'android', 'azae', 'chat', 'choding', 'emoticon', 'enfp', \
'gentle', 'halbae', 'halmae', 'joongding', 'king', 'naruto', 'seonbi', 'sosim', 'translator']
with gr.Blocks() as demo:
gr.Markdown("KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. You can exercise translation tasks, and speech-style conversion. \
If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)")
with gr.Tab("Translation"):
translator_app = gr.Interface(
fn=translate,
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')],
outputs=[gr.outputs.Textbox(label='Translated Text')],
title=title,
# description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
# article='Jisu, Kim. Juhwan, Lee',
enable_queue=True,
)
with gr.Tab("Speech-style conversion"):
translator_app = gr.Interface(
fn=conversion,
inputs=[gr.inputs.Dropdown(choices=style, label='Speech Style'), gr.inputs.Textbox(lines=5, label='Text to style conversion')],
outputs=[gr.outputs.Textbox(label='Converted Text')],
title=title,
# description = 'KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)',
# article='Jisu, Kim. Juhwan, Lee',
enable_queue=True,
)
demo.launch()