|
import os |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline |
|
|
|
en2ko = "KoJLabs/nllb-finetuned-en2ko" |
|
ko2en = "KoJLabs/nllb-finetuned-ko2en" |
|
style = "KoJLabs/bart-speech-style-converter" |
|
|
|
en2ko_model = AutoModelForSeq2SeqLM.from_pretrained(en2ko) |
|
ko2en_model = AutoModelForSeq2SeqLM.from_pretrained(ko2en) |
|
style_model = AutoModelForSeq2SeqLM.from_pretrained(style) |
|
|
|
en2ko_tokenizer = AutoTokenizer.from_pretrained(en2ko) |
|
ko2en_tokenizer = AutoTokenizer.from_pretrained(ko2en) |
|
style_tokenizer = AutoTokenizer.from_pretrained(style) |
|
|
|
def translate(source, target, text): |
|
formats = {"English":"eng_Latn", "Korean":"kor_Hang"} |
|
src = formats[source] |
|
tgt = formats[target] |
|
|
|
if src == "eng_Latn": |
|
translator = pipeline( |
|
'translation', |
|
model=en2ko_model, |
|
tokenizer=ko2en_tokenizer, |
|
src_lang=src, |
|
tgt_lang=tgt, |
|
) |
|
|
|
if src == "kor_Hang": |
|
translator = pipeline( |
|
'translation', |
|
model=ko2en_model, |
|
tokenizer=en2ko_tokenizer, |
|
src_lang=src, |
|
tgt_lang=tgt |
|
) |
|
|
|
output = translator(text) |
|
translated_text = output[0]['translation_text'] |
|
|
|
return translated_text |
|
|
|
def conversion(source, text): |
|
formats = { |
|
"formal":"λ¬Έμ΄μ²΄", |
|
"informal":"ꡬμ΄μ²΄", |
|
"android":"μλλ‘μ΄λ", |
|
"azae":"μμ¬", |
|
"chat":"μ±ν
", |
|
"choding":"μ΄λ±νμ", |
|
"emoticon":"μ΄λͺ¨ν°μ½", |
|
"enfp":"enfp", |
|
"gentle":"μ μ¬", |
|
"halbae":"ν μλ²μ§", |
|
"halmae":"ν λ¨Έλ", |
|
"joongding":"μ€νμ", |
|
"king":"μ", |
|
"naruto":"λ루ν ", |
|
"seonbi":"μ λΉ", |
|
"sosim":"μμ¬ν", |
|
"translator":"λ²μκΈ°", |
|
} |
|
style = formats[source] |
|
|
|
input_text = f"{style} νμμΌλ‘ λ³ν:" + text |
|
|
|
converter = pipeline( |
|
'text2text-generation', |
|
model=style_model, |
|
tokenizer=style_tokenizer, |
|
) |
|
|
|
output = converter(input_text) |
|
generated_text = output[0]['generated_text'] |
|
|
|
return generated_text |
|
|
|
title = 'KoTAN Translator & Speech-style converter' |
|
lang = ['English','Korean'] |
|
style = ['formal', 'informal', 'android', 'azae', 'chat', 'choding', 'emoticon', 'enfp', \ |
|
'gentle', 'halbae', 'halmae', 'joongding', 'king', 'naruto', 'seonbi', 'sosim', 'translator'] |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("KoTAN: Korean Translation and Augmentation with fine-tuned NLLB. You can exercise translation tasks, and speech-style conversion. \ |
|
If you want to download as pip package, please visit our github. (https://github.com/KoJLabs/KoTAN)") |
|
|
|
with gr.Tab("Translation"): |
|
translator_app = gr.Interface( |
|
fn=translate, |
|
inputs=[gr.inputs.Dropdown(choices=lang, label='Source Language'), gr.inputs.Dropdown(choices=lang, label='Target Language'), gr.inputs.Textbox(lines=5, label='Text to Translate')], |
|
outputs=[gr.outputs.Textbox(label='Translated Text')], |
|
title=title, |
|
|
|
|
|
enable_queue=True, |
|
) |
|
|
|
with gr.Tab("Speech-style conversion"): |
|
translator_app = gr.Interface( |
|
fn=conversion, |
|
inputs=[gr.inputs.Dropdown(choices=style, label='Speech Style'), gr.inputs.Textbox(lines=5, label='Text to style conversion')], |
|
outputs=[gr.outputs.Textbox(label='Converted Text')], |
|
title=title, |
|
|
|
|
|
enable_queue=True, |
|
) |
|
|
|
demo.launch() |