Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,23 @@ tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
|
|
9 |
|
10 |
LANG_CODES = {
|
11 |
"English":"en",
|
12 |
-
"
|
|
|
13 |
}
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
16 |
"""
|
17 |
Translate the text from source lang to target lang
|
@@ -43,37 +57,22 @@ def translate(text, src_lang, tgt_lang, candidates:int):
|
|
43 |
|
44 |
with gr.Blocks() as app:
|
45 |
markdown="""
|
46 |
-
#
|
47 |
|
48 |
-
###
|
49 |
|
50 |
-
This is an english to
|
51 |
|
52 |
-
Input your text to translate, a source language and target language, and desired number of return sequences!
|
53 |
|
54 |
-
|
55 |
-
An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
|
56 |
-
can be achieved by translating *from* **language A** *to* **language A**
|
57 |
-
|
58 |
-
Remember, this can ***approximate*** grammaticality, but it isn't always the best.
|
59 |
-
|
60 |
-
For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
|
61 |
-
- ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
|
62 |
-
- (Thus, the ungrammatical "li" is dropped)
|
63 |
|
64 |
### Model and Data
|
65 |
-
This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
|
66 |
|
67 |
-
|
68 |
-
we can jumpstart our transfer learning to accomplish machine translation for toki pona!
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
### This app is a work in progress and obviously not all translations will be perfect.
|
73 |
-
In addition to parameter quantity and the hyper-parameters used while training,
|
74 |
-
the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
|
75 |
-
|
76 |
-
If you wish to contribute, please add high quality and diverse translations to Tatoeba!
|
77 |
"""
|
78 |
|
79 |
with gr.Row():
|
@@ -82,7 +81,7 @@ with gr.Blocks() as app:
|
|
82 |
input_text = gr.components.Textbox(label="Input Text", value="Toad (Pit Crew) is a fun character you can try in Mario Kart Tour! Wow!")
|
83 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
84 |
target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
|
85 |
-
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=
|
86 |
|
87 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
88 |
outputs = gr.Textbox()
|
|
|
9 |
|
10 |
LANG_CODES = {
|
11 |
"English":"en",
|
12 |
+
"Toki Pona":"tl"
|
13 |
+
"Romanian":"ro"
|
14 |
}
|
15 |
|
16 |
+
if tgt == tl and src == en:
|
17 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
18 |
+
else if tgt == en and src == tl:
|
19 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
20 |
+
else if tgt == en and src == en:
|
21 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
22 |
+
else if tgt == tl and src == tl:
|
23 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
|
24 |
+
else if tgt == en and src == ro:
|
25 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
|
26 |
+
else if tgt == ro and src == en:
|
27 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
|
28 |
+
|
29 |
def translate(text, src_lang, tgt_lang, candidates:int):
|
30 |
"""
|
31 |
Translate the text from source lang to target lang
|
|
|
57 |
|
58 |
with gr.Blocks() as app:
|
59 |
markdown="""
|
60 |
+
# Translate any text to ANY language!
|
61 |
|
62 |
+
### Bună! 💬
|
63 |
|
64 |
+
This is an english to any language / any language to english neural machine translation app.
|
65 |
|
66 |
+
Input your text to translate, a source language and target language, and desired number of return sequences!
|
67 |
|
68 |
+
Right now, this only supports 3 languages. I will add more later! So stay tuned!
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
### Model and Data
|
71 |
+
This app utilizes BOTH a fine-tuned version of Facebook/Meta AI's M2M100 418M param model for Toki Pona and the original for other languages.
|
72 |
|
73 |
+
The Toki Pona variant of the model was fine-tuned on the English/toki pona bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
|
|
|
74 |
|
75 |
+
### This app is a machine and not all translations will be perfect.
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
"""
|
77 |
|
78 |
with gr.Row():
|
|
|
81 |
input_text = gr.components.Textbox(label="Input Text", value="Toad (Pit Crew) is a fun character you can try in Mario Kart Tour! Wow!")
|
82 |
source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
|
83 |
target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
|
84 |
+
return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=128, step=1)
|
85 |
|
86 |
inputs=[input_text, source_lang, target_lang, return_seqs]
|
87 |
outputs = gr.Textbox()
|