MihaiHuggingFace commited on
Commit
6d5dae6
·
verified ·
1 Parent(s): 0dcd084

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -9,9 +9,23 @@ tokenizer = AutoTokenizer.from_pretrained("facebook/m2m100_418M")
9
 
10
  LANG_CODES = {
11
  "English":"en",
12
- "toki pona":"tl"
 
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def translate(text, src_lang, tgt_lang, candidates:int):
16
  """
17
  Translate the text from source lang to target lang
@@ -43,37 +57,22 @@ def translate(text, src_lang, tgt_lang, candidates:int):
43
 
44
  with gr.Blocks() as app:
45
  markdown="""
46
- # An English / toki pona Neural Machine Translation App!
47
 
48
- ### toki a! 💬
49
 
50
- This is an english to toki pona / toki pona to english neural machine translation app.
51
 
52
- Input your text to translate, a source language and target language, and desired number of return sequences!
53
 
54
- ### Grammar Regularization
55
- An interesting quirk of training a many-to-many translation model is that pseudo-grammar correction
56
- can be achieved by translating *from* **language A** *to* **language A**
57
-
58
- Remember, this can ***approximate*** grammaticality, but it isn't always the best.
59
-
60
- For example, "mi li toki e toki pona" (Source Language: toki pona & Target Language: toki pona) will result in:
61
- - ['mi toki e toki pona.', 'mi toki pona.', 'mi toki e toki pona']
62
- - (Thus, the ungrammatical "li" is dropped)
63
 
64
  ### Model and Data
65
- This app utilizes a fine-tuned version of Facebook/Meta AI's M2M100 418M param model.
66
 
67
- By leveraging the pretrained weights of the massively multilingual M2M100 model,
68
- we can jumpstart our transfer learning to accomplish machine translation for toki pona!
69
 
70
- The model was fine-tuned on the English/toki pona bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
71
-
72
- ### This app is a work in progress and obviously not all translations will be perfect.
73
- In addition to parameter quantity and the hyper-parameters used while training,
74
- the *quality of data* found on Tatoeba directly influences the perfomance of projects like this!
75
-
76
- If you wish to contribute, please add high quality and diverse translations to Tatoeba!
77
  """
78
 
79
  with gr.Row():
@@ -82,7 +81,7 @@ with gr.Blocks() as app:
82
  input_text = gr.components.Textbox(label="Input Text", value="Toad (Pit Crew) is a fun character you can try in Mario Kart Tour! Wow!")
83
  source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
84
  target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
85
- return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=24, step=1)
86
 
87
  inputs=[input_text, source_lang, target_lang, return_seqs]
88
  outputs = gr.Textbox()
 
9
 
10
  LANG_CODES = {
11
  "English":"en",
12
+ "Toki Pona":"tl"
13
+ "Romanian":"ro"
14
  }
15
 
16
+ if tgt == tl and src == en:
17
+ model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
18
+ else if tgt == en and src == tl:
19
+ model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
20
+ else if tgt == en and src == en:
21
+ model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
22
+ else if tgt == tl and src == tl:
23
+ model = AutoModelForSeq2SeqLM.from_pretrained("Jayyydyyy/m2m100_418m_tokipona").to(device)
24
+ else if tgt == en and src == ro:
25
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
26
+ else if tgt == ro and src == en:
27
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/m2m100_418M").to(device)
28
+
29
  def translate(text, src_lang, tgt_lang, candidates:int):
30
  """
31
  Translate the text from source lang to target lang
 
57
 
58
  with gr.Blocks() as app:
59
  markdown="""
60
+ # Translate any text to ANY language!
61
 
62
+ ### Bună! 💬
63
 
64
+ This is an english to any language / any language to english neural machine translation app.
65
 
66
+ Input your text to translate, a source language and target language, and desired number of return sequences!
67
 
68
+ Right now, this only supports 3 languages. I will add more later! So stay tuned!
 
 
 
 
 
 
 
 
69
 
70
  ### Model and Data
71
+ This app utilizes BOTH a fine-tuned version of Facebook/Meta AI's M2M100 418M param model for Toki Pona and the original for other languages.
72
 
73
+ The Toki Pona variant of the model was fine-tuned on the English/toki pona bitexts found at [https://tatoeba.org/](https://tatoeba.org/)
 
74
 
75
+ ### This app is a machine and not all translations will be perfect.
 
 
 
 
 
 
76
  """
77
 
78
  with gr.Row():
 
81
  input_text = gr.components.Textbox(label="Input Text", value="Toad (Pit Crew) is a fun character you can try in Mario Kart Tour! Wow!")
82
  source_lang = gr.components.Dropdown(label="Source Language", value="English", choices=list(LANG_CODES.keys()))
83
  target_lang = gr.components.Dropdown(label="Target Language", value="toki pona", choices=list(LANG_CODES.keys()))
84
+ return_seqs = gr.Slider(label="Number of return sequences", value=3, minimum=1, maximum=128, step=1)
85
 
86
  inputs=[input_text, source_lang, target_lang, return_seqs]
87
  outputs = gr.Textbox()