SaranaAbidueva
commited on
Commit
·
ae36c31
1
Parent(s):
0fabc81
fix tokenizer
Browse files- tokenizer_config.json +4 -3
tokenizer_config.json
CHANGED
@@ -51,7 +51,8 @@
|
|
51 |
"ur_PK",
|
52 |
"xh_ZA",
|
53 |
"gl_ES",
|
54 |
-
"sl_SI"
|
|
|
55 |
],
|
56 |
"bos_token": "<s>",
|
57 |
"clean_up_tokenization_spaces": true,
|
@@ -70,8 +71,8 @@
|
|
70 |
"pad_token": "<pad>",
|
71 |
"sep_token": "</s>",
|
72 |
"sp_model_kwargs": {},
|
73 |
-
"src_lang":
|
74 |
-
"tgt_lang":
|
75 |
"tokenizer_class": "MBart50Tokenizer",
|
76 |
"tokenizer_file": null,
|
77 |
"unk_token": "<unk>"
|
|
|
51 |
"ur_PK",
|
52 |
"xh_ZA",
|
53 |
"gl_ES",
|
54 |
+
"sl_SI",
|
55 |
+
"bxr_XX"
|
56 |
],
|
57 |
"bos_token": "<s>",
|
58 |
"clean_up_tokenization_spaces": true,
|
|
|
71 |
"pad_token": "<pad>",
|
72 |
"sep_token": "</s>",
|
73 |
"sp_model_kwargs": {},
|
74 |
+
"src_lang": "ru_RU",
|
75 |
+
"tgt_lang": "bxr_XX",
|
76 |
"tokenizer_class": "MBart50Tokenizer",
|
77 |
"tokenizer_file": null,
|
78 |
"unk_token": "<unk>"
|