seyoungsong commited on
Commit
68fdd8d
1 Parent(s): 5b36141
model_files/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
model_files/.gitignore ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ temp.*
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+
164
+ # General
165
+ .DS_Store
166
+ .AppleDouble
167
+ .LSOverride
168
+
169
+ # Icon must end with two \r
170
+ Icon
171
+
172
+ # Thumbnails
173
+ ._*
174
+
175
+ # Files that might appear in the root of a volume
176
+ .DocumentRevisions-V100
177
+ .fseventsd
178
+ .Spotlight-V100
179
+ .TemporaryItems
180
+ .Trashes
181
+ .VolumeIcon.icns
182
+ .com.apple.timemachine.donotpresent
183
+
184
+ # Directories potentially created on remote AFP share
185
+ .AppleDB
186
+ .AppleDesktop
187
+ Network Trash Folder
188
+ Temporary Items
189
+ .apdisk
190
+
191
+ *~
192
+
193
+ # temporary files which can be created if a process still has a handle open of a deleted file
194
+ .fuse_hidden*
195
+
196
+ # KDE directory preferences
197
+ .directory
198
+
199
+ # Linux trash folder which might appear on any partition or disk
200
+ .Trash-*
201
+
202
+ # .nfs files are created when an open file is removed but is still being accessed
203
+ .nfs*
model_files/README.md ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: translation
3
+ license: mit
4
+ language:
5
+ - multilingual
6
+ - af
7
+ - am
8
+ - ar
9
+ - as
10
+ - ast
11
+ - ay
12
+ - az
13
+ - ba
14
+ - be
15
+ - bg
16
+ - bn
17
+ - br
18
+ - bs
19
+ - ca
20
+ - ceb
21
+ - cjk
22
+ - cs
23
+ - cy
24
+ - da
25
+ - de
26
+ - dyu
27
+ - el
28
+ - en
29
+ - es
30
+ - et
31
+ - fa
32
+ - ff
33
+ - fi
34
+ - fr
35
+ - fy
36
+ - ga
37
+ - gd
38
+ - gl
39
+ - gu
40
+ - ha
41
+ - he
42
+ - hi
43
+ - hr
44
+ - ht
45
+ - hu
46
+ - hy
47
+ - id
48
+ - ig
49
+ - ilo
50
+ - is
51
+ - it
52
+ - ja
53
+ - jv
54
+ - ka
55
+ - kac
56
+ - kam
57
+ - kea
58
+ - kg
59
+ - kk
60
+ - km
61
+ - kmb
62
+ - kmr
63
+ - kn
64
+ - ko
65
+ - ku
66
+ - ky
67
+ - lb
68
+ - lg
69
+ - ln
70
+ - lo
71
+ - lt
72
+ - luo
73
+ - lv
74
+ - mg
75
+ - mi
76
+ - mk
77
+ - ml
78
+ - mn
79
+ - mr
80
+ - ms
81
+ - mt
82
+ - my
83
+ - ne
84
+ - nl
85
+ - no
86
+ - ns
87
+ - ny
88
+ - oc
89
+ - om
90
+ - or
91
+ - pa
92
+ - pl
93
+ - ps
94
+ - pt
95
+ - qu
96
+ - ro
97
+ - ru
98
+ - sd
99
+ - shn
100
+ - si
101
+ - sk
102
+ - sl
103
+ - sn
104
+ - so
105
+ - sq
106
+ - sr
107
+ - ss
108
+ - su
109
+ - sv
110
+ - sw
111
+ - ta
112
+ - te
113
+ - tg
114
+ - th
115
+ - ti
116
+ - tl
117
+ - tn
118
+ - tr
119
+ - uk
120
+ - umb
121
+ - ur
122
+ - uz
123
+ - vi
124
+ - wo
125
+ - xh
126
+ - yi
127
+ - yo
128
+ - zh
129
+ - zu
130
+ ---
131
+
132
+ # Flores101: Large-Scale Multilingual Machine Translation
133
+
134
+ `flores101_mm100_175M` is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation. It was introduced in this [paper](https://aclanthology.org/2022.tacl-1.30) and released in [this](https://github.com/facebookresearch/fairseq/tree/main/examples/flores101) repository.
135
+
136
+ The model architecture and config are the same as [M2M100](https://huggingface.co/facebook/m2m100_418M) implementation, but the **tokenizer should be modified** to adjust language codes.
137
+
138
+ ```python
139
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
140
+
141
+ hi_text = "जीवन एक चॉकलेट बॉक्स की तरह है।"
142
+ chinese_text = "生活就像一盒巧克力。"
143
+
144
+ model = M2M100ForConditionalGeneration.from_pretrained("seyoungsong/flores101_mm100_175M")
145
+ tokenizer: M2M100Tokenizer = M2M100Tokenizer.from_pretrained("seyoungsong/flores101_mm100_175M")
146
+
147
+ # FIX TOKENIZER!
148
+ tokenizer.lang_token_to_id = {t: i for t, i in zip(tokenizer.all_special_tokens, tokenizer.all_special_ids) if i > 5}
149
+ tokenizer.lang_code_to_token = {s.strip("_"): s for s in tokenizer.lang_token_to_id}
150
+ tokenizer.lang_code_to_id = {s.strip("_"): i for s, i in tokenizer.lang_token_to_id.items()}
151
+ tokenizer.id_to_lang_token = {i: s for s, i in tokenizer.lang_token_to_id.items()}
152
+
153
+ # translate Hindi to French
154
+ tokenizer.src_lang = "hi"
155
+ encoded_hi = tokenizer(hi_text, return_tensors="pt")
156
+ generated_tokens = model.generate(**encoded_hi, forced_bos_token_id=tokenizer.get_lang_id("fr"))
157
+ tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
158
+ # => "La vie est comme une boîte de chocolat."
159
+
160
+ # translate Chinese to English
161
+ tokenizer.src_lang = "zh"
162
+ encoded_zh = tokenizer(chinese_text, return_tensors="pt")
163
+ generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id("en"))
164
+ tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
165
+ # => "Life is like a chocolate box."
166
+ ```
167
+
168
+ ## Languages covered
169
+
170
+ | Language | lang code |
171
+ | ---------------- | --------- |
172
+ | Akrikaans | af |
173
+ | Amharic | am |
174
+ | Arabic | ar |
175
+ | Assamese | as |
176
+ | Asturian | ast |
177
+ | Aymara | ay |
178
+ | Azerbaijani | az |
179
+ | Bashkir | ba |
180
+ | Belarusian | be |
181
+ | Bulgarian | bg |
182
+ | Bengali | bn |
183
+ | Breton | br |
184
+ | Bosnian | bs |
185
+ | Catalan | ca |
186
+ | Cebuano | ceb |
187
+ | Chokwe | cjk |
188
+ | Czech | cs |
189
+ | Welsh | cy |
190
+ | Danish | da |
191
+ | German | de |
192
+ | Dyula | dyu |
193
+ | Greek | el |
194
+ | English | en |
195
+ | Spanish | es |
196
+ | Estonian | et |
197
+ | Persian | fa |
198
+ | Fulah | ff |
199
+ | Finnish | fi |
200
+ | French | fr |
201
+ | Western Frisian | fy |
202
+ | Irish | ga |
203
+ | Scottish Gaelic | gd |
204
+ | Galician | gl |
205
+ | Gujarati | gu |
206
+ | Hausa | ha |
207
+ | Hebrew | he |
208
+ | Hindi | hi |
209
+ | Croatian | hr |
210
+ | Haitian Creole | ht |
211
+ | Hungarian | hu |
212
+ | Armenian | hy |
213
+ | Indonesian | id |
214
+ | Igbo | ig |
215
+ | Iloko | ilo |
216
+ | Icelandic | is |
217
+ | Italian | it |
218
+ | Japanese | ja |
219
+ | Javanese | jv |
220
+ | Georgian | ka |
221
+ | Kachin | kac |
222
+ | Kamba | kam |
223
+ | Kabuverdianu | kea |
224
+ | Kongo | kg |
225
+ | Kazakh | kk |
226
+ | Central Khmer | km |
227
+ | Kimbundu | kmb |
228
+ | Northern Kurdish | kmr |
229
+ | Kannada | kn |
230
+ | Korean | ko |
231
+ | Kurdish | ku |
232
+ | Kyrgyz | ky |
233
+ | Luxembourgish | lb |
234
+ | Ganda | lg |
235
+ | Lingala | ln |
236
+ | Lao | lo |
237
+ | Lithuanian | lt |
238
+ | Luo | luo |
239
+ | Latvian | lv |
240
+ | Malagasy | mg |
241
+ | Maori | mi |
242
+ | Macedonian | mk |
243
+ | Malayalam | ml |
244
+ | Mongolian | mn |
245
+ | Marathi | mr |
246
+ | Malay | ms |
247
+ | Maltese | mt |
248
+ | Burmese | my |
249
+ | Nepali | ne |
250
+ | Dutch | nl |
251
+ | Norwegian | no |
252
+ | Northern Sotho | ns |
253
+ | Nyanja | ny |
254
+ | Occitan | oc |
255
+ | Oromo | om |
256
+ | Oriya | or |
257
+ | Punjabi | pa |
258
+ | Polish | pl |
259
+ | Pashto | ps |
260
+ | Portuguese | pt |
261
+ | Quechua | qu |
262
+ | Romanian | ro |
263
+ | Russian | ru |
264
+ | Sindhi | sd |
265
+ | Shan | shn |
266
+ | Sinhala | si |
267
+ | Slovak | sk |
268
+ | Slovenian | sl |
269
+ | Shona | sn |
270
+ | Somali | so |
271
+ | Albanian | sq |
272
+ | Serbian | sr |
273
+ | Swati | ss |
274
+ | Sundanese | su |
275
+ | Swedish | sv |
276
+ | Swahili | sw |
277
+ | Tamil | ta |
278
+ | Telugu | te |
279
+ | Tajik | tg |
280
+ | Thai | th |
281
+ | Tigrinya | ti |
282
+ | Tagalog | tl |
283
+ | Tswana | tn |
284
+ | Turkish | tr |
285
+ | Ukrainian | uk |
286
+ | Umbundu | umb |
287
+ | Urdu | ur |
288
+ | Uzbek | uz |
289
+ | Vietnamese | vi |
290
+ | Wolof | wo |
291
+ | Xhosa | xh |
292
+ | Yiddish | yi |
293
+ | Yoruba | yo |
294
+ | Chinese | zh |
295
+ | Zulu | zu |
model_files/added_tokens.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__af__": 256001,
3
+ "__am__": 256002,
4
+ "__ar__": 256003,
5
+ "__as__": 256004,
6
+ "__ast__": 256005,
7
+ "__ay__": 256006,
8
+ "__az__": 256007,
9
+ "__ba__": 256008,
10
+ "__be__": 256009,
11
+ "__bg__": 256010,
12
+ "__bn__": 256011,
13
+ "__br__": 256012,
14
+ "__bs__": 256013,
15
+ "__ca__": 256014,
16
+ "__ceb__": 256015,
17
+ "__cjk__": 256016,
18
+ "__cs__": 256017,
19
+ "__cy__": 256018,
20
+ "__da__": 256019,
21
+ "__de__": 256020,
22
+ "__dyu__": 256021,
23
+ "__el__": 256022,
24
+ "__en__": 256023,
25
+ "__es__": 256024,
26
+ "__et__": 256025,
27
+ "__fa__": 256026,
28
+ "__ff__": 256027,
29
+ "__fi__": 256028,
30
+ "__fr__": 256029,
31
+ "__fy__": 256030,
32
+ "__ga__": 256031,
33
+ "__gd__": 256032,
34
+ "__gl__": 256033,
35
+ "__gu__": 256034,
36
+ "__ha__": 256035,
37
+ "__he__": 256036,
38
+ "__hi__": 256037,
39
+ "__hr__": 256038,
40
+ "__ht__": 256039,
41
+ "__hu__": 256040,
42
+ "__hy__": 256041,
43
+ "__id__": 256042,
44
+ "__ig__": 256043,
45
+ "__ilo__": 256044,
46
+ "__is__": 256045,
47
+ "__it__": 256046,
48
+ "__ja__": 256047,
49
+ "__jv__": 256048,
50
+ "__ka__": 256049,
51
+ "__kac__": 256050,
52
+ "__kam__": 256051,
53
+ "__kea__": 256052,
54
+ "__kg__": 256053,
55
+ "__kk__": 256054,
56
+ "__km__": 256055,
57
+ "__kmb__": 256056,
58
+ "__kmr__": 256057,
59
+ "__kn__": 256058,
60
+ "__ko__": 256059,
61
+ "__ku__": 256060,
62
+ "__ky__": 256061,
63
+ "__lb__": 256062,
64
+ "__lg__": 256063,
65
+ "__ln__": 256064,
66
+ "__lo__": 256065,
67
+ "__lt__": 256066,
68
+ "__luo__": 256067,
69
+ "__lv__": 256068,
70
+ "__mg__": 256069,
71
+ "__mi__": 256070,
72
+ "__mk__": 256071,
73
+ "__ml__": 256072,
74
+ "__mn__": 256073,
75
+ "__mr__": 256074,
76
+ "__ms__": 256075,
77
+ "__mt__": 256076,
78
+ "__my__": 256077,
79
+ "__ne__": 256078,
80
+ "__nl__": 256079,
81
+ "__no__": 256080,
82
+ "__ns__": 256081,
83
+ "__ny__": 256082,
84
+ "__oc__": 256083,
85
+ "__om__": 256084,
86
+ "__or__": 256085,
87
+ "__pa__": 256086,
88
+ "__pl__": 256087,
89
+ "__ps__": 256088,
90
+ "__pt__": 256089,
91
+ "__qu__": 256090,
92
+ "__ro__": 256091,
93
+ "__ru__": 256092,
94
+ "__sd__": 256093,
95
+ "__shn__": 256094,
96
+ "__si__": 256095,
97
+ "__sk__": 256096,
98
+ "__sl__": 256097,
99
+ "__sn__": 256098,
100
+ "__so__": 256099,
101
+ "__sq__": 256100,
102
+ "__sr__": 256101,
103
+ "__ss__": 256102,
104
+ "__su__": 256103,
105
+ "__sv__": 256104,
106
+ "__sw__": 256105,
107
+ "__ta__": 256106,
108
+ "__te__": 256107,
109
+ "__tg__": 256108,
110
+ "__th__": 256109,
111
+ "__ti__": 256110,
112
+ "__tl__": 256111,
113
+ "__tn__": 256112,
114
+ "__tr__": 256113,
115
+ "__uk__": 256114,
116
+ "__umb__": 256115,
117
+ "__ur__": 256116,
118
+ "__uz__": 256117,
119
+ "__vi__": 256118,
120
+ "__wo__": 256119,
121
+ "__xh__": 256120,
122
+ "__yi__": 256121,
123
+ "__yo__": 256122,
124
+ "__zh__": 256123,
125
+ "__zu__": 256124
126
+ }
model_files/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "relu",
4
+ "architectures": [
5
+ "M2M100ForConditionalGeneration"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_model": 512,
10
+ "decoder_attention_heads": 16,
11
+ "decoder_ffn_dim": 2048,
12
+ "decoder_layerdrop": 0,
13
+ "decoder_layers": 6,
14
+ "decoder_start_token_id": 2,
15
+ "dropout": 0.1,
16
+ "encoder_attention_heads": 16,
17
+ "encoder_ffn_dim": 2048,
18
+ "encoder_layerdrop": 0,
19
+ "encoder_layers": 6,
20
+ "eos_token_id": 2,
21
+ "init_std": 0.02,
22
+ "is_encoder_decoder": true,
23
+ "max_position_embeddings": 1024,
24
+ "model_type": "m2m_100",
25
+ "num_hidden_layers": 6,
26
+ "pad_token_id": 1,
27
+ "scale_embedding": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.35.2",
30
+ "use_cache": true,
31
+ "vocab_size": 256125
32
+ }
model_files/generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "decoder_start_token_id": 2,
5
+ "eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.35.2"
8
+ }
model_files/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1561d6c592b727a7bb5d229e2d72730bf662d83f8f1915803ca6b35b3c8399d8
3
+ size 701135520
model_files/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee4dc054a17c18fe81f76c0b1cda00e9fc1cfd9e0f1a16cb6d77009e2076653
3
+ size 4870365
model_files/special_tokens_map.json ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__af__",
4
+ "__am__",
5
+ "__ar__",
6
+ "__as__",
7
+ "__ast__",
8
+ "__ay__",
9
+ "__az__",
10
+ "__ba__",
11
+ "__be__",
12
+ "__bg__",
13
+ "__bn__",
14
+ "__br__",
15
+ "__bs__",
16
+ "__ca__",
17
+ "__ceb__",
18
+ "__cjk__",
19
+ "__cs__",
20
+ "__cy__",
21
+ "__da__",
22
+ "__de__",
23
+ "__dyu__",
24
+ "__el__",
25
+ "__en__",
26
+ "__es__",
27
+ "__et__",
28
+ "__fa__",
29
+ "__ff__",
30
+ "__fi__",
31
+ "__fr__",
32
+ "__fy__",
33
+ "__ga__",
34
+ "__gd__",
35
+ "__gl__",
36
+ "__gu__",
37
+ "__ha__",
38
+ "__he__",
39
+ "__hi__",
40
+ "__hr__",
41
+ "__ht__",
42
+ "__hu__",
43
+ "__hy__",
44
+ "__id__",
45
+ "__ig__",
46
+ "__ilo__",
47
+ "__is__",
48
+ "__it__",
49
+ "__ja__",
50
+ "__jv__",
51
+ "__ka__",
52
+ "__kac__",
53
+ "__kam__",
54
+ "__kea__",
55
+ "__kg__",
56
+ "__kk__",
57
+ "__km__",
58
+ "__kmb__",
59
+ "__kmr__",
60
+ "__kn__",
61
+ "__ko__",
62
+ "__ku__",
63
+ "__ky__",
64
+ "__lb__",
65
+ "__lg__",
66
+ "__ln__",
67
+ "__lo__",
68
+ "__lt__",
69
+ "__luo__",
70
+ "__lv__",
71
+ "__mg__",
72
+ "__mi__",
73
+ "__mk__",
74
+ "__ml__",
75
+ "__mn__",
76
+ "__mr__",
77
+ "__ms__",
78
+ "__mt__",
79
+ "__my__",
80
+ "__ne__",
81
+ "__nl__",
82
+ "__no__",
83
+ "__ns__",
84
+ "__ny__",
85
+ "__oc__",
86
+ "__om__",
87
+ "__or__",
88
+ "__pa__",
89
+ "__pl__",
90
+ "__ps__",
91
+ "__pt__",
92
+ "__qu__",
93
+ "__ro__",
94
+ "__ru__",
95
+ "__sd__",
96
+ "__shn__",
97
+ "__si__",
98
+ "__sk__",
99
+ "__sl__",
100
+ "__sn__",
101
+ "__so__",
102
+ "__sq__",
103
+ "__sr__",
104
+ "__ss__",
105
+ "__su__",
106
+ "__sv__",
107
+ "__sw__",
108
+ "__ta__",
109
+ "__te__",
110
+ "__tg__",
111
+ "__th__",
112
+ "__ti__",
113
+ "__tl__",
114
+ "__tn__",
115
+ "__tr__",
116
+ "__uk__",
117
+ "__umb__",
118
+ "__ur__",
119
+ "__uz__",
120
+ "__vi__",
121
+ "__wo__",
122
+ "__xh__",
123
+ "__yi__",
124
+ "__yo__",
125
+ "__zh__",
126
+ "__zu__"
127
+ ],
128
+ "bos_token": "<s>",
129
+ "eos_token": "</s>",
130
+ "pad_token": "<pad>",
131
+ "sep_token": "</s>",
132
+ "unk_token": "<unk>"
133
+ }
model_files/tokenizer_config.json ADDED
@@ -0,0 +1,1167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "256001": {
36
+ "content": "__af__",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "256002": {
44
+ "content": "__am__",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "256003": {
52
+ "content": "__ar__",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "256004": {
60
+ "content": "__as__",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "256005": {
68
+ "content": "__ast__",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "256006": {
76
+ "content": "__ay__",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "256007": {
84
+ "content": "__az__",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "256008": {
92
+ "content": "__ba__",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "256009": {
100
+ "content": "__be__",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "256010": {
108
+ "content": "__bg__",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "256011": {
116
+ "content": "__bn__",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "256012": {
124
+ "content": "__br__",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "256013": {
132
+ "content": "__bs__",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "256014": {
140
+ "content": "__ca__",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "256015": {
148
+ "content": "__ceb__",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "256016": {
156
+ "content": "__cjk__",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "256017": {
164
+ "content": "__cs__",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "256018": {
172
+ "content": "__cy__",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "256019": {
180
+ "content": "__da__",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "256020": {
188
+ "content": "__de__",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "256021": {
196
+ "content": "__dyu__",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "256022": {
204
+ "content": "__el__",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "256023": {
212
+ "content": "__en__",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "256024": {
220
+ "content": "__es__",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "256025": {
228
+ "content": "__et__",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "256026": {
236
+ "content": "__fa__",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "256027": {
244
+ "content": "__ff__",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "256028": {
252
+ "content": "__fi__",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "256029": {
260
+ "content": "__fr__",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "256030": {
268
+ "content": "__fy__",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "256031": {
276
+ "content": "__ga__",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "256032": {
284
+ "content": "__gd__",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "256033": {
292
+ "content": "__gl__",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "256034": {
300
+ "content": "__gu__",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "256035": {
308
+ "content": "__ha__",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "256036": {
316
+ "content": "__he__",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "256037": {
324
+ "content": "__hi__",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "256038": {
332
+ "content": "__hr__",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "256039": {
340
+ "content": "__ht__",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "256040": {
348
+ "content": "__hu__",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "256041": {
356
+ "content": "__hy__",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "256042": {
364
+ "content": "__id__",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "256043": {
372
+ "content": "__ig__",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "256044": {
380
+ "content": "__ilo__",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "256045": {
388
+ "content": "__is__",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "256046": {
396
+ "content": "__it__",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "256047": {
404
+ "content": "__ja__",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "256048": {
412
+ "content": "__jv__",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "256049": {
420
+ "content": "__ka__",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "256050": {
428
+ "content": "__kac__",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "256051": {
436
+ "content": "__kam__",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "256052": {
444
+ "content": "__kea__",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "256053": {
452
+ "content": "__kg__",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "256054": {
460
+ "content": "__kk__",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "256055": {
468
+ "content": "__km__",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "256056": {
476
+ "content": "__kmb__",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "256057": {
484
+ "content": "__kmr__",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "256058": {
492
+ "content": "__kn__",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "256059": {
500
+ "content": "__ko__",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "256060": {
508
+ "content": "__ku__",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "256061": {
516
+ "content": "__ky__",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "256062": {
524
+ "content": "__lb__",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "256063": {
532
+ "content": "__lg__",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "256064": {
540
+ "content": "__ln__",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "256065": {
548
+ "content": "__lo__",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "256066": {
556
+ "content": "__lt__",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "256067": {
564
+ "content": "__luo__",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "256068": {
572
+ "content": "__lv__",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "256069": {
580
+ "content": "__mg__",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "256070": {
588
+ "content": "__mi__",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "256071": {
596
+ "content": "__mk__",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "256072": {
604
+ "content": "__ml__",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "256073": {
612
+ "content": "__mn__",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "256074": {
620
+ "content": "__mr__",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "256075": {
628
+ "content": "__ms__",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "256076": {
636
+ "content": "__mt__",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "256077": {
644
+ "content": "__my__",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "256078": {
652
+ "content": "__ne__",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "256079": {
660
+ "content": "__nl__",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "256080": {
668
+ "content": "__no__",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "256081": {
676
+ "content": "__ns__",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "256082": {
684
+ "content": "__ny__",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "256083": {
692
+ "content": "__oc__",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "256084": {
700
+ "content": "__om__",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "256085": {
708
+ "content": "__or__",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "256086": {
716
+ "content": "__pa__",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "256087": {
724
+ "content": "__pl__",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "256088": {
732
+ "content": "__ps__",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "256089": {
740
+ "content": "__pt__",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "256090": {
748
+ "content": "__qu__",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "256091": {
756
+ "content": "__ro__",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "256092": {
764
+ "content": "__ru__",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "256093": {
772
+ "content": "__sd__",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "256094": {
780
+ "content": "__shn__",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "256095": {
788
+ "content": "__si__",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "256096": {
796
+ "content": "__sk__",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "256097": {
804
+ "content": "__sl__",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "256098": {
812
+ "content": "__sn__",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "256099": {
820
+ "content": "__so__",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "256100": {
828
+ "content": "__sq__",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "256101": {
836
+ "content": "__sr__",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "256102": {
844
+ "content": "__ss__",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "256103": {
852
+ "content": "__su__",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "256104": {
860
+ "content": "__sv__",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "256105": {
868
+ "content": "__sw__",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "256106": {
876
+ "content": "__ta__",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "256107": {
884
+ "content": "__te__",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "256108": {
892
+ "content": "__tg__",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "256109": {
900
+ "content": "__th__",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "256110": {
908
+ "content": "__ti__",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "256111": {
916
+ "content": "__tl__",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "256112": {
924
+ "content": "__tn__",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "256113": {
932
+ "content": "__tr__",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "256114": {
940
+ "content": "__uk__",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "256115": {
948
+ "content": "__umb__",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "256116": {
956
+ "content": "__ur__",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "256117": {
964
+ "content": "__uz__",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "256118": {
972
+ "content": "__vi__",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "256119": {
980
+ "content": "__wo__",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "256120": {
988
+ "content": "__xh__",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "256121": {
996
+ "content": "__yi__",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "256122": {
1004
+ "content": "__yo__",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "256123": {
1012
+ "content": "__zh__",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "256124": {
1020
+ "content": "__zu__",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ }
1027
+ },
1028
+ "additional_special_tokens": [
1029
+ "__af__",
1030
+ "__am__",
1031
+ "__ar__",
1032
+ "__as__",
1033
+ "__ast__",
1034
+ "__ay__",
1035
+ "__az__",
1036
+ "__ba__",
1037
+ "__be__",
1038
+ "__bg__",
1039
+ "__bn__",
1040
+ "__br__",
1041
+ "__bs__",
1042
+ "__ca__",
1043
+ "__ceb__",
1044
+ "__cjk__",
1045
+ "__cs__",
1046
+ "__cy__",
1047
+ "__da__",
1048
+ "__de__",
1049
+ "__dyu__",
1050
+ "__el__",
1051
+ "__en__",
1052
+ "__es__",
1053
+ "__et__",
1054
+ "__fa__",
1055
+ "__ff__",
1056
+ "__fi__",
1057
+ "__fr__",
1058
+ "__fy__",
1059
+ "__ga__",
1060
+ "__gd__",
1061
+ "__gl__",
1062
+ "__gu__",
1063
+ "__ha__",
1064
+ "__he__",
1065
+ "__hi__",
1066
+ "__hr__",
1067
+ "__ht__",
1068
+ "__hu__",
1069
+ "__hy__",
1070
+ "__id__",
1071
+ "__ig__",
1072
+ "__ilo__",
1073
+ "__is__",
1074
+ "__it__",
1075
+ "__ja__",
1076
+ "__jv__",
1077
+ "__ka__",
1078
+ "__kac__",
1079
+ "__kam__",
1080
+ "__kea__",
1081
+ "__kg__",
1082
+ "__kk__",
1083
+ "__km__",
1084
+ "__kmb__",
1085
+ "__kmr__",
1086
+ "__kn__",
1087
+ "__ko__",
1088
+ "__ku__",
1089
+ "__ky__",
1090
+ "__lb__",
1091
+ "__lg__",
1092
+ "__ln__",
1093
+ "__lo__",
1094
+ "__lt__",
1095
+ "__luo__",
1096
+ "__lv__",
1097
+ "__mg__",
1098
+ "__mi__",
1099
+ "__mk__",
1100
+ "__ml__",
1101
+ "__mn__",
1102
+ "__mr__",
1103
+ "__ms__",
1104
+ "__mt__",
1105
+ "__my__",
1106
+ "__ne__",
1107
+ "__nl__",
1108
+ "__no__",
1109
+ "__ns__",
1110
+ "__ny__",
1111
+ "__oc__",
1112
+ "__om__",
1113
+ "__or__",
1114
+ "__pa__",
1115
+ "__pl__",
1116
+ "__ps__",
1117
+ "__pt__",
1118
+ "__qu__",
1119
+ "__ro__",
1120
+ "__ru__",
1121
+ "__sd__",
1122
+ "__shn__",
1123
+ "__si__",
1124
+ "__sk__",
1125
+ "__sl__",
1126
+ "__sn__",
1127
+ "__so__",
1128
+ "__sq__",
1129
+ "__sr__",
1130
+ "__ss__",
1131
+ "__su__",
1132
+ "__sv__",
1133
+ "__sw__",
1134
+ "__ta__",
1135
+ "__te__",
1136
+ "__tg__",
1137
+ "__th__",
1138
+ "__ti__",
1139
+ "__tl__",
1140
+ "__tn__",
1141
+ "__tr__",
1142
+ "__uk__",
1143
+ "__umb__",
1144
+ "__ur__",
1145
+ "__uz__",
1146
+ "__vi__",
1147
+ "__wo__",
1148
+ "__xh__",
1149
+ "__yi__",
1150
+ "__yo__",
1151
+ "__zh__",
1152
+ "__zu__"
1153
+ ],
1154
+ "bos_token": "<s>",
1155
+ "clean_up_tokenization_spaces": true,
1156
+ "eos_token": "</s>",
1157
+ "language_codes": "m2m100",
1158
+ "model_max_length": 1024,
1159
+ "num_madeup_words": 0,
1160
+ "pad_token": "<pad>",
1161
+ "sep_token": "</s>",
1162
+ "sp_model_kwargs": {},
1163
+ "src_lang": "en",
1164
+ "tgt_lang": null,
1165
+ "tokenizer_class": "M2M100Tokenizer",
1166
+ "unk_token": "<unk>"
1167
+ }
model_files/vocab.json ADDED
The diff for this file is too large to render. See raw diff