houdini001 commited on
Commit
0ec6ef2
1 Parent(s): d524499

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. special_tokens_map.json +1 -1
  3. tokenizer_config.json +32 -32
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  license: mit
3
- base_model: facebook/mbart-large-50
4
  tags:
5
  - generated_from_trainer
 
6
  model-index:
7
  - name: mBART_try_v1
8
  results: []
 
1
  ---
2
  license: mit
 
3
  tags:
4
  - generated_from_trainer
5
+ base_model: facebook/mbart-large-50
6
  model-index:
7
  - name: mBART_try_v1
8
  results: []
special_tokens_map.json CHANGED
@@ -59,7 +59,7 @@
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
62
- "normalized": false,
63
  "rstrip": false,
64
  "single_word": false
65
  },
 
59
  "mask_token": {
60
  "content": "<mask>",
61
  "lstrip": true,
62
+ "normalized": true,
63
  "rstrip": false,
64
  "single_word": false
65
  },
tokenizer_config.json CHANGED
@@ -233,15 +233,15 @@
233
  "special": true
234
  },
235
  "250026": {
236
- "content": "<mask>",
237
- "lstrip": true,
238
  "normalized": false,
239
  "rstrip": false,
240
  "single_word": false,
241
  "special": true
242
  },
243
  "250027": {
244
- "content": "af_ZA",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": true
250
  },
251
  "250028": {
252
- "content": "az_AZ",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": true
258
  },
259
  "250029": {
260
- "content": "bn_IN",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": true
266
  },
267
  "250030": {
268
- "content": "fa_IR",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": true
274
  },
275
  "250031": {
276
- "content": "he_IL",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": true
282
  },
283
  "250032": {
284
- "content": "hr_HR",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": true
290
  },
291
  "250033": {
292
- "content": "id_ID",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": true
298
  },
299
  "250034": {
300
- "content": "ka_GE",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": true
306
  },
307
  "250035": {
308
- "content": "km_KH",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": true
314
  },
315
  "250036": {
316
- "content": "mk_MK",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": true
322
  },
323
  "250037": {
324
- "content": "ml_IN",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": true
330
  },
331
  "250038": {
332
- "content": "mn_MN",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
@@ -337,7 +337,7 @@
337
  "special": true
338
  },
339
  "250039": {
340
- "content": "mr_IN",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
@@ -345,7 +345,7 @@
345
  "special": true
346
  },
347
  "250040": {
348
- "content": "pl_PL",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
@@ -353,7 +353,7 @@
353
  "special": true
354
  },
355
  "250041": {
356
- "content": "ps_AF",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  "special": true
362
  },
363
  "250042": {
364
- "content": "pt_XX",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
@@ -369,7 +369,7 @@
369
  "special": true
370
  },
371
  "250043": {
372
- "content": "sv_SE",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
@@ -377,7 +377,7 @@
377
  "special": true
378
  },
379
  "250044": {
380
- "content": "sw_KE",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
@@ -385,7 +385,7 @@
385
  "special": true
386
  },
387
  "250045": {
388
- "content": "ta_IN",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
@@ -393,7 +393,7 @@
393
  "special": true
394
  },
395
  "250046": {
396
- "content": "te_IN",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
@@ -401,7 +401,7 @@
401
  "special": true
402
  },
403
  "250047": {
404
- "content": "th_TH",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
@@ -409,7 +409,7 @@
409
  "special": true
410
  },
411
  "250048": {
412
- "content": "tl_XX",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
@@ -417,7 +417,7 @@
417
  "special": true
418
  },
419
  "250049": {
420
- "content": "uk_UA",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
@@ -425,7 +425,7 @@
425
  "special": true
426
  },
427
  "250050": {
428
- "content": "ur_PK",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
@@ -433,7 +433,7 @@
433
  "special": true
434
  },
435
  "250051": {
436
- "content": "xh_ZA",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
@@ -441,7 +441,7 @@
441
  "special": true
442
  },
443
  "250052": {
444
- "content": "gl_ES",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
@@ -449,9 +449,9 @@
449
  "special": true
450
  },
451
  "250053": {
452
- "content": "sl_SI",
453
- "lstrip": false,
454
- "normalized": false,
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
@@ -522,6 +522,6 @@
522
  "sp_model_kwargs": {},
523
  "src_lang": "en_XX",
524
  "tgt_lang": null,
525
- "tokenizer_class": "MBartTokenizer",
526
  "unk_token": "<unk>"
527
  }
 
233
  "special": true
234
  },
235
  "250026": {
236
+ "content": "af_ZA",
237
+ "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
240
  "single_word": false,
241
  "special": true
242
  },
243
  "250027": {
244
+ "content": "az_AZ",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
 
249
  "special": true
250
  },
251
  "250028": {
252
+ "content": "bn_IN",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
 
257
  "special": true
258
  },
259
  "250029": {
260
+ "content": "fa_IR",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
 
265
  "special": true
266
  },
267
  "250030": {
268
+ "content": "he_IL",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
 
273
  "special": true
274
  },
275
  "250031": {
276
+ "content": "hr_HR",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
 
281
  "special": true
282
  },
283
  "250032": {
284
+ "content": "id_ID",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
 
289
  "special": true
290
  },
291
  "250033": {
292
+ "content": "ka_GE",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
 
297
  "special": true
298
  },
299
  "250034": {
300
+ "content": "km_KH",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
 
305
  "special": true
306
  },
307
  "250035": {
308
+ "content": "mk_MK",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
 
313
  "special": true
314
  },
315
  "250036": {
316
+ "content": "ml_IN",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
 
321
  "special": true
322
  },
323
  "250037": {
324
+ "content": "mn_MN",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
 
329
  "special": true
330
  },
331
  "250038": {
332
+ "content": "mr_IN",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
 
337
  "special": true
338
  },
339
  "250039": {
340
+ "content": "pl_PL",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
 
345
  "special": true
346
  },
347
  "250040": {
348
+ "content": "ps_AF",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
 
353
  "special": true
354
  },
355
  "250041": {
356
+ "content": "pt_XX",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
 
361
  "special": true
362
  },
363
  "250042": {
364
+ "content": "sv_SE",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
 
369
  "special": true
370
  },
371
  "250043": {
372
+ "content": "sw_KE",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
 
377
  "special": true
378
  },
379
  "250044": {
380
+ "content": "ta_IN",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
 
385
  "special": true
386
  },
387
  "250045": {
388
+ "content": "te_IN",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
 
393
  "special": true
394
  },
395
  "250046": {
396
+ "content": "th_TH",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
 
401
  "special": true
402
  },
403
  "250047": {
404
+ "content": "tl_XX",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
 
409
  "special": true
410
  },
411
  "250048": {
412
+ "content": "uk_UA",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
 
417
  "special": true
418
  },
419
  "250049": {
420
+ "content": "ur_PK",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
 
425
  "special": true
426
  },
427
  "250050": {
428
+ "content": "xh_ZA",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
 
433
  "special": true
434
  },
435
  "250051": {
436
+ "content": "gl_ES",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
 
441
  "special": true
442
  },
443
  "250052": {
444
+ "content": "sl_SI",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
 
449
  "special": true
450
  },
451
  "250053": {
452
+ "content": "<mask>",
453
+ "lstrip": true,
454
+ "normalized": true,
455
  "rstrip": false,
456
  "single_word": false,
457
  "special": true
 
522
  "sp_model_kwargs": {},
523
  "src_lang": "en_XX",
524
  "tgt_lang": null,
525
+ "tokenizer_class": "MBart50Tokenizer",
526
  "unk_token": "<unk>"
527
  }