napatswift commited on
Commit
3a04818
·
1 Parent(s): 66ff5ab

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +2 -2
  3. vocab.json +27 -49
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 52,
3
- "<s>": 51
4
  }
 
1
  {
2
+ "</s>": 30,
3
+ "<s>": 29
4
  }
tokenizer_config.json CHANGED
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "51": {
12
  "content": "<s>",
13
  "lstrip": false,
14
  "normalized": true,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "52": {
20
  "content": "</s>",
21
  "lstrip": false,
22
  "normalized": true,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "29": {
12
  "content": "<s>",
13
  "lstrip": false,
14
  "normalized": true,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "30": {
20
  "content": "</s>",
21
  "lstrip": false,
22
  "normalized": true,
vocab.json CHANGED
@@ -1,53 +1,31 @@
1
  {
2
- "\t": 20,
3
- "'": 11,
4
- "(": 43,
5
- ")": 15,
6
- "0": 36,
7
- "1": 44,
8
- "2": 13,
9
- "3": 1,
10
- "4": 8,
11
- "5": 14,
12
- "6": 7,
13
- "7": 26,
14
- "8": 3,
15
- "9": 24,
16
- "[": 38,
17
  "[PAD]": 2,
18
  "[UNK]": 2,
19
- "]": 22,
20
- "a": 37,
21
- "b": 29,
22
- "c": 19,
23
- "d": 35,
24
- "e": 42,
25
- "f": 39,
26
- "g": 28,
27
- "h": 33,
28
- "i": 46,
29
- "j": 6,
30
- "k": 2,
31
- "l": 16,
32
- "m": 47,
33
- "n": 5,
34
- "o": 18,
35
- "p": 25,
36
- "q": 27,
37
- "r": 0,
38
- "s": 12,
39
- "t": 21,
40
- "u": 34,
41
- "v": 41,
42
- "w": 31,
43
- "x": 10,
44
- "y": 4,
45
- "z": 32,
46
- "|": 9,
47
- "é": 48,
48
- "í": 45,
49
- "ó": 23,
50
- "е": 40,
51
- "fi": 30,
52
- "fl": 17
53
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "[PAD]": 2,
3
  "[UNK]": 2,
4
+ "a": 16,
5
+ "b": 5,
6
+ "c": 23,
7
+ "d": 13,
8
+ "e": 21,
9
+ "f": 17,
10
+ "g": 1,
11
+ "h": 10,
12
+ "i": 24,
13
+ "j": 11,
14
+ "k": 4,
15
+ "l": 20,
16
+ "m": 25,
17
+ "n": 9,
18
+ "o": 22,
19
+ "p": 0,
20
+ "q": 2,
21
+ "r": 3,
22
+ "s": 18,
23
+ "t": 26,
24
+ "u": 12,
25
+ "v": 19,
26
+ "w": 7,
27
+ "x": 15,
28
+ "y": 6,
29
+ "z": 8,
30
+ "|": 14
 
 
 
 
 
 
 
31
  }