davidilag commited on
Commit
8ac4635
·
verified ·
1 Parent(s): b63520d

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +68 -68
vocab.json CHANGED
@@ -1,73 +1,73 @@
1
  {
2
- "\n": 2,
3
- "&": 51,
4
- "'": 18,
5
- "(": 29,
6
- ")": 13,
7
- "/": 65,
8
- "0": 31,
9
- "1": 11,
10
- "2": 60,
11
- "3": 4,
12
- "4": 57,
13
- "5": 27,
14
- "6": 10,
15
- "7": 68,
16
- "8": 44,
17
- "9": 33,
18
- "[": 40,
19
  "[PAD]": 70,
20
  "[UNK]": 69,
21
- "]": 17,
22
- "a": 67,
23
- "b": 54,
24
- "c": 37,
25
- "d": 28,
26
- "e": 50,
27
- "f": 36,
28
- "g": 59,
29
- "h": 14,
30
- "i": 34,
31
- "j": 19,
32
- "k": 5,
33
- "l": 25,
34
- "m": 32,
35
- "n": 16,
36
- "o": 38,
37
- "p": 46,
38
- "q": 53,
39
- "r": 45,
40
- "s": 26,
41
- "t": 62,
42
- "u": 43,
43
- "v": 63,
44
- "w": 39,
45
- "x": 22,
46
- "y": 52,
47
- "z": 47,
48
- "|": 56,
49
- " ": 3,
50
- "´": 48,
51
- "ß": 42,
52
- "à": 64,
53
- "á": 24,
54
- "â": 23,
55
- "ä": 9,
56
- "ç": 66,
57
- "è": 41,
58
- "é": 6,
59
- "ê": 58,
60
- "ë": 15,
61
- "î": 61,
62
- "ï": 21,
63
- "ô": 55,
64
  "ö": 35,
65
- "û": 7,
66
- "ü": 49,
67
- "ě": 8,
68
- "ğ": 30,
69
- "‒": 20,
70
- "–": 1,
71
- "’": 0,
72
- "„": 12
73
  }
 
1
  {
2
+ "\n": 42,
3
+ "&": 66,
4
+ "'": 61,
5
+ "(": 32,
6
+ ")": 16,
7
+ "/": 11,
8
+ "0": 48,
9
+ "1": 1,
10
+ "2": 27,
11
+ "3": 20,
12
+ "4": 45,
13
+ "5": 43,
14
+ "6": 34,
15
+ "7": 21,
16
+ "8": 39,
17
+ "9": 54,
18
+ "[": 37,
19
  "[PAD]": 70,
20
  "[UNK]": 69,
21
+ "]": 40,
22
+ "a": 33,
23
+ "b": 58,
24
+ "c": 46,
25
+ "d": 7,
26
+ "e": 63,
27
+ "f": 57,
28
+ "g": 18,
29
+ "h": 15,
30
+ "i": 25,
31
+ "j": 31,
32
+ "k": 44,
33
+ "l": 59,
34
+ "m": 17,
35
+ "n": 10,
36
+ "o": 6,
37
+ "p": 13,
38
+ "q": 47,
39
+ "r": 22,
40
+ "s": 38,
41
+ "t": 67,
42
+ "u": 51,
43
+ "v": 30,
44
+ "w": 68,
45
+ "x": 14,
46
+ "y": 26,
47
+ "z": 5,
48
+ "|": 60,
49
+ " ": 36,
50
+ "´": 0,
51
+ "ß": 9,
52
+ "à": 52,
53
+ "á": 49,
54
+ "â": 55,
55
+ "ä": 23,
56
+ "ç": 56,
57
+ "è": 8,
58
+ "é": 4,
59
+ "ê": 29,
60
+ "ë": 28,
61
+ "î": 2,
62
+ "ï": 3,
63
+ "ô": 12,
64
  "ö": 35,
65
+ "û": 53,
66
+ "ü": 64,
67
+ "ě": 50,
68
+ "ğ": 24,
69
+ "‒": 19,
70
+ "–": 62,
71
+ "’": 65,
72
+ "„": 41
73
  }