samikhan121 commited on
Commit
895d23c
·
verified ·
1 Parent(s): 1df998a

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -13
  2. tokenizer_config.json +1 -97
  3. vocab.json +3 -0
added_tokens.json CHANGED
@@ -1,15 +1,3 @@
1
  {
2
- "<unk>": 74,
3
- "ড়": 76,
4
- "ঢ়": 86,
5
- "য়": 84,
6
- "১": 83,
7
- "২": 79,
8
- "৩": 85,
9
- "৪": 82,
10
- "৫": 75,
11
- "৬": 77,
12
- "৭": 81,
13
- "৮": 80,
14
- "৯": 78
15
  }
 
1
  {
2
+ "<unk>": 77
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
tokenizer_config.json CHANGED
@@ -9,109 +9,13 @@
9
  "single_word": false,
10
  "special": true
11
  },
12
- "74": {
13
  "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
- },
20
- "75": {
21
- "content": "৫",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": false
27
- },
28
- "76": {
29
- "content": "ড়",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "77": {
37
- "content": "৬",
38
- "lstrip": false,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": false
43
- },
44
- "78": {
45
- "content": "৯",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": false
51
- },
52
- "79": {
53
- "content": "২",
54
- "lstrip": false,
55
- "normalized": true,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": false
59
- },
60
- "80": {
61
- "content": "৮",
62
- "lstrip": false,
63
- "normalized": true,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": false
67
- },
68
- "81": {
69
- "content": "৭",
70
- "lstrip": false,
71
- "normalized": true,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": false
75
- },
76
- "82": {
77
- "content": "৪",
78
- "lstrip": false,
79
- "normalized": true,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": false
83
- },
84
- "83": {
85
- "content": "১",
86
- "lstrip": false,
87
- "normalized": true,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": false
91
- },
92
- "84": {
93
- "content": "য়",
94
- "lstrip": false,
95
- "normalized": true,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": false
99
- },
100
- "85": {
101
- "content": "৩",
102
- "lstrip": false,
103
- "normalized": true,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": false
107
- },
108
- "86": {
109
- "content": "ঢ়",
110
- "lstrip": false,
111
- "normalized": true,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": false
115
  }
116
  },
117
  "clean_up_tokenization_spaces": true,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "77": {
13
  "content": "<unk>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  },
21
  "clean_up_tokenization_spaces": true,
vocab.json CHANGED
@@ -40,7 +40,9 @@
40
  "ট": 55,
41
  "ঠ": 73,
42
  "ড": 21,
 
43
  "ঢ": 22,
 
44
  "ণ": 51,
45
  "ত": 42,
46
  "থ": 14,
@@ -53,6 +55,7 @@
53
  "ভ": 16,
54
  "ম": 41,
55
  "য": 38,
 
56
  "র": 69,
57
  "ল": 10,
58
  "শ": 48,
 
40
  "ট": 55,
41
  "ঠ": 73,
42
  "ড": 21,
43
+ "ড়": 74,
44
  "ঢ": 22,
45
+ "ঢ়": 76,
46
  "ণ": 51,
47
  "ত": 42,
48
  "থ": 14,
 
55
  "ভ": 16,
56
  "ম": 41,
57
  "য": 38,
58
+ "য়": 75,
59
  "র": 69,
60
  "ল": 10,
61
  "শ": 48,