miscovery commited on
Commit
31b9596
·
1 Parent(s): ce91bfe

Updated Model

Browse files
config.json CHANGED
@@ -3,18 +3,18 @@
3
  "CustomTransformerModel"
4
  ],
5
  "bos_token_id": 2,
6
- "d_ff": 3072,
7
- "d_model": 768,
8
- "dropout": 0.1,
9
  "eos_token_id": 3,
10
- "max_position_embeddings": 512,
11
  "model_type": "miscovery",
12
- "num_decoder_layers": 12,
13
- "num_encoder_layers": 12,
14
- "num_heads": 12,
15
  "pad_token_id": 0,
16
  "torch_dtype": "float32",
17
- "transformers_version": "4.51.3",
18
  "use_flash_attn": true,
19
- "vocab_size": 100000
20
  }
 
3
  "CustomTransformerModel"
4
  ],
5
  "bos_token_id": 2,
6
+ "d_ff": 2048,
7
+ "d_model": 512,
8
+ "dropout": 0.05,
9
  "eos_token_id": 3,
10
+ "max_position_embeddings": 300,
11
  "model_type": "miscovery",
12
+ "num_decoder_layers": 8,
13
+ "num_encoder_layers": 8,
14
+ "num_heads": 8,
15
  "pad_token_id": 0,
16
  "torch_dtype": "float32",
17
+ "transformers_version": "4.35.2",
18
  "use_flash_attn": true,
19
+ "vocab_size": 50000
20
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.51.3"
7
  }
 
3
  "bos_token_id": 2,
4
  "eos_token_id": 3,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.35.2"
7
  }
special_tokens_map.json CHANGED
@@ -2,7 +2,21 @@
2
  "additional_special_tokens": [
3
  "[LANG_EN]",
4
  "[LANG_AR]",
5
- "[LANG_AR_EG]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ],
7
  "cls_token": {
8
  "content": "[CLS]",
 
2
  "additional_special_tokens": [
3
  "[LANG_EN]",
4
  "[LANG_AR]",
5
+ "[LANG_AR_EG]",
6
+ "[TRANSLATION]",
7
+ "[SUMMARIZATION]",
8
+ "[PARAPHRASING]",
9
+ "[INSTRUCTIONS]",
10
+ "[CALCULATE]",
11
+ "[REORDER]",
12
+ "[QUESTION]",
13
+ "[ANSWER]",
14
+ "[CHOICES]",
15
+ "[START_OPTION]",
16
+ "[END_OPTION]",
17
+ "[REASONING]",
18
+ "[START_THINKING]",
19
+ "[END_THINKING]"
20
  ],
21
  "cls_token": {
22
  "content": "[CLS]",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -63,20 +63,145 @@
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
69
  "[LANG_EN]",
70
  "[LANG_AR]",
71
- "[LANG_AR_EG]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ],
73
  "clean_up_tokenization_spaces": true,
74
  "cls_token": "[CLS]",
75
- "extra_special_tokens": {},
76
  "mask_token": "[MASK]",
77
  "model_max_length": 1000000000000000019884624838656,
78
  "pad_token": "[PAD]",
79
  "sep_token": "[SEP]",
80
- "tokenizer_class": "PreTrainedTokenizer",
81
  "unk_token": "[UNK]"
82
  }
 
63
  "rstrip": false,
64
  "single_word": false,
65
  "special": true
66
+ },
67
+ "8": {
68
+ "content": "[TRANSLATION]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "[SUMMARIZATION]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "[PARAPHRASING]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "[INSTRUCTIONS]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "[CALCULATE]",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "[REORDER]",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "[QUESTION]",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "[ANSWER]",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "[CHOICES]",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "[START_OPTION]",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "[END_OPTION]",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "[REASONING]",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "[START_THINKING]",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "[END_THINKING]",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
  }
179
  },
180
  "additional_special_tokens": [
181
  "[LANG_EN]",
182
  "[LANG_AR]",
183
+ "[LANG_AR_EG]",
184
+ "[TRANSLATION]",
185
+ "[SUMMARIZATION]",
186
+ "[PARAPHRASING]",
187
+ "[INSTRUCTIONS]",
188
+ "[CALCULATE]",
189
+ "[REORDER]",
190
+ "[QUESTION]",
191
+ "[ANSWER]",
192
+ "[CHOICES]",
193
+ "[START_OPTION]",
194
+ "[END_OPTION]",
195
+ "[REASONING]",
196
+ "[START_THINKING]",
197
+ "[END_THINKING]"
198
  ],
199
  "clean_up_tokenization_spaces": true,
200
  "cls_token": "[CLS]",
 
201
  "mask_token": "[MASK]",
202
  "model_max_length": 1000000000000000019884624838656,
203
  "pad_token": "[PAD]",
204
  "sep_token": "[SEP]",
205
+ "tokenizer_class": "PreTrainedTokenizerFast",
206
  "unk_token": "[UNK]"
207
  }