add model
Browse files- README.md +204 -504
- config.json +1 -1
- tf_model.h5 +2 -2
README.md
CHANGED
@@ -13,9 +13,9 @@ probably proofread and complete it, then remove this comment. -->
|
|
13 |
|
14 |
This model is a fine-tuned version of [MJ199999/gpt3_model](https://huggingface.co/MJ199999/gpt3_model) on an unknown dataset.
|
15 |
It achieves the following results on the evaluation set:
|
16 |
-
- Train Loss: 0.
|
17 |
-
- Train Lr:
|
18 |
-
- Epoch:
|
19 |
|
20 |
## Model description
|
21 |
|
@@ -34,513 +34,213 @@ More information needed
|
|
34 |
### Training hyperparameters
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
-
- optimizer: {'name': 'Adagrad', 'learning_rate':
|
38 |
- training_precision: float32
|
39 |
|
40 |
### Training results
|
41 |
|
42 |
| Train Loss | Train Lr | Epoch |
|
43 |
|:----------:|:------------:|:-----:|
|
44 |
-
| 6.
|
45 |
-
| 5.
|
46 |
-
| 5.
|
47 |
-
| 4.
|
48 |
-
| 3.
|
49 |
-
| 3.
|
50 |
-
|
|
51 |
-
| 2.
|
52 |
-
| 2.
|
53 |
-
| 2.
|
54 |
-
|
|
55 |
-
| 1.
|
56 |
-
| 1.
|
57 |
-
| 1.
|
58 |
-
| 1.
|
59 |
-
| 1.
|
60 |
-
| 1.
|
61 |
-
| 1.
|
62 |
-
| 1.
|
63 |
-
| 1.
|
64 |
-
| 1.
|
65 |
-
| 1.
|
66 |
-
| 1.
|
67 |
-
| 1.
|
68 |
-
| 1.
|
69 |
-
| 1.
|
70 |
-
| 1.
|
71 |
-
|
|
72 |
-
|
|
73 |
-
|
|
74 |
-
|
|
75 |
-
|
|
76 |
-
|
|
77 |
-
|
|
78 |
-
|
|
79 |
-
|
|
80 |
-
|
|
81 |
-
|
|
82 |
-
|
|
83 |
-
|
|
84 |
-
|
|
85 |
-
|
|
86 |
-
|
|
87 |
-
|
|
88 |
-
|
|
89 |
-
|
|
90 |
-
|
|
91 |
-
|
|
92 |
-
|
|
93 |
-
|
|
94 |
-
|
|
95 |
-
|
|
96 |
-
|
|
97 |
-
|
|
98 |
-
|
|
99 |
-
|
|
100 |
-
|
|
101 |
-
|
|
102 |
-
|
|
103 |
-
|
|
104 |
-
|
|
105 |
-
|
|
106 |
-
|
|
107 |
-
|
|
108 |
-
|
|
109 |
-
|
|
110 |
-
|
|
111 |
-
|
|
112 |
-
|
|
113 |
-
|
|
114 |
-
|
|
115 |
-
|
|
116 |
-
|
|
117 |
-
|
|
118 |
-
|
|
119 |
-
|
|
120 |
-
|
|
121 |
-
|
|
122 |
-
|
|
123 |
-
|
|
124 |
-
|
|
125 |
-
|
|
126 |
-
|
|
127 |
-
|
|
128 |
-
|
|
129 |
-
|
|
130 |
-
|
|
131 |
-
|
|
132 |
-
|
|
133 |
-
|
|
134 |
-
|
|
135 |
-
|
|
136 |
-
|
|
137 |
-
|
|
138 |
-
|
|
139 |
-
|
|
140 |
-
|
|
141 |
-
|
|
142 |
-
|
|
143 |
-
|
|
144 |
-
|
|
145 |
-
|
|
146 |
-
|
|
147 |
-
|
|
148 |
-
|
|
149 |
-
|
|
150 |
-
|
|
151 |
-
|
|
152 |
-
|
|
153 |
-
|
|
154 |
-
|
|
155 |
-
|
|
156 |
-
|
|
157 |
-
|
|
158 |
-
|
|
159 |
-
|
|
160 |
-
|
|
161 |
-
|
|
162 |
-
|
|
163 |
-
|
|
164 |
-
|
|
165 |
-
|
|
166 |
-
|
|
167 |
-
|
|
168 |
-
|
|
169 |
-
|
|
170 |
-
|
|
171 |
-
|
|
172 |
-
|
|
173 |
-
|
|
174 |
-
|
|
175 |
-
|
|
176 |
-
|
|
177 |
-
|
|
178 |
-
|
|
179 |
-
|
|
180 |
-
|
|
181 |
-
|
|
182 |
-
|
|
183 |
-
|
|
184 |
-
|
|
185 |
-
|
|
186 |
-
|
|
187 |
-
|
|
188 |
-
|
|
189 |
-
|
|
190 |
-
|
|
191 |
-
|
|
192 |
-
|
|
193 |
-
|
|
194 |
-
| 0.
|
195 |
-
| 0.
|
196 |
-
| 0.
|
197 |
-
| 0.
|
198 |
-
| 0.
|
199 |
-
| 0.
|
200 |
-
| 0.
|
201 |
-
| 0.
|
202 |
-
| 0.
|
203 |
-
| 0.
|
204 |
-
| 0.
|
205 |
-
| 0.
|
206 |
-
| 0.
|
207 |
-
| 0.
|
208 |
-
| 0.
|
209 |
-
| 0.
|
210 |
-
| 0.
|
211 |
-
| 0.
|
212 |
-
| 0.
|
213 |
-
| 0.
|
214 |
-
| 0.
|
215 |
-
| 0.
|
216 |
-
| 0.
|
217 |
-
| 0.
|
218 |
-
| 0.
|
219 |
-
| 0.
|
220 |
-
| 0.
|
221 |
-
| 0.
|
222 |
-
| 0.
|
223 |
-
| 0.
|
224 |
-
| 0.
|
225 |
-
| 0.
|
226 |
-
| 0.
|
227 |
-
| 0.
|
228 |
-
| 0.
|
229 |
-
| 0.
|
230 |
-
| 0.
|
231 |
-
| 0.
|
232 |
-
| 0.
|
233 |
-
| 0.
|
234 |
-
| 0.
|
235 |
-
| 0.
|
236 |
-
| 0.
|
237 |
-
| 0.
|
238 |
-
| 0.
|
239 |
-
| 0.
|
240 |
-
| 0.
|
241 |
-
| 0.
|
242 |
-
| 0.
|
243 |
-
| 0.
|
244 |
-
| 0.6017 | 0.0009999999 | 200 |
|
245 |
-
| 0.6024 | 0.0009999999 | 201 |
|
246 |
-
| 0.5955 | 0.0009999999 | 202 |
|
247 |
-
| 0.5990 | 0.0009999999 | 203 |
|
248 |
-
| 0.5967 | 0.0009999999 | 204 |
|
249 |
-
| 0.5994 | 0.0009999999 | 205 |
|
250 |
-
| 0.5956 | 0.0009999999 | 206 |
|
251 |
-
| 0.5982 | 0.0009999999 | 207 |
|
252 |
-
| 0.5931 | 0.0009999999 | 208 |
|
253 |
-
| 0.5974 | 0.0009999999 | 209 |
|
254 |
-
| 0.5947 | 0.0009999999 | 210 |
|
255 |
-
| 0.5957 | 0.0009999999 | 211 |
|
256 |
-
| 0.5972 | 0.0009999999 | 212 |
|
257 |
-
| 0.5922 | 0.0009999999 | 213 |
|
258 |
-
| 0.5977 | 0.0009999999 | 214 |
|
259 |
-
| 0.5925 | 0.0009999999 | 215 |
|
260 |
-
| 0.5922 | 0.0009999999 | 216 |
|
261 |
-
| 0.5965 | 0.0009999999 | 217 |
|
262 |
-
| 0.5949 | 0.0009999999 | 218 |
|
263 |
-
| 0.5950 | 0.0009999999 | 219 |
|
264 |
-
| 0.5972 | 0.0009999999 | 220 |
|
265 |
-
| 0.5936 | 0.0009999999 | 221 |
|
266 |
-
| 0.5909 | 0.0009999999 | 222 |
|
267 |
-
| 0.5920 | 0.0009999999 | 223 |
|
268 |
-
| 0.5923 | 0.0009999999 | 224 |
|
269 |
-
| 0.5895 | 0.0009999999 | 225 |
|
270 |
-
| 0.5932 | 0.0009999999 | 226 |
|
271 |
-
| 0.5915 | 0.0009999999 | 227 |
|
272 |
-
| 0.5967 | 0.0009999999 | 228 |
|
273 |
-
| 0.5935 | 0.0009999999 | 229 |
|
274 |
-
| 0.5957 | 0.0009999999 | 230 |
|
275 |
-
| 0.5926 | 0.0009999999 | 231 |
|
276 |
-
| 0.5938 | 0.0009999999 | 232 |
|
277 |
-
| 0.5921 | 0.0009999999 | 233 |
|
278 |
-
| 0.5943 | 0.0009999999 | 234 |
|
279 |
-
| 0.5939 | 0.0009999999 | 235 |
|
280 |
-
| 0.5951 | 0.0009999999 | 236 |
|
281 |
-
| 0.5918 | 0.0009999999 | 237 |
|
282 |
-
| 0.5942 | 0.0009999999 | 238 |
|
283 |
-
| 0.5919 | 0.0009999999 | 239 |
|
284 |
-
| 0.5950 | 0.0009999999 | 240 |
|
285 |
-
| 0.5921 | 0.0009999999 | 241 |
|
286 |
-
| 0.5919 | 0.0009999999 | 242 |
|
287 |
-
| 0.5930 | 0.0009999999 | 243 |
|
288 |
-
| 0.5893 | 0.0009999999 | 244 |
|
289 |
-
| 0.5912 | 0.0009999999 | 245 |
|
290 |
-
| 0.5911 | 0.0009999999 | 246 |
|
291 |
-
| 0.5903 | 0.0009999999 | 247 |
|
292 |
-
| 0.5913 | 0.0009999999 | 248 |
|
293 |
-
| 0.5916 | 0.0009999999 | 249 |
|
294 |
-
| 0.5912 | 0.0009999999 | 250 |
|
295 |
-
| 0.5893 | 0.0009999999 | 251 |
|
296 |
-
| 0.5917 | 0.0009999999 | 252 |
|
297 |
-
| 0.5930 | 0.0009999999 | 253 |
|
298 |
-
| 0.5891 | 0.0009999999 | 254 |
|
299 |
-
| 0.5901 | 0.0009999999 | 255 |
|
300 |
-
| 0.5894 | 0.0009999999 | 256 |
|
301 |
-
| 0.5908 | 0.0009999999 | 257 |
|
302 |
-
| 0.5904 | 0.0009999999 | 258 |
|
303 |
-
| 0.5890 | 0.0009999999 | 259 |
|
304 |
-
| 0.5913 | 0.0009999999 | 260 |
|
305 |
-
| 0.5896 | 0.0009999999 | 261 |
|
306 |
-
| 0.5876 | 0.0009999999 | 262 |
|
307 |
-
| 0.5914 | 0.0009999999 | 263 |
|
308 |
-
| 0.5882 | 0.0009999999 | 264 |
|
309 |
-
| 0.5926 | 0.0009999999 | 265 |
|
310 |
-
| 0.5926 | 0.0009999999 | 266 |
|
311 |
-
| 0.5929 | 0.0009999999 | 267 |
|
312 |
-
| 0.5915 | 0.0009999999 | 268 |
|
313 |
-
| 0.5896 | 0.0009999999 | 269 |
|
314 |
-
| 0.5900 | 0.0009999999 | 270 |
|
315 |
-
| 0.5910 | 0.0009999999 | 271 |
|
316 |
-
| 0.5935 | 0.0009999999 | 272 |
|
317 |
-
| 0.5914 | 0.0009999999 | 273 |
|
318 |
-
| 0.5885 | 0.0009999999 | 274 |
|
319 |
-
| 0.5907 | 0.0009999999 | 275 |
|
320 |
-
| 0.5911 | 0.0009999999 | 276 |
|
321 |
-
| 0.5878 | 0.0009999999 | 277 |
|
322 |
-
| 0.5908 | 0.0009999999 | 278 |
|
323 |
-
| 0.5928 | 0.0009999999 | 279 |
|
324 |
-
| 0.5957 | 0.0009999999 | 280 |
|
325 |
-
| 0.5901 | 0.0009999999 | 281 |
|
326 |
-
| 0.5899 | 0.0009999999 | 282 |
|
327 |
-
| 0.5906 | 0.0009999999 | 283 |
|
328 |
-
| 0.5911 | 0.0009999999 | 284 |
|
329 |
-
| 0.5935 | 0.0009999999 | 285 |
|
330 |
-
| 0.5860 | 0.0009999999 | 286 |
|
331 |
-
| 0.5863 | 0.0009999999 | 287 |
|
332 |
-
| 0.5866 | 0.0009999999 | 288 |
|
333 |
-
| 0.5916 | 0.0009999999 | 289 |
|
334 |
-
| 0.5914 | 0.0009999999 | 290 |
|
335 |
-
| 0.5866 | 0.0009999999 | 291 |
|
336 |
-
| 0.5904 | 0.0009999999 | 292 |
|
337 |
-
| 0.5917 | 0.0009999999 | 293 |
|
338 |
-
| 0.5864 | 0.0009999999 | 294 |
|
339 |
-
| 0.5857 | 0.0009999999 | 295 |
|
340 |
-
| 0.5902 | 0.0009999999 | 296 |
|
341 |
-
| 0.5874 | 0.0009999999 | 297 |
|
342 |
-
| 0.5912 | 0.0009999999 | 298 |
|
343 |
-
| 0.5904 | 0.0009999999 | 299 |
|
344 |
-
| 0.5847 | 9.999999e-05 | 300 |
|
345 |
-
| 0.5861 | 9.999999e-05 | 301 |
|
346 |
-
| 0.5844 | 9.999999e-05 | 302 |
|
347 |
-
| 0.5896 | 9.999999e-05 | 303 |
|
348 |
-
| 0.5837 | 9.999999e-05 | 304 |
|
349 |
-
| 0.5847 | 9.999999e-05 | 305 |
|
350 |
-
| 0.5854 | 9.999999e-05 | 306 |
|
351 |
-
| 0.5854 | 9.999999e-05 | 307 |
|
352 |
-
| 0.5865 | 9.999999e-05 | 308 |
|
353 |
-
| 0.5862 | 9.999999e-05 | 309 |
|
354 |
-
| 0.5857 | 9.999999e-05 | 310 |
|
355 |
-
| 0.5811 | 9.999999e-05 | 311 |
|
356 |
-
| 0.5843 | 9.999999e-05 | 312 |
|
357 |
-
| 0.5860 | 9.999999e-05 | 313 |
|
358 |
-
| 0.5874 | 9.999999e-05 | 314 |
|
359 |
-
| 0.5847 | 9.999999e-05 | 315 |
|
360 |
-
| 0.5834 | 9.999999e-05 | 316 |
|
361 |
-
| 0.5879 | 9.999999e-05 | 317 |
|
362 |
-
| 0.5826 | 9.999999e-05 | 318 |
|
363 |
-
| 0.5857 | 9.999999e-05 | 319 |
|
364 |
-
| 0.5861 | 9.999999e-05 | 320 |
|
365 |
-
| 0.5859 | 9.999999e-05 | 321 |
|
366 |
-
| 0.5833 | 9.999999e-05 | 322 |
|
367 |
-
| 0.5847 | 9.999999e-05 | 323 |
|
368 |
-
| 0.5873 | 9.999999e-05 | 324 |
|
369 |
-
| 0.5856 | 9.999999e-05 | 325 |
|
370 |
-
| 0.5865 | 9.999999e-05 | 326 |
|
371 |
-
| 0.5870 | 9.999999e-05 | 327 |
|
372 |
-
| 0.5842 | 9.999999e-05 | 328 |
|
373 |
-
| 0.5824 | 9.999999e-05 | 329 |
|
374 |
-
| 0.5832 | 9.999999e-05 | 330 |
|
375 |
-
| 0.5888 | 9.999999e-05 | 331 |
|
376 |
-
| 0.5857 | 9.999999e-05 | 332 |
|
377 |
-
| 0.5851 | 9.999999e-05 | 333 |
|
378 |
-
| 0.5866 | 9.999999e-05 | 334 |
|
379 |
-
| 0.5844 | 9.999999e-05 | 335 |
|
380 |
-
| 0.5806 | 9.999999e-05 | 336 |
|
381 |
-
| 0.5869 | 9.999999e-05 | 337 |
|
382 |
-
| 0.5842 | 9.999999e-05 | 338 |
|
383 |
-
| 0.5871 | 9.999999e-05 | 339 |
|
384 |
-
| 0.5843 | 9.999999e-05 | 340 |
|
385 |
-
| 0.5837 | 9.999999e-05 | 341 |
|
386 |
-
| 0.5830 | 9.999999e-05 | 342 |
|
387 |
-
| 0.5839 | 9.999999e-05 | 343 |
|
388 |
-
| 0.5831 | 9.999999e-05 | 344 |
|
389 |
-
| 0.5834 | 9.999999e-05 | 345 |
|
390 |
-
| 0.5844 | 9.999999e-05 | 346 |
|
391 |
-
| 0.5828 | 9.999999e-05 | 347 |
|
392 |
-
| 0.5871 | 9.999999e-05 | 348 |
|
393 |
-
| 0.5871 | 9.999999e-05 | 349 |
|
394 |
-
| 0.5881 | 9.999999e-05 | 350 |
|
395 |
-
| 0.5828 | 9.999999e-05 | 351 |
|
396 |
-
| 0.5857 | 9.999999e-05 | 352 |
|
397 |
-
| 0.5855 | 9.999999e-05 | 353 |
|
398 |
-
| 0.5879 | 9.999999e-05 | 354 |
|
399 |
-
| 0.5828 | 9.999999e-05 | 355 |
|
400 |
-
| 0.5868 | 9.999999e-05 | 356 |
|
401 |
-
| 0.5865 | 9.999999e-05 | 357 |
|
402 |
-
| 0.5858 | 9.999999e-05 | 358 |
|
403 |
-
| 0.5857 | 9.999999e-05 | 359 |
|
404 |
-
| 0.5860 | 9.999999e-05 | 360 |
|
405 |
-
| 0.5860 | 9.999999e-05 | 361 |
|
406 |
-
| 0.5852 | 9.999999e-05 | 362 |
|
407 |
-
| 0.5845 | 9.999999e-05 | 363 |
|
408 |
-
| 0.5831 | 9.999999e-05 | 364 |
|
409 |
-
| 0.5878 | 9.999999e-05 | 365 |
|
410 |
-
| 0.5877 | 9.999999e-05 | 366 |
|
411 |
-
| 0.5892 | 9.999999e-05 | 367 |
|
412 |
-
| 0.5845 | 9.999999e-05 | 368 |
|
413 |
-
| 0.5878 | 9.999999e-05 | 369 |
|
414 |
-
| 0.5831 | 9.999999e-05 | 370 |
|
415 |
-
| 0.5841 | 9.999999e-05 | 371 |
|
416 |
-
| 0.5856 | 9.999999e-05 | 372 |
|
417 |
-
| 0.5902 | 9.999999e-05 | 373 |
|
418 |
-
| 0.5850 | 9.999999e-05 | 374 |
|
419 |
-
| 0.5850 | 9.999999e-05 | 375 |
|
420 |
-
| 0.5864 | 9.999999e-05 | 376 |
|
421 |
-
| 0.5855 | 9.999999e-05 | 377 |
|
422 |
-
| 0.5850 | 9.999999e-05 | 378 |
|
423 |
-
| 0.5877 | 9.999999e-05 | 379 |
|
424 |
-
| 0.5836 | 9.999999e-05 | 380 |
|
425 |
-
| 0.5787 | 9.999999e-05 | 381 |
|
426 |
-
| 0.5847 | 9.999999e-05 | 382 |
|
427 |
-
| 0.5818 | 9.999999e-05 | 383 |
|
428 |
-
| 0.5854 | 9.999999e-05 | 384 |
|
429 |
-
| 0.5829 | 9.999999e-05 | 385 |
|
430 |
-
| 0.5841 | 9.999999e-05 | 386 |
|
431 |
-
| 0.5855 | 9.999999e-05 | 387 |
|
432 |
-
| 0.5830 | 9.999999e-05 | 388 |
|
433 |
-
| 0.5861 | 9.999999e-05 | 389 |
|
434 |
-
| 0.5833 | 9.999999e-05 | 390 |
|
435 |
-
| 0.5850 | 9.999999e-05 | 391 |
|
436 |
-
| 0.5832 | 9.999999e-05 | 392 |
|
437 |
-
| 0.5837 | 9.999999e-05 | 393 |
|
438 |
-
| 0.5830 | 9.999999e-05 | 394 |
|
439 |
-
| 0.5871 | 9.999999e-05 | 395 |
|
440 |
-
| 0.5858 | 9.999999e-05 | 396 |
|
441 |
-
| 0.5840 | 9.999999e-05 | 397 |
|
442 |
-
| 0.5827 | 9.999999e-05 | 398 |
|
443 |
-
| 0.5860 | 9.999999e-05 | 399 |
|
444 |
-
| 0.5847 | 9.999999e-06 | 400 |
|
445 |
-
| 0.5800 | 9.999999e-06 | 401 |
|
446 |
-
| 0.5867 | 9.999999e-06 | 402 |
|
447 |
-
| 0.5842 | 9.999999e-06 | 403 |
|
448 |
-
| 0.5865 | 9.999999e-06 | 404 |
|
449 |
-
| 0.5828 | 9.999999e-06 | 405 |
|
450 |
-
| 0.5832 | 9.999999e-06 | 406 |
|
451 |
-
| 0.5840 | 9.999999e-06 | 407 |
|
452 |
-
| 0.5841 | 9.999999e-06 | 408 |
|
453 |
-
| 0.5876 | 9.999999e-06 | 409 |
|
454 |
-
| 0.5856 | 9.999999e-06 | 410 |
|
455 |
-
| 0.5789 | 9.999999e-06 | 411 |
|
456 |
-
| 0.5846 | 9.999999e-06 | 412 |
|
457 |
-
| 0.5850 | 9.999999e-06 | 413 |
|
458 |
-
| 0.5847 | 9.999999e-06 | 414 |
|
459 |
-
| 0.5845 | 9.999999e-06 | 415 |
|
460 |
-
| 0.5854 | 9.999999e-06 | 416 |
|
461 |
-
| 0.5857 | 9.999999e-06 | 417 |
|
462 |
-
| 0.5842 | 9.999999e-06 | 418 |
|
463 |
-
| 0.5842 | 9.999999e-06 | 419 |
|
464 |
-
| 0.5795 | 9.999999e-06 | 420 |
|
465 |
-
| 0.5859 | 9.999999e-06 | 421 |
|
466 |
-
| 0.5848 | 9.999999e-06 | 422 |
|
467 |
-
| 0.5812 | 9.999999e-06 | 423 |
|
468 |
-
| 0.5850 | 9.999999e-06 | 424 |
|
469 |
-
| 0.5855 | 9.999999e-06 | 425 |
|
470 |
-
| 0.5863 | 9.999999e-06 | 426 |
|
471 |
-
| 0.5847 | 9.999999e-06 | 427 |
|
472 |
-
| 0.5858 | 9.999999e-06 | 428 |
|
473 |
-
| 0.5868 | 9.999999e-06 | 429 |
|
474 |
-
| 0.5789 | 9.999999e-06 | 430 |
|
475 |
-
| 0.5853 | 9.999999e-06 | 431 |
|
476 |
-
| 0.5822 | 9.999999e-06 | 432 |
|
477 |
-
| 0.5817 | 9.999999e-06 | 433 |
|
478 |
-
| 0.5844 | 9.999999e-06 | 434 |
|
479 |
-
| 0.5849 | 9.999999e-06 | 435 |
|
480 |
-
| 0.5835 | 9.999999e-06 | 436 |
|
481 |
-
| 0.5868 | 9.999999e-06 | 437 |
|
482 |
-
| 0.5831 | 9.999999e-06 | 438 |
|
483 |
-
| 0.5859 | 9.999999e-06 | 439 |
|
484 |
-
| 0.5818 | 9.999999e-06 | 440 |
|
485 |
-
| 0.5833 | 9.999999e-06 | 441 |
|
486 |
-
| 0.5888 | 9.999999e-06 | 442 |
|
487 |
-
| 0.5878 | 9.999999e-06 | 443 |
|
488 |
-
| 0.5847 | 9.999999e-06 | 444 |
|
489 |
-
| 0.5816 | 9.999999e-06 | 445 |
|
490 |
-
| 0.5864 | 9.999999e-06 | 446 |
|
491 |
-
| 0.5829 | 9.999999e-06 | 447 |
|
492 |
-
| 0.5835 | 9.999999e-06 | 448 |
|
493 |
-
| 0.5848 | 9.999999e-06 | 449 |
|
494 |
-
| 0.5852 | 9.999999e-06 | 450 |
|
495 |
-
| 0.5800 | 9.999999e-06 | 451 |
|
496 |
-
| 0.5834 | 9.999999e-06 | 452 |
|
497 |
-
| 0.5856 | 9.999999e-06 | 453 |
|
498 |
-
| 0.5866 | 9.999999e-06 | 454 |
|
499 |
-
| 0.5844 | 9.999999e-06 | 455 |
|
500 |
-
| 0.5846 | 9.999999e-06 | 456 |
|
501 |
-
| 0.5840 | 9.999999e-06 | 457 |
|
502 |
-
| 0.5857 | 9.999999e-06 | 458 |
|
503 |
-
| 0.5854 | 9.999999e-06 | 459 |
|
504 |
-
| 0.5836 | 9.999999e-06 | 460 |
|
505 |
-
| 0.5859 | 9.999999e-06 | 461 |
|
506 |
-
| 0.5866 | 9.999999e-06 | 462 |
|
507 |
-
| 0.5835 | 9.999999e-06 | 463 |
|
508 |
-
| 0.5866 | 9.999999e-06 | 464 |
|
509 |
-
| 0.5843 | 9.999999e-06 | 465 |
|
510 |
-
| 0.5868 | 9.999999e-06 | 466 |
|
511 |
-
| 0.5877 | 9.999999e-06 | 467 |
|
512 |
-
| 0.5841 | 9.999999e-06 | 468 |
|
513 |
-
| 0.5858 | 9.999999e-06 | 469 |
|
514 |
-
| 0.5847 | 9.999999e-06 | 470 |
|
515 |
-
| 0.5855 | 9.999999e-06 | 471 |
|
516 |
-
| 0.5832 | 9.999999e-06 | 472 |
|
517 |
-
| 0.5807 | 9.999999e-06 | 473 |
|
518 |
-
| 0.5818 | 9.999999e-06 | 474 |
|
519 |
-
| 0.5868 | 9.999999e-06 | 475 |
|
520 |
-
| 0.5856 | 9.999999e-06 | 476 |
|
521 |
-
| 0.5821 | 9.999999e-06 | 477 |
|
522 |
-
| 0.5791 | 9.999999e-06 | 478 |
|
523 |
-
| 0.5847 | 9.999999e-06 | 479 |
|
524 |
-
| 0.5881 | 9.999999e-06 | 480 |
|
525 |
-
| 0.5821 | 9.999999e-06 | 481 |
|
526 |
-
| 0.5844 | 9.999999e-06 | 482 |
|
527 |
-
| 0.5873 | 9.999999e-06 | 483 |
|
528 |
-
| 0.5854 | 9.999999e-06 | 484 |
|
529 |
-
| 0.5868 | 9.999999e-06 | 485 |
|
530 |
-
| 0.5810 | 9.999999e-06 | 486 |
|
531 |
-
| 0.5874 | 9.999999e-06 | 487 |
|
532 |
-
| 0.5849 | 9.999999e-06 | 488 |
|
533 |
-
| 0.5887 | 9.999999e-06 | 489 |
|
534 |
-
| 0.5870 | 9.999999e-06 | 490 |
|
535 |
-
| 0.5846 | 9.999999e-06 | 491 |
|
536 |
-
| 0.5800 | 9.999999e-06 | 492 |
|
537 |
-
| 0.5850 | 9.999999e-06 | 493 |
|
538 |
-
| 0.5857 | 9.999999e-06 | 494 |
|
539 |
-
| 0.5849 | 9.999999e-06 | 495 |
|
540 |
-
| 0.5818 | 9.999999e-06 | 496 |
|
541 |
-
| 0.5849 | 9.999999e-06 | 497 |
|
542 |
-
| 0.5861 | 9.999999e-06 | 498 |
|
543 |
-
| 0.5845 | 9.999999e-06 | 499 |
|
544 |
|
545 |
|
546 |
### Framework versions
|
|
|
13 |
|
14 |
This model is a fine-tuned version of [MJ199999/gpt3_model](https://huggingface.co/MJ199999/gpt3_model) on an unknown dataset.
|
15 |
It achieves the following results on the evaluation set:
|
16 |
+
- Train Loss: 0.9684
|
17 |
+
- Train Lr: 0.0009999999
|
18 |
+
- Epoch: 199
|
19 |
|
20 |
## Model description
|
21 |
|
|
|
34 |
### Training hyperparameters
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
+
- optimizer: {'name': 'Adagrad', 'learning_rate': 0.0009999999, 'decay': 0.0, 'initial_accumulator_value': 0.1, 'epsilon': 1e-07}
|
38 |
- training_precision: float32
|
39 |
|
40 |
### Training results
|
41 |
|
42 |
| Train Loss | Train Lr | Epoch |
|
43 |
|:----------:|:------------:|:-----:|
|
44 |
+
| 6.9783 | 0.01 | 0 |
|
45 |
+
| 5.7078 | 0.01 | 1 |
|
46 |
+
| 5.1313 | 0.01 | 2 |
|
47 |
+
| 4.4967 | 0.01 | 3 |
|
48 |
+
| 3.9272 | 0.01 | 4 |
|
49 |
+
| 3.4594 | 0.01 | 5 |
|
50 |
+
| 3.0591 | 0.01 | 6 |
|
51 |
+
| 2.7073 | 0.01 | 7 |
|
52 |
+
| 2.4512 | 0.01 | 8 |
|
53 |
+
| 2.2231 | 0.01 | 9 |
|
54 |
+
| 2.0833 | 0.01 | 10 |
|
55 |
+
| 1.9429 | 0.01 | 11 |
|
56 |
+
| 1.8434 | 0.01 | 12 |
|
57 |
+
| 1.7522 | 0.01 | 13 |
|
58 |
+
| 1.6928 | 0.01 | 14 |
|
59 |
+
| 1.6394 | 0.01 | 15 |
|
60 |
+
| 1.5898 | 0.01 | 16 |
|
61 |
+
| 1.5477 | 0.01 | 17 |
|
62 |
+
| 1.5130 | 0.01 | 18 |
|
63 |
+
| 1.4822 | 0.01 | 19 |
|
64 |
+
| 1.4550 | 0.01 | 20 |
|
65 |
+
| 1.4375 | 0.01 | 21 |
|
66 |
+
| 1.4096 | 0.01 | 22 |
|
67 |
+
| 1.3952 | 0.01 | 23 |
|
68 |
+
| 1.3756 | 0.01 | 24 |
|
69 |
+
| 1.3603 | 0.01 | 25 |
|
70 |
+
| 1.3422 | 0.01 | 26 |
|
71 |
+
| 1.3329 | 0.01 | 27 |
|
72 |
+
| 1.3221 | 0.01 | 28 |
|
73 |
+
| 1.3155 | 0.01 | 29 |
|
74 |
+
| 1.3007 | 0.01 | 30 |
|
75 |
+
| 1.2915 | 0.01 | 31 |
|
76 |
+
| 1.2869 | 0.01 | 32 |
|
77 |
+
| 1.2744 | 0.01 | 33 |
|
78 |
+
| 1.2710 | 0.01 | 34 |
|
79 |
+
| 1.2578 | 0.01 | 35 |
|
80 |
+
| 1.2550 | 0.01 | 36 |
|
81 |
+
| 1.2458 | 0.01 | 37 |
|
82 |
+
| 1.2373 | 0.01 | 38 |
|
83 |
+
| 1.2364 | 0.01 | 39 |
|
84 |
+
| 1.2332 | 0.01 | 40 |
|
85 |
+
| 1.2177 | 0.01 | 41 |
|
86 |
+
| 1.2164 | 0.01 | 42 |
|
87 |
+
| 1.2176 | 0.01 | 43 |
|
88 |
+
| 1.2129 | 0.01 | 44 |
|
89 |
+
| 1.2088 | 0.01 | 45 |
|
90 |
+
| 1.2067 | 0.01 | 46 |
|
91 |
+
| 1.2013 | 0.01 | 47 |
|
92 |
+
| 1.1933 | 0.01 | 48 |
|
93 |
+
| 1.1920 | 0.01 | 49 |
|
94 |
+
| 1.1873 | 0.01 | 50 |
|
95 |
+
| 1.1831 | 0.01 | 51 |
|
96 |
+
| 1.1795 | 0.01 | 52 |
|
97 |
+
| 1.1777 | 0.01 | 53 |
|
98 |
+
| 1.1749 | 0.01 | 54 |
|
99 |
+
| 1.1719 | 0.01 | 55 |
|
100 |
+
| 1.1622 | 0.01 | 56 |
|
101 |
+
| 1.1621 | 0.01 | 57 |
|
102 |
+
| 1.1591 | 0.01 | 58 |
|
103 |
+
| 1.1582 | 0.01 | 59 |
|
104 |
+
| 1.1620 | 0.01 | 60 |
|
105 |
+
| 1.1528 | 0.01 | 61 |
|
106 |
+
| 1.1536 | 0.01 | 62 |
|
107 |
+
| 1.1479 | 0.01 | 63 |
|
108 |
+
| 1.1458 | 0.01 | 64 |
|
109 |
+
| 1.1429 | 0.01 | 65 |
|
110 |
+
| 1.1415 | 0.01 | 66 |
|
111 |
+
| 1.1452 | 0.01 | 67 |
|
112 |
+
| 1.1398 | 0.01 | 68 |
|
113 |
+
| 1.1360 | 0.01 | 69 |
|
114 |
+
| 1.1367 | 0.01 | 70 |
|
115 |
+
| 1.1303 | 0.01 | 71 |
|
116 |
+
| 1.1339 | 0.01 | 72 |
|
117 |
+
| 1.1298 | 0.01 | 73 |
|
118 |
+
| 1.1295 | 0.01 | 74 |
|
119 |
+
| 1.1240 | 0.01 | 75 |
|
120 |
+
| 1.1255 | 0.01 | 76 |
|
121 |
+
| 1.1218 | 0.01 | 77 |
|
122 |
+
| 1.1208 | 0.01 | 78 |
|
123 |
+
| 1.1214 | 0.01 | 79 |
|
124 |
+
| 1.1187 | 0.01 | 80 |
|
125 |
+
| 1.1186 | 0.01 | 81 |
|
126 |
+
| 1.1144 | 0.01 | 82 |
|
127 |
+
| 1.1126 | 0.01 | 83 |
|
128 |
+
| 1.1144 | 0.01 | 84 |
|
129 |
+
| 1.1124 | 0.01 | 85 |
|
130 |
+
| 1.1098 | 0.01 | 86 |
|
131 |
+
| 1.1112 | 0.01 | 87 |
|
132 |
+
| 1.1074 | 0.01 | 88 |
|
133 |
+
| 1.1061 | 0.01 | 89 |
|
134 |
+
| 1.1029 | 0.01 | 90 |
|
135 |
+
| 1.1058 | 0.01 | 91 |
|
136 |
+
| 1.1011 | 0.01 | 92 |
|
137 |
+
| 1.1054 | 0.01 | 93 |
|
138 |
+
| 1.0961 | 0.01 | 94 |
|
139 |
+
| 1.0991 | 0.01 | 95 |
|
140 |
+
| 1.0947 | 0.01 | 96 |
|
141 |
+
| 1.0995 | 0.01 | 97 |
|
142 |
+
| 1.0927 | 0.01 | 98 |
|
143 |
+
| 1.0952 | 0.01 | 99 |
|
144 |
+
| 1.0940 | 0.01 | 100 |
|
145 |
+
| 1.0883 | 0.01 | 101 |
|
146 |
+
| 1.0889 | 0.01 | 102 |
|
147 |
+
| 1.0934 | 0.01 | 103 |
|
148 |
+
| 1.0910 | 0.01 | 104 |
|
149 |
+
| 1.0883 | 0.01 | 105 |
|
150 |
+
| 1.0819 | 0.01 | 106 |
|
151 |
+
| 1.0851 | 0.01 | 107 |
|
152 |
+
| 1.0841 | 0.01 | 108 |
|
153 |
+
| 1.0817 | 0.01 | 109 |
|
154 |
+
| 1.0828 | 0.01 | 110 |
|
155 |
+
| 1.0821 | 0.01 | 111 |
|
156 |
+
| 1.0813 | 0.01 | 112 |
|
157 |
+
| 1.0800 | 0.01 | 113 |
|
158 |
+
| 1.0784 | 0.01 | 114 |
|
159 |
+
| 1.0780 | 0.01 | 115 |
|
160 |
+
| 1.0769 | 0.01 | 116 |
|
161 |
+
| 1.0795 | 0.01 | 117 |
|
162 |
+
| 1.0772 | 0.01 | 118 |
|
163 |
+
| 1.0753 | 0.01 | 119 |
|
164 |
+
| 1.0767 | 0.01 | 120 |
|
165 |
+
| 1.0764 | 0.01 | 121 |
|
166 |
+
| 1.0734 | 0.01 | 122 |
|
167 |
+
| 1.0755 | 0.01 | 123 |
|
168 |
+
| 1.0756 | 0.01 | 124 |
|
169 |
+
| 1.0717 | 0.01 | 125 |
|
170 |
+
| 1.0712 | 0.01 | 126 |
|
171 |
+
| 1.0688 | 0.01 | 127 |
|
172 |
+
| 1.0701 | 0.01 | 128 |
|
173 |
+
| 1.0672 | 0.01 | 129 |
|
174 |
+
| 1.0687 | 0.01 | 130 |
|
175 |
+
| 1.0683 | 0.01 | 131 |
|
176 |
+
| 1.0652 | 0.01 | 132 |
|
177 |
+
| 1.0659 | 0.01 | 133 |
|
178 |
+
| 1.0662 | 0.01 | 134 |
|
179 |
+
| 1.0644 | 0.01 | 135 |
|
180 |
+
| 1.0601 | 0.01 | 136 |
|
181 |
+
| 1.0605 | 0.01 | 137 |
|
182 |
+
| 1.0622 | 0.01 | 138 |
|
183 |
+
| 1.0622 | 0.01 | 139 |
|
184 |
+
| 1.0592 | 0.01 | 140 |
|
185 |
+
| 1.0624 | 0.01 | 141 |
|
186 |
+
| 1.0578 | 0.01 | 142 |
|
187 |
+
| 1.0618 | 0.01 | 143 |
|
188 |
+
| 1.0591 | 0.01 | 144 |
|
189 |
+
| 1.0573 | 0.01 | 145 |
|
190 |
+
| 1.0576 | 0.01 | 146 |
|
191 |
+
| 1.0590 | 0.01 | 147 |
|
192 |
+
| 1.0570 | 0.01 | 148 |
|
193 |
+
| 1.0539 | 0.01 | 149 |
|
194 |
+
| 0.9785 | 0.0009999999 | 150 |
|
195 |
+
| 0.9782 | 0.0009999999 | 151 |
|
196 |
+
| 0.9751 | 0.0009999999 | 152 |
|
197 |
+
| 0.9738 | 0.0009999999 | 153 |
|
198 |
+
| 0.9747 | 0.0009999999 | 154 |
|
199 |
+
| 0.9723 | 0.0009999999 | 155 |
|
200 |
+
| 0.9721 | 0.0009999999 | 156 |
|
201 |
+
| 0.9726 | 0.0009999999 | 157 |
|
202 |
+
| 0.9718 | 0.0009999999 | 158 |
|
203 |
+
| 0.9716 | 0.0009999999 | 159 |
|
204 |
+
| 0.9730 | 0.0009999999 | 160 |
|
205 |
+
| 0.9711 | 0.0009999999 | 161 |
|
206 |
+
| 0.9711 | 0.0009999999 | 162 |
|
207 |
+
| 0.9721 | 0.0009999999 | 163 |
|
208 |
+
| 0.9718 | 0.0009999999 | 164 |
|
209 |
+
| 0.9688 | 0.0009999999 | 165 |
|
210 |
+
| 0.9713 | 0.0009999999 | 166 |
|
211 |
+
| 0.9736 | 0.0009999999 | 167 |
|
212 |
+
| 0.9711 | 0.0009999999 | 168 |
|
213 |
+
| 0.9700 | 0.0009999999 | 169 |
|
214 |
+
| 0.9680 | 0.0009999999 | 170 |
|
215 |
+
| 0.9692 | 0.0009999999 | 171 |
|
216 |
+
| 0.9692 | 0.0009999999 | 172 |
|
217 |
+
| 0.9718 | 0.0009999999 | 173 |
|
218 |
+
| 0.9679 | 0.0009999999 | 174 |
|
219 |
+
| 0.9716 | 0.0009999999 | 175 |
|
220 |
+
| 0.9703 | 0.0009999999 | 176 |
|
221 |
+
| 0.9655 | 0.0009999999 | 177 |
|
222 |
+
| 0.9680 | 0.0009999999 | 178 |
|
223 |
+
| 0.9685 | 0.0009999999 | 179 |
|
224 |
+
| 0.9663 | 0.0009999999 | 180 |
|
225 |
+
| 0.9690 | 0.0009999999 | 181 |
|
226 |
+
| 0.9683 | 0.0009999999 | 182 |
|
227 |
+
| 0.9681 | 0.0009999999 | 183 |
|
228 |
+
| 0.9688 | 0.0009999999 | 184 |
|
229 |
+
| 0.9658 | 0.0009999999 | 185 |
|
230 |
+
| 0.9699 | 0.0009999999 | 186 |
|
231 |
+
| 0.9666 | 0.0009999999 | 187 |
|
232 |
+
| 0.9690 | 0.0009999999 | 188 |
|
233 |
+
| 0.9700 | 0.0009999999 | 189 |
|
234 |
+
| 0.9675 | 0.0009999999 | 190 |
|
235 |
+
| 0.9691 | 0.0009999999 | 191 |
|
236 |
+
| 0.9671 | 0.0009999999 | 192 |
|
237 |
+
| 0.9692 | 0.0009999999 | 193 |
|
238 |
+
| 0.9692 | 0.0009999999 | 194 |
|
239 |
+
| 0.9653 | 0.0009999999 | 195 |
|
240 |
+
| 0.9668 | 0.0009999999 | 196 |
|
241 |
+
| 0.9684 | 0.0009999999 | 197 |
|
242 |
+
| 0.9682 | 0.0009999999 | 198 |
|
243 |
+
| 0.9684 | 0.0009999999 | 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
|
246 |
### Framework versions
|
config.json
CHANGED
@@ -30,5 +30,5 @@
|
|
30 |
"summary_use_proj": true,
|
31 |
"transformers_version": "4.21.3",
|
32 |
"use_cache": true,
|
33 |
-
"vocab_size":
|
34 |
}
|
|
|
30 |
"summary_use_proj": true,
|
31 |
"transformers_version": "4.21.3",
|
32 |
"use_cache": true,
|
33 |
+
"vocab_size": 51202
|
34 |
}
|
tf_model.h5
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aefdac0796093d4c4bd26411b52a4cd872c0fbdf0002c78eb4bb4f2c00fbe03
|
3 |
+
size 5043830472
|