nRuaif commited on
Commit
2ec64ca
·
1 Parent(s): d666458

Upload folder using huggingface_hub

Browse files
Untitled.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
  "outputs": [
@@ -16,278 +16,12 @@
16
  {
17
  "data": {
18
  "application/vnd.jupyter.widget-view+json": {
19
- "model_id": "5f308b1ddb654821afe650b708175cc2",
20
  "version_major": 2,
21
  "version_minor": 0
22
  },
23
  "text/plain": [
24
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
25
- ]
26
- },
27
- "metadata": {},
28
- "output_type": "display_data"
29
- },
30
- {
31
- "data": {
32
- "application/vnd.jupyter.widget-view+json": {
33
- "model_id": "6aaef8ad6f75454991e62a4d1e8ff05e",
34
- "version_major": 2,
35
- "version_minor": 0
36
- },
37
- "text/plain": [
38
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
39
- ]
40
- },
41
- "metadata": {},
42
- "output_type": "display_data"
43
- },
44
- {
45
- "data": {
46
- "application/vnd.jupyter.widget-view+json": {
47
- "model_id": "43f479d5951b4c08848897dbda7a9483",
48
- "version_major": 2,
49
- "version_minor": 0
50
- },
51
- "text/plain": [
52
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
53
- ]
54
- },
55
- "metadata": {},
56
- "output_type": "display_data"
57
- },
58
- {
59
- "data": {
60
- "application/vnd.jupyter.widget-view+json": {
61
- "model_id": "491a2b982ebe4269a0a1b302938fd82f",
62
- "version_major": 2,
63
- "version_minor": 0
64
- },
65
- "text/plain": [
66
- "Upload 43 LFS files: 0%| | 0/43 [00:00<?, ?it/s]"
67
- ]
68
- },
69
- "metadata": {},
70
- "output_type": "display_data"
71
- },
72
- {
73
- "data": {
74
- "application/vnd.jupyter.widget-view+json": {
75
- "model_id": "de17bf15a4df42fea8781b73b667c4e7",
76
- "version_major": 2,
77
- "version_minor": 0
78
- },
79
- "text/plain": [
80
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
81
- ]
82
- },
83
- "metadata": {},
84
- "output_type": "display_data"
85
- },
86
- {
87
- "data": {
88
- "application/vnd.jupyter.widget-view+json": {
89
- "model_id": "ca5035dfd1e146138cdea7b14d3af6c8",
90
- "version_major": 2,
91
- "version_minor": 0
92
- },
93
- "text/plain": [
94
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
95
- ]
96
- },
97
- "metadata": {},
98
- "output_type": "display_data"
99
- },
100
- {
101
- "data": {
102
- "application/vnd.jupyter.widget-view+json": {
103
- "model_id": "47a61e2634844e1ca0c556626ff249c2",
104
- "version_major": 2,
105
- "version_minor": 0
106
- },
107
- "text/plain": [
108
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
109
- ]
110
- },
111
- "metadata": {},
112
- "output_type": "display_data"
113
- },
114
- {
115
- "data": {
116
- "application/vnd.jupyter.widget-view+json": {
117
- "model_id": "6a9b2c58d70b4ecfa879aaf3caea5edc",
118
- "version_major": 2,
119
- "version_minor": 0
120
- },
121
- "text/plain": [
122
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
123
- ]
124
- },
125
- "metadata": {},
126
- "output_type": "display_data"
127
- },
128
- {
129
- "data": {
130
- "application/vnd.jupyter.widget-view+json": {
131
- "model_id": "1d48ef30c59244f09a69b208b000ceee",
132
- "version_major": 2,
133
- "version_minor": 0
134
- },
135
- "text/plain": [
136
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
137
- ]
138
- },
139
- "metadata": {},
140
- "output_type": "display_data"
141
- },
142
- {
143
- "data": {
144
- "application/vnd.jupyter.widget-view+json": {
145
- "model_id": "94b212b03565444e8f7cedf2688feb24",
146
- "version_major": 2,
147
- "version_minor": 0
148
- },
149
- "text/plain": [
150
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
151
- ]
152
- },
153
- "metadata": {},
154
- "output_type": "display_data"
155
- },
156
- {
157
- "data": {
158
- "application/vnd.jupyter.widget-view+json": {
159
- "model_id": "0ba7f609f57b4cc5af970e364bb3ee27",
160
- "version_major": 2,
161
- "version_minor": 0
162
- },
163
- "text/plain": [
164
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
165
- ]
166
- },
167
- "metadata": {},
168
- "output_type": "display_data"
169
- },
170
- {
171
- "data": {
172
- "application/vnd.jupyter.widget-view+json": {
173
- "model_id": "8be371a76bb14e2caa2ec672c112e23e",
174
- "version_major": 2,
175
- "version_minor": 0
176
- },
177
- "text/plain": [
178
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
179
- ]
180
- },
181
- "metadata": {},
182
- "output_type": "display_data"
183
- },
184
- {
185
- "data": {
186
- "application/vnd.jupyter.widget-view+json": {
187
- "model_id": "555efd66b20c49eab3edd2518af34f57",
188
- "version_major": 2,
189
- "version_minor": 0
190
- },
191
- "text/plain": [
192
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
193
- ]
194
- },
195
- "metadata": {},
196
- "output_type": "display_data"
197
- },
198
- {
199
- "data": {
200
- "application/vnd.jupyter.widget-view+json": {
201
- "model_id": "dd48f67cda094c958349a2f0a3107a3e",
202
- "version_major": 2,
203
- "version_minor": 0
204
- },
205
- "text/plain": [
206
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
207
- ]
208
- },
209
- "metadata": {},
210
- "output_type": "display_data"
211
- },
212
- {
213
- "data": {
214
- "application/vnd.jupyter.widget-view+json": {
215
- "model_id": "bdf4c85a50c54a1d8ee0c4805538cedc",
216
- "version_major": 2,
217
- "version_minor": 0
218
- },
219
- "text/plain": [
220
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
221
- ]
222
- },
223
- "metadata": {},
224
- "output_type": "display_data"
225
- },
226
- {
227
- "data": {
228
- "application/vnd.jupyter.widget-view+json": {
229
- "model_id": "6294a1cf6111457ab2722f5570eaf5e9",
230
- "version_major": 2,
231
- "version_minor": 0
232
- },
233
- "text/plain": [
234
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
235
- ]
236
- },
237
- "metadata": {},
238
- "output_type": "display_data"
239
- },
240
- {
241
- "data": {
242
- "application/vnd.jupyter.widget-view+json": {
243
- "model_id": "8d444fba036549dc8d5a86c9678c6f10",
244
- "version_major": 2,
245
- "version_minor": 0
246
- },
247
- "text/plain": [
248
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
249
- ]
250
- },
251
- "metadata": {},
252
- "output_type": "display_data"
253
- },
254
- {
255
- "data": {
256
- "application/vnd.jupyter.widget-view+json": {
257
- "model_id": "c042cf944dde4b6ab62fb374b4525c6e",
258
- "version_major": 2,
259
- "version_minor": 0
260
- },
261
- "text/plain": [
262
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
263
- ]
264
- },
265
- "metadata": {},
266
- "output_type": "display_data"
267
- },
268
- {
269
- "data": {
270
- "application/vnd.jupyter.widget-view+json": {
271
- "model_id": "1068ab04d0274c03904d387bdcdf5de7",
272
- "version_major": 2,
273
- "version_minor": 0
274
- },
275
- "text/plain": [
276
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
277
- ]
278
- },
279
- "metadata": {},
280
- "output_type": "display_data"
281
- },
282
- {
283
- "data": {
284
- "application/vnd.jupyter.widget-view+json": {
285
- "model_id": "2a95d0b7842748dabd9fc1f8c18deb39",
286
- "version_major": 2,
287
- "version_minor": 0
288
- },
289
- "text/plain": [
290
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
291
  ]
292
  },
293
  "metadata": {},
@@ -296,7 +30,7 @@
296
  {
297
  "data": {
298
  "application/vnd.jupyter.widget-view+json": {
299
- "model_id": "f7797fab17a3420f95232a252c8d772c",
300
  "version_major": 2,
301
  "version_minor": 0
302
  },
@@ -310,7 +44,7 @@
310
  {
311
  "data": {
312
  "application/vnd.jupyter.widget-view+json": {
313
- "model_id": "1bafc37d5dca444bb7c6ef711f7861ae",
314
  "version_major": 2,
315
  "version_minor": 0
316
  },
@@ -324,63 +58,7 @@
324
  {
325
  "data": {
326
  "application/vnd.jupyter.widget-view+json": {
327
- "model_id": "293fda7d9cd14487b770342136ed25d0",
328
- "version_major": 2,
329
- "version_minor": 0
330
- },
331
- "text/plain": [
332
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
333
- ]
334
- },
335
- "metadata": {},
336
- "output_type": "display_data"
337
- },
338
- {
339
- "data": {
340
- "application/vnd.jupyter.widget-view+json": {
341
- "model_id": "c36233f77fb14c2698087dd20522d4eb",
342
- "version_major": 2,
343
- "version_minor": 0
344
- },
345
- "text/plain": [
346
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
347
- ]
348
- },
349
- "metadata": {},
350
- "output_type": "display_data"
351
- },
352
- {
353
- "data": {
354
- "application/vnd.jupyter.widget-view+json": {
355
- "model_id": "3cef58ea84be4f3dae5e848ff5a6f27c",
356
- "version_major": 2,
357
- "version_minor": 0
358
- },
359
- "text/plain": [
360
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
361
- ]
362
- },
363
- "metadata": {},
364
- "output_type": "display_data"
365
- },
366
- {
367
- "data": {
368
- "application/vnd.jupyter.widget-view+json": {
369
- "model_id": "e952241ba9be4c9fa8c9b2eeb1e3d9e4",
370
- "version_major": 2,
371
- "version_minor": 0
372
- },
373
- "text/plain": [
374
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
375
- ]
376
- },
377
- "metadata": {},
378
- "output_type": "display_data"
379
- },
380
- {
381
- "data": {
382
- "application/vnd.jupyter.widget-view+json": {
383
- "model_id": "c0dce134d40942ab999de97456fe9b32",
384
  "version_major": 2,
385
  "version_minor": 0
386
  },
@@ -394,21 +72,7 @@
394
  {
395
  "data": {
396
  "application/vnd.jupyter.widget-view+json": {
397
- "model_id": "23a3b1c6a43f497b815f163b35460238",
398
- "version_major": 2,
399
- "version_minor": 0
400
- },
401
- "text/plain": [
402
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
403
- ]
404
- },
405
- "metadata": {},
406
- "output_type": "display_data"
407
- },
408
- {
409
- "data": {
410
- "application/vnd.jupyter.widget-view+json": {
411
- "model_id": "1b971333e98e46ba9a10b38349a51f8f",
412
  "version_major": 2,
413
  "version_minor": 0
414
  },
@@ -422,7 +86,7 @@
422
  {
423
  "data": {
424
  "application/vnd.jupyter.widget-view+json": {
425
- "model_id": "bf03b21ac07a4980aecb4de18e18827c",
426
  "version_major": 2,
427
  "version_minor": 0
428
  },
@@ -433,209 +97,13 @@
433
  "metadata": {},
434
  "output_type": "display_data"
435
  },
436
- {
437
- "data": {
438
- "application/vnd.jupyter.widget-view+json": {
439
- "model_id": "9fb30b3564964dc796e0499f15d99c34",
440
- "version_major": 2,
441
- "version_minor": 0
442
- },
443
- "text/plain": [
444
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
445
- ]
446
- },
447
- "metadata": {},
448
- "output_type": "display_data"
449
- },
450
- {
451
- "data": {
452
- "application/vnd.jupyter.widget-view+json": {
453
- "model_id": "64204effaf14419f9519fa67dbcdffac",
454
- "version_major": 2,
455
- "version_minor": 0
456
- },
457
- "text/plain": [
458
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
459
- ]
460
- },
461
- "metadata": {},
462
- "output_type": "display_data"
463
- },
464
- {
465
- "data": {
466
- "application/vnd.jupyter.widget-view+json": {
467
- "model_id": "d904d221326142dab018eef1e192ece5",
468
- "version_major": 2,
469
- "version_minor": 0
470
- },
471
- "text/plain": [
472
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
473
- ]
474
- },
475
- "metadata": {},
476
- "output_type": "display_data"
477
- },
478
- {
479
- "data": {
480
- "application/vnd.jupyter.widget-view+json": {
481
- "model_id": "b378d36e28dd4d3b8624b25461fd74a6",
482
- "version_major": 2,
483
- "version_minor": 0
484
- },
485
- "text/plain": [
486
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
487
- ]
488
- },
489
- "metadata": {},
490
- "output_type": "display_data"
491
- },
492
- {
493
- "data": {
494
- "application/vnd.jupyter.widget-view+json": {
495
- "model_id": "df37b47bf87746de81f69960f9cfe4e8",
496
- "version_major": 2,
497
- "version_minor": 0
498
- },
499
- "text/plain": [
500
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
501
- ]
502
- },
503
- "metadata": {},
504
- "output_type": "display_data"
505
- },
506
- {
507
- "data": {
508
- "application/vnd.jupyter.widget-view+json": {
509
- "model_id": "20da2b3cbbe44b2e8da8435d693d28f2",
510
- "version_major": 2,
511
- "version_minor": 0
512
- },
513
- "text/plain": [
514
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
515
- ]
516
- },
517
- "metadata": {},
518
- "output_type": "display_data"
519
- },
520
- {
521
- "data": {
522
- "application/vnd.jupyter.widget-view+json": {
523
- "model_id": "953085469ce34c21b5d9f55849780e25",
524
- "version_major": 2,
525
- "version_minor": 0
526
- },
527
- "text/plain": [
528
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
529
- ]
530
- },
531
- "metadata": {},
532
- "output_type": "display_data"
533
- },
534
- {
535
- "data": {
536
- "application/vnd.jupyter.widget-view+json": {
537
- "model_id": "4b4a15ec387b4ec3a2bcb652daa81651",
538
- "version_major": 2,
539
- "version_minor": 0
540
- },
541
- "text/plain": [
542
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
543
- ]
544
- },
545
- "metadata": {},
546
- "output_type": "display_data"
547
- },
548
- {
549
- "data": {
550
- "application/vnd.jupyter.widget-view+json": {
551
- "model_id": "b3beba6e06114e45891d074c1df6bfc8",
552
- "version_major": 2,
553
- "version_minor": 0
554
- },
555
- "text/plain": [
556
- "adapter_model.bin: 0%| | 0.00/62.8M [00:00<?, ?B/s]"
557
- ]
558
- },
559
- "metadata": {},
560
- "output_type": "display_data"
561
- },
562
- {
563
- "data": {
564
- "application/vnd.jupyter.widget-view+json": {
565
- "model_id": "902a27cfae9843b5968b145adf334838",
566
- "version_major": 2,
567
- "version_minor": 0
568
- },
569
- "text/plain": [
570
- "optimizer.pt: 0%| | 0.00/251M [00:00<?, ?B/s]"
571
- ]
572
- },
573
- "metadata": {},
574
- "output_type": "display_data"
575
- },
576
- {
577
- "data": {
578
- "application/vnd.jupyter.widget-view+json": {
579
- "model_id": "34bf777998ea4384841e047233995bd4",
580
- "version_major": 2,
581
- "version_minor": 0
582
- },
583
- "text/plain": [
584
- "rng_state.pth: 0%| | 0.00/14.6k [00:00<?, ?B/s]"
585
- ]
586
- },
587
- "metadata": {},
588
- "output_type": "display_data"
589
- },
590
- {
591
- "data": {
592
- "application/vnd.jupyter.widget-view+json": {
593
- "model_id": "2be773b9220844fdbaf85ae04bcf6b5c",
594
- "version_major": 2,
595
- "version_minor": 0
596
- },
597
- "text/plain": [
598
- "scheduler.pt: 0%| | 0.00/627 [00:00<?, ?B/s]"
599
- ]
600
- },
601
- "metadata": {},
602
- "output_type": "display_data"
603
- },
604
- {
605
- "data": {
606
- "application/vnd.jupyter.widget-view+json": {
607
- "model_id": "b651756e28e8438a921ab786a3b4ca68",
608
- "version_major": 2,
609
- "version_minor": 0
610
- },
611
- "text/plain": [
612
- "training_args.bin: 0%| | 0.00/4.22k [00:00<?, ?B/s]"
613
- ]
614
- },
615
- "metadata": {},
616
- "output_type": "display_data"
617
- },
618
- {
619
- "data": {
620
- "application/vnd.jupyter.widget-view+json": {
621
- "model_id": "382b072e12f347de8318abac597ae873",
622
- "version_major": 2,
623
- "version_minor": 0
624
- },
625
- "text/plain": [
626
- "tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
627
- ]
628
- },
629
- "metadata": {},
630
- "output_type": "display_data"
631
- },
632
  {
633
  "data": {
634
  "text/plain": [
635
  "'https://huggingface.co/nRuaif/Blind-test01/tree/main/'"
636
  ]
637
  },
638
- "execution_count": 3,
639
  "metadata": {},
640
  "output_type": "execute_result"
641
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 4,
6
  "id": "127668ee-44f0-4438-9337-e7c4a486aea3",
7
  "metadata": {},
8
  "outputs": [
 
16
  {
17
  "data": {
18
  "application/vnd.jupyter.widget-view+json": {
19
+ "model_id": "2529d1e8ef8c41e7acffc3b4b361e2d1",
20
  "version_major": 2,
21
  "version_minor": 0
22
  },
23
  "text/plain": [
24
+ "Upload 5 LFS files: 0%| | 0/5 [00:00<?, ?it/s]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  ]
26
  },
27
  "metadata": {},
 
30
  {
31
  "data": {
32
  "application/vnd.jupyter.widget-view+json": {
33
+ "model_id": "829fa95283aa4168978a8e583d10f286",
34
  "version_major": 2,
35
  "version_minor": 0
36
  },
 
44
  {
45
  "data": {
46
  "application/vnd.jupyter.widget-view+json": {
47
+ "model_id": "3c2f0437b42a4fa997c9427f891609a6",
48
  "version_major": 2,
49
  "version_minor": 0
50
  },
 
58
  {
59
  "data": {
60
  "application/vnd.jupyter.widget-view+json": {
61
+ "model_id": "4ecfcef382ee44a1acc18a868e3dbef3",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "version_major": 2,
63
  "version_minor": 0
64
  },
 
72
  {
73
  "data": {
74
  "application/vnd.jupyter.widget-view+json": {
75
+ "model_id": "818bf55fe9124089b9378164fcb70319",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  "version_major": 2,
77
  "version_minor": 0
78
  },
 
86
  {
87
  "data": {
88
  "application/vnd.jupyter.widget-view+json": {
89
+ "model_id": "a067333d049c449b9b1a31a7eba92b40",
90
  "version_major": 2,
91
  "version_minor": 0
92
  },
 
97
  "metadata": {},
98
  "output_type": "display_data"
99
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  {
101
  "data": {
102
  "text/plain": [
103
  "'https://huggingface.co/nRuaif/Blind-test01/tree/main/'"
104
  ]
105
  },
106
+ "execution_count": 4,
107
  "metadata": {},
108
  "output_type": "execute_result"
109
  }
checkpoint-90/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-90/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-90/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab80d580d2473359497c3477b018ceeeea5cc05dea5431d27aa4603c281d9485
3
+ size 62788109
checkpoint-90/adapter_model/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
checkpoint-90/adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "NousResearch/Llama-2-13b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj",
19
+ "up_proj",
20
+ "gate_proj",
21
+ "o_proj",
22
+ "k_proj",
23
+ "down_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
checkpoint-90/adapter_model/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab80d580d2473359497c3477b018ceeeea5cc05dea5431d27aa4603c281d9485
3
+ size 62788109
checkpoint-90/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8316370ea618cc284e1ae2c314512562c3104ec39229694b796724e1b84f85
3
+ size 250681597
checkpoint-90/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b128c0e1005d711adf0a8fefe90fdd8507d128f203b84f36d690b97c27d739
3
+ size 14575
checkpoint-90/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1973c9badcac6f2b6522a65b2ddc2c01351eef8d7355ffdf2f6e10ecab6ff1
3
+ size 627
checkpoint-90/trainer_state.json ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0155017375946045,
3
+ "best_model_checkpoint": "./qlora-out-kimiko-test2/checkpoint-90",
4
+ "epoch": 2.3263327948303716,
5
+ "eval_steps": 10,
6
+ "global_step": 90,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "learning_rate": 1e-05,
14
+ "loss": 1.5707,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.05,
19
+ "learning_rate": 2e-05,
20
+ "loss": 1.5621,
21
+ "step": 2
22
+ },
23
+ {
24
+ "epoch": 0.08,
25
+ "learning_rate": 3e-05,
26
+ "loss": 1.4812,
27
+ "step": 3
28
+ },
29
+ {
30
+ "epoch": 0.1,
31
+ "learning_rate": 4e-05,
32
+ "loss": 1.5197,
33
+ "step": 4
34
+ },
35
+ {
36
+ "epoch": 0.13,
37
+ "learning_rate": 5e-05,
38
+ "loss": 1.5567,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.16,
43
+ "learning_rate": 5e-05,
44
+ "loss": 1.4645,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 0.18,
49
+ "learning_rate": 5e-05,
50
+ "loss": 1.6122,
51
+ "step": 7
52
+ },
53
+ {
54
+ "epoch": 0.21,
55
+ "learning_rate": 5e-05,
56
+ "loss": 1.5596,
57
+ "step": 8
58
+ },
59
+ {
60
+ "epoch": 0.23,
61
+ "learning_rate": 5e-05,
62
+ "loss": 1.5608,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 0.26,
67
+ "learning_rate": 5e-05,
68
+ "loss": 1.5456,
69
+ "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.26,
73
+ "eval_loss": 2.098437547683716,
74
+ "eval_runtime": 119.6161,
75
+ "eval_samples_per_second": 1.555,
76
+ "eval_steps_per_second": 0.777,
77
+ "step": 10
78
+ },
79
+ {
80
+ "epoch": 0.28,
81
+ "learning_rate": 5e-05,
82
+ "loss": 1.5645,
83
+ "step": 11
84
+ },
85
+ {
86
+ "epoch": 0.31,
87
+ "learning_rate": 5e-05,
88
+ "loss": 1.538,
89
+ "step": 12
90
+ },
91
+ {
92
+ "epoch": 0.34,
93
+ "learning_rate": 5e-05,
94
+ "loss": 1.6388,
95
+ "step": 13
96
+ },
97
+ {
98
+ "epoch": 0.36,
99
+ "learning_rate": 5e-05,
100
+ "loss": 1.4943,
101
+ "step": 14
102
+ },
103
+ {
104
+ "epoch": 0.39,
105
+ "learning_rate": 5e-05,
106
+ "loss": 1.5469,
107
+ "step": 15
108
+ },
109
+ {
110
+ "epoch": 0.41,
111
+ "learning_rate": 5e-05,
112
+ "loss": 1.6149,
113
+ "step": 16
114
+ },
115
+ {
116
+ "epoch": 0.44,
117
+ "learning_rate": 5e-05,
118
+ "loss": 1.5345,
119
+ "step": 17
120
+ },
121
+ {
122
+ "epoch": 0.47,
123
+ "learning_rate": 5e-05,
124
+ "loss": 1.4903,
125
+ "step": 18
126
+ },
127
+ {
128
+ "epoch": 0.49,
129
+ "learning_rate": 5e-05,
130
+ "loss": 1.5499,
131
+ "step": 19
132
+ },
133
+ {
134
+ "epoch": 0.52,
135
+ "learning_rate": 5e-05,
136
+ "loss": 1.5934,
137
+ "step": 20
138
+ },
139
+ {
140
+ "epoch": 0.52,
141
+ "eval_loss": 2.066981077194214,
142
+ "eval_runtime": 119.5781,
143
+ "eval_samples_per_second": 1.555,
144
+ "eval_steps_per_second": 0.778,
145
+ "step": 20
146
+ },
147
+ {
148
+ "epoch": 0.54,
149
+ "learning_rate": 5e-05,
150
+ "loss": 1.4554,
151
+ "step": 21
152
+ },
153
+ {
154
+ "epoch": 0.57,
155
+ "learning_rate": 5e-05,
156
+ "loss": 1.5512,
157
+ "step": 22
158
+ },
159
+ {
160
+ "epoch": 0.59,
161
+ "learning_rate": 5e-05,
162
+ "loss": 1.4636,
163
+ "step": 23
164
+ },
165
+ {
166
+ "epoch": 0.62,
167
+ "learning_rate": 5e-05,
168
+ "loss": 1.5398,
169
+ "step": 24
170
+ },
171
+ {
172
+ "epoch": 0.65,
173
+ "learning_rate": 5e-05,
174
+ "loss": 1.5623,
175
+ "step": 25
176
+ },
177
+ {
178
+ "epoch": 0.67,
179
+ "learning_rate": 5e-05,
180
+ "loss": 1.4658,
181
+ "step": 26
182
+ },
183
+ {
184
+ "epoch": 0.7,
185
+ "learning_rate": 5e-05,
186
+ "loss": 1.4723,
187
+ "step": 27
188
+ },
189
+ {
190
+ "epoch": 0.72,
191
+ "learning_rate": 5e-05,
192
+ "loss": 1.432,
193
+ "step": 28
194
+ },
195
+ {
196
+ "epoch": 0.75,
197
+ "learning_rate": 5e-05,
198
+ "loss": 1.4814,
199
+ "step": 29
200
+ },
201
+ {
202
+ "epoch": 0.78,
203
+ "learning_rate": 5e-05,
204
+ "loss": 1.4924,
205
+ "step": 30
206
+ },
207
+ {
208
+ "epoch": 0.78,
209
+ "eval_loss": 2.046339750289917,
210
+ "eval_runtime": 119.5771,
211
+ "eval_samples_per_second": 1.555,
212
+ "eval_steps_per_second": 0.778,
213
+ "step": 30
214
+ },
215
+ {
216
+ "epoch": 0.8,
217
+ "learning_rate": 5e-05,
218
+ "loss": 1.5809,
219
+ "step": 31
220
+ },
221
+ {
222
+ "epoch": 0.83,
223
+ "learning_rate": 5e-05,
224
+ "loss": 1.4803,
225
+ "step": 32
226
+ },
227
+ {
228
+ "epoch": 0.85,
229
+ "learning_rate": 5e-05,
230
+ "loss": 1.4878,
231
+ "step": 33
232
+ },
233
+ {
234
+ "epoch": 0.88,
235
+ "learning_rate": 5e-05,
236
+ "loss": 1.3871,
237
+ "step": 34
238
+ },
239
+ {
240
+ "epoch": 0.9,
241
+ "learning_rate": 5e-05,
242
+ "loss": 1.5151,
243
+ "step": 35
244
+ },
245
+ {
246
+ "epoch": 0.93,
247
+ "learning_rate": 5e-05,
248
+ "loss": 1.4212,
249
+ "step": 36
250
+ },
251
+ {
252
+ "epoch": 0.96,
253
+ "learning_rate": 5e-05,
254
+ "loss": 1.6284,
255
+ "step": 37
256
+ },
257
+ {
258
+ "epoch": 0.98,
259
+ "learning_rate": 5e-05,
260
+ "loss": 1.5002,
261
+ "step": 38
262
+ },
263
+ {
264
+ "epoch": 1.01,
265
+ "learning_rate": 5e-05,
266
+ "loss": 1.4452,
267
+ "step": 39
268
+ },
269
+ {
270
+ "epoch": 1.03,
271
+ "learning_rate": 5e-05,
272
+ "loss": 1.4399,
273
+ "step": 40
274
+ },
275
+ {
276
+ "epoch": 1.03,
277
+ "eval_loss": 2.0354697704315186,
278
+ "eval_runtime": 119.5875,
279
+ "eval_samples_per_second": 1.555,
280
+ "eval_steps_per_second": 0.778,
281
+ "step": 40
282
+ },
283
+ {
284
+ "epoch": 1.06,
285
+ "learning_rate": 5e-05,
286
+ "loss": 1.465,
287
+ "step": 41
288
+ },
289
+ {
290
+ "epoch": 1.09,
291
+ "learning_rate": 5e-05,
292
+ "loss": 1.4199,
293
+ "step": 42
294
+ },
295
+ {
296
+ "epoch": 1.11,
297
+ "learning_rate": 5e-05,
298
+ "loss": 1.5403,
299
+ "step": 43
300
+ },
301
+ {
302
+ "epoch": 1.14,
303
+ "learning_rate": 5e-05,
304
+ "loss": 1.4499,
305
+ "step": 44
306
+ },
307
+ {
308
+ "epoch": 1.16,
309
+ "learning_rate": 5e-05,
310
+ "loss": 1.5751,
311
+ "step": 45
312
+ },
313
+ {
314
+ "epoch": 1.19,
315
+ "learning_rate": 5e-05,
316
+ "loss": 1.4809,
317
+ "step": 46
318
+ },
319
+ {
320
+ "epoch": 1.21,
321
+ "learning_rate": 5e-05,
322
+ "loss": 1.5022,
323
+ "step": 47
324
+ },
325
+ {
326
+ "epoch": 1.24,
327
+ "learning_rate": 5e-05,
328
+ "loss": 1.4663,
329
+ "step": 48
330
+ },
331
+ {
332
+ "epoch": 1.27,
333
+ "learning_rate": 5e-05,
334
+ "loss": 1.4435,
335
+ "step": 49
336
+ },
337
+ {
338
+ "epoch": 1.29,
339
+ "learning_rate": 5e-05,
340
+ "loss": 1.4246,
341
+ "step": 50
342
+ },
343
+ {
344
+ "epoch": 1.29,
345
+ "eval_loss": 2.0276732444763184,
346
+ "eval_runtime": 119.5811,
347
+ "eval_samples_per_second": 1.555,
348
+ "eval_steps_per_second": 0.778,
349
+ "step": 50
350
+ },
351
+ {
352
+ "epoch": 1.32,
353
+ "learning_rate": 5e-05,
354
+ "loss": 1.4877,
355
+ "step": 51
356
+ },
357
+ {
358
+ "epoch": 1.34,
359
+ "learning_rate": 5e-05,
360
+ "loss": 1.4066,
361
+ "step": 52
362
+ },
363
+ {
364
+ "epoch": 1.37,
365
+ "learning_rate": 5e-05,
366
+ "loss": 1.3559,
367
+ "step": 53
368
+ },
369
+ {
370
+ "epoch": 1.4,
371
+ "learning_rate": 5e-05,
372
+ "loss": 1.5591,
373
+ "step": 54
374
+ },
375
+ {
376
+ "epoch": 1.42,
377
+ "learning_rate": 5e-05,
378
+ "loss": 1.4942,
379
+ "step": 55
380
+ },
381
+ {
382
+ "epoch": 1.45,
383
+ "learning_rate": 5e-05,
384
+ "loss": 1.4685,
385
+ "step": 56
386
+ },
387
+ {
388
+ "epoch": 1.47,
389
+ "learning_rate": 5e-05,
390
+ "loss": 1.4165,
391
+ "step": 57
392
+ },
393
+ {
394
+ "epoch": 1.5,
395
+ "learning_rate": 5e-05,
396
+ "loss": 1.3995,
397
+ "step": 58
398
+ },
399
+ {
400
+ "epoch": 1.53,
401
+ "learning_rate": 5e-05,
402
+ "loss": 1.3931,
403
+ "step": 59
404
+ },
405
+ {
406
+ "epoch": 1.55,
407
+ "learning_rate": 5e-05,
408
+ "loss": 1.4234,
409
+ "step": 60
410
+ },
411
+ {
412
+ "epoch": 1.55,
413
+ "eval_loss": 2.0228564739227295,
414
+ "eval_runtime": 119.5715,
415
+ "eval_samples_per_second": 1.556,
416
+ "eval_steps_per_second": 0.778,
417
+ "step": 60
418
+ },
419
+ {
420
+ "epoch": 1.58,
421
+ "learning_rate": 5e-05,
422
+ "loss": 1.4732,
423
+ "step": 61
424
+ },
425
+ {
426
+ "epoch": 1.6,
427
+ "learning_rate": 5e-05,
428
+ "loss": 1.4349,
429
+ "step": 62
430
+ },
431
+ {
432
+ "epoch": 1.63,
433
+ "learning_rate": 5e-05,
434
+ "loss": 1.4548,
435
+ "step": 63
436
+ },
437
+ {
438
+ "epoch": 1.65,
439
+ "learning_rate": 5e-05,
440
+ "loss": 1.48,
441
+ "step": 64
442
+ },
443
+ {
444
+ "epoch": 1.68,
445
+ "learning_rate": 5e-05,
446
+ "loss": 1.3789,
447
+ "step": 65
448
+ },
449
+ {
450
+ "epoch": 1.71,
451
+ "learning_rate": 5e-05,
452
+ "loss": 1.3915,
453
+ "step": 66
454
+ },
455
+ {
456
+ "epoch": 1.73,
457
+ "learning_rate": 5e-05,
458
+ "loss": 1.3789,
459
+ "step": 67
460
+ },
461
+ {
462
+ "epoch": 1.76,
463
+ "learning_rate": 5e-05,
464
+ "loss": 1.5206,
465
+ "step": 68
466
+ },
467
+ {
468
+ "epoch": 1.78,
469
+ "learning_rate": 5e-05,
470
+ "loss": 1.4851,
471
+ "step": 69
472
+ },
473
+ {
474
+ "epoch": 1.81,
475
+ "learning_rate": 5e-05,
476
+ "loss": 1.5251,
477
+ "step": 70
478
+ },
479
+ {
480
+ "epoch": 1.81,
481
+ "eval_loss": 2.0199856758117676,
482
+ "eval_runtime": 119.5994,
483
+ "eval_samples_per_second": 1.555,
484
+ "eval_steps_per_second": 0.778,
485
+ "step": 70
486
+ },
487
+ {
488
+ "epoch": 1.84,
489
+ "learning_rate": 5e-05,
490
+ "loss": 1.4152,
491
+ "step": 71
492
+ },
493
+ {
494
+ "epoch": 1.86,
495
+ "learning_rate": 5e-05,
496
+ "loss": 1.4262,
497
+ "step": 72
498
+ },
499
+ {
500
+ "epoch": 1.89,
501
+ "learning_rate": 5e-05,
502
+ "loss": 1.5563,
503
+ "step": 73
504
+ },
505
+ {
506
+ "epoch": 1.91,
507
+ "learning_rate": 5e-05,
508
+ "loss": 1.42,
509
+ "step": 74
510
+ },
511
+ {
512
+ "epoch": 1.94,
513
+ "learning_rate": 5e-05,
514
+ "loss": 1.4407,
515
+ "step": 75
516
+ },
517
+ {
518
+ "epoch": 1.96,
519
+ "learning_rate": 5e-05,
520
+ "loss": 1.3953,
521
+ "step": 76
522
+ },
523
+ {
524
+ "epoch": 1.99,
525
+ "learning_rate": 5e-05,
526
+ "loss": 1.4186,
527
+ "step": 77
528
+ },
529
+ {
530
+ "epoch": 2.02,
531
+ "learning_rate": 5e-05,
532
+ "loss": 1.3863,
533
+ "step": 78
534
+ },
535
+ {
536
+ "epoch": 2.04,
537
+ "learning_rate": 5e-05,
538
+ "loss": 1.3907,
539
+ "step": 79
540
+ },
541
+ {
542
+ "epoch": 2.07,
543
+ "learning_rate": 5e-05,
544
+ "loss": 1.5147,
545
+ "step": 80
546
+ },
547
+ {
548
+ "epoch": 2.07,
549
+ "eval_loss": 2.0174262523651123,
550
+ "eval_runtime": 119.6245,
551
+ "eval_samples_per_second": 1.555,
552
+ "eval_steps_per_second": 0.777,
553
+ "step": 80
554
+ },
555
+ {
556
+ "epoch": 2.09,
557
+ "learning_rate": 5e-05,
558
+ "loss": 1.3683,
559
+ "step": 81
560
+ },
561
+ {
562
+ "epoch": 2.12,
563
+ "learning_rate": 5e-05,
564
+ "loss": 1.4289,
565
+ "step": 82
566
+ },
567
+ {
568
+ "epoch": 2.15,
569
+ "learning_rate": 5e-05,
570
+ "loss": 1.4033,
571
+ "step": 83
572
+ },
573
+ {
574
+ "epoch": 2.17,
575
+ "learning_rate": 5e-05,
576
+ "loss": 1.384,
577
+ "step": 84
578
+ },
579
+ {
580
+ "epoch": 2.2,
581
+ "learning_rate": 5e-05,
582
+ "loss": 1.3379,
583
+ "step": 85
584
+ },
585
+ {
586
+ "epoch": 2.22,
587
+ "learning_rate": 5e-05,
588
+ "loss": 1.3916,
589
+ "step": 86
590
+ },
591
+ {
592
+ "epoch": 2.25,
593
+ "learning_rate": 5e-05,
594
+ "loss": 1.5267,
595
+ "step": 87
596
+ },
597
+ {
598
+ "epoch": 2.27,
599
+ "learning_rate": 5e-05,
600
+ "loss": 1.4465,
601
+ "step": 88
602
+ },
603
+ {
604
+ "epoch": 2.3,
605
+ "learning_rate": 5e-05,
606
+ "loss": 1.4349,
607
+ "step": 89
608
+ },
609
+ {
610
+ "epoch": 2.33,
611
+ "learning_rate": 5e-05,
612
+ "loss": 1.4927,
613
+ "step": 90
614
+ },
615
+ {
616
+ "epoch": 2.33,
617
+ "eval_loss": 2.0155017375946045,
618
+ "eval_runtime": 119.6021,
619
+ "eval_samples_per_second": 1.555,
620
+ "eval_steps_per_second": 0.778,
621
+ "step": 90
622
+ }
623
+ ],
624
+ "logging_steps": 1,
625
+ "max_steps": 114,
626
+ "num_train_epochs": 3,
627
+ "save_steps": 10,
628
+ "total_flos": 4.119054979969843e+17,
629
+ "trial_name": null,
630
+ "trial_params": null
631
+ }
checkpoint-90/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca362c6e6a1bbe2523b2190a501c92d6dbb3db6186bef551619d83852cca3df1
3
+ size 4219