saqidr commited on
Commit
e5c6038
·
verified ·
1 Parent(s): b9a3361

Training in progress, step 2000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94dd719ae3e79ab6db163a2c0423be61fef278b4f047585040f2170742065552
3
  size 268290900
run-19/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3887df0e32383a82f3b0357fdcf7cac716fe58c3bca0cb51a05145c01acb312e
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b03a1ebe26778ef7584f3b5ce91a82d5e0801ce12ac307b0ba8cfa388c6a7e6
3
  size 268290900
run-19/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b896ce92b3581b32be5c7e0153413b6337020bc73a2697e90f53af4c15a8f76c
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a75832910e0d6e49b011d70bbfb59a4e505d4daa94e60189c9b25f32a120c8d
3
  size 536643898
run-19/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c794bc4c67ef18245dd516031ce405ab557e4d551d225d8dd1e1abc0f2be8e33
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a4307cca951ef6d42c7f184774e8b1259c78b2bf130b8443ec0b62f6dbf689
3
  size 1064
run-19/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ad3e5230508df5eafe3b8cc807f8ce5e9543207331b4b6a4fd8cdd4dbc0b67
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-19/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c06f4ccbf57c650ccdb1224936e67d8fc278df187f4e6c1f87e0ad04c278aaa8
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c373ab5344044dbcedc500f5f4ed80aa030fd333733b5c8b253cc11ad7a863a1
3
  size 268290900
run-19/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce355d2fe1d9a3c0fd09dffee8abf0d60c2605e6199ead5d1e08c5bfb4b2a13f
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:651b5537b74b24320558f06d8d9ca5688caf04f8adec3300e852e5928a7a3c40
3
  size 536643898
run-19/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71453465aad25f4c5a0a948496c64b1f74df850abda497954afe3695c00756ee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693ec081eb0fa0af7d13e289edb194450dd19fdada23346e2af4292ea228535f
3
  size 1064
run-19/checkpoint-1500/trainer_state.json CHANGED
@@ -10,66 +10,66 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6229032258064516,
14
- "eval_loss": 0.2478133589029312,
15
- "eval_runtime": 1.3731,
16
- "eval_samples_per_second": 2257.605,
17
- "eval_steps_per_second": 47.337,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6151512861251831,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.398,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.844516129032258,
30
- "eval_loss": 0.10843393951654434,
31
- "eval_runtime": 1.3751,
32
- "eval_samples_per_second": 2254.368,
33
- "eval_steps_per_second": 47.269,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.89,
39
- "eval_loss": 0.06599755585193634,
40
- "eval_runtime": 1.3802,
41
- "eval_samples_per_second": 2246.112,
42
- "eval_steps_per_second": 47.096,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5515937209129333,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1268,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9038709677419355,
55
- "eval_loss": 0.048167161643505096,
56
- "eval_runtime": 1.3675,
57
- "eval_samples_per_second": 2266.877,
58
- "eval_steps_per_second": 47.531,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3498728275299072,
64
- "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0739,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 3180,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 10,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -83,12 +83,12 @@
83
  "attributes": {}
84
  }
85
  },
86
- "total_flos": 450371359983132.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.878459838807545,
91
- "num_train_epochs": 10,
92
  "temperature": 4
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.62,
14
+ "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.3797,
16
+ "eval_samples_per_second": 2246.797,
17
+ "eval_steps_per_second": 47.11,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6149903535842896,
23
+ "learning_rate": 1.650593990216632e-05,
24
+ "loss": 0.3991,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8422580645161291,
30
+ "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.3583,
32
+ "eval_samples_per_second": 2282.186,
33
+ "eval_steps_per_second": 47.852,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8883870967741936,
39
+ "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.3701,
41
+ "eval_samples_per_second": 2262.648,
42
+ "eval_steps_per_second": 47.443,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5547620058059692,
48
+ "learning_rate": 1.3011879804332637e-05,
49
+ "loss": 0.1288,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.902258064516129,
55
+ "eval_loss": 0.04977014288306236,
56
+ "eval_runtime": 1.3689,
57
+ "eval_samples_per_second": 2264.534,
58
+ "eval_steps_per_second": 47.482,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3585783839225769,
64
+ "learning_rate": 9.517819706498952e-06,
65
+ "loss": 0.076,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 2862,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 9,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
 
83
  "attributes": {}
84
  }
85
  },
86
+ "total_flos": 427228618667844.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.8975938459167363,
91
+ "num_train_epochs": 9,
92
  "temperature": 4
93
  }
94
  }
run-19/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ad3e5230508df5eafe3b8cc807f8ce5e9543207331b4b6a4fd8cdd4dbc0b67
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-19/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e477ef0cac5c502dfc55efabbd37889ae5459f261261350bad1f0a1af2d813
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94dd719ae3e79ab6db163a2c0423be61fef278b4f047585040f2170742065552
3
  size 268290900
run-19/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93a68cea6bab40ac40103238390c1c1c318905d57e2e10700933f42e3b73a977
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5783a258c7988741343c6e90bad6b3a278da69f943ad33c6fcd9b700e55572b0
3
  size 536643898
run-19/checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43d45918bfeb622ba899798234ec7646c7a90dd3f5771db086f5b7dee1d5a530
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d1a8a9215f18c672cf798b531337b9e0636922d8c4373e76080f8fec19ceb2
3
  size 1064
run-19/checkpoint-2000/trainer_state.json CHANGED
@@ -10,91 +10,91 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6229032258064516,
14
- "eval_loss": 0.2478133589029312,
15
- "eval_runtime": 1.3731,
16
- "eval_samples_per_second": 2257.605,
17
- "eval_steps_per_second": 47.337,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6151512861251831,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.398,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.844516129032258,
30
- "eval_loss": 0.10843393951654434,
31
- "eval_runtime": 1.3751,
32
- "eval_samples_per_second": 2254.368,
33
- "eval_steps_per_second": 47.269,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.89,
39
- "eval_loss": 0.06599755585193634,
40
- "eval_runtime": 1.3802,
41
- "eval_samples_per_second": 2246.112,
42
- "eval_steps_per_second": 47.096,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5515937209129333,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1268,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9038709677419355,
55
- "eval_loss": 0.048167161643505096,
56
- "eval_runtime": 1.3675,
57
- "eval_samples_per_second": 2266.877,
58
- "eval_steps_per_second": 47.531,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3498728275299072,
64
- "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0739,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9174193548387096,
71
- "eval_loss": 0.03836863115429878,
72
- "eval_runtime": 1.3687,
73
- "eval_samples_per_second": 2264.915,
74
- "eval_steps_per_second": 47.49,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9238709677419354,
80
- "eval_loss": 0.03342122584581375,
81
- "eval_runtime": 1.369,
82
- "eval_samples_per_second": 2264.444,
83
- "eval_steps_per_second": 47.48,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.2557421624660492,
89
- "learning_rate": 7.421383647798742e-06,
90
- "loss": 0.0563,
91
  "step": 2000
92
  }
93
  ],
94
  "logging_steps": 500,
95
- "max_steps": 3180,
96
  "num_input_tokens_seen": 0,
97
- "num_train_epochs": 10,
98
  "save_steps": 500,
99
  "stateful_callbacks": {
100
  "TrainerControl": {
@@ -108,12 +108,12 @@
108
  "attributes": {}
109
  }
110
  },
111
- "total_flos": 579993747211956.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.878459838807545,
116
- "num_train_epochs": 10,
117
  "temperature": 4
118
  }
119
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.62,
14
+ "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.3797,
16
+ "eval_samples_per_second": 2246.797,
17
+ "eval_steps_per_second": 47.11,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6149903535842896,
23
+ "learning_rate": 1.650593990216632e-05,
24
+ "loss": 0.3991,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8422580645161291,
30
+ "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.3583,
32
+ "eval_samples_per_second": 2282.186,
33
+ "eval_steps_per_second": 47.852,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8883870967741936,
39
+ "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.3701,
41
+ "eval_samples_per_second": 2262.648,
42
+ "eval_steps_per_second": 47.443,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5547620058059692,
48
+ "learning_rate": 1.3011879804332637e-05,
49
+ "loss": 0.1288,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.902258064516129,
55
+ "eval_loss": 0.04977014288306236,
56
+ "eval_runtime": 1.3689,
57
+ "eval_samples_per_second": 2264.534,
58
+ "eval_steps_per_second": 47.482,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3585783839225769,
64
+ "learning_rate": 9.517819706498952e-06,
65
+ "loss": 0.076,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9167741935483871,
71
+ "eval_loss": 0.039991483092308044,
72
+ "eval_runtime": 1.3644,
73
+ "eval_samples_per_second": 2272.037,
74
+ "eval_steps_per_second": 47.639,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9232258064516129,
80
+ "eval_loss": 0.034951966255903244,
81
+ "eval_runtime": 1.3576,
82
+ "eval_samples_per_second": 2283.496,
83
+ "eval_steps_per_second": 47.88,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.27236634492874146,
89
+ "learning_rate": 6.02375960866527e-06,
90
+ "loss": 0.0585,
91
  "step": 2000
92
  }
93
  ],
94
  "logging_steps": 500,
95
+ "max_steps": 2862,
96
  "num_input_tokens_seen": 0,
97
+ "num_train_epochs": 9,
98
  "save_steps": 500,
99
  "stateful_callbacks": {
100
  "TrainerControl": {
 
108
  "attributes": {}
109
  }
110
  },
111
+ "total_flos": 556851005896668.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.8975938459167363,
116
+ "num_train_epochs": 9,
117
  "temperature": 4
118
  }
119
  }
run-19/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ad3e5230508df5eafe3b8cc807f8ce5e9543207331b4b6a4fd8cdd4dbc0b67
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-19/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42c0e227629e2036d030b67c2e4414cd3185c70af8bb3a2e6af908313c2b143d
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c523e1752fda46381be0344a31afc01362450b48bbf8705a2c2bdc36cbf5d0d2
3
  size 268290900
run-19/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f632c364b0e8bb9c5aaa1a1d92dc11251e021768e8319ab85e38045e3c31cf61
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d08d8a9628761ec315d94e75629b6b7ab53f4005f41e207ed0e3588ae544ab03
3
  size 536643898
run-19/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98041bd7cae455426e290a1a0ee683bd5dd30893f7451fec3a464ae8995b17e4
3
  size 1064
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1255ca729d01717aa414fa6756e0d6f9030540b62191575288bf9e49b890e64b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176