saqidr commited on
Commit
4aec20c
·
verified ·
1 Parent(s): 8485a44

Training in progress, step 500

Browse files
run-10/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176
run-10/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176
run-10/checkpoint-2000/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.4022,
16
- "eval_samples_per_second": 2210.798,
17
- "eval_steps_per_second": 46.355,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
- "eval_runtime": 1.4384,
32
- "eval_samples_per_second": 2155.104,
33
- "eval_steps_per_second": 45.188,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
- "eval_runtime": 1.3662,
41
- "eval_samples_per_second": 2269.06,
42
- "eval_steps_per_second": 47.577,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
- "eval_runtime": 1.3887,
57
- "eval_samples_per_second": 2232.374,
58
- "eval_steps_per_second": 46.808,
59
  "step": 1272
60
  },
61
  {
@@ -69,18 +69,18 @@
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
- "eval_runtime": 1.4597,
73
- "eval_samples_per_second": 2123.769,
74
- "eval_steps_per_second": 44.531,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
- "eval_runtime": 1.3818,
82
- "eval_samples_per_second": 2243.399,
83
- "eval_steps_per_second": 47.039,
84
  "step": 1908
85
  },
86
  {
@@ -108,11 +108,11 @@
108
  "attributes": {}
109
  }
110
  },
111
- "total_flos": 722985755140992.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.7807938087018197,
116
  "num_train_epochs": 10,
117
  "temperature": 2
118
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2498,
16
+ "eval_samples_per_second": 2480.484,
17
+ "eval_steps_per_second": 52.01,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
+ "eval_runtime": 1.253,
32
+ "eval_samples_per_second": 2473.996,
33
+ "eval_steps_per_second": 51.874,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
+ "eval_runtime": 1.2561,
41
+ "eval_samples_per_second": 2467.887,
42
+ "eval_steps_per_second": 51.746,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
+ "eval_runtime": 1.257,
57
+ "eval_samples_per_second": 2466.139,
58
+ "eval_steps_per_second": 51.709,
59
  "step": 1272
60
  },
61
  {
 
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
+ "eval_runtime": 1.2556,
73
+ "eval_samples_per_second": 2468.853,
74
+ "eval_steps_per_second": 51.766,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
+ "eval_runtime": 1.2547,
82
+ "eval_samples_per_second": 2470.711,
83
+ "eval_steps_per_second": 51.805,
84
  "step": 1908
85
  },
86
  {
 
108
  "attributes": {}
109
  }
110
  },
111
+ "total_flos": 603995663965932.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.9803719554379057,
116
  "num_train_epochs": 10,
117
  "temperature": 2
118
  }
run-10/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176
run-10/checkpoint-2500/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.4022,
16
- "eval_samples_per_second": 2210.798,
17
- "eval_steps_per_second": 46.355,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
- "eval_runtime": 1.4384,
32
- "eval_samples_per_second": 2155.104,
33
- "eval_steps_per_second": 45.188,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
- "eval_runtime": 1.3662,
41
- "eval_samples_per_second": 2269.06,
42
- "eval_steps_per_second": 47.577,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
- "eval_runtime": 1.3887,
57
- "eval_samples_per_second": 2232.374,
58
- "eval_steps_per_second": 46.808,
59
  "step": 1272
60
  },
61
  {
@@ -69,18 +69,18 @@
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
- "eval_runtime": 1.4597,
73
- "eval_samples_per_second": 2123.769,
74
- "eval_steps_per_second": 44.531,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
- "eval_runtime": 1.3818,
82
- "eval_samples_per_second": 2243.399,
83
- "eval_steps_per_second": 47.039,
84
  "step": 1908
85
  },
86
  {
@@ -94,9 +94,9 @@
94
  "epoch": 7.0,
95
  "eval_accuracy": 0.9319354838709677,
96
  "eval_loss": 0.03430590778589249,
97
- "eval_runtime": 1.3689,
98
- "eval_samples_per_second": 2264.53,
99
- "eval_steps_per_second": 47.482,
100
  "step": 2226
101
  },
102
  {
@@ -124,11 +124,11 @@
124
  "attributes": {}
125
  }
126
  },
127
- "total_flos": 853150315276476.0,
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
- "alpha": 0.7807938087018197,
132
  "num_train_epochs": 10,
133
  "temperature": 2
134
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2498,
16
+ "eval_samples_per_second": 2480.484,
17
+ "eval_steps_per_second": 52.01,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
+ "eval_runtime": 1.253,
32
+ "eval_samples_per_second": 2473.996,
33
+ "eval_steps_per_second": 51.874,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
+ "eval_runtime": 1.2561,
41
+ "eval_samples_per_second": 2467.887,
42
+ "eval_steps_per_second": 51.746,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
+ "eval_runtime": 1.257,
57
+ "eval_samples_per_second": 2466.139,
58
+ "eval_steps_per_second": 51.709,
59
  "step": 1272
60
  },
61
  {
 
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
+ "eval_runtime": 1.2556,
73
+ "eval_samples_per_second": 2468.853,
74
+ "eval_steps_per_second": 51.766,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
+ "eval_runtime": 1.2547,
82
+ "eval_samples_per_second": 2470.711,
83
+ "eval_steps_per_second": 51.805,
84
  "step": 1908
85
  },
86
  {
 
94
  "epoch": 7.0,
95
  "eval_accuracy": 0.9319354838709677,
96
  "eval_loss": 0.03430590778589249,
97
+ "eval_runtime": 1.2578,
98
+ "eval_samples_per_second": 2464.687,
99
+ "eval_steps_per_second": 51.679,
100
  "step": 2226
101
  },
102
  {
 
124
  "attributes": {}
125
  }
126
  },
127
+ "total_flos": 734160224101416.0,
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
+ "alpha": 0.9803719554379057,
132
  "num_train_epochs": 10,
133
  "temperature": 2
134
  }
run-10/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176
run-10/checkpoint-3000/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.4022,
16
- "eval_samples_per_second": 2210.798,
17
- "eval_steps_per_second": 46.355,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
- "eval_runtime": 1.4384,
32
- "eval_samples_per_second": 2155.104,
33
- "eval_steps_per_second": 45.188,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
- "eval_runtime": 1.3662,
41
- "eval_samples_per_second": 2269.06,
42
- "eval_steps_per_second": 47.577,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
- "eval_runtime": 1.3887,
57
- "eval_samples_per_second": 2232.374,
58
- "eval_steps_per_second": 46.808,
59
  "step": 1272
60
  },
61
  {
@@ -69,18 +69,18 @@
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
- "eval_runtime": 1.4597,
73
- "eval_samples_per_second": 2123.769,
74
- "eval_steps_per_second": 44.531,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
- "eval_runtime": 1.3818,
82
- "eval_samples_per_second": 2243.399,
83
- "eval_steps_per_second": 47.039,
84
  "step": 1908
85
  },
86
  {
@@ -94,9 +94,9 @@
94
  "epoch": 7.0,
95
  "eval_accuracy": 0.9319354838709677,
96
  "eval_loss": 0.03430590778589249,
97
- "eval_runtime": 1.3689,
98
- "eval_samples_per_second": 2264.53,
99
- "eval_steps_per_second": 47.482,
100
  "step": 2226
101
  },
102
  {
@@ -110,18 +110,18 @@
110
  "epoch": 8.0,
111
  "eval_accuracy": 0.9332258064516129,
112
  "eval_loss": 0.032527994364500046,
113
- "eval_runtime": 1.361,
114
- "eval_samples_per_second": 2277.734,
115
- "eval_steps_per_second": 47.759,
116
  "step": 2544
117
  },
118
  {
119
  "epoch": 9.0,
120
  "eval_accuracy": 0.9325806451612904,
121
  "eval_loss": 0.03162752836942673,
122
- "eval_runtime": 1.3732,
123
- "eval_samples_per_second": 2257.525,
124
- "eval_steps_per_second": 47.335,
125
  "step": 2862
126
  },
127
  {
@@ -149,11 +149,11 @@
149
  "attributes": {}
150
  }
151
  },
152
- "total_flos": 982733271748452.0,
153
  "train_batch_size": 48,
154
  "trial_name": null,
155
  "trial_params": {
156
- "alpha": 0.7807938087018197,
157
  "num_train_epochs": 10,
158
  "temperature": 2
159
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2498,
16
+ "eval_samples_per_second": 2480.484,
17
+ "eval_steps_per_second": 52.01,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
+ "eval_runtime": 1.253,
32
+ "eval_samples_per_second": 2473.996,
33
+ "eval_steps_per_second": 51.874,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
+ "eval_runtime": 1.2561,
41
+ "eval_samples_per_second": 2467.887,
42
+ "eval_steps_per_second": 51.746,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
+ "eval_runtime": 1.257,
57
+ "eval_samples_per_second": 2466.139,
58
+ "eval_steps_per_second": 51.709,
59
  "step": 1272
60
  },
61
  {
 
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9241935483870968,
71
  "eval_loss": 0.04163844883441925,
72
+ "eval_runtime": 1.2556,
73
+ "eval_samples_per_second": 2468.853,
74
+ "eval_steps_per_second": 51.766,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9283870967741935,
80
  "eval_loss": 0.037362001836299896,
81
+ "eval_runtime": 1.2547,
82
+ "eval_samples_per_second": 2470.711,
83
+ "eval_steps_per_second": 51.805,
84
  "step": 1908
85
  },
86
  {
 
94
  "epoch": 7.0,
95
  "eval_accuracy": 0.9319354838709677,
96
  "eval_loss": 0.03430590778589249,
97
+ "eval_runtime": 1.2578,
98
+ "eval_samples_per_second": 2464.687,
99
+ "eval_steps_per_second": 51.679,
100
  "step": 2226
101
  },
102
  {
 
110
  "epoch": 8.0,
111
  "eval_accuracy": 0.9332258064516129,
112
  "eval_loss": 0.032527994364500046,
113
+ "eval_runtime": 1.2604,
114
+ "eval_samples_per_second": 2459.579,
115
+ "eval_steps_per_second": 51.572,
116
  "step": 2544
117
  },
118
  {
119
  "epoch": 9.0,
120
  "eval_accuracy": 0.9325806451612904,
121
  "eval_loss": 0.03162752836942673,
122
+ "eval_runtime": 1.2585,
123
+ "eval_samples_per_second": 2463.197,
124
+ "eval_steps_per_second": 51.648,
125
  "step": 2862
126
  },
127
  {
 
149
  "attributes": {}
150
  }
151
  },
152
+ "total_flos": 863743180573392.0,
153
  "train_batch_size": 48,
154
  "trial_name": null,
155
  "trial_params": {
156
+ "alpha": 0.9803719554379057,
157
  "num_train_epochs": 10,
158
  "temperature": 2
159
  }
run-10/checkpoint-3000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176
run-11/checkpoint-1000/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.3612,
16
- "eval_samples_per_second": 2277.395,
17
- "eval_steps_per_second": 47.752,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
- "eval_runtime": 1.356,
32
- "eval_samples_per_second": 2286.155,
33
- "eval_steps_per_second": 47.936,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
- "eval_runtime": 1.3531,
41
- "eval_samples_per_second": 2291.022,
42
- "eval_steps_per_second": 48.038,
43
  "step": 954
44
  },
45
  {
@@ -71,7 +71,7 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.8252766996315095,
75
  "num_train_epochs": 10,
76
  "temperature": 2
77
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2636,
16
+ "eval_samples_per_second": 2453.311,
17
+ "eval_steps_per_second": 51.44,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
+ "eval_runtime": 1.2607,
32
+ "eval_samples_per_second": 2458.94,
33
+ "eval_steps_per_second": 51.558,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
+ "eval_runtime": 1.2596,
41
+ "eval_samples_per_second": 2461.115,
42
+ "eval_steps_per_second": 51.604,
43
  "step": 954
44
  },
45
  {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.9689147754277356,
75
  "num_train_epochs": 10,
76
  "temperature": 2
77
  }
run-11/checkpoint-1500/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.3612,
16
- "eval_samples_per_second": 2277.395,
17
- "eval_steps_per_second": 47.752,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
- "eval_runtime": 1.356,
32
- "eval_samples_per_second": 2286.155,
33
- "eval_steps_per_second": 47.936,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
- "eval_runtime": 1.3531,
41
- "eval_samples_per_second": 2291.022,
42
- "eval_steps_per_second": 48.038,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
- "eval_runtime": 1.3624,
57
- "eval_samples_per_second": 2275.392,
58
- "eval_steps_per_second": 47.71,
59
  "step": 1272
60
  },
61
  {
@@ -87,7 +87,7 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.8252766996315095,
91
  "num_train_epochs": 10,
92
  "temperature": 2
93
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2636,
16
+ "eval_samples_per_second": 2453.311,
17
+ "eval_steps_per_second": 51.44,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8451612903225807,
30
  "eval_loss": 0.13748767971992493,
31
+ "eval_runtime": 1.2607,
32
+ "eval_samples_per_second": 2458.94,
33
+ "eval_steps_per_second": 51.558,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8974193548387097,
39
  "eval_loss": 0.07043754309415817,
40
+ "eval_runtime": 1.2596,
41
+ "eval_samples_per_second": 2461.115,
42
+ "eval_steps_per_second": 51.604,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.9145161290322581,
55
  "eval_loss": 0.05076972395181656,
56
+ "eval_runtime": 1.3318,
57
+ "eval_samples_per_second": 2327.592,
58
+ "eval_steps_per_second": 48.804,
59
  "step": 1272
60
  },
61
  {
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.9689147754277356,
91
  "num_train_epochs": 10,
92
  "temperature": 2
93
  }
run-11/checkpoint-500/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
- "eval_runtime": 1.3612,
16
- "eval_samples_per_second": 2277.395,
17
- "eval_steps_per_second": 47.752,
18
  "step": 318
19
  },
20
  {
@@ -46,7 +46,7 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.8252766996315095,
50
  "num_train_epochs": 10,
51
  "temperature": 2
52
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.6741935483870968,
14
  "eval_loss": 0.4054252803325653,
15
+ "eval_runtime": 1.2636,
16
+ "eval_samples_per_second": 2453.311,
17
+ "eval_steps_per_second": 51.44,
18
  "step": 318
19
  },
20
  {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.9689147754277356,
50
  "num_train_epochs": 10,
51
  "temperature": 2
52
  }
run-11/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53b16b25375514918da67404b3403c850c15a69162ae13812852117b14e664d6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c3d81cf1e94136cf69f7afe09173f6fff54ee913216fd1109c21e652a8bd239
3
  size 5176