Kira-Floris commited on
Commit
4bd3b3e
1 Parent(s): 0c60331

Training in progress, epoch 8

Browse files
logs/events.out.tfevents.1719305887.852b1e905a9a.223.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c91cfc7c3ff26707ffc20d2eeaa9e3f6123a16f616fc80e23516a73b694763d
3
- size 8634
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9712295d445764a17d0b9ea3d073188b8a0c340524e920a8f682615d175fdadd
3
+ size 9168
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86bafd1ad69646e1b2b90542839513ba91c261f65984761709b70377635a061a
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0256dbbbe2c5bbfa0e2ed08ea537f2eaa57f31e70581600481ae42aceef1c20b
3
  size 17549312
run-0/checkpoint-4216/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7e49bfe6cfd2d96a860484be039fa136e6054d9cecdbb8fbda97a759707d985
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0256dbbbe2c5bbfa0e2ed08ea537f2eaa57f31e70581600481ae42aceef1c20b
3
  size 17549312
run-0/checkpoint-4216/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6325c8e0b4ceb30845b52ec38bc93b7ffd274b42f395494ec024deeb4de7775b
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0724687bc06517df25ee266dbb2d9e98ff4202787f122997f9cc3080e3cc5959
3
  size 35123898
run-0/checkpoint-4216/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b973f0b00352a0713dda5aec6e5ca7daebeb18a4763bbd29ea4b99c134e7365
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e77749fc39d449c66a2d4dd7f37e3d2bd1c2bc0057e14632da6fbbef1bec35
3
  size 1064
run-0/checkpoint-4216/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8302752293577982,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-4216",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
@@ -10,137 +10,137 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 16.55626678466797,
14
- "learning_rate": 5.1383294230414005e-05,
15
- "loss": 1.5907,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8061926605504587,
21
- "eval_loss": 1.2464169263839722,
22
- "eval_runtime": 2.4007,
23
- "eval_samples_per_second": 363.232,
24
- "eval_steps_per_second": 2.916,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": Infinity,
30
- "learning_rate": 4.405675241279466e-05,
31
- "loss": 0.9038,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.8107798165137615,
37
- "eval_loss": 1.1235342025756836,
38
- "eval_runtime": 2.3533,
39
- "eval_samples_per_second": 370.54,
40
- "eval_steps_per_second": 2.975,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 48.55740737915039,
46
- "learning_rate": 3.67162818084498e-05,
47
- "loss": 0.6946,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.8176605504587156,
53
- "eval_loss": 1.1027159690856934,
54
- "eval_runtime": 2.3719,
55
- "eval_samples_per_second": 367.637,
56
- "eval_steps_per_second": 2.951,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 16.267213821411133,
62
- "learning_rate": 2.9375811204104943e-05,
63
- "loss": 0.595,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.8256880733944955,
69
- "eval_loss": 1.0700539350509644,
70
- "eval_runtime": 2.3638,
71
- "eval_samples_per_second": 368.893,
72
- "eval_steps_per_second": 2.961,
73
  "step": 2108
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 10.482604026794434,
78
- "learning_rate": 2.2035340599760084e-05,
79
- "loss": 0.5308,
80
  "step": 2635
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.8291284403669725,
85
- "eval_loss": 1.1128482818603516,
86
- "eval_runtime": 2.3733,
87
- "eval_samples_per_second": 367.419,
88
- "eval_steps_per_second": 2.949,
89
  "step": 2635
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 24.14480209350586,
94
- "learning_rate": 1.4694869995415228e-05,
95
- "loss": 0.4895,
96
  "step": 3162
97
  },
98
  {
99
  "epoch": 6.0,
100
- "eval_accuracy": 0.8291284403669725,
101
- "eval_loss": 1.1355042457580566,
102
- "eval_runtime": 2.3485,
103
- "eval_samples_per_second": 371.294,
104
- "eval_steps_per_second": 2.981,
105
  "step": 3162
106
  },
107
  {
108
  "epoch": 7.0,
109
- "grad_norm": 25.623193740844727,
110
- "learning_rate": 7.35439939107037e-06,
111
- "loss": 0.4606,
112
  "step": 3689
113
  },
114
  {
115
  "epoch": 7.0,
116
- "eval_accuracy": 0.8268348623853211,
117
- "eval_loss": 1.1558915376663208,
118
- "eval_runtime": 2.3654,
119
- "eval_samples_per_second": 368.653,
120
- "eval_steps_per_second": 2.959,
121
  "step": 3689
122
  },
123
  {
124
  "epoch": 8.0,
125
- "grad_norm": 11.750224113464355,
126
- "learning_rate": 1.3928786725512063e-08,
127
- "loss": 0.449,
128
  "step": 4216
129
  },
130
  {
131
  "epoch": 8.0,
132
- "eval_accuracy": 0.8302752293577982,
133
- "eval_loss": 1.155808687210083,
134
- "eval_runtime": 2.3688,
135
- "eval_samples_per_second": 368.125,
136
- "eval_steps_per_second": 2.955,
137
  "step": 4216
138
  }
139
  ],
140
  "logging_steps": 500,
141
- "max_steps": 4216,
142
  "num_input_tokens_seen": 0,
143
- "num_train_epochs": 8,
144
  "save_steps": 500,
145
  "stateful_callbacks": {
146
  "TrainerControl": {
@@ -149,7 +149,7 @@
149
  "should_evaluate": false,
150
  "should_log": false,
151
  "should_save": true,
152
- "should_training_stop": true
153
  },
154
  "attributes": {}
155
  }
@@ -158,9 +158,9 @@
158
  "train_batch_size": 128,
159
  "trial_name": null,
160
  "trial_params": {
161
- "alpha": 0.5167874928728581,
162
- "learning_rate": 5.872376483475886e-05,
163
- "num_train_epochs": 8,
164
- "temperature": 5
165
  }
166
  }
 
1
  {
2
+ "best_metric": 0.7958715596330275,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-4216",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 5.95961856842041,
14
+ "learning_rate": 9.55389368279823e-06,
15
+ "loss": 1.5369,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7339449541284404,
21
+ "eval_loss": 1.2773902416229248,
22
+ "eval_runtime": 2.5957,
23
+ "eval_samples_per_second": 335.939,
24
+ "eval_steps_per_second": 2.697,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 15.252978324890137,
30
+ "learning_rate": 8.492349940265094e-06,
31
+ "loss": 1.2159,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7786697247706422,
37
+ "eval_loss": 1.022659182548523,
38
+ "eval_runtime": 2.5741,
39
+ "eval_samples_per_second": 338.753,
40
+ "eval_steps_per_second": 2.719,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 18.01114845275879,
46
+ "learning_rate": 7.430806197731956e-06,
47
+ "loss": 1.0132,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.7844036697247706,
53
+ "eval_loss": 0.9622328281402588,
54
+ "eval_runtime": 2.5865,
55
+ "eval_samples_per_second": 337.137,
56
+ "eval_steps_per_second": 2.706,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 15.912079811096191,
62
+ "learning_rate": 6.371276769700781e-06,
63
+ "loss": 0.9206,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.7901376146788991,
69
+ "eval_loss": 0.9278557896614075,
70
+ "eval_runtime": 2.5808,
71
+ "eval_samples_per_second": 337.878,
72
+ "eval_steps_per_second": 2.712,
73
  "step": 2108
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 10.149016380310059,
78
+ "learning_rate": 5.3097330271676446e-06,
79
+ "loss": 0.8564,
80
  "step": 2635
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.7901376146788991,
85
+ "eval_loss": 0.9079629182815552,
86
+ "eval_runtime": 2.5658,
87
+ "eval_samples_per_second": 339.852,
88
+ "eval_steps_per_second": 2.728,
89
  "step": 2635
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 16.081409454345703,
94
+ "learning_rate": 4.248189284634507e-06,
95
+ "loss": 0.8139,
96
  "step": 3162
97
  },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.7947247706422018,
101
+ "eval_loss": 0.8956273794174194,
102
+ "eval_runtime": 2.6131,
103
+ "eval_samples_per_second": 333.701,
104
+ "eval_steps_per_second": 2.679,
105
  "step": 3162
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "grad_norm": 11.411503791809082,
110
+ "learning_rate": 3.1866455421013703e-06,
111
+ "loss": 0.7798,
112
  "step": 3689
113
  },
114
  {
115
  "epoch": 7.0,
116
+ "eval_accuracy": 0.7924311926605505,
117
+ "eval_loss": 0.8861347436904907,
118
+ "eval_runtime": 2.6019,
119
+ "eval_samples_per_second": 335.138,
120
+ "eval_steps_per_second": 2.69,
121
  "step": 3689
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "grad_norm": Infinity,
126
+ "learning_rate": 2.127116114070194e-06,
127
+ "loss": 0.7583,
128
  "step": 4216
129
  },
130
  {
131
  "epoch": 8.0,
132
+ "eval_accuracy": 0.7958715596330275,
133
+ "eval_loss": 0.8804019093513489,
134
+ "eval_runtime": 2.543,
135
+ "eval_samples_per_second": 342.909,
136
+ "eval_steps_per_second": 2.753,
137
  "step": 4216
138
  }
139
  ],
140
  "logging_steps": 500,
141
+ "max_steps": 5270,
142
  "num_input_tokens_seen": 0,
143
+ "num_train_epochs": 10,
144
  "save_steps": 500,
145
  "stateful_callbacks": {
146
  "TrainerControl": {
 
149
  "should_evaluate": false,
150
  "should_log": false,
151
  "should_save": true,
152
+ "should_training_stop": false
153
  },
154
  "attributes": {}
155
  }
 
158
  "train_batch_size": 128,
159
  "trial_name": null,
160
  "trial_params": {
161
+ "alpha": 0.27608826195592573,
162
+ "learning_rate": 1.0615437425331367e-05,
163
+ "num_train_epochs": 10,
164
+ "temperature": 2
165
  }
166
  }
run-0/checkpoint-4216/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f28149fe21091b257234d7cbe1611ee6ca88e3a7cef675e40e6d90410e6fc1a6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b6a60f7b85b38fa45cddf1a417ee51250fe5822237403416bf2406ff2cdb84
3
  size 5176