Kira-Floris commited on
Commit
3e8ad85
1 Parent(s): c959a8e

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1719316946.49cbd00d2005.382.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32027f51491766380aa4b56b9780645239e4a1be2cee3a370fbedf1eeff9b8b9
3
- size 5428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc06a89bf4f3acb77b416b957edf4b8d0c7487ba1d7b08f8ca6b28909e0e8659
3
+ size 5962
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27cee0faa93c8ebfdb171e60acd1f01bcf52efd270df1bebe4847e474aab2212
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b81af07e0ebeae51ba086b61ef32b5b3d7f690b9b86ffede2609f3651486fd3
3
  size 17549312
run-1/checkpoint-1054/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4db5ce53872114bd04603e9f4cf8a2c9fff8b5eba2e6c35fa6175cd08190265
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b81af07e0ebeae51ba086b61ef32b5b3d7f690b9b86ffede2609f3651486fd3
3
  size 17549312
run-1/checkpoint-1054/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1dd136a0ee7ffbf29f6a85dc5e2e539fdd79d2195a573a0abab8048ddf9905f
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ca175f0a3f0d3b6098f2ce36a4acbfd95d7d212be030eec4dc090dfb722108
3
  size 35123898
run-1/checkpoint-1054/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec5be829c4dd6c4d8b768ac32866ba5e4919b044a4f27c321cee266e36d17472
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51af740af6cdafe257b8aaac73fb8a3ce20ed9c05fc5b7d8ec20c8584be4a5b
3
  size 1064
run-1/checkpoint-1054/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8268348623853211,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-527",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,34 +10,34 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 20.18393898010254,
14
- "learning_rate": 0.0004400348679504635,
15
- "loss": 1.3749,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8268348623853211,
21
- "eval_loss": 1.6724135875701904,
22
- "eval_runtime": 2.5582,
23
- "eval_samples_per_second": 340.862,
24
- "eval_steps_per_second": 2.736,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 4.33894681930542,
30
- "learning_rate": 0.00022001743397523176,
31
- "loss": 0.5714,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.8222477064220184,
37
- "eval_loss": 1.7203704118728638,
38
- "eval_runtime": 2.5544,
39
- "eval_samples_per_second": 341.369,
40
- "eval_steps_per_second": 2.74,
41
  "step": 1054
42
  }
43
  ],
@@ -62,9 +62,9 @@
62
  "train_batch_size": 128,
63
  "trial_name": null,
64
  "trial_params": {
65
- "alpha": 0.22319309854701086,
66
- "learning_rate": 0.0006600523019256953,
67
  "num_train_epochs": 3,
68
- "temperature": 16
69
  }
70
  }
 
1
  {
2
+ "best_metric": 0.8130733944954128,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-527",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.467271327972412,
14
+ "learning_rate": 0.0005246601812780973,
15
+ "loss": 0.349,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8130733944954128,
21
+ "eval_loss": 0.5175108313560486,
22
+ "eval_runtime": 2.3764,
23
+ "eval_samples_per_second": 366.943,
24
+ "eval_steps_per_second": 2.946,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 4.971531867980957,
30
+ "learning_rate": 0.00026233009063904864,
31
+ "loss": 0.1805,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.8027522935779816,
37
+ "eval_loss": 0.6036062240600586,
38
+ "eval_runtime": 2.3772,
39
+ "eval_samples_per_second": 366.815,
40
+ "eval_steps_per_second": 2.945,
41
  "step": 1054
42
  }
43
  ],
 
62
  "train_batch_size": 128,
63
  "trial_name": null,
64
  "trial_params": {
65
+ "alpha": 0.9438943885488221,
66
+ "learning_rate": 0.0007869902719171459,
67
  "num_train_epochs": 3,
68
+ "temperature": 2
69
  }
70
  }
run-1/checkpoint-1054/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee9990afd96f204321e9e33fc4071f73d76355111015fd2ec723f4a4e4849ce7
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13747d4a4002463522d34a00fd1d294fed1696984634001cefecc3ef3eb702bf
3
  size 5176