Kira-Floris commited on
Commit
d4eb0eb
1 Parent(s): 789077e

Training in progress, epoch 3

Browse files
logs/events.out.tfevents.1719305887.852b1e905a9a.223.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3563337f111b88b6da01083577e56dfa749019aaf0c65354784829fe6a3eb876
3
- size 5964
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccce78a6de7b7ead06a2eaeb7095288a4e8dd2066b568e59b6f75294e9e29337
3
+ size 6498
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ffba682ed7f803f9b0e6756d1d7262c0a8ff7725ee17d25a4400fb904738bf0
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1ea81d68885299a8a06cce03ef9b210fe932d9c8ee4477cb9e366ba6369340
3
  size 17549312
run-0/checkpoint-1581/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d6f0e2214ce09dc211d998069eb98b5432e4531fbba06af7e853a06ef6117f7
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc1ea81d68885299a8a06cce03ef9b210fe932d9c8ee4477cb9e366ba6369340
3
  size 17549312
run-0/checkpoint-1581/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f01e060fe45621661f203c80a7b2c4ce2cfe51d9e2364cc0bd7ebc8aaac5cb9
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcf0da333c75e9d06d87facf836a5f02471b9309fad9123564e7885013fa80b9
3
  size 35123898
run-0/checkpoint-1581/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c41b5dd740096ccbf68fc1a9ad0d5790124f7751117ba763aa9751a461999e7d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32e718ffb75105dc8303c02e19996108f0b226cb213ae68dc70ae12ff0a9eb1
3
  size 1064
run-0/checkpoint-1581/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8176605504587156,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1581",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,57 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 16.55626678466797,
14
- "learning_rate": 5.1383294230414005e-05,
15
- "loss": 1.5907,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8061926605504587,
21
- "eval_loss": 1.2464169263839722,
22
- "eval_runtime": 2.4007,
23
- "eval_samples_per_second": 363.232,
24
- "eval_steps_per_second": 2.916,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": Infinity,
30
- "learning_rate": 4.405675241279466e-05,
31
- "loss": 0.9038,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.8107798165137615,
37
- "eval_loss": 1.1235342025756836,
38
- "eval_runtime": 2.3533,
39
- "eval_samples_per_second": 370.54,
40
- "eval_steps_per_second": 2.975,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 48.55740737915039,
46
- "learning_rate": 3.67162818084498e-05,
47
- "loss": 0.6946,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.8176605504587156,
53
- "eval_loss": 1.1027159690856934,
54
- "eval_runtime": 2.3719,
55
- "eval_samples_per_second": 367.637,
56
- "eval_steps_per_second": 2.951,
57
  "step": 1581
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 4216,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 8,
64
  "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
@@ -78,9 +78,9 @@
78
  "train_batch_size": 128,
79
  "trial_name": null,
80
  "trial_params": {
81
- "alpha": 0.5167874928728581,
82
- "learning_rate": 5.872376483475886e-05,
83
- "num_train_epochs": 8,
84
- "temperature": 5
85
  }
86
  }
 
1
  {
2
+ "best_metric": 0.7844036697247706,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1581",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 5.95961856842041,
14
+ "learning_rate": 9.55389368279823e-06,
15
+ "loss": 1.5369,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7339449541284404,
21
+ "eval_loss": 1.2773902416229248,
22
+ "eval_runtime": 2.5957,
23
+ "eval_samples_per_second": 335.939,
24
+ "eval_steps_per_second": 2.697,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 15.252978324890137,
30
+ "learning_rate": 8.492349940265094e-06,
31
+ "loss": 1.2159,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7786697247706422,
37
+ "eval_loss": 1.022659182548523,
38
+ "eval_runtime": 2.5741,
39
+ "eval_samples_per_second": 338.753,
40
+ "eval_steps_per_second": 2.719,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 18.01114845275879,
46
+ "learning_rate": 7.430806197731956e-06,
47
+ "loss": 1.0132,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.7844036697247706,
53
+ "eval_loss": 0.9622328281402588,
54
+ "eval_runtime": 2.5865,
55
+ "eval_samples_per_second": 337.137,
56
+ "eval_steps_per_second": 2.706,
57
  "step": 1581
58
  }
59
  ],
60
  "logging_steps": 500,
61
+ "max_steps": 5270,
62
  "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 10,
64
  "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
 
78
  "train_batch_size": 128,
79
  "trial_name": null,
80
  "trial_params": {
81
+ "alpha": 0.27608826195592573,
82
+ "learning_rate": 1.0615437425331367e-05,
83
+ "num_train_epochs": 10,
84
+ "temperature": 2
85
  }
86
  }
run-0/checkpoint-1581/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f28149fe21091b257234d7cbe1611ee6ca88e3a7cef675e40e6d90410e6fc1a6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8b6a60f7b85b38fa45cddf1a417ee51250fe5822237403416bf2406ff2cdb84
3
  size 5176