Slava commited on
Commit
55d36cb
·
verified ·
1 Parent(s): baa99d8

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1706267654.A-FVFFG2C4Q05P.18450.25 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52e8a45c581fb5b3516f4cb4889b959907d751513b182ded83870e5a483005e9
3
- size 4916
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b747605f0133808a1c3fb7171fd8d456496eafe3abfe5be2883f33c808489f6
3
+ size 5396
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87568c0afc5ae5e4ff718507f68ad4943997f13f2f2353ed7480c5872143ba54
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ca26b38440b24ac09b4108e4fad32a203e7fb86f22a35fb7ee11404f1a1e04
3
  size 17549312
run-1/checkpoint-1054/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a48f92c46c7e5026b38f2b3bf26873dd3ae9f5952ec78dfa9724da5d9e0ab0c2
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ca26b38440b24ac09b4108e4fad32a203e7fb86f22a35fb7ee11404f1a1e04
3
  size 17549312
run-1/checkpoint-1054/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b28bbcc0546d31f0198832865f505c9cf6e29ab2fd20d07ff939ad80196014e
3
  size 35122373
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a833337f09e72dc86573785f7bcf879bfb8368c000dfb8bc8fad2aef129deb2
3
  size 35122373
run-1/checkpoint-1054/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7ac61b823b02e9f174aebe61188f30c7365422c82ea4dfaaa2d4e8479e62b71
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11d09bc903f95d6f79c61520bda6bb10b74f15510dab87c8eabb5924934da6f3
3
  size 627
run-1/checkpoint-1054/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6995412844036697,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-1054",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,47 +10,47 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "learning_rate": 5.34694014828574e-06,
14
- "loss": 4.485,
15
  "step": 527
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_accuracy": 0.6513761467889908,
20
- "eval_loss": 3.9686293601989746,
21
- "eval_runtime": 9.1719,
22
- "eval_samples_per_second": 95.073,
23
- "eval_steps_per_second": 0.763,
24
  "step": 527
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 0.0,
29
- "loss": 4.0805,
30
  "step": 1054
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.6995412844036697,
35
- "eval_loss": 3.6462810039520264,
36
- "eval_runtime": 9.4456,
37
- "eval_samples_per_second": 92.318,
38
- "eval_steps_per_second": 0.741,
39
  "step": 1054
40
  }
41
  ],
42
  "logging_steps": 500,
43
- "max_steps": 1054,
44
  "num_input_tokens_seen": 0,
45
- "num_train_epochs": 2,
46
  "save_steps": 500,
47
  "total_flos": 16162868129520.0,
48
  "train_batch_size": 128,
49
  "trial_name": null,
50
  "trial_params": {
51
- "alpha": 0.10705118382199963,
52
- "learning_rate": 1.069388029657148e-05,
53
- "num_train_epochs": 2,
54
- "temperature": 16
55
  }
56
  }
 
1
  {
2
+ "best_metric": 0.8130733944954128,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-1054",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "learning_rate": 0.0006875917567735723,
14
+ "loss": 0.9069,
15
  "step": 527
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_accuracy": 0.801605504587156,
20
+ "eval_loss": 1.1160032749176025,
21
+ "eval_runtime": 9.4507,
22
+ "eval_samples_per_second": 92.269,
23
+ "eval_steps_per_second": 0.741,
24
  "step": 527
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 0.0006111926726876199,
29
+ "loss": 0.4218,
30
  "step": 1054
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.8130733944954128,
35
+ "eval_loss": 1.181414246559143,
36
+ "eval_runtime": 9.1074,
37
+ "eval_samples_per_second": 95.746,
38
+ "eval_steps_per_second": 0.769,
39
  "step": 1054
40
  }
41
  ],
42
  "logging_steps": 500,
43
+ "max_steps": 5270,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 10,
46
  "save_steps": 500,
47
  "total_flos": 16162868129520.0,
48
  "train_batch_size": 128,
49
  "trial_name": null,
50
  "trial_params": {
51
+ "alpha": 0.5682681228180307,
52
+ "learning_rate": 0.0007639908408595248,
53
+ "num_train_epochs": 10,
54
+ "temperature": 8
55
  }
56
  }
run-1/checkpoint-1054/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70d09d03f84ec5e6f07e718b975ff8effb1d82efe0ca25ef5c8642902ec419c8
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99816c97dcbe7250039b2fe72e8bb2c24840867dcd296d9eb6598b2afd2670d
3
  size 4283