ngwgsang commited on
Commit
f59e172
·
verified ·
1 Parent(s): b0f137e

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77661cf122b30a04f4aa6cd000dd6533ddd4353ab75c3d3d10d582b37f3b49d
3
  size 1681761380
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17784ea5fb67c5913ce425ef6308b1353852a8e0c79a2174d95ed8cba70ae7f
3
  size 1681761380
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65a3cdefc8505797145911727202f35a8cdaeabfa9952d729a2ba3560541d9e7
3
  size 3363321371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc52de097e9782e1e826f34ec13cf5024771b56821572ba62cd75ffcf6e08c9
3
  size 3363321371
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90929af1eba940155952091f85bda02adb1f6fd2b869f62220ba76e4e0de291c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854c8f736a290926978c84338e8ea8816f0f1656561b7d42e716e67daa1a7464
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79a65e50ebd7c88e6afe05bce60e1437447659f0ab3d763f77217140057a93ad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7eaa191070e51793fbf344cd35501716ecc7578f20c1ced375c0d7bb5bee549
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4237809181213379,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-863",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 863,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22,6 +22,28 @@
22
  "eval_samples_per_second": 201.191,
23
  "eval_steps_per_second": 5.039,
24
  "step": 863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
@@ -41,7 +63,7 @@
41
  "attributes": {}
42
  }
43
  },
44
- "total_flos": 2628948507623424.0,
45
  "train_batch_size": 40,
46
  "trial_name": null,
47
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4237809181213379,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-863",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1726,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22
  "eval_samples_per_second": 201.191,
23
  "eval_steps_per_second": 5.039,
24
  "step": 863
25
+ },
26
+ {
27
+ "epoch": 1.1587485515643106,
28
+ "grad_norm": 0.8257110714912415,
29
+ "learning_rate": 3.0687524140594824e-05,
30
+ "loss": 0.1866,
31
+ "step": 1000
32
+ },
33
+ {
34
+ "epoch": 1.7381228273464657,
35
+ "grad_norm": 0.8043908476829529,
36
+ "learning_rate": 2.103128621089224e-05,
37
+ "loss": 0.1303,
38
+ "step": 1500
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "eval_loss": 0.4571397602558136,
43
+ "eval_runtime": 107.1835,
44
+ "eval_samples_per_second": 201.169,
45
+ "eval_steps_per_second": 5.038,
46
+ "step": 1726
47
  }
48
  ],
49
  "logging_steps": 500,
 
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 5257897015246848.0,
67
  "train_batch_size": 40,
68
  "trial_name": null,
69
  "trial_params": null