ngwgsang commited on
Commit
3416d36
·
verified ·
1 Parent(s): d3d91e3

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8bf5052cabb23e9dcf452aa143aa34ae321d54f843f8247ad8e3927c4c149c3
3
  size 600177236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139fec45154ca33906c2903e3974cf5bb7b5d00a3eef28beee8d1f2ed03c7c7d
3
  size 600177236
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b57597d0067e7abffddcf181ec99b30a24399213ec61ed08eda3db8aebfeb0c
3
  size 1200001786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3202d3b24f8b5a2935b12e801a57e12580102cf9a946ee725dcc7abf8cbb966b
3
  size 1200001786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45bb083c87006f454b2f544360e3b1b61397e295b76720295d40e74907c141a1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b03b11193656c1a3d536bab7a0679a12f7b4ef3b480229b7a017067ad82448d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbaf1fe07c08cae80261838683ae5bba41ac45905af686e74e6d05f19f13430d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e60debad2e0820d2d64238ae4a209b8d40fdcf351db5f0394509952de535c45
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.12813186645507812,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 6570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -114,6 +114,48 @@
114
  "eval_samples_per_second": 193.121,
115
  "eval_steps_per_second": 5.371,
116
  "step": 6570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
  ],
119
  "logging_steps": 500,
@@ -128,12 +170,12 @@
128
  "should_evaluate": false,
129
  "should_log": false,
130
  "should_save": true,
131
- "should_training_stop": false
132
  },
133
  "attributes": {}
134
  }
135
  },
136
- "total_flos": 1.40801080983552e+16,
137
  "train_batch_size": 36,
138
  "trial_name": null,
139
  "trial_params": null
 
1
  {
2
  "best_metric": 0.12813186645507812,
3
  "best_model_checkpoint": "./vit5_qqp/checkpoint-6570",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 9855,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
114
  "eval_samples_per_second": 193.121,
115
  "eval_steps_per_second": 5.371,
116
  "step": 6570
117
+ },
118
+ {
119
+ "epoch": 2.13089802130898,
120
+ "grad_norm": 0.4530036449432373,
121
+ "learning_rate": 1.4485032978183663e-05,
122
+ "loss": 0.1129,
123
+ "step": 7000
124
+ },
125
+ {
126
+ "epoch": 2.2831050228310503,
127
+ "grad_norm": 0.3842392861843109,
128
+ "learning_rate": 1.1948249619482495e-05,
129
+ "loss": 0.1092,
130
+ "step": 7500
131
+ },
132
+ {
133
+ "epoch": 2.43531202435312,
134
+ "grad_norm": 0.4166797697544098,
135
+ "learning_rate": 9.411466260781329e-06,
136
+ "loss": 0.1086,
137
+ "step": 8000
138
+ },
139
+ {
140
+ "epoch": 2.5875190258751903,
141
+ "grad_norm": 0.5072170495986938,
142
+ "learning_rate": 6.874682902080162e-06,
143
+ "loss": 0.1075,
144
+ "step": 8500
145
+ },
146
+ {
147
+ "epoch": 2.73972602739726,
148
+ "grad_norm": 0.4664016664028168,
149
+ "learning_rate": 4.337899543378996e-06,
150
+ "loss": 0.1089,
151
+ "step": 9000
152
+ },
153
+ {
154
+ "epoch": 2.8919330289193304,
155
+ "grad_norm": 0.48263782262802124,
156
+ "learning_rate": 1.8011161846778284e-06,
157
+ "loss": 0.1069,
158
+ "step": 9500
159
  }
160
  ],
161
  "logging_steps": 500,
 
170
  "should_evaluate": false,
171
  "should_log": false,
172
  "should_save": true,
173
+ "should_training_stop": true
174
  },
175
  "attributes": {}
176
  }
177
  },
178
+ "total_flos": 2.11201621475328e+16,
179
  "train_batch_size": 36,
180
  "trial_name": null,
181
  "trial_params": null