ccore commited on
Commit
d30244c
·
verified ·
1 Parent(s): da89c88

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c13b7b19ef580ad42a2f99f24213ccaa4472c6b4b059a794da90412c9ac0b4
3
  size 1447317080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76fb97c1964b635d7be1205ccef676d0caa1dd98ed0ac8b59e7c755533e32712
3
  size 1447317080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a758a18680bfdfc600ed0491e712113dd602c7edc2f701d2cf8f3ef60a372168
3
  size 2894813242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2323db34f8f05f4d09cb6d1278b45c569a36b0afe0f5fd1eacf32a59e4fbd607
3
  size 2894813242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2709c70a3faf4f234941f28397ed1c327ceb35087b102618458597e5d291411
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:238f89c7881608a89c144adfd8d2cd6859c09e7f2d8a8e8db71c7c86758fd97b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.344450980424881,
3
  "best_model_checkpoint": "./opt_trained2/checkpoint-803",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 2409,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -59,6 +59,28 @@
59
  "eval_samples_per_second": 10.462,
60
  "eval_steps_per_second": 2.615,
61
  "step": 2409
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 500,
@@ -78,7 +100,7 @@
78
  "attributes": {}
79
  }
80
  },
81
- "total_flos": 9.386164249704e+16,
82
  "train_batch_size": 4,
83
  "trial_name": null,
84
  "trial_params": null
 
1
  {
2
  "best_metric": 0.344450980424881,
3
  "best_model_checkpoint": "./opt_trained2/checkpoint-803",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 3212,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
59
  "eval_samples_per_second": 10.462,
60
  "eval_steps_per_second": 2.615,
61
  "step": 2409
62
+ },
63
+ {
64
+ "epoch": 3.1133250311332503,
65
+ "grad_norm": 1.2075772285461426,
66
+ "learning_rate": 0.000337733499377335,
67
+ "loss": 2.1429,
68
+ "step": 2500
69
+ },
70
+ {
71
+ "epoch": 3.7359900373599,
72
+ "grad_norm": 1.2370275259017944,
73
+ "learning_rate": 0.00032528019925280203,
74
+ "loss": 1.7732,
75
+ "step": 3000
76
+ },
77
+ {
78
+ "epoch": 4.0,
79
+ "eval_loss": 0.41189202666282654,
80
+ "eval_runtime": 272.6721,
81
+ "eval_samples_per_second": 10.474,
82
+ "eval_steps_per_second": 2.619,
83
+ "step": 3212
84
  }
85
  ],
86
  "logging_steps": 500,
 
100
  "attributes": {}
101
  }
102
  },
103
+ "total_flos": 1.2514885666272e+17,
104
  "train_batch_size": 4,
105
  "trial_name": null,
106
  "trial_params": null