kooff11 commited on
Commit
05bc034
1 Parent(s): 72799d5

Training in progress, step 8, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a1723d01ce24d971f62ea48862a0a37f5374749bb447c5476496b0f9c142753
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d88bcc6395f0d510811e42b166748e9b9dfa3c199f8a5dc055add965a9f14533
3
  size 82460660
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:676e8f07a7a19ff56e2cb1c80802de090e9ab0fc592fe12b6a3de424395d8113
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907e832f4fa9407ac43cb9b73dcdec990eb83b4cdf8b2280c4d7fd827bea7fe1
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af9e5c0c831587c3533d61cb36995aac83a9f40922ce4df496a867bb67d84b7e
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb39198e1c03ba0971e8b1588d211a6c43121869b71254d225961454badbceb1
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0a47c057106d2ddc8c27b996cfcdb012431f97bae35e43b5668738d196070d4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:535bb91ea586ea928fe6012763740a2245e4922b02b18519cc231e354a7d01a3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2807017543859649,
5
  "eval_steps": 4,
6
- "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -51,6 +51,42 @@
51
  "eval_samples_per_second": 9.954,
52
  "eval_steps_per_second": 2.566,
53
  "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
  ],
56
  "logging_steps": 1,
@@ -70,7 +106,7 @@
70
  "attributes": {}
71
  }
72
  },
73
- "total_flos": 8.94769505983529e+16,
74
  "train_batch_size": 2,
75
  "trial_name": null,
76
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5614035087719298,
5
  "eval_steps": 4,
6
+ "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
51
  "eval_samples_per_second": 9.954,
52
  "eval_steps_per_second": 2.566,
53
  "step": 4
54
+ },
55
+ {
56
+ "epoch": 0.3508771929824561,
57
+ "grad_norm": NaN,
58
+ "learning_rate": 8.742553740855506e-05,
59
+ "loss": 0.0,
60
+ "step": 5
61
+ },
62
+ {
63
+ "epoch": 0.42105263157894735,
64
+ "grad_norm": NaN,
65
+ "learning_rate": 7.840323733655778e-05,
66
+ "loss": 0.0,
67
+ "step": 6
68
+ },
69
+ {
70
+ "epoch": 0.49122807017543857,
71
+ "grad_norm": NaN,
72
+ "learning_rate": 6.773024435212678e-05,
73
+ "loss": 0.0,
74
+ "step": 7
75
+ },
76
+ {
77
+ "epoch": 0.5614035087719298,
78
+ "grad_norm": NaN,
79
+ "learning_rate": 5.602683401276615e-05,
80
+ "loss": 0.0,
81
+ "step": 8
82
+ },
83
+ {
84
+ "epoch": 0.5614035087719298,
85
+ "eval_loss": NaN,
86
+ "eval_runtime": 9.7423,
87
+ "eval_samples_per_second": 9.957,
88
+ "eval_steps_per_second": 2.566,
89
+ "step": 8
90
  }
91
  ],
92
  "logging_steps": 1,
 
106
  "attributes": {}
107
  }
108
  },
109
+ "total_flos": 1.789539011967058e+17,
110
  "train_batch_size": 2,
111
  "trial_name": null,
112
  "trial_params": null