mika5883 commited on
Commit
25ab6a8
1 Parent(s): 57166e7

Training in progress, step 6500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a4009e19ac3f932b12f74b5e5961fb8a1a2da79ef0beb6278bcbe4110281f58
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab28ecb94b7a818a5a7fd1c75030bb7191001272b6094d37a909690e2363f392
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2cdc3d74d2f035f1f3fcaf7445452a3b282c680da8a9098675ce7b6aef4afd7
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f96d7c8ef3b30034ac3cc33cc9b776b72fa413fbfee63fe75839a96f5fc8817
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5460bfda360ec6bf006c4e70c03c4e6dd92ee461733554c5eae970f8ca47273c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d57fdcf5f9a2ba7a66cb157a536592aea4bc319138ee95de3e9ae0ff3c98c0
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75b4b746e7e71a283788a34656838ee1f277007f7a1a22835377cbaf8957d3ae
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8aa76edc747fa1f55c383a900f2044d0133f29781c0dba0a687e1604db66b1
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0064,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,6 +21,83 @@
21
  "learning_rate": 4.9840800000000006e-05,
22
  "loss": 0.7566,
23
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 500,
@@ -40,7 +117,7 @@
40
  "attributes": {}
41
  }
42
  },
43
- "total_flos": 4871663124480000.0,
44
  "train_batch_size": 64,
45
  "trial_name": null,
46
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0416,
5
  "eval_steps": 500,
6
+ "global_step": 6500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "learning_rate": 4.9840800000000006e-05,
22
  "loss": 0.7566,
23
  "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.0096,
27
+ "grad_norm": 1.2925927639007568,
28
+ "learning_rate": 4.97608e-05,
29
+ "loss": 0.6764,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.0128,
34
+ "grad_norm": 1.286004900932312,
35
+ "learning_rate": 4.968080000000001e-05,
36
+ "loss": 0.6304,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.016,
41
+ "grad_norm": 1.2140214443206787,
42
+ "learning_rate": 4.96008e-05,
43
+ "loss": 0.5981,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.0192,
48
+ "grad_norm": 1.2525482177734375,
49
+ "learning_rate": 4.95208e-05,
50
+ "loss": 0.5767,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.0224,
55
+ "grad_norm": 1.2310410737991333,
56
+ "learning_rate": 4.94408e-05,
57
+ "loss": 0.5597,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.0256,
62
+ "grad_norm": 1.1735206842422485,
63
+ "learning_rate": 4.9360800000000004e-05,
64
+ "loss": 0.5418,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.0288,
69
+ "grad_norm": 1.114688754081726,
70
+ "learning_rate": 4.9280800000000004e-05,
71
+ "loss": 0.5335,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.032,
76
+ "grad_norm": 0.8874593377113342,
77
+ "learning_rate": 4.9200800000000005e-05,
78
+ "loss": 0.5237,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.0352,
83
+ "grad_norm": 1.1261299848556519,
84
+ "learning_rate": 4.91208e-05,
85
+ "loss": 0.5135,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.0384,
90
+ "grad_norm": 0.9994556307792664,
91
+ "learning_rate": 4.9040800000000007e-05,
92
+ "loss": 0.5059,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.0416,
97
+ "grad_norm": 1.2349673509597778,
98
+ "learning_rate": 4.89608e-05,
99
+ "loss": 0.4939,
100
+ "step": 6500
101
  }
102
  ],
103
  "logging_steps": 500,
 
117
  "attributes": {}
118
  }
119
  },
120
+ "total_flos": 3.166581030912e+16,
121
  "train_batch_size": 64,
122
  "trial_name": null,
123
  "trial_params": null