ccore commited on
Commit
2669048
·
verified ·
1 Parent(s): 816f9a0

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/opt-125m",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
 
1
  {
2
+ "_name_or_path": "ccore/ccore-v3",
3
  "_remove_final_layer_norm": false,
4
  "activation_dropout": 0.0,
5
  "activation_function": "relu",
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a40dec7e30253e75e422b47ab951addd60730e7951035834622549ac801367e
3
  size 500979600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4013af1bfe8d529a0921d3154adc965111f5f1e71176b73e3b888780d625357
3
  size 500979600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc053d38b879409c8034df7340972100796763466dd69d9c7b037470814f772
3
  size 1002078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:757da7d4f6896bd7517037c1352fee52b788211aeff771636b1687b889bfad65
3
  size 1002078330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebdf656e7a36761328b5fb073dd978028ede2fdcdc132424304899457e59ad39
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574024fd0bef58b4ca8af0606c1be9cf07d7494e9e30913df874b1f6896f01c2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a9e472ef49de9b8b8e7b375a6d84ff735f7d72dc421adbe419aa4444b7c6057
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d4270c057ffbe7d94d3a4a46e15a61be9cfe5f3368f1f48731c220c26ede867
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,113 +1,24 @@
1
  {
2
- "best_metric": 0.9929500818252563,
3
- "best_model_checkpoint": "./opt_trained3/checkpoint-3654",
4
- "epoch": 5.998769987699877,
5
  "eval_steps": 500,
6
- "global_step": 3654,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8200082000820008,
13
- "grad_norm": 3.336149215698242,
14
- "learning_rate": 9.835913054681877e-05,
15
- "loss": 1.0546,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.998769987699877,
20
- "eval_loss": 1.013350248336792,
21
- "eval_runtime": 259.9403,
22
- "eval_samples_per_second": 50.035,
23
- "eval_steps_per_second": 12.511,
24
- "step": 609
25
- },
26
- {
27
- "epoch": 1.6412464124641246,
28
- "grad_norm": 2.491156578063965,
29
- "learning_rate": 9.351884318196129e-05,
30
- "loss": 1.0221,
31
- "step": 1000
32
- },
33
- {
34
- "epoch": 1.998769987699877,
35
- "eval_loss": 1.0091875791549683,
36
- "eval_runtime": 259.5356,
37
- "eval_samples_per_second": 50.113,
38
- "eval_steps_per_second": 12.53,
39
- "step": 1218
40
- },
41
- {
42
- "epoch": 2.4624846248462484,
43
- "grad_norm": 2.6502525806427,
44
- "learning_rate": 8.579934515124202e-05,
45
- "loss": 1.0175,
46
- "step": 1500
47
- },
48
- {
49
- "epoch": 2.998769987699877,
50
- "eval_loss": 1.0066354274749756,
51
- "eval_runtime": 262.6041,
52
- "eval_samples_per_second": 49.527,
53
- "eval_steps_per_second": 12.384,
54
- "step": 1827
55
- },
56
- {
57
- "epoch": 3.2837228372283724,
58
- "grad_norm": 1.8426792621612549,
59
- "learning_rate": 7.571135915407219e-05,
60
- "loss": 1.0148,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 3.998769987699877,
65
- "eval_loss": 1.003442406654358,
66
- "eval_runtime": 261.9157,
67
- "eval_samples_per_second": 49.657,
68
- "eval_steps_per_second": 12.416,
69
- "step": 2436
70
- },
71
- {
72
- "epoch": 4.1049610496104965,
73
- "grad_norm": 1.7222294807434082,
74
- "learning_rate": 6.39223072714725e-05,
75
- "loss": 1.0105,
76
- "step": 2500
77
- },
78
- {
79
- "epoch": 4.924969249692497,
80
- "grad_norm": 3.702568292617798,
81
- "learning_rate": 5.121215425983256e-05,
82
- "loss": 1.0048,
83
- "step": 3000
84
- },
85
- {
86
- "epoch": 4.998769987699877,
87
- "eval_loss": 0.9986960887908936,
88
- "eval_runtime": 258.2811,
89
- "eval_samples_per_second": 50.356,
90
- "eval_steps_per_second": 12.591,
91
- "step": 3045
92
- },
93
- {
94
- "epoch": 5.74620746207462,
95
- "grad_norm": 1.9525970220565796,
96
- "learning_rate": 3.842180501092163e-05,
97
- "loss": 1.0013,
98
- "step": 3500
99
- },
100
- {
101
- "epoch": 5.998769987699877,
102
- "eval_loss": 0.9929500818252563,
103
- "eval_runtime": 260.4505,
104
- "eval_samples_per_second": 49.937,
105
- "eval_steps_per_second": 12.486,
106
- "step": 3654
107
  }
108
  ],
109
  "logging_steps": 500,
110
- "max_steps": 6090,
111
  "num_input_tokens_seen": 0,
112
  "num_train_epochs": 10,
113
  "save_steps": 500,
@@ -123,7 +34,7 @@
123
  "attributes": {}
124
  }
125
  },
126
- "total_flos": 1.4515315375104e+17,
127
  "train_batch_size": 24,
128
  "trial_name": null,
129
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7184422016143799,
3
+ "best_model_checkpoint": "./opt_trained3/checkpoint-15",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.7184422016143799,
14
+ "eval_runtime": 5.9548,
15
+ "eval_samples_per_second": 50.38,
16
+ "eval_steps_per_second": 12.595,
17
+ "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  }
19
  ],
20
  "logging_steps": 500,
21
+ "max_steps": 140,
22
  "num_input_tokens_seen": 0,
23
  "num_train_epochs": 10,
24
  "save_steps": 500,
 
34
  "attributes": {}
35
  }
36
  },
37
+ "total_flos": 556605613440000.0,
38
  "train_batch_size": 24,
39
  "trial_name": null,
40
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:432b24e0a454c6ba118d533189ffa32a4ef3f0654798d68a647380ab38384532
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5b37466798b22cd1c58fa164a2deb26fd86f95d9733731d6588e8abd389d38
3
  size 5368