fpadovani commited on
Commit
8bbdea9
·
verified ·
1 Parent(s): 28c4a07

Training in progress, step 12000, checkpoint

Browse files
checkpoint-12000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6594bcda7c37f46af5a7034a0a3a5e5404b400fd58503a826192b4a17a7e26
3
  size 51007160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cad5ac5a5ac9ae4ebcc434ed367cd166b5c8d9198470a60a6a4734ba58b34d8
3
  size 51007160
checkpoint-12000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d074f60296c3775bf8ee10baa8786f755def3ce442bd21e24fa06620063edec6
3
  size 102078202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af955ba6d5a07870556a93eaed58863f5b28875d0e0c95dc1b635384e7ca9c7b
3
  size 102078202
checkpoint-12000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:830ab2b6136d8d2aab64443305d7325934223d7dc3a59a6214ea6709b98b0520
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dee6be081b446e09b1bd96d3dd8c7d6f80a5e3fc6ada987b1d58900a7916323
3
  size 14308
checkpoint-12000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5463b724c2cd6909cd79c85b289d00a369e4fb6bb042482b75620c59845c7360
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a300d405a31ed9a6a3aacb8615e61e340e96b3c687b201bcf6ed4ae9f299ec86
3
  size 1000
checkpoint-12000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-12000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 4.1580729484558105,
3
- "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/fr_clm/childes_30/checkpoint-12000",
4
- "epoch": 18.867924528301888,
5
  "eval_steps": 2000,
6
  "global_step": 12000,
7
  "is_hyper_param_search": false,
@@ -9,79 +9,79 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 3.1446540880503147,
13
- "eval_loss": 6.67229700088501,
14
- "eval_runtime": 0.6568,
15
- "eval_samples_per_second": 1386.938,
16
- "eval_steps_per_second": 86.779,
17
  "step": 2000
18
  },
19
  {
20
- "epoch": 6.289308176100629,
21
- "grad_norm": 1.4630517959594727,
22
  "learning_rate": 1e-05,
23
- "loss": 6.5359,
24
  "step": 4000
25
  },
26
  {
27
- "epoch": 6.289308176100629,
28
- "eval_loss": 5.102311611175537,
29
- "eval_runtime": 0.6383,
30
- "eval_samples_per_second": 1427.13,
31
- "eval_steps_per_second": 89.294,
32
  "step": 4000
33
  },
34
  {
35
- "epoch": 9.433962264150944,
36
- "eval_loss": 4.675341606140137,
37
- "eval_runtime": 0.6396,
38
- "eval_samples_per_second": 1424.427,
39
- "eval_steps_per_second": 89.124,
40
  "step": 6000
41
  },
42
  {
43
- "epoch": 12.578616352201259,
44
- "grad_norm": 2.3778915405273438,
45
  "learning_rate": 2e-05,
46
- "loss": 4.34,
47
  "step": 8000
48
  },
49
  {
50
- "epoch": 12.578616352201259,
51
- "eval_loss": 4.42986536026001,
52
- "eval_runtime": 0.646,
53
- "eval_samples_per_second": 1410.229,
54
- "eval_steps_per_second": 88.236,
55
  "step": 8000
56
  },
57
  {
58
- "epoch": 15.723270440251572,
59
- "eval_loss": 4.2789154052734375,
60
- "eval_runtime": 0.6485,
61
- "eval_samples_per_second": 1404.856,
62
- "eval_steps_per_second": 87.9,
63
  "step": 10000
64
  },
65
  {
66
- "epoch": 18.867924528301888,
67
- "grad_norm": 2.512312889099121,
68
- "learning_rate": 2.9995e-05,
69
- "loss": 3.8692,
70
  "step": 12000
71
  },
72
  {
73
- "epoch": 18.867924528301888,
74
- "eval_loss": 4.1580729484558105,
75
- "eval_runtime": 0.6403,
76
- "eval_samples_per_second": 1422.769,
77
- "eval_steps_per_second": 89.021,
78
  "step": 12000
79
  }
80
  ],
81
  "logging_steps": 4000,
82
  "max_steps": 100000,
83
  "num_input_tokens_seen": 0,
84
- "num_train_epochs": 158,
85
  "save_steps": 4000,
86
  "stateful_callbacks": {
87
  "TrainerControl": {
@@ -95,7 +95,7 @@
95
  "attributes": {}
96
  }
97
  },
98
- "total_flos": 3102504217214976.0,
99
  "train_batch_size": 16,
100
  "trial_name": null,
101
  "trial_params": null
 
1
  {
2
+ "best_metric": 4.983631134033203,
3
+ "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/de_clm/childes_30/checkpoint-12000",
4
+ "epoch": 12.578616352201259,
5
  "eval_steps": 2000,
6
  "global_step": 12000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.0964360587002098,
13
+ "eval_loss": 7.102903366088867,
14
+ "eval_runtime": 0.9708,
15
+ "eval_samples_per_second": 1416.286,
16
+ "eval_steps_per_second": 88.582,
17
  "step": 2000
18
  },
19
  {
20
+ "epoch": 4.1928721174004195,
21
+ "grad_norm": 1.3964662551879883,
22
  "learning_rate": 1e-05,
23
+ "loss": 6.9987,
24
  "step": 4000
25
  },
26
  {
27
+ "epoch": 4.1928721174004195,
28
+ "eval_loss": 5.884151935577393,
29
+ "eval_runtime": 0.966,
30
+ "eval_samples_per_second": 1423.408,
31
+ "eval_steps_per_second": 89.028,
32
  "step": 4000
33
  },
34
  {
35
+ "epoch": 6.289308176100629,
36
+ "eval_loss": 5.54873514175415,
37
+ "eval_runtime": 0.9657,
38
+ "eval_samples_per_second": 1423.84,
39
+ "eval_steps_per_second": 89.055,
40
  "step": 6000
41
  },
42
  {
43
+ "epoch": 8.385744234800839,
44
+ "grad_norm": 2.7172107696533203,
45
  "learning_rate": 2e-05,
46
+ "loss": 5.2204,
47
  "step": 8000
48
  },
49
  {
50
+ "epoch": 8.385744234800839,
51
+ "eval_loss": 5.2793288230896,
52
+ "eval_runtime": 0.9644,
53
+ "eval_samples_per_second": 1425.779,
54
+ "eval_steps_per_second": 89.176,
55
  "step": 8000
56
  },
57
  {
58
+ "epoch": 10.482180293501049,
59
+ "eval_loss": 5.10486364364624,
60
+ "eval_runtime": 0.9641,
61
+ "eval_samples_per_second": 1426.204,
62
+ "eval_steps_per_second": 89.203,
63
  "step": 10000
64
  },
65
  {
66
+ "epoch": 12.578616352201259,
67
+ "grad_norm": 2.500443458557129,
68
+ "learning_rate": 2.99925e-05,
69
+ "loss": 4.7358,
70
  "step": 12000
71
  },
72
  {
73
+ "epoch": 12.578616352201259,
74
+ "eval_loss": 4.983631134033203,
75
+ "eval_runtime": 0.9644,
76
+ "eval_samples_per_second": 1425.809,
77
+ "eval_steps_per_second": 89.178,
78
  "step": 12000
79
  }
80
  ],
81
  "logging_steps": 4000,
82
  "max_steps": 100000,
83
  "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 105,
85
  "save_steps": 4000,
86
  "stateful_callbacks": {
87
  "TrainerControl": {
 
95
  "attributes": {}
96
  }
97
  },
98
+ "total_flos": 3102794094231552.0,
99
  "train_batch_size": 16,
100
  "trial_name": null,
101
  "trial_params": null
checkpoint-12000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6af9de70859d62cbd3b61f71e8a5bc95702dbc6ddb62d2994641a31953e4ea9b
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c779122676b54107edc62ae0b9293c062733193c5f82a36a2bc097bca192814
3
  size 5368