fpadovani commited on
Commit
1277ef0
·
verified ·
1 Parent(s): 6b1ad3e

Training in progress, step 8000, checkpoint

Browse files
checkpoint-8000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:627229742a2d293c1beac9304f1d11ed17fcd6fcdf22b770ecb36237829f130a
3
  size 51007160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d3ebe97ee50f546614ac9f949feee247bc5dc408ceaad89fcce6a53e1752f38
3
  size 51007160
checkpoint-8000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0fb22c8ee61e10feb3524488f9ce5fb88ae53870247546b683a4718d13b1daf
3
  size 102078202
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f462633d16d687e09aeea034e4d00318608d3344167292227cee0583a4f48de
3
  size 102078202
checkpoint-8000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9cbde3cb69d8ca158786a74c764490a016e8910b7620e6dd0143fffdadddefa
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189d6e84bc40b3cc8958937f6d3c0e2a60ff9b6d811dec81418c92589744536a
3
  size 14308
checkpoint-8000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-8000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 4.42986536026001,
3
- "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/fr_clm/childes_30/checkpoint-8000",
4
- "epoch": 12.578616352201259,
5
  "eval_steps": 2000,
6
  "global_step": 8000,
7
  "is_hyper_param_search": false,
@@ -9,56 +9,56 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 3.1446540880503147,
13
- "eval_loss": 6.67229700088501,
14
- "eval_runtime": 0.6568,
15
- "eval_samples_per_second": 1386.938,
16
- "eval_steps_per_second": 86.779,
17
  "step": 2000
18
  },
19
  {
20
- "epoch": 6.289308176100629,
21
- "grad_norm": 1.4630517959594727,
22
  "learning_rate": 1e-05,
23
- "loss": 6.5359,
24
  "step": 4000
25
  },
26
  {
27
- "epoch": 6.289308176100629,
28
- "eval_loss": 5.102311611175537,
29
- "eval_runtime": 0.6383,
30
- "eval_samples_per_second": 1427.13,
31
- "eval_steps_per_second": 89.294,
32
  "step": 4000
33
  },
34
  {
35
- "epoch": 9.433962264150944,
36
- "eval_loss": 4.675341606140137,
37
- "eval_runtime": 0.6396,
38
- "eval_samples_per_second": 1424.427,
39
- "eval_steps_per_second": 89.124,
40
  "step": 6000
41
  },
42
  {
43
- "epoch": 12.578616352201259,
44
- "grad_norm": 2.3778915405273438,
45
  "learning_rate": 2e-05,
46
- "loss": 4.34,
47
  "step": 8000
48
  },
49
  {
50
- "epoch": 12.578616352201259,
51
- "eval_loss": 4.42986536026001,
52
- "eval_runtime": 0.646,
53
- "eval_samples_per_second": 1410.229,
54
- "eval_steps_per_second": 88.236,
55
  "step": 8000
56
  }
57
  ],
58
  "logging_steps": 4000,
59
  "max_steps": 100000,
60
  "num_input_tokens_seen": 0,
61
- "num_train_epochs": 158,
62
  "save_steps": 4000,
63
  "stateful_callbacks": {
64
  "TrainerControl": {
@@ -72,7 +72,7 @@
72
  "attributes": {}
73
  }
74
  },
75
- "total_flos": 2068336144809984.0,
76
  "train_batch_size": 16,
77
  "trial_name": null,
78
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.2793288230896,
3
+ "best_model_checkpoint": "/home/p318482/babyLM_controlled/models_trained/de_clm/childes_30/checkpoint-8000",
4
+ "epoch": 8.385744234800839,
5
  "eval_steps": 2000,
6
  "global_step": 8000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.0964360587002098,
13
+ "eval_loss": 7.102903366088867,
14
+ "eval_runtime": 0.9708,
15
+ "eval_samples_per_second": 1416.286,
16
+ "eval_steps_per_second": 88.582,
17
  "step": 2000
18
  },
19
  {
20
+ "epoch": 4.1928721174004195,
21
+ "grad_norm": 1.3964662551879883,
22
  "learning_rate": 1e-05,
23
+ "loss": 6.9987,
24
  "step": 4000
25
  },
26
  {
27
+ "epoch": 4.1928721174004195,
28
+ "eval_loss": 5.884151935577393,
29
+ "eval_runtime": 0.966,
30
+ "eval_samples_per_second": 1423.408,
31
+ "eval_steps_per_second": 89.028,
32
  "step": 4000
33
  },
34
  {
35
+ "epoch": 6.289308176100629,
36
+ "eval_loss": 5.54873514175415,
37
+ "eval_runtime": 0.9657,
38
+ "eval_samples_per_second": 1423.84,
39
+ "eval_steps_per_second": 89.055,
40
  "step": 6000
41
  },
42
  {
43
+ "epoch": 8.385744234800839,
44
+ "grad_norm": 2.7172107696533203,
45
  "learning_rate": 2e-05,
46
+ "loss": 5.2204,
47
  "step": 8000
48
  },
49
  {
50
+ "epoch": 8.385744234800839,
51
+ "eval_loss": 5.2793288230896,
52
+ "eval_runtime": 0.9644,
53
+ "eval_samples_per_second": 1425.779,
54
+ "eval_steps_per_second": 89.176,
55
  "step": 8000
56
  }
57
  ],
58
  "logging_steps": 4000,
59
  "max_steps": 100000,
60
  "num_input_tokens_seen": 0,
61
+ "num_train_epochs": 105,
62
  "save_steps": 4000,
63
  "stateful_callbacks": {
64
  "TrainerControl": {
 
72
  "attributes": {}
73
  }
74
  },
75
+ "total_flos": 2068529059479552.0,
76
  "train_batch_size": 16,
77
  "trial_name": null,
78
  "trial_params": null
checkpoint-8000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6af9de70859d62cbd3b61f71e8a5bc95702dbc6ddb62d2994641a31953e4ea9b
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c779122676b54107edc62ae0b9293c062733193c5f82a36a2bc097bca192814
3
  size 5368