trapoom555 commited on
Commit
86fda2e
·
1 Parent(s): ea96d68

delete optimizer state

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. checkpoint-10/optimizer.pt +0 -3
  3. checkpoint-10/rng_state_0.pth +0 -3
  4. checkpoint-10/rng_state_1.pth +0 -3
  5. checkpoint-10/rng_state_2.pth +0 -3
  6. checkpoint-10/rng_state_3.pth +0 -3
  7. checkpoint-10/scheduler.pt +0 -3
  8. checkpoint-10/trainer_state.json +0 -28
  9. checkpoint-100/optimizer.pt +0 -3
  10. checkpoint-100/rng_state_0.pth +0 -3
  11. checkpoint-100/rng_state_1.pth +0 -3
  12. checkpoint-100/rng_state_2.pth +0 -3
  13. checkpoint-100/rng_state_3.pth +0 -3
  14. checkpoint-100/scheduler.pt +0 -3
  15. checkpoint-100/trainer_state.json +0 -91
  16. checkpoint-110/optimizer.pt +0 -3
  17. checkpoint-110/rng_state_0.pth +0 -3
  18. checkpoint-110/rng_state_1.pth +0 -3
  19. checkpoint-110/rng_state_2.pth +0 -3
  20. checkpoint-110/rng_state_3.pth +0 -3
  21. checkpoint-110/scheduler.pt +0 -3
  22. checkpoint-110/trainer_state.json +0 -98
  23. checkpoint-120/optimizer.pt +0 -3
  24. checkpoint-120/rng_state_0.pth +0 -3
  25. checkpoint-120/rng_state_1.pth +0 -3
  26. checkpoint-120/rng_state_2.pth +0 -3
  27. checkpoint-120/rng_state_3.pth +0 -3
  28. checkpoint-120/scheduler.pt +0 -3
  29. checkpoint-120/trainer_state.json +0 -105
  30. checkpoint-130/optimizer.pt +0 -3
  31. checkpoint-130/rng_state_0.pth +0 -3
  32. checkpoint-130/rng_state_1.pth +0 -3
  33. checkpoint-130/rng_state_2.pth +0 -3
  34. checkpoint-130/rng_state_3.pth +0 -3
  35. checkpoint-130/scheduler.pt +0 -3
  36. checkpoint-130/trainer_state.json +0 -112
  37. checkpoint-140/optimizer.pt +0 -3
  38. checkpoint-140/rng_state_0.pth +0 -3
  39. checkpoint-140/rng_state_1.pth +0 -3
  40. checkpoint-140/rng_state_2.pth +0 -3
  41. checkpoint-140/rng_state_3.pth +0 -3
  42. checkpoint-140/scheduler.pt +0 -3
  43. checkpoint-140/trainer_state.json +0 -119
  44. checkpoint-150/optimizer.pt +0 -3
  45. checkpoint-150/rng_state_0.pth +0 -3
  46. checkpoint-150/rng_state_1.pth +0 -3
  47. checkpoint-150/rng_state_2.pth +0 -3
  48. checkpoint-150/rng_state_3.pth +0 -3
  49. checkpoint-150/scheduler.pt +0 -3
  50. checkpoint-150/trainer_state.json +0 -126
.DS_Store ADDED
Binary file (6.15 kB). View file
 
checkpoint-10/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:182e3a17774463e0c6bd5a37fdd0e3c3019690160e9e56a5105f298808efdc4c
3
- size 11930938
 
 
 
 
checkpoint-10/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:279c231f7db5849b53ea6f61278709c8be27bcc46fc1b36100377bf36c55cfb9
3
- size 15024
 
 
 
 
checkpoint-10/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35738ebb9e53709608b7f4feaf1edbde1a19901d813f15922153ded80ead6540
3
- size 15024
 
 
 
 
checkpoint-10/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f411b8c60d90c0733bb03c4955ea2e40ab35464f214cb47cc4d6d0eaa83bc79
3
- size 15024
 
 
 
 
checkpoint-10/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7224ff493b87486a3e2c3001115ad539913e8fe95cf25f4bcae3236f97e83f41
3
- size 15024
 
 
 
 
checkpoint-10/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83a43a1702074407e12e3670badb2ee9d4306d850fa20aefab82c78c45597c3
3
- size 1064
 
 
 
 
checkpoint-10/trainer_state.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.0007256367462448298,
5
- "eval_steps": 500,
6
- "global_step": 10,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- }
18
- ],
19
- "logging_steps": 10,
20
- "max_steps": 13781,
21
- "num_input_tokens_seen": 0,
22
- "num_train_epochs": 1,
23
- "save_steps": 10,
24
- "total_flos": 0.0,
25
- "train_batch_size": 5,
26
- "trial_name": null,
27
- "trial_params": null
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-100/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cb35f5dac123d790c75782fc60bc45b28afd76cdf742d7d98cceda5ae4d25a4
3
- size 11930938
 
 
 
 
checkpoint-100/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:85f43e42ff30186bb51f3d90dcd7d261d6e09960636961fd696f9478303d1331
3
- size 15024
 
 
 
 
checkpoint-100/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf210db9013b20416569b6811b878570fbbf461f867de41a8a69fd07f0d2c8c
3
- size 15024
 
 
 
 
checkpoint-100/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:132e0dad8f05cba3da38386b81951c801df7c5c2c1cf9e06b5d359b7b92422da
3
- size 15024
 
 
 
 
checkpoint-100/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:441e2aab46e3935d5d49029fda3ebaf07053ac3a8e8a6eb7aca038ab1127bea1
3
- size 15024
 
 
 
 
checkpoint-100/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc19241d2a41bda65817df0018620c7c8c7fc240c9e0ee55d613dab2e95b85b4
3
- size 1064
 
 
 
 
checkpoint-100/trainer_state.json DELETED
@@ -1,91 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.007256367462448298,
5
- "eval_steps": 500,
6
- "global_step": 100,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- }
81
- ],
82
- "logging_steps": 10,
83
- "max_steps": 13781,
84
- "num_input_tokens_seen": 0,
85
- "num_train_epochs": 1,
86
- "save_steps": 10,
87
- "total_flos": 0.0,
88
- "train_batch_size": 5,
89
- "trial_name": null,
90
- "trial_params": null
91
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-110/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb407bfe2800cc6fdca75862cafe072c48ae7906dc275dfa7e98ff5edaf0e08
3
- size 11930938
 
 
 
 
checkpoint-110/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac95db18ae6a1e414f19563e15335ec1a3d44d5b26a3896a591a42bf53daac57
3
- size 15024
 
 
 
 
checkpoint-110/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad2adf8c9d84012d5c08bc34b7d7b7bd8f571238b97deba7b563bc8579f284e
3
- size 15024
 
 
 
 
checkpoint-110/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d02fc29e95ce367f0b8273bbbf6e41186c317282c9a486968d768ffcb716f8dd
3
- size 15024
 
 
 
 
checkpoint-110/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:540b9cf222feb8019c875aee3fd37ce5b892ea395b93ddd0b75459462687e321
3
- size 15024
 
 
 
 
checkpoint-110/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:74829487ac903b3ed35b75a02da621979cc3f366050c3017755d2bfa4119fb65
3
- size 1064
 
 
 
 
checkpoint-110/trainer_state.json DELETED
@@ -1,98 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.007982004208693128,
5
- "eval_steps": 500,
6
- "global_step": 110,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.007982004208693128,
83
- "grad_norm": 1.5859375,
84
- "learning_rate": 0.000499999340865746,
85
- "loss": 0.2922,
86
- "step": 110
87
- }
88
- ],
89
- "logging_steps": 10,
90
- "max_steps": 13781,
91
- "num_input_tokens_seen": 0,
92
- "num_train_epochs": 1,
93
- "save_steps": 10,
94
- "total_flos": 0.0,
95
- "train_batch_size": 5,
96
- "trial_name": null,
97
- "trial_params": null
98
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-120/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c2d97d46a86ec0f3e481bffb226c09dfb3c7a1af0595f32b22e685a0a0c3a54
3
- size 11930938
 
 
 
 
checkpoint-120/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e010d5c5e5ff459e09cee093e035058bce80bd0e562b9008cf49e37a37c4a265
3
- size 15024
 
 
 
 
checkpoint-120/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ceaec8c84867fda1405ca685c206ff5498d51b755970edb435f4777d1649c24
3
- size 15024
 
 
 
 
checkpoint-120/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:791eebd4302125380e3da4e87668a4bb1db8af54a2e9f9519cb225a5eefb78b6
3
- size 15024
 
 
 
 
checkpoint-120/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b5e8577ac55da749e71c292571c66ba7068eaeeac8f69a2d9ecb004c4ea24df
3
- size 15024
 
 
 
 
checkpoint-120/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed7d425e862f330c41856fd5fe8baa40e1998a2b4162a2709a8d1aba7a072f7e
3
- size 1064
 
 
 
 
checkpoint-120/trainer_state.json DELETED
@@ -1,105 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.008707640954937958,
5
- "eval_steps": 500,
6
- "global_step": 120,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.007982004208693128,
83
- "grad_norm": 1.5859375,
84
- "learning_rate": 0.000499999340865746,
85
- "loss": 0.2922,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.008707640954937958,
90
- "grad_norm": 3.8125,
91
- "learning_rate": 0.0004999973634664594,
92
- "loss": 0.3996,
93
- "step": 120
94
- }
95
- ],
96
- "logging_steps": 10,
97
- "max_steps": 13781,
98
- "num_input_tokens_seen": 0,
99
- "num_train_epochs": 1,
100
- "save_steps": 10,
101
- "total_flos": 0.0,
102
- "train_batch_size": 5,
103
- "trial_name": null,
104
- "trial_params": null
105
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-130/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b0cd305b6dc9a4a71a0c0e01f6956397839e506c56dbc71ad6dcacaa60e54b6
3
- size 11930938
 
 
 
 
checkpoint-130/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ced0c6ba13f477a0dbd44034592fe000f226e71898cbab5bf87ce59dc6bde36
3
- size 15024
 
 
 
 
checkpoint-130/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e97d793be909b79220b59b211d87fda9d35184d2305c00641e9b4531b73b8441
3
- size 15024
 
 
 
 
checkpoint-130/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:60416c656b12aaecd01e32e964532f371c0a6b02a4b9b91ccfdc35d45dce0050
3
- size 15024
 
 
 
 
checkpoint-130/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:218ddffb5978f25094e6ad3cfbfc85ad7b807a183e3bc9f6f15bd471542d7273
3
- size 15024
 
 
 
 
checkpoint-130/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:28e2fee165899e7e8256fbe1ade72b87b60a73a6f8ec65162793c9cfaf3c5dea
3
- size 1064
 
 
 
 
checkpoint-130/trainer_state.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.009433277701182788,
5
- "eval_steps": 500,
6
- "global_step": 130,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.007982004208693128,
83
- "grad_norm": 1.5859375,
84
- "learning_rate": 0.000499999340865746,
85
- "loss": 0.2922,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.008707640954937958,
90
- "grad_norm": 3.8125,
91
- "learning_rate": 0.0004999973634664594,
92
- "loss": 0.3996,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.009433277701182788,
97
- "grad_norm": 2.0625,
98
- "learning_rate": 0.0004999940678125673,
99
- "loss": 0.2841,
100
- "step": 130
101
- }
102
- ],
103
- "logging_steps": 10,
104
- "max_steps": 13781,
105
- "num_input_tokens_seen": 0,
106
- "num_train_epochs": 1,
107
- "save_steps": 10,
108
- "total_flos": 0.0,
109
- "train_batch_size": 5,
110
- "trial_name": null,
111
- "trial_params": null
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-140/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2655f85f4e3e2b20b017eb2bb5965d4d222442c92cc04d3028defc26b55eab7
3
- size 11930938
 
 
 
 
checkpoint-140/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad67dbef2a21b26f3117ca45d621957bf72b1116535cf6e524b17661b94b1a9
3
- size 15024
 
 
 
 
checkpoint-140/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3afd4f067268e4c6ff34242266c9e70bce106dd4d7365781bb893119a4033d
3
- size 15024
 
 
 
 
checkpoint-140/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7a56e9bc058e763d68d477e80d923c2fe559a75d518ac8d5d693397a88304b3
3
- size 15024
 
 
 
 
checkpoint-140/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aef494a54b19f2a6c92fb251d8acadbfc7c21bcba926f5a7f5fa134981bb678
3
- size 15024
 
 
 
 
checkpoint-140/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:30859c02f6b970089c798019b31d9595b893cd0ad7ef1e694d1a5869622b8738
3
- size 1064
 
 
 
 
checkpoint-140/trainer_state.json DELETED
@@ -1,119 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.010158914447427617,
5
- "eval_steps": 500,
6
- "global_step": 140,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.007982004208693128,
83
- "grad_norm": 1.5859375,
84
- "learning_rate": 0.000499999340865746,
85
- "loss": 0.2922,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.008707640954937958,
90
- "grad_norm": 3.8125,
91
- "learning_rate": 0.0004999973634664594,
92
- "loss": 0.3996,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.009433277701182788,
97
- "grad_norm": 2.0625,
98
- "learning_rate": 0.0004999940678125673,
99
- "loss": 0.2841,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.010158914447427617,
104
- "grad_norm": 3.390625,
105
- "learning_rate": 0.000499989453921448,
106
- "loss": 0.3003,
107
- "step": 140
108
- }
109
- ],
110
- "logging_steps": 10,
111
- "max_steps": 13781,
112
- "num_input_tokens_seen": 0,
113
- "num_train_epochs": 1,
114
- "save_steps": 10,
115
- "total_flos": 0.0,
116
- "train_batch_size": 5,
117
- "trial_name": null,
118
- "trial_params": null
119
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-150/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b75ce68a3a6892278035b4a46b87384dd6eb3f0270ba1243b5db34285ebf6b93
3
- size 11930938
 
 
 
 
checkpoint-150/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c342af2ce35811f7314d04dcf27fe047ef7a2c2c65a53827cf5bfa3bbef9abbb
3
- size 15024
 
 
 
 
checkpoint-150/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:732cda9265841aab840d0742ab54e81d4890cc436da4ad72a7491a2de6e456cd
3
- size 15024
 
 
 
 
checkpoint-150/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9e93aaf91c3d45dc0a00b2862a0b23147bc87200884e67202507624081ba206
3
- size 15024
 
 
 
 
checkpoint-150/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde38e9c7ebd4dcc6310f8e51cdb47e2f01b8ae902f2ef5613c6f4a36b2b5231
3
- size 15024
 
 
 
 
checkpoint-150/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:45654c01dd78f553696a346db1ede715fbcdf68458bb3234128c8ff7cd662376
3
- size 1064
 
 
 
 
checkpoint-150/trainer_state.json DELETED
@@ -1,126 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.010884551193672447,
5
- "eval_steps": 500,
6
- "global_step": 150,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0007256367462448298,
13
- "grad_norm": 12.0,
14
- "learning_rate": 5e-05,
15
- "loss": 3.0993,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.0014512734924896596,
20
- "grad_norm": 10.4375,
21
- "learning_rate": 0.0001,
22
- "loss": 2.208,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.0021769102387344894,
27
- "grad_norm": 6.5625,
28
- "learning_rate": 0.00015,
29
- "loss": 1.3285,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.0029025469849793192,
34
- "grad_norm": 6.21875,
35
- "learning_rate": 0.0002,
36
- "loss": 0.6895,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.003628183731224149,
41
- "grad_norm": 4.15625,
42
- "learning_rate": 0.00025,
43
- "loss": 0.714,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.004353820477468979,
48
- "grad_norm": 4.71875,
49
- "learning_rate": 0.0003,
50
- "loss": 0.4849,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.005079457223713809,
55
- "grad_norm": 3.828125,
56
- "learning_rate": 0.00035,
57
- "loss": 0.3671,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.0058050939699586385,
62
- "grad_norm": 4.03125,
63
- "learning_rate": 0.0004,
64
- "loss": 0.5693,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.006530730716203468,
69
- "grad_norm": 3.296875,
70
- "learning_rate": 0.00045000000000000004,
71
- "loss": 0.4133,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.007256367462448298,
76
- "grad_norm": 2.640625,
77
- "learning_rate": 0.0005,
78
- "loss": 0.2668,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.007982004208693128,
83
- "grad_norm": 1.5859375,
84
- "learning_rate": 0.000499999340865746,
85
- "loss": 0.2922,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.008707640954937958,
90
- "grad_norm": 3.8125,
91
- "learning_rate": 0.0004999973634664594,
92
- "loss": 0.3996,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.009433277701182788,
97
- "grad_norm": 2.0625,
98
- "learning_rate": 0.0004999940678125673,
99
- "loss": 0.2841,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.010158914447427617,
104
- "grad_norm": 3.390625,
105
- "learning_rate": 0.000499989453921448,
106
- "loss": 0.3003,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.010884551193672447,
111
- "grad_norm": 1.65625,
112
- "learning_rate": 0.0004999835218174307,
113
- "loss": 0.2747,
114
- "step": 150
115
- }
116
- ],
117
- "logging_steps": 10,
118
- "max_steps": 13781,
119
- "num_input_tokens_seen": 0,
120
- "num_train_epochs": 1,
121
- "save_steps": 10,
122
- "total_flos": 0.0,
123
- "train_batch_size": 5,
124
- "trial_name": null,
125
- "trial_params": null
126
- }