caijanfeng commited on
Commit
79c44a2
·
verified ·
1 Parent(s): cbe2667

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/caijf-USTC/huggingface/runs/8dhrl9y4)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -35,9 +35,9 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
35
  ### Framework versions
36
 
37
  - TRL: 0.16.0.dev0
38
- - Transformers: 4.49.0.dev0
39
  - Pytorch: 2.5.1
40
- - Datasets: 3.3.0
41
  - Tokenizers: 0.21.0
42
 
43
  ## Citations
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/caijf-USTC/huggingface/runs/2l0ywuxv)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
35
  ### Framework versions
36
 
37
  - TRL: 0.16.0.dev0
38
+ - Transformers: 4.49.0
39
  - Pytorch: 2.5.1
40
+ - Datasets: 3.3.2
41
  - Tokenizers: 0.21.0
42
 
43
  ## Citations
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0014648210027220997,
4
- "train_runtime": 13165.6558,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.57,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.04403019331170824,
4
+ "train_runtime": 12949.6635,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.579,
7
  "train_steps_per_second": 0.004
8
  }
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "sliding_window": 4096,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.49.0.dev0",
26
  "use_cache": false,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
 
22
  "sliding_window": 4096,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0",
26
  "use_cache": false,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "bos_token_id": 151643,
3
  "eos_token_id": 151643,
4
  "max_new_tokens": 2048,
5
- "transformers_version": "4.49.0.dev0"
6
  }
 
2
  "bos_token_id": 151643,
3
  "eos_token_id": 151643,
4
  "max_new_tokens": 2048,
5
+ "transformers_version": "4.49.0"
6
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7547a9cb1541f12d9eccc5eed04484ea49cf6453d1310c36fda81f400b382ff
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3797589222016a3a611e96dc5a69737991986dc2c060c32d13e9c000f56bea5
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e8ade0b82fa0b5f3b7e336330c6bf7ca2932caab1f9621c96ba76e39f3841d3
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8049419da69fd759a2e43e1aba7e8fde6fe4f81c376c18fd58697db119a219ee
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9c7c319881619560c296c1f5fc148928ac2c00014ace3e621d3940537fa882d
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6101942e8e37c192f1b8bdc504842bafb68a1b7914889a5e162468c241e03fc6
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af4c5874389c104b9ba33995242e2383d7578a5e938b4865f9c70130e569a0c0
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff45b93884d6dd43ac45c9567a2caf807d9245490744f7698612ac41f84b94e
3
  size 1089994880
tokenizer_config.json CHANGED
@@ -202,7 +202,6 @@
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
204
  "pad_token": "<|endoftext|>",
205
- "padding_side": "left",
206
  "split_special_tokens": false,
207
  "tokenizer_class": "Qwen2Tokenizer",
208
  "unk_token": null
 
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
204
  "pad_token": "<|endoftext|>",
 
205
  "split_special_tokens": false,
206
  "tokenizer_class": "Qwen2Tokenizer",
207
  "unk_token": null
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0014648210027220997,
4
- "train_runtime": 13165.6558,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.57,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.04403019331170824,
4
+ "train_runtime": 12949.6635,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.579,
7
  "train_steps_per_second": 0.004
8
  }
trainer_state.json CHANGED
@@ -9,161 +9,187 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 614.1698936462402,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "epoch": 0.08528784648187633,
14
- "grad_norm": 1.6420103311538696,
15
- "kl": 0.00021245479583740235,
16
  "learning_rate": 2.5e-06,
17
- "loss": 0.0,
18
- "reward": 0.6379464596509934,
19
- "reward_std": 0.3210102315992117,
20
- "rewards/accuracy_reward": 0.6375000312924385,
21
- "rewards/format_reward": 0.00044642859138548373,
22
  "step": 5
23
  },
24
  {
25
- "completion_length": 624.264535522461,
 
26
  "epoch": 0.17057569296375266,
27
- "grad_norm": 1526.457275390625,
28
- "kl": 0.14271965026855468,
29
  "learning_rate": 2.956412726139078e-06,
30
- "loss": 0.0057,
31
- "reward": 0.6959821730852127,
32
- "reward_std": 0.28622329905629157,
33
- "rewards/accuracy_reward": 0.6955357447266579,
34
- "rewards/format_reward": 0.00044642859138548373,
35
  "step": 10
36
  },
37
  {
38
- "completion_length": 616.0730186462403,
 
39
  "epoch": 0.255863539445629,
40
- "grad_norm": 0.11626364290714264,
41
- "kl": 0.0034526824951171876,
42
  "learning_rate": 2.7836719084521715e-06,
43
- "loss": 0.0001,
44
- "reward": 0.7558036059141159,
45
- "reward_std": 0.22891067173331975,
46
- "rewards/accuracy_reward": 0.7555803924798965,
47
- "rewards/format_reward": 0.00022321429569274187,
48
  "step": 15
49
  },
50
  {
51
- "completion_length": 602.7257011413574,
 
52
  "epoch": 0.3411513859275053,
53
- "grad_norm": 0.3678954541683197,
54
- "kl": 0.0035373687744140623,
55
  "learning_rate": 2.4946839873611927e-06,
56
- "loss": 0.0001,
57
- "reward": 0.7736607491970062,
58
- "reward_std": 0.1879386292770505,
59
- "rewards/accuracy_reward": 0.7736607491970062,
60
  "rewards/format_reward": 0.0,
61
  "step": 20
62
  },
63
  {
64
- "completion_length": 606.3076171875,
 
65
  "epoch": 0.42643923240938164,
66
- "grad_norm": 0.08185073733329773,
67
- "kl": 0.008475685119628906,
68
  "learning_rate": 2.1156192081791355e-06,
69
- "loss": 0.0003,
70
- "reward": 0.7618303924798966,
71
- "reward_std": 0.18782664239406585,
72
- "rewards/accuracy_reward": 0.7618303924798966,
73
  "rewards/format_reward": 0.0,
74
  "step": 25
75
  },
76
  {
77
- "completion_length": 615.8890892028809,
 
78
  "epoch": 0.511727078891258,
79
- "grad_norm": 0.1566159576177597,
80
- "kl": 0.0039794921875,
81
  "learning_rate": 1.6808050203829845e-06,
82
- "loss": 0.0002,
83
- "reward": 0.7493303924798965,
84
- "reward_std": 0.18519791485741735,
85
- "rewards/accuracy_reward": 0.7493303924798965,
86
  "rewards/format_reward": 0.0,
87
  "step": 30
88
  },
89
  {
90
- "completion_length": 610.3998023986817,
 
91
  "epoch": 0.5970149253731343,
92
- "grad_norm": 0.07447274774312973,
93
- "kl": 0.004097747802734375,
94
  "learning_rate": 1.2296174432791415e-06,
95
- "loss": 0.0002,
96
- "reward": 0.7495536029338836,
97
- "reward_std": 0.1825955007225275,
98
- "rewards/accuracy_reward": 0.7495536029338836,
99
  "rewards/format_reward": 0.0,
100
  "step": 35
101
  },
102
  {
103
- "completion_length": 595.3044914245605,
 
104
  "epoch": 0.6823027718550106,
105
- "grad_norm": 0.07800718396902084,
106
- "kl": 0.0040496826171875,
107
  "learning_rate": 8.029152419343472e-07,
108
- "loss": 0.0002,
109
- "reward": 0.7689732506871223,
110
- "reward_std": 0.17793030026368797,
111
- "rewards/accuracy_reward": 0.7689732506871223,
112
  "rewards/format_reward": 0.0,
113
  "step": 40
114
  },
115
  {
116
- "completion_length": 614.7270347595215,
 
117
  "epoch": 0.767590618336887,
118
- "grad_norm": 0.07937229424715042,
119
- "kl": 0.004020309448242188,
120
  "learning_rate": 4.3933982822017883e-07,
121
- "loss": 0.0002,
122
- "reward": 0.7448661088943481,
123
- "reward_std": 0.20116904862225055,
124
- "rewards/accuracy_reward": 0.7446428954601287,
125
- "rewards/format_reward": 0.00022321429569274187,
126
  "step": 45
127
  },
128
  {
129
- "completion_length": 618.0094017028808,
 
130
  "epoch": 0.8528784648187633,
131
- "grad_norm": 0.074642114341259,
132
- "kl": 0.003514862060546875,
133
  "learning_rate": 1.718159615201853e-07,
134
- "loss": 0.0001,
135
- "reward": 0.743750037252903,
136
- "reward_std": 0.18902343986555933,
137
- "rewards/accuracy_reward": 0.743750037252903,
138
  "rewards/format_reward": 0.0,
139
  "step": 50
140
  },
141
  {
142
- "completion_length": 605.6493591308594,
 
143
  "epoch": 0.9381663113006397,
144
- "grad_norm": 0.08093011379241943,
145
- "kl": 0.0038265228271484376,
146
  "learning_rate": 2.4570139579284723e-08,
147
- "loss": 0.0002,
148
- "reward": 0.7801339656114579,
149
- "reward_std": 0.19253778588026763,
150
- "rewards/accuracy_reward": 0.7801339656114579,
151
  "rewards/format_reward": 0.0,
152
  "step": 55
153
  },
154
  {
155
- "completion_length": 596.0093282063802,
 
156
  "epoch": 0.9893390191897654,
157
- "kl": 0.003498077392578125,
158
- "reward": 0.7726934800545374,
159
- "reward_std": 0.18784288999934992,
160
- "rewards/accuracy_reward": 0.7726934800545374,
161
  "rewards/format_reward": 0.0,
162
  "step": 58,
163
  "total_flos": 0.0,
164
- "train_loss": 0.0014648210027220997,
165
- "train_runtime": 13165.6558,
166
- "train_samples_per_second": 0.57,
167
  "train_steps_per_second": 0.004
168
  }
169
  ],
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "clip_ratio": 0.0,
13
+ "completion_length": 594.6719055175781,
14
+ "epoch": 0.017057569296375266,
15
+ "grad_norm": 0.39008715748786926,
16
+ "kl": 0.0,
17
+ "learning_rate": 5e-07,
18
+ "loss": -0.0015,
19
+ "reward": 0.5937500223517418,
20
+ "reward_std": 0.36258383840322495,
21
+ "rewards/accuracy_reward": 0.5937500223517418,
22
+ "rewards/format_reward": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "clip_ratio": 0.0,
27
+ "completion_length": 605.4260902404785,
28
  "epoch": 0.08528784648187633,
29
+ "grad_norm": 1.0314300060272217,
30
+ "kl": 0.00019499659538269043,
31
  "learning_rate": 2.5e-06,
32
+ "loss": 0.0241,
33
+ "reward": 0.6032366305589676,
34
+ "reward_std": 0.3533420516178012,
35
+ "rewards/accuracy_reward": 0.6032366305589676,
36
+ "rewards/format_reward": 0.0,
37
  "step": 5
38
  },
39
  {
40
+ "clip_ratio": 0.0,
41
+ "completion_length": 614.3067253112793,
42
  "epoch": 0.17057569296375266,
43
+ "grad_norm": 0.17284299433231354,
44
+ "kl": 0.005330562591552734,
45
  "learning_rate": 2.956412726139078e-06,
46
+ "loss": 0.069,
47
+ "reward": 0.6810268200933933,
48
+ "reward_std": 0.3049736712127924,
49
+ "rewards/accuracy_reward": 0.6810268200933933,
50
+ "rewards/format_reward": 0.0,
51
  "step": 10
52
  },
53
  {
54
+ "clip_ratio": 0.0,
55
+ "completion_length": 606.5038230895996,
56
  "epoch": 0.255863539445629,
57
+ "grad_norm": 0.15556961297988892,
58
+ "kl": 0.05679264068603516,
59
  "learning_rate": 2.7836719084521715e-06,
60
+ "loss": 0.0754,
61
+ "reward": 0.7587053939700127,
62
+ "reward_std": 0.22350936699658633,
63
+ "rewards/accuracy_reward": 0.7587053939700127,
64
+ "rewards/format_reward": 0.0,
65
  "step": 15
66
  },
67
  {
68
+ "clip_ratio": 0.0,
69
+ "completion_length": 584.5225677490234,
70
  "epoch": 0.3411513859275053,
71
+ "grad_norm": 0.2594759166240692,
72
+ "kl": 0.005410385131835937,
73
  "learning_rate": 2.4946839873611927e-06,
74
+ "loss": 0.0497,
75
+ "reward": 0.7783482521772385,
76
+ "reward_std": 0.19184589060023427,
77
+ "rewards/accuracy_reward": 0.7783482521772385,
78
  "rewards/format_reward": 0.0,
79
  "step": 20
80
  },
81
  {
82
+ "clip_ratio": 0.0,
83
+ "completion_length": 595.5861892700195,
84
  "epoch": 0.42643923240938164,
85
+ "grad_norm": 0.1489226222038269,
86
+ "kl": 0.007462882995605468,
87
  "learning_rate": 2.1156192081791355e-06,
88
+ "loss": 0.0395,
89
+ "reward": 0.7607143223285675,
90
+ "reward_std": 0.18709109332412482,
91
+ "rewards/accuracy_reward": 0.7607143223285675,
92
  "rewards/format_reward": 0.0,
93
  "step": 25
94
  },
95
  {
96
+ "clip_ratio": 0.0,
97
+ "completion_length": 602.0279273986816,
98
  "epoch": 0.511727078891258,
99
+ "grad_norm": 0.17840267717838287,
100
+ "kl": 0.008930206298828125,
101
  "learning_rate": 1.6808050203829845e-06,
102
+ "loss": 0.0342,
103
+ "reward": 0.7479911029338837,
104
+ "reward_std": 0.1881474507972598,
105
+ "rewards/accuracy_reward": 0.7479911029338837,
106
  "rewards/format_reward": 0.0,
107
  "step": 30
108
  },
109
  {
110
+ "clip_ratio": 0.0,
111
+ "completion_length": 595.2819473266602,
112
  "epoch": 0.5970149253731343,
113
+ "grad_norm": 0.19424788653850555,
114
+ "kl": 0.007519149780273437,
115
  "learning_rate": 1.2296174432791415e-06,
116
+ "loss": 0.0401,
117
+ "reward": 0.7430803917348385,
118
+ "reward_std": 0.19397471882402897,
119
+ "rewards/accuracy_reward": 0.7430803917348385,
120
  "rewards/format_reward": 0.0,
121
  "step": 35
122
  },
123
  {
124
+ "clip_ratio": 0.0,
125
+ "completion_length": 580.3154273986817,
126
  "epoch": 0.6823027718550106,
127
+ "grad_norm": 0.1329621970653534,
128
+ "kl": 0.009706497192382812,
129
  "learning_rate": 8.029152419343472e-07,
130
+ "loss": 0.0334,
131
+ "reward": 0.7716518208384514,
132
+ "reward_std": 0.18079692414030432,
133
+ "rewards/accuracy_reward": 0.7716518208384514,
134
  "rewards/format_reward": 0.0,
135
  "step": 40
136
  },
137
  {
138
+ "clip_ratio": 0.0,
139
+ "completion_length": 599.7227966308594,
140
  "epoch": 0.767590618336887,
141
+ "grad_norm": 0.16628068685531616,
142
+ "kl": 0.005841827392578125,
143
  "learning_rate": 4.3933982822017883e-07,
144
+ "loss": 0.0443,
145
+ "reward": 0.7444196790456772,
146
+ "reward_std": 0.209815969876945,
147
+ "rewards/accuracy_reward": 0.7444196790456772,
148
+ "rewards/format_reward": 0.0,
149
  "step": 45
150
  },
151
  {
152
+ "clip_ratio": 0.0,
153
+ "completion_length": 600.9355201721191,
154
  "epoch": 0.8528784648187633,
155
+ "grad_norm": 0.13309380412101746,
156
+ "kl": 0.006610107421875,
157
  "learning_rate": 1.718159615201853e-07,
158
+ "loss": 0.0371,
159
+ "reward": 0.7435268238186836,
160
+ "reward_std": 0.1858388701453805,
161
+ "rewards/accuracy_reward": 0.7435268238186836,
162
  "rewards/format_reward": 0.0,
163
  "step": 50
164
  },
165
  {
166
+ "clip_ratio": 0.0,
167
+ "completion_length": 593.021898651123,
168
  "epoch": 0.9381663113006397,
169
+ "grad_norm": 0.74787837266922,
170
+ "kl": 0.007943344116210938,
171
  "learning_rate": 2.4570139579284723e-08,
172
+ "loss": 0.0361,
173
+ "reward": 0.7767857477068901,
174
+ "reward_std": 0.19855766519904136,
175
+ "rewards/accuracy_reward": 0.7767857477068901,
176
  "rewards/format_reward": 0.0,
177
  "step": 55
178
  },
179
  {
180
+ "clip_ratio": 0.0,
181
+ "completion_length": 587.1831906636556,
182
  "epoch": 0.9893390191897654,
183
+ "kl": 0.006526947021484375,
184
+ "reward": 0.7529762213428816,
185
+ "reward_std": 0.1964037980263432,
186
+ "rewards/accuracy_reward": 0.7529762213428816,
187
  "rewards/format_reward": 0.0,
188
  "step": 58,
189
  "total_flos": 0.0,
190
+ "train_loss": 0.04403019331170824,
191
+ "train_runtime": 12949.6635,
192
+ "train_samples_per_second": 0.579,
193
  "train_steps_per_second": 0.004
194
  }
195
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f3f39420d245a590d0c8b6f2cf6719714e3ffd9bc8e87f97fa901117125e1a3
3
- size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1aec54fa7ce2ffb19eeee2f51172031d4e6e3c7ee69c82e7fef40dbe3218e62
3
+ size 7992