susumuota commited on
Commit
b12f132
·
verified ·
1 Parent(s): e8f9a72

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/llm-m_wandb-weblab/Qwen2.5-7B-Instruct-GRPO/runs/07i0f523)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -35,9 +35,9 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
35
  ### Framework versions
36
 
37
  - TRL: 0.16.0.dev0
38
- - Transformers: 4.50.0.dev0
39
  - Pytorch: 2.5.1
40
- - Datasets: 3.3.1
41
  - Tokenizers: 0.21.0
42
 
43
  ## Citations
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/llm-m_wandb-weblab/Qwen2.5-7B-Instruct-GRPO/runs/potnc7q9)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
35
  ### Framework versions
36
 
37
  - TRL: 0.16.0.dev0
38
+ - Transformers: 4.49.0
39
  - Pytorch: 2.5.1
40
+ - Datasets: 3.3.2
41
  - Tokenizers: 0.21.0
42
 
43
  ## Citations
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.02936485406525176,
4
- "train_runtime": 6394.9086,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.173,
7
  "train_steps_per_second": 0.009
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.020674003962555837,
4
+ "train_runtime": 6417.2756,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.169,
7
  "train_steps_per_second": 0.009
8
  }
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "sliding_window": 131072,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.50.0.dev0",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 152064
 
22
  "sliding_window": 131072,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0",
26
  "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 152064
generation_config.json CHANGED
@@ -10,5 +10,5 @@
10
  "temperature": 0.7,
11
  "top_k": 20,
12
  "top_p": 0.8,
13
- "transformers_version": "4.50.0.dev0"
14
  }
 
10
  "temperature": 0.7,
11
  "top_k": 20,
12
  "top_p": 0.8,
13
+ "transformers_version": "4.49.0"
14
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc60d9a7495d0aa48dc90505ebf3d104535956d4f3458df65220089242913819
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247735adddbbd9944bf2dc1cc35ccc9b4cfd5e79ef3d8c3ab7340c7f26a0955e
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:303ef5188ca1847519cc09888a6cf60e6d29c993c31beb5f9bc46192297c602a
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9ef086092554c80dab98bd0bfbaa398b21a3d2e367bf94ad86f1e05c6ce509
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39e887afa83ae21afba2f860b89e41bfdeb3836eaa05be27e78109801f5dea72
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e80ee5064fdd08d096850130cfbb55ce4521b8c8f8f019740bac43298665148f
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01566002591b0833a84065e44ef7d3477dde51cb6125ca752f17d1877f864e26
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe67c93ef45e03e5b117ec79ebbbcc459cf2fe2e878531f0a57c2da06c2fc0ef
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.02936485406525176,
4
- "train_runtime": 6394.9086,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 1.173,
7
  "train_steps_per_second": 0.009
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.020674003962555837,
4
+ "train_runtime": 6417.2756,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 1.169,
7
  "train_steps_per_second": 0.009
8
  }
trainer_state.json CHANGED
@@ -9,174 +9,187 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
 
12
  "completion_length": 498.510066986084,
13
  "epoch": 0.017057569296375266,
14
- "grad_norm": 1.1288529634475708,
15
  "kl": 0.0,
16
  "learning_rate": 5e-07,
17
- "loss": 0.0113,
18
- "reward": 0.6216518096625805,
19
- "reward_std": 0.43382299318909645,
20
  "rewards/accuracy_reward": 0.17633929289877415,
21
- "rewards/format_reward": 0.4453125223517418,
22
  "step": 1
23
  },
24
  {
25
- "completion_length": 489.7723445892334,
 
26
  "epoch": 0.08528784648187633,
27
- "grad_norm": 434.209228515625,
28
- "kl": 1.8089315593242645,
29
  "learning_rate": 2.5e-06,
30
- "loss": 0.0766,
31
- "reward": 0.7890625391155481,
32
- "reward_std": 0.38019732665270567,
33
- "rewards/accuracy_reward": 0.18470982904545963,
34
- "rewards/format_reward": 0.6043527061119676,
35
  "step": 5
36
  },
37
  {
38
- "completion_length": 477.6071632385254,
 
39
  "epoch": 0.17057569296375266,
40
- "grad_norm": 0.20538534224033356,
41
- "kl": 1.3172725677490233,
42
  "learning_rate": 2.956412726139078e-06,
43
- "loss": 0.0959,
44
- "reward": 1.1899554073810577,
45
- "reward_std": 0.323445713147521,
46
- "rewards/accuracy_reward": 0.2645089406520128,
47
- "rewards/format_reward": 0.9254464700818061,
48
  "step": 10
49
  },
50
  {
51
- "completion_length": 438.728816986084,
 
52
  "epoch": 0.255863539445629,
53
- "grad_norm": 0.4574572443962097,
54
- "kl": 0.0261322021484375,
55
  "learning_rate": 2.7836719084521715e-06,
56
- "loss": 0.0128,
57
- "reward": 1.4294643551111221,
58
- "reward_std": 0.346218079701066,
59
- "rewards/accuracy_reward": 0.47678573802113533,
60
- "rewards/format_reward": 0.9526786103844642,
61
  "step": 15
62
  },
63
  {
64
- "completion_length": 410.66720657348634,
 
65
  "epoch": 0.3411513859275053,
66
- "grad_norm": 0.21916256844997406,
67
- "kl": 0.0268157958984375,
68
  "learning_rate": 2.4946839873611927e-06,
69
- "loss": 0.0144,
70
- "reward": 1.516741144657135,
71
- "reward_std": 0.3288130540400743,
72
- "rewards/accuracy_reward": 0.5453125208616256,
73
- "rewards/format_reward": 0.9714286103844643,
74
  "step": 20
75
  },
76
  {
77
- "completion_length": 440.30113372802737,
 
78
  "epoch": 0.42643923240938164,
79
- "grad_norm": 0.19857414066791534,
80
- "kl": 0.043634033203125,
81
  "learning_rate": 2.1156192081791355e-06,
82
- "loss": 0.0152,
83
- "reward": 1.6386161416769027,
84
- "reward_std": 0.2348614836111665,
85
- "rewards/accuracy_reward": 0.6629464566707611,
86
- "rewards/format_reward": 0.9756696790456771,
87
  "step": 25
88
  },
89
  {
90
- "completion_length": 435.9283676147461,
 
91
  "epoch": 0.511727078891258,
92
- "grad_norm": 4.214118003845215,
93
- "kl": 0.04940185546875,
94
  "learning_rate": 1.6808050203829845e-06,
95
- "loss": 0.0198,
96
- "reward": 1.6723215013742447,
97
- "reward_std": 0.2099373336881399,
98
- "rewards/accuracy_reward": 0.7004464611411094,
99
- "rewards/format_reward": 0.971875037252903,
100
  "step": 30
101
  },
102
  {
103
- "completion_length": 431.46564331054685,
 
104
  "epoch": 0.5970149253731343,
105
- "grad_norm": 0.245810866355896,
106
- "kl": 0.0542022705078125,
107
  "learning_rate": 1.2296174432791415e-06,
108
- "loss": 0.014,
109
- "reward": 1.6790179312229156,
110
- "reward_std": 0.20684626493602992,
111
- "rewards/accuracy_reward": 0.7033482506871224,
112
- "rewards/format_reward": 0.9756696790456771,
113
  "step": 35
114
  },
115
  {
116
- "completion_length": 422.8857360839844,
 
117
  "epoch": 0.6823027718550106,
118
- "grad_norm": 0.2607899606227875,
119
- "kl": 0.0524322509765625,
120
  "learning_rate": 8.029152419343472e-07,
121
- "loss": 0.018,
122
- "reward": 1.6703125715255738,
123
- "reward_std": 0.21576487701386213,
124
- "rewards/accuracy_reward": 0.6926339581608772,
125
- "rewards/format_reward": 0.9776786059141159,
126
  "step": 40
127
  },
128
  {
129
- "completion_length": 441.6977897644043,
 
130
  "epoch": 0.767590618336887,
131
- "grad_norm": 0.6231416463851929,
132
- "kl": 0.07574462890625,
133
  "learning_rate": 4.3933982822017883e-07,
134
- "loss": 0.0199,
135
- "reward": 1.656696504354477,
136
- "reward_std": 0.2240034222602844,
137
- "rewards/accuracy_reward": 0.675892886519432,
138
- "rewards/format_reward": 0.9808036059141159,
139
  "step": 45
140
  },
141
  {
142
- "completion_length": 432.76631317138674,
 
143
  "epoch": 0.8528784648187633,
144
- "grad_norm": 1.0140599012374878,
145
- "kl": 0.046148681640625,
146
  "learning_rate": 1.718159615201853e-07,
147
- "loss": 0.0211,
148
- "reward": 1.656919714808464,
149
- "reward_std": 0.2316950935870409,
150
- "rewards/accuracy_reward": 0.6727678880095482,
151
- "rewards/format_reward": 0.9841518223285675,
152
  "step": 50
153
  },
154
  {
155
- "completion_length": 437.499796295166,
 
156
  "epoch": 0.9381663113006397,
157
- "grad_norm": 3.5078225135803223,
158
- "kl": 0.1864105224609375,
159
  "learning_rate": 2.4570139579284723e-08,
160
- "loss": 0.0283,
161
- "reward": 1.6799107879400252,
162
- "reward_std": 0.23468854520469903,
163
- "rewards/accuracy_reward": 0.6982143193483352,
164
- "rewards/format_reward": 0.9816964596509934,
165
  "step": 55
166
  },
167
  {
168
- "completion_length": 428.84972254435223,
 
169
  "epoch": 0.9893390191897654,
170
- "kl": 0.059397379557291664,
171
- "reward": 1.661458412806193,
172
- "reward_std": 0.2265977036828796,
173
- "rewards/accuracy_reward": 0.6741071691115698,
174
- "rewards/format_reward": 0.9873512263099352,
175
  "step": 58,
176
  "total_flos": 0.0,
177
- "train_loss": 0.02936485406525176,
178
- "train_runtime": 6394.9086,
179
- "train_samples_per_second": 1.173,
180
  "train_steps_per_second": 0.009
181
  }
182
  ],
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "clip_ratio": 0.0,
13
  "completion_length": 498.510066986084,
14
  "epoch": 0.017057569296375266,
15
+ "grad_norm": 1.1215301752090454,
16
  "kl": 0.0,
17
  "learning_rate": 5e-07,
18
+ "loss": 0.0115,
19
+ "reward": 0.6171875260770321,
20
+ "reward_std": 0.437016986310482,
21
  "rewards/accuracy_reward": 0.17633929289877415,
22
+ "rewards/format_reward": 0.4408482313156128,
23
  "step": 1
24
  },
25
  {
26
+ "clip_ratio": 0.0,
27
+ "completion_length": 487.4813299179077,
28
  "epoch": 0.08528784648187633,
29
+ "grad_norm": 476.3103332519531,
30
+ "kl": 1.7723130583763123,
31
  "learning_rate": 2.5e-06,
32
+ "loss": 0.0822,
33
+ "reward": 0.7912946743890643,
34
+ "reward_std": 0.3657265743240714,
35
+ "rewards/accuracy_reward": 0.1872209922876209,
36
+ "rewards/format_reward": 0.6040736874565482,
37
  "step": 5
38
  },
39
  {
40
+ "clip_ratio": 0.0,
41
+ "completion_length": 466.69912643432616,
42
  "epoch": 0.17057569296375266,
43
+ "grad_norm": 0.41791579127311707,
44
+ "kl": 0.012205886840820312,
45
  "learning_rate": 2.956412726139078e-06,
46
+ "loss": 0.0203,
47
+ "reward": 1.2589286297559739,
48
+ "reward_std": 0.32850122936069964,
49
+ "rewards/accuracy_reward": 0.3185267999768257,
50
+ "rewards/format_reward": 0.9404018238186836,
51
  "step": 10
52
  },
53
  {
54
+ "clip_ratio": 0.0,
55
+ "completion_length": 444.567431640625,
56
  "epoch": 0.255863539445629,
57
+ "grad_norm": 0.6771596670150757,
58
+ "kl": 0.02176055908203125,
59
  "learning_rate": 2.7836719084521715e-06,
60
+ "loss": 0.0059,
61
+ "reward": 1.4082589864730835,
62
+ "reward_std": 0.3335796441882849,
63
+ "rewards/accuracy_reward": 0.441294664144516,
64
+ "rewards/format_reward": 0.9669643238186836,
65
  "step": 15
66
  },
67
  {
68
+ "clip_ratio": 0.0,
69
+ "completion_length": 422.7493499755859,
70
  "epoch": 0.3411513859275053,
71
+ "grad_norm": 0.23769104480743408,
72
+ "kl": 0.026959228515625,
73
  "learning_rate": 2.4946839873611927e-06,
74
+ "loss": 0.01,
75
+ "reward": 1.4937500715255738,
76
+ "reward_std": 0.3376178216189146,
77
+ "rewards/accuracy_reward": 0.5156250238418579,
78
+ "rewards/format_reward": 0.9781250357627869,
79
  "step": 20
80
  },
81
  {
82
+ "clip_ratio": 0.0,
83
+ "completion_length": 433.67725372314453,
84
  "epoch": 0.42643923240938164,
85
+ "grad_norm": 0.1763259768486023,
86
+ "kl": 0.0300628662109375,
87
  "learning_rate": 2.1156192081791355e-06,
88
+ "loss": 0.0157,
89
+ "reward": 1.5979911386966705,
90
+ "reward_std": 0.2915887963026762,
91
+ "rewards/accuracy_reward": 0.6209821671247482,
92
+ "rewards/format_reward": 0.9770089671015739,
93
  "step": 25
94
  },
95
  {
96
+ "clip_ratio": 0.0,
97
+ "completion_length": 461.1984573364258,
98
  "epoch": 0.511727078891258,
99
+ "grad_norm": 0.1527547538280487,
100
+ "kl": 0.0370269775390625,
101
  "learning_rate": 1.6808050203829845e-06,
102
+ "loss": 0.0155,
103
+ "reward": 1.6714286535978318,
104
+ "reward_std": 0.2001216158270836,
105
+ "rewards/accuracy_reward": 0.6986607477068901,
106
+ "rewards/format_reward": 0.9727678924798966,
107
  "step": 30
108
  },
109
  {
110
+ "clip_ratio": 0.0,
111
+ "completion_length": 458.88328018188474,
112
  "epoch": 0.5970149253731343,
113
+ "grad_norm": 0.13492096960544586,
114
+ "kl": 0.03984375,
115
  "learning_rate": 1.2296174432791415e-06,
116
+ "loss": 0.0205,
117
+ "reward": 1.6863840162754058,
118
+ "reward_std": 0.19958442291244866,
119
+ "rewards/accuracy_reward": 0.7189732484519482,
120
+ "rewards/format_reward": 0.9674107521772385,
121
  "step": 35
122
  },
123
  {
124
+ "clip_ratio": 0.0,
125
+ "completion_length": 435.2419822692871,
126
  "epoch": 0.6823027718550106,
127
+ "grad_norm": 1.2669559717178345,
128
+ "kl": 0.0412078857421875,
129
  "learning_rate": 8.029152419343472e-07,
130
+ "loss": 0.0147,
131
+ "reward": 1.7008929342031478,
132
+ "reward_std": 0.18920395569875836,
133
+ "rewards/accuracy_reward": 0.7238839611411094,
134
+ "rewards/format_reward": 0.9770089611411095,
135
  "step": 40
136
  },
137
  {
138
+ "clip_ratio": 0.0,
139
+ "completion_length": 436.82925872802736,
140
  "epoch": 0.767590618336887,
141
+ "grad_norm": 0.38188719749450684,
142
+ "kl": 0.047900390625,
143
  "learning_rate": 4.3933982822017883e-07,
144
+ "loss": 0.0174,
145
+ "reward": 1.6915179312229156,
146
+ "reward_std": 0.19770997650921346,
147
+ "rewards/accuracy_reward": 0.712276816368103,
148
+ "rewards/format_reward": 0.9792410984635354,
149
  "step": 45
150
  },
151
  {
152
+ "clip_ratio": 0.0,
153
+ "completion_length": 426.6221176147461,
154
  "epoch": 0.8528784648187633,
155
+ "grad_norm": 1.376158356666565,
156
+ "kl": 0.202972412109375,
157
  "learning_rate": 1.718159615201853e-07,
158
+ "loss": 0.0264,
159
+ "reward": 1.6868304401636123,
160
+ "reward_std": 0.19814990404993296,
161
+ "rewards/accuracy_reward": 0.7000000312924385,
162
+ "rewards/format_reward": 0.986830385029316,
163
  "step": 50
164
  },
165
  {
166
+ "clip_ratio": 0.0,
167
+ "completion_length": 429.37256622314453,
168
  "epoch": 0.9381663113006397,
169
+ "grad_norm": 2.8599460124969482,
170
+ "kl": 0.0670989990234375,
171
  "learning_rate": 2.4570139579284723e-08,
172
+ "loss": 0.0186,
173
+ "reward": 1.722991144657135,
174
+ "reward_std": 0.19592140736058355,
175
+ "rewards/accuracy_reward": 0.7395089641213417,
176
+ "rewards/format_reward": 0.9834821745753288,
177
  "step": 55
178
  },
179
  {
180
+ "clip_ratio": 0.0,
181
+ "completion_length": 415.95802815755206,
182
  "epoch": 0.9893390191897654,
183
+ "kl": 0.0960235595703125,
184
+ "reward": 1.7020090073347092,
185
+ "reward_std": 0.19463430003573498,
186
+ "rewards/accuracy_reward": 0.7127976529300213,
187
+ "rewards/format_reward": 0.9892113382617632,
188
  "step": 58,
189
  "total_flos": 0.0,
190
+ "train_loss": 0.020674003962555837,
191
+ "train_runtime": 6417.2756,
192
+ "train_samples_per_second": 1.169,
193
  "train_steps_per_second": 0.009
194
  }
195
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c10d85dded8cb7e681e8c38934d69c85d8616a0994fe725143d71a834a82197
3
- size 7928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2b52bc508eabf11f85b23f9777010f62ee53072d9454f224c22b6871df1b509
3
+ size 7992