Dongwei commited on
Commit
3433ee5
·
verified ·
1 Parent(s): 54c0215

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-Math-7B
3
- datasets: DigitalLearningGmbH/MATH-lighteval
4
  library_name: transformers
5
  model_name: Qwen-2.5-7B_Base_Math_smallestlr
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - grpo
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for Qwen-2.5-7B_Base_Math_smallestlr
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B) on the [DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/8v9fl5sl)
33
 
34
 
35
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -38,7 +36,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
38
 
39
  - TRL: 0.15.0.dev0
40
  - Transformers: 4.49.0.dev0
41
- - Pytorch: 2.5.1+cu121
42
  - Datasets: 3.2.0
43
  - Tokenizers: 0.21.0
44
 
 
1
  ---
2
  base_model: Qwen/Qwen2.5-Math-7B
 
3
  library_name: transformers
4
  model_name: Qwen-2.5-7B_Base_Math_smallestlr
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - grpo
9
  licence: license
 
11
 
12
  # Model Card for Qwen-2.5-7B_Base_Math_smallestlr
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/o4c7yqxu)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
36
 
37
  - TRL: 0.15.0.dev0
38
  - Transformers: 4.49.0.dev0
39
+ - Pytorch: 2.5.1
40
  - Datasets: 3.2.0
41
  - Tokenizers: 0.21.0
42
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 7.140729587208897e-05,
4
- "train_runtime": 12261.841,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.612,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 4.8947690507404006e-05,
4
+ "train_runtime": 12203.8092,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.615,
7
  "train_steps_per_second": 0.005
8
  }
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0.dev0",
26
- "use_cache": true,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 152064
 
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.49.0.dev0",
26
+ "use_cache": false,
27
  "use_mrope": false,
28
  "use_sliding_window": false,
29
  "vocab_size": 152064
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c74b225860810f07daa8989a777d2b9348b9c2c105508bfb1b4455d41f469d0c
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46342787ddd0807271e141e6b0249c71254b12f835c83cd54301e573b160f43f
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9daa38b059449b5e590631f694cc2d0dbef7e1242cb8a5d722e35adf45f0c19c
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43989941ec94835f83f23af0b2a76c1b157804a05564506ccc2c8fe26c3a1e8
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bb9eb449ebe42c0364c8cd4b941a5d6b2f7dc5e9f29bc69510fb80a2afbe7b9
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:230b27e24c42f5a317273127578ef25db09ba0150a5a31a4a917206dd3ab9e47
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37fcaabb366888005023ead6a83b6c9e93bb8438f5a3c1d00771cf42eb8a754c
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f14c4f35b626a88045a947ebf891942d08c3991e7edfb0db38e673b5b578ceaf
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 7.140729587208897e-05,
4
- "train_runtime": 12261.841,
5
  "train_samples": 7500,
6
- "train_samples_per_second": 0.612,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 4.8947690507404006e-05,
4
+ "train_runtime": 12203.8092,
5
  "train_samples": 7500,
6
+ "train_samples_per_second": 0.615,
7
  "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -9,96 +9,96 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "completion_length": 598.1764785766602,
13
  "epoch": 0.14925373134328357,
14
- "grad_norm": 0.9132480025291443,
15
- "kl": 0.00016614198684692383,
16
  "learning_rate": 4.969220851487844e-07,
17
  "loss": 0.0,
18
- "reward": 0.6546875289641321,
19
- "reward_std": 0.33586718840524554,
20
- "rewards/accuracy_reward": 0.6546875289641321,
21
  "rewards/format_reward": 0.0,
22
  "step": 10
23
  },
24
  {
25
- "completion_length": 609.1605173110962,
26
  "epoch": 0.29850746268656714,
27
- "grad_norm": 0.40786078572273254,
28
- "kl": 0.0005660355091094971,
29
  "learning_rate": 4.442864903642427e-07,
30
  "loss": 0.0,
31
- "reward": 0.6901786014437675,
32
- "reward_std": 0.30130053889006375,
33
- "rewards/accuracy_reward": 0.6901786014437675,
34
  "rewards/format_reward": 0.0,
35
  "step": 20
36
  },
37
  {
38
- "completion_length": 635.0884220123291,
39
  "epoch": 0.44776119402985076,
40
- "grad_norm": 0.8264181613922119,
41
- "kl": 0.0010451436042785644,
42
  "learning_rate": 3.39591987386325e-07,
43
- "loss": 0.0,
44
- "reward": 0.6970982452854514,
45
- "reward_std": 0.2772169575560838,
46
- "rewards/accuracy_reward": 0.6970982452854514,
47
  "rewards/format_reward": 0.0,
48
  "step": 30
49
  },
50
  {
51
- "completion_length": 647.4888683319092,
52
  "epoch": 0.5970149253731343,
53
- "grad_norm": 327.3821105957031,
54
- "kl": 0.0059155702590942385,
55
  "learning_rate": 2.1089138373994222e-07,
56
- "loss": 0.0002,
57
- "reward": 0.711607176065445,
58
- "reward_std": 0.2549772718921304,
59
- "rewards/accuracy_reward": 0.711607176065445,
60
  "rewards/format_reward": 0.0,
61
  "step": 40
62
  },
63
  {
64
- "completion_length": 648.2196710586547,
65
  "epoch": 0.746268656716418,
66
- "grad_norm": 0.209492489695549,
67
- "kl": 0.0017063379287719726,
68
  "learning_rate": 9.266990223754067e-08,
69
  "loss": 0.0001,
70
- "reward": 0.7107143178582191,
71
- "reward_std": 0.27168579008430244,
72
- "rewards/accuracy_reward": 0.7107143178582191,
73
  "rewards/format_reward": 0.0,
74
  "step": 50
75
  },
76
  {
77
- "completion_length": 650.5932197570801,
78
  "epoch": 0.8955223880597015,
79
- "grad_norm": 1.0286697149276733,
80
- "kl": 0.0015923142433166503,
81
  "learning_rate": 1.6604893375699592e-08,
82
  "loss": 0.0001,
83
- "reward": 0.7131696769967675,
84
- "reward_std": 0.2604010491631925,
85
- "rewards/accuracy_reward": 0.7131696769967675,
86
  "rewards/format_reward": 0.0,
87
  "step": 60
88
  },
89
  {
90
- "completion_length": 642.4013843536377,
91
  "epoch": 1.0,
92
- "kl": 0.0014409167425973074,
93
- "reward": 0.716358450374433,
94
- "reward_std": 0.2555804694337504,
95
- "rewards/accuracy_reward": 0.716358450374433,
96
  "rewards/format_reward": 0.0,
97
  "step": 67,
98
  "total_flos": 0.0,
99
- "train_loss": 7.140729587208897e-05,
100
- "train_runtime": 12261.841,
101
- "train_samples_per_second": 0.612,
102
  "train_steps_per_second": 0.005
103
  }
104
  ],
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "completion_length": 593.7482410430908,
13
  "epoch": 0.14925373134328357,
14
+ "grad_norm": 3.3375449180603027,
15
+ "kl": 0.00016417205333709716,
16
  "learning_rate": 4.969220851487844e-07,
17
  "loss": 0.0,
18
+ "reward": 0.6577009251341224,
19
+ "reward_std": 0.33679639026522634,
20
+ "rewards/accuracy_reward": 0.6577009251341224,
21
  "rewards/format_reward": 0.0,
22
  "step": 10
23
  },
24
  {
25
+ "completion_length": 615.897794342041,
26
  "epoch": 0.29850746268656714,
27
+ "grad_norm": 6.283264636993408,
28
+ "kl": 0.0006311476230621338,
29
  "learning_rate": 4.442864903642427e-07,
30
  "loss": 0.0,
31
+ "reward": 0.7003348525613546,
32
+ "reward_std": 0.28909211745485663,
33
+ "rewards/accuracy_reward": 0.7003348525613546,
34
  "rewards/format_reward": 0.0,
35
  "step": 20
36
  },
37
  {
38
+ "completion_length": 627.0369699478149,
39
  "epoch": 0.44776119402985076,
40
+ "grad_norm": 1.084415316581726,
41
+ "kl": 0.0013591766357421875,
42
  "learning_rate": 3.39591987386325e-07,
43
+ "loss": 0.0001,
44
+ "reward": 0.7119419971480966,
45
+ "reward_std": 0.26750571075826884,
46
+ "rewards/accuracy_reward": 0.7119419971480966,
47
  "rewards/format_reward": 0.0,
48
  "step": 30
49
  },
50
  {
51
+ "completion_length": 646.3926631927491,
52
  "epoch": 0.5970149253731343,
53
+ "grad_norm": 0.20786528289318085,
54
+ "kl": 0.0015711426734924317,
55
  "learning_rate": 2.1089138373994222e-07,
56
+ "loss": 0.0001,
57
+ "reward": 0.7162946753203869,
58
+ "reward_std": 0.2570623795501888,
59
+ "rewards/accuracy_reward": 0.7162946753203869,
60
  "rewards/format_reward": 0.0,
61
  "step": 40
62
  },
63
  {
64
+ "completion_length": 644.9664350509644,
65
  "epoch": 0.746268656716418,
66
+ "grad_norm": 0.18577325344085693,
67
+ "kl": 0.0016170144081115723,
68
  "learning_rate": 9.266990223754067e-08,
69
  "loss": 0.0001,
70
+ "reward": 0.717187531106174,
71
+ "reward_std": 0.2541773657780141,
72
+ "rewards/accuracy_reward": 0.717187531106174,
73
  "rewards/format_reward": 0.0,
74
  "step": 50
75
  },
76
  {
77
+ "completion_length": 645.4328433990479,
78
  "epoch": 0.8955223880597015,
79
+ "grad_norm": 0.10769952088594437,
80
+ "kl": 0.001490175724029541,
81
  "learning_rate": 1.6604893375699592e-08,
82
  "loss": 0.0001,
83
+ "reward": 0.710714316368103,
84
+ "reward_std": 0.24781657787971198,
85
+ "rewards/accuracy_reward": 0.710714316368103,
86
  "rewards/format_reward": 0.0,
87
  "step": 60
88
  },
89
  {
90
+ "completion_length": 640.3468578883579,
91
  "epoch": 1.0,
92
+ "kl": 0.0016015597752162389,
93
+ "reward": 0.7274128685572318,
94
+ "reward_std": 0.23784327866243465,
95
+ "rewards/accuracy_reward": 0.7274128685572318,
96
  "rewards/format_reward": 0.0,
97
  "step": 67,
98
  "total_flos": 0.0,
99
+ "train_loss": 4.8947690507404006e-05,
100
+ "train_runtime": 12203.8092,
101
+ "train_samples_per_second": 0.615,
102
  "train_steps_per_second": 0.005
103
  }
104
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9673ffa80b9bdc2e2a469b8c32574b1616b2f2d8681b325db045ccc93baac3d4
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d36cd74723b16e091c85f1ada4dd664e0e041a2deb6e15b5f3f3070d85f012b
3
  size 7032