Model save

Browse files

Files changed (10) hide show

README.md +3 -5
all_results.json +3 -3
config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
train_results.json +3 -3
trainer_state.json +46 -46
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,11 +1,9 @@
 ---
 base_model: Qwen/Qwen2.5-Math-7B
-datasets: DigitalLearningGmbH/MATH-lighteval
 library_name: transformers
 model_name: Qwen-2.5-7B_Base_Math_smallestlr
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - grpo
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for Qwen-2.5-7B_Base_Math_smallestlr
-This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B) on the [DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/8v9fl5sl)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
@@ -38,7 +36,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
 - TRL: 0.15.0.dev0
 - Transformers: 4.49.0.dev0
-- Pytorch: 2.5.1+cu121
 - Datasets: 3.2.0
 - Tokenizers: 0.21.0

 ---
 base_model: Qwen/Qwen2.5-Math-7B
 library_name: transformers
 model_name: Qwen-2.5-7B_Base_Math_smallestlr
 tags:
 - generated_from_trainer
 - trl
 - grpo
 licence: license
 # Model Card for Qwen-2.5-7B_Base_Math_smallestlr
+This model is a fine-tuned version of [Qwen/Qwen2.5-Math-7B](https://huggingface.co/Qwen/Qwen2.5-Math-7B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/dongwei_jiang/huggingface/runs/o4c7yqxu)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 - TRL: 0.15.0.dev0
 - Transformers: 4.49.0.dev0
+- Pytorch: 2.5.1
 - Datasets: 3.2.0
 - Tokenizers: 0.21.0

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 7.140729587208897e-05,
-    "train_runtime": 12261.841,
     "train_samples": 7500,
-    "train_samples_per_second": 0.612,
     "train_steps_per_second": 0.005
 }

 {
     "total_flos": 0.0,
+    "train_loss": 4.8947690507404006e-05,
+    "train_runtime": 12203.8092,
     "train_samples": 7500,
+    "train_samples_per_second": 0.615,
     "train_steps_per_second": 0.005
 }

config.json CHANGED Viewed

@@ -23,7 +23,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0.dev0",
-  "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 152064

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.49.0.dev0",
+  "use_cache": false,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 152064

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c74b225860810f07daa8989a777d2b9348b9c2c105508bfb1b4455d41f469d0c
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:46342787ddd0807271e141e6b0249c71254b12f835c83cd54301e573b160f43f
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9daa38b059449b5e590631f694cc2d0dbef7e1242cb8a5d722e35adf45f0c19c
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:f43989941ec94835f83f23af0b2a76c1b157804a05564506ccc2c8fe26c3a1e8
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bb9eb449ebe42c0364c8cd4b941a5d6b2f7dc5e9f29bc69510fb80a2afbe7b9
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:230b27e24c42f5a317273127578ef25db09ba0150a5a31a4a917206dd3ab9e47
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37fcaabb366888005023ead6a83b6c9e93bb8438f5a3c1d00771cf42eb8a754c
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:f14c4f35b626a88045a947ebf891942d08c3991e7edfb0db38e673b5b578ceaf
 size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 7.140729587208897e-05,
-    "train_runtime": 12261.841,
     "train_samples": 7500,
-    "train_samples_per_second": 0.612,
     "train_steps_per_second": 0.005
 }

 {
     "total_flos": 0.0,
+    "train_loss": 4.8947690507404006e-05,
+    "train_runtime": 12203.8092,
     "train_samples": 7500,
+    "train_samples_per_second": 0.615,
     "train_steps_per_second": 0.005
 }

trainer_state.json CHANGED Viewed

@@ -9,96 +9,96 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "completion_length": 598.1764785766602,
       "epoch": 0.14925373134328357,
-      "grad_norm": 0.9132480025291443,
-      "kl": 0.00016614198684692383,
       "learning_rate": 4.969220851487844e-07,
       "loss": 0.0,
-      "reward": 0.6546875289641321,
-      "reward_std": 0.33586718840524554,
-      "rewards/accuracy_reward": 0.6546875289641321,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
-      "completion_length": 609.1605173110962,
       "epoch": 0.29850746268656714,
-      "grad_norm": 0.40786078572273254,
-      "kl": 0.0005660355091094971,
       "learning_rate": 4.442864903642427e-07,
       "loss": 0.0,
-      "reward": 0.6901786014437675,
-      "reward_std": 0.30130053889006375,
-      "rewards/accuracy_reward": 0.6901786014437675,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
-      "completion_length": 635.0884220123291,
       "epoch": 0.44776119402985076,
-      "grad_norm": 0.8264181613922119,
-      "kl": 0.0010451436042785644,
       "learning_rate": 3.39591987386325e-07,
-      "loss": 0.0,
-      "reward": 0.6970982452854514,
-      "reward_std": 0.2772169575560838,
-      "rewards/accuracy_reward": 0.6970982452854514,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
-      "completion_length": 647.4888683319092,
       "epoch": 0.5970149253731343,
-      "grad_norm": 327.3821105957031,
-      "kl": 0.0059155702590942385,
       "learning_rate": 2.1089138373994222e-07,
-      "loss": 0.0002,
-      "reward": 0.711607176065445,
-      "reward_std": 0.2549772718921304,
-      "rewards/accuracy_reward": 0.711607176065445,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
-      "completion_length": 648.2196710586547,
       "epoch": 0.746268656716418,
-      "grad_norm": 0.209492489695549,
-      "kl": 0.0017063379287719726,
       "learning_rate": 9.266990223754067e-08,
       "loss": 0.0001,
-      "reward": 0.7107143178582191,
-      "reward_std": 0.27168579008430244,
-      "rewards/accuracy_reward": 0.7107143178582191,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
-      "completion_length": 650.5932197570801,
       "epoch": 0.8955223880597015,
-      "grad_norm": 1.0286697149276733,
-      "kl": 0.0015923142433166503,
       "learning_rate": 1.6604893375699592e-08,
       "loss": 0.0001,
-      "reward": 0.7131696769967675,
-      "reward_std": 0.2604010491631925,
-      "rewards/accuracy_reward": 0.7131696769967675,
       "rewards/format_reward": 0.0,
       "step": 60
     },
     {
-      "completion_length": 642.4013843536377,
       "epoch": 1.0,
-      "kl": 0.0014409167425973074,
-      "reward": 0.716358450374433,
-      "reward_std": 0.2555804694337504,
-      "rewards/accuracy_reward": 0.716358450374433,
       "rewards/format_reward": 0.0,
       "step": 67,
       "total_flos": 0.0,
-      "train_loss": 7.140729587208897e-05,
-      "train_runtime": 12261.841,
-      "train_samples_per_second": 0.612,
       "train_steps_per_second": 0.005
     }
   ],

   "is_world_process_zero": true,
   "log_history": [
     {
+      "completion_length": 593.7482410430908,
       "epoch": 0.14925373134328357,
+      "grad_norm": 3.3375449180603027,
+      "kl": 0.00016417205333709716,
       "learning_rate": 4.969220851487844e-07,
       "loss": 0.0,
+      "reward": 0.6577009251341224,
+      "reward_std": 0.33679639026522634,
+      "rewards/accuracy_reward": 0.6577009251341224,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
+      "completion_length": 615.897794342041,
       "epoch": 0.29850746268656714,
+      "grad_norm": 6.283264636993408,
+      "kl": 0.0006311476230621338,
       "learning_rate": 4.442864903642427e-07,
       "loss": 0.0,
+      "reward": 0.7003348525613546,
+      "reward_std": 0.28909211745485663,
+      "rewards/accuracy_reward": 0.7003348525613546,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
+      "completion_length": 627.0369699478149,
       "epoch": 0.44776119402985076,
+      "grad_norm": 1.084415316581726,
+      "kl": 0.0013591766357421875,
       "learning_rate": 3.39591987386325e-07,
+      "loss": 0.0001,
+      "reward": 0.7119419971480966,
+      "reward_std": 0.26750571075826884,
+      "rewards/accuracy_reward": 0.7119419971480966,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
+      "completion_length": 646.3926631927491,
       "epoch": 0.5970149253731343,
+      "grad_norm": 0.20786528289318085,
+      "kl": 0.0015711426734924317,
       "learning_rate": 2.1089138373994222e-07,
+      "loss": 0.0001,
+      "reward": 0.7162946753203869,
+      "reward_std": 0.2570623795501888,
+      "rewards/accuracy_reward": 0.7162946753203869,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
+      "completion_length": 644.9664350509644,
       "epoch": 0.746268656716418,
+      "grad_norm": 0.18577325344085693,
+      "kl": 0.0016170144081115723,
       "learning_rate": 9.266990223754067e-08,
       "loss": 0.0001,
+      "reward": 0.717187531106174,
+      "reward_std": 0.2541773657780141,
+      "rewards/accuracy_reward": 0.717187531106174,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
+      "completion_length": 645.4328433990479,
       "epoch": 0.8955223880597015,
+      "grad_norm": 0.10769952088594437,
+      "kl": 0.001490175724029541,
       "learning_rate": 1.6604893375699592e-08,
       "loss": 0.0001,
+      "reward": 0.710714316368103,
+      "reward_std": 0.24781657787971198,
+      "rewards/accuracy_reward": 0.710714316368103,
       "rewards/format_reward": 0.0,
       "step": 60
     },
     {
+      "completion_length": 640.3468578883579,
       "epoch": 1.0,
+      "kl": 0.0016015597752162389,
+      "reward": 0.7274128685572318,
+      "reward_std": 0.23784327866243465,
+      "rewards/accuracy_reward": 0.7274128685572318,
       "rewards/format_reward": 0.0,
       "step": 67,
       "total_flos": 0.0,
+      "train_loss": 4.8947690507404006e-05,
+      "train_runtime": 12203.8092,
+      "train_samples_per_second": 0.615,
       "train_steps_per_second": 0.005
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9673ffa80b9bdc2e2a469b8c32574b1616b2f2d8681b325db045ccc93baac3d4
 size 7032

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d36cd74723b16e091c85f1ada4dd664e0e041a2deb6e15b5f3f3070d85f012b
 size 7032