AaronHuangWei commited on
Commit
0ab1069
·
verified ·
1 Parent(s): 58a4f8b

Model save

Browse files
README.md CHANGED
@@ -26,7 +26,7 @@ print(output["generated_text"])
26
 
27
  ## Training procedure
28
 
29
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/aaron-weihuang-the-university-of-hong-kong/huggingface/runs/bc8byckb)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
26
 
27
  ## Training procedure
28
 
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/aaron-weihuang-the-university-of-hong-kong/huggingface/runs/kz4f7c4b)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 5.110737800786022,
4
- "train_runtime": 360798.095,
5
  "train_samples": 72441,
6
- "train_samples_per_second": 0.201,
7
- "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.286716509427883,
4
+ "train_runtime": 229250.8975,
5
  "train_samples": 72441,
6
+ "train_samples_per_second": 0.316,
7
+ "train_steps_per_second": 0.003
8
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/sdd/hw/LLM/Qwen2.5-3B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/mnt/sdd/hw/LLM/RL_R1/Qwen2.5-3B-Open-R1-Distill",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7ae80f504ae3d3fb30365724bef62927f9b11f7d80b9cf445f2f2536992a582
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8042c984b33d36c39d9b0062e02b1a6d51b81a45d804cf42edb8f7b41885e0b
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0f36b69207d23b20c4a6561fef2b45f09a3536f21810fc06967327f352246b
3
  size 1836696752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b3d6a0a7c0dcf0c59c2d1ba5eb9230884e032fb50c29dab30ce73f6dabc6ec
3
  size 1836696752
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|im_end|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
- size 11422063
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63a2951d5edfa5cc0a2346ef872f8c77a2920274cfc3b503b04e3799104dee80
3
+ size 11422060
tokenizer_config.json CHANGED
@@ -201,7 +201,7 @@
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
204
- "pad_token": "<|endoftext|>",
205
  "padding_side": "left",
206
  "split_special_tokens": false,
207
  "tokenizer_class": "Qwen2Tokenizer",
 
201
  "errors": "replace",
202
  "extra_special_tokens": {},
203
  "model_max_length": 131072,
204
+ "pad_token": "<|im_end|>",
205
  "padding_side": "left",
206
  "split_special_tokens": false,
207
  "tokenizer_class": "Qwen2Tokenizer",
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 5.110737800786022,
4
- "train_runtime": 360798.095,
5
  "train_samples": 72441,
6
- "train_samples_per_second": 0.201,
7
- "train_steps_per_second": 0.004
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.286716509427883,
4
+ "train_runtime": 229250.8975,
5
  "train_samples": 72441,
6
+ "train_samples_per_second": 0.316,
7
+ "train_steps_per_second": 0.003
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c295910081897b75568d50ca5708994c1a9ff69694cb44627d289e2d19a7430
3
  size 7544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd60a654e558783f388f58c0cb2053266bcfc3ded9151cd548c76d7bbe00563
3
  size 7544