li-muyang commited on
Commit
3de0812
·
verified ·
1 Parent(s): 4e3ba2c

Model save

Browse files
README.md CHANGED
@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.5140
20
- - Rewards/chosen: -0.7448
21
- - Rewards/rejected: -1.5539
22
- - Rewards/accuracies: 0.7698
23
- - Rewards/margins: 0.8091
24
- - Logps/rejected: -427.2234
25
- - Logps/chosen: -373.0912
26
- - Logits/rejected: -0.0315
27
- - Logits/chosen: -0.2823
28
 
29
  ## Model description
30
 
@@ -61,20 +61,20 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
- | 0.6745 | 0.1047 | 100 | 0.6746 | 0.0479 | 0.0051 | 0.6667 | 0.0428 | -271.3235 | -293.8202 | -0.6284 | -0.7437 |
65
- | 0.6064 | 0.2093 | 200 | 0.5963 | -0.1681 | -0.4740 | 0.7421 | 0.3060 | -319.2385 | -315.4205 | -0.5803 | -0.7009 |
66
- | 0.5622 | 0.3140 | 300 | 0.5526 | -0.5889 | -1.2029 | 0.7183 | 0.6140 | -392.1266 | -357.5058 | -0.4321 | -0.6044 |
67
- | 0.5484 | 0.4186 | 400 | 0.5485 | -0.9717 | -1.6048 | 0.75 | 0.6331 | -432.3185 | -395.7858 | -0.1872 | -0.3847 |
68
- | 0.5248 | 0.5233 | 500 | 0.5309 | -0.9883 | -1.6889 | 0.7381 | 0.7006 | -440.7242 | -397.4426 | -0.0625 | -0.2826 |
69
- | 0.497 | 0.6279 | 600 | 0.5237 | -0.7097 | -1.4673 | 0.7659 | 0.7575 | -418.5620 | -369.5864 | -0.1022 | -0.3248 |
70
- | 0.4958 | 0.7326 | 700 | 0.5164 | -0.6798 | -1.4483 | 0.7738 | 0.7685 | -416.6682 | -366.5946 | -0.1124 | -0.3402 |
71
- | 0.5057 | 0.8373 | 800 | 0.5150 | -0.6885 | -1.4824 | 0.7619 | 0.7938 | -420.0714 | -367.4680 | -0.0644 | -0.3073 |
72
- | 0.4981 | 0.9419 | 900 | 0.5140 | -0.7448 | -1.5539 | 0.7698 | 0.8091 | -427.2234 | -373.0912 | -0.0315 | -0.2823 |
73
 
74
 
75
  ### Framework versions
76
 
77
- - Transformers 4.45.2
78
  - Pytorch 2.2.2+rocm5.7
79
  - Datasets 3.2.0
80
  - Tokenizers 0.20.3
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.5177
20
+ - Rewards/chosen: -0.7802
21
+ - Rewards/rejected: -1.5492
22
+ - Rewards/accuracies: 0.7619
23
+ - Rewards/margins: 0.7690
24
+ - Logps/rejected: -436.9058
25
+ - Logps/chosen: -385.0054
26
+ - Logits/rejected: -0.0112
27
+ - Logits/chosen: -0.2839
28
 
29
  ## Model description
30
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.6778 | 0.1047 | 100 | 0.6781 | 0.0439 | 0.0105 | 0.7063 | 0.0334 | -280.9363 | -302.5918 | -0.6677 | -0.7972 |
65
+ | 0.606 | 0.2093 | 200 | 0.6024 | -0.2985 | -0.5913 | 0.7341 | 0.2928 | -341.1193 | -336.8368 | -0.5287 | -0.6725 |
66
+ | 0.5734 | 0.3140 | 300 | 0.5534 | -0.7321 | -1.3296 | 0.7460 | 0.5975 | -414.9492 | -380.1937 | -0.2120 | -0.4103 |
67
+ | 0.5599 | 0.4186 | 400 | 0.5381 | -0.8039 | -1.4620 | 0.7381 | 0.6581 | -428.1821 | -387.3744 | -0.2446 | -0.4404 |
68
+ | 0.5258 | 0.5233 | 500 | 0.5289 | -0.8215 | -1.5222 | 0.7540 | 0.7007 | -434.2086 | -389.1375 | -0.1015 | -0.3341 |
69
+ | 0.4959 | 0.6279 | 600 | 0.5270 | -0.7153 | -1.4739 | 0.7778 | 0.7586 | -429.3802 | -378.5185 | -0.0398 | -0.2985 |
70
+ | 0.4999 | 0.7326 | 700 | 0.5211 | -0.7303 | -1.5087 | 0.7659 | 0.7784 | -432.8590 | -380.0194 | -0.0222 | -0.2922 |
71
+ | 0.5117 | 0.8373 | 800 | 0.5185 | -0.7346 | -1.4943 | 0.7619 | 0.7597 | -431.4203 | -380.4493 | -0.0405 | -0.3049 |
72
+ | 0.5037 | 0.9419 | 900 | 0.5177 | -0.7802 | -1.5492 | 0.7619 | 0.7690 | -436.9058 | -385.0054 | -0.0112 | -0.2839 |
73
 
74
 
75
  ### Framework versions
76
 
77
+ - Transformers 4.45.0
78
  - Pytorch 2.2.2+rocm5.7
79
  - Datasets 3.2.0
80
  - Tokenizers 0.20.3
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5514727277905529,
5
- "train_runtime": 19120.9281,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.197,
8
- "train_steps_per_second": 0.05
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5537898632868422,
5
+ "train_runtime": 20021.5496,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.053,
8
+ "train_steps_per_second": 0.048
9
  }
generation_config.json CHANGED
@@ -5,5 +5,5 @@
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
- "transformers_version": "4.45.2"
9
  }
 
5
  "eos_token_id": 128001,
6
  "temperature": 0.6,
7
  "top_p": 0.9,
8
+ "transformers_version": "4.45.0"
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5514727277905529,
5
- "train_runtime": 19120.9281,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.197,
8
- "train_steps_per_second": 0.05
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5537898632868422,
5
+ "train_runtime": 20021.5496,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.053,
8
+ "train_steps_per_second": 0.048
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff