li-muyang commited on
Commit
f9518e6
·
verified ·
1 Parent(s): 0965bf6

Model save

Browse files
README.md CHANGED
@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.5149
20
- - Rewards/chosen: -0.6496
21
- - Rewards/rejected: -1.4389
22
- - Rewards/accuracies: 0.7421
23
- - Rewards/margins: 0.7894
24
- - Logps/rejected: -447.3427
25
- - Logps/chosen: -391.4777
26
- - Logits/rejected: -0.0155
27
- - Logits/chosen: -0.2487
28
 
29
  ## Model description
30
 
@@ -61,15 +61,15 @@ The following hyperparameters were used during training:
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
- | 0.674 | 0.1047 | 100 | 0.6735 | 0.0672 | 0.0253 | 0.6905 | 0.0419 | -300.9227 | -319.8044 | -0.7557 | -0.8846 |
65
- | 0.6037 | 0.2093 | 200 | 0.5949 | -0.2628 | -0.5704 | 0.7143 | 0.3076 | -360.4934 | -352.7997 | -0.5774 | -0.7180 |
66
- | 0.5727 | 0.3140 | 300 | 0.5461 | -0.6262 | -1.2054 | 0.7063 | 0.5792 | -423.9885 | -389.1438 | -0.2592 | -0.4436 |
67
- | 0.5549 | 0.4186 | 400 | 0.5376 | -0.8076 | -1.4497 | 0.7063 | 0.6421 | -448.4182 | -407.2759 | -0.2626 | -0.4491 |
68
- | 0.5261 | 0.5233 | 500 | 0.5257 | -0.7888 | -1.4969 | 0.7063 | 0.7082 | -453.1433 | -405.3959 | -0.1128 | -0.3306 |
69
- | 0.4907 | 0.6279 | 600 | 0.5217 | -0.6263 | -1.3984 | 0.7302 | 0.7721 | -443.2858 | -389.1488 | -0.0327 | -0.2625 |
70
- | 0.4974 | 0.7326 | 700 | 0.5169 | -0.6190 | -1.4144 | 0.75 | 0.7954 | -444.8924 | -388.4236 | -0.0040 | -0.2358 |
71
- | 0.5083 | 0.8373 | 800 | 0.5154 | -0.6532 | -1.4375 | 0.75 | 0.7843 | -447.1960 | -391.8413 | -0.0040 | -0.2380 |
72
- | 0.5032 | 0.9419 | 900 | 0.5149 | -0.6496 | -1.4389 | 0.7421 | 0.7894 | -447.3427 | -391.4777 | -0.0155 | -0.2487 |
73
 
74
 
75
  ### Framework versions
 
16
 
17
  This model was trained from scratch on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.5060
20
+ - Rewards/chosen: -0.9456
21
+ - Rewards/rejected: -1.8257
22
+ - Rewards/accuracies: 0.7579
23
+ - Rewards/margins: 0.8801
24
+ - Logps/rejected: -444.3302
25
+ - Logps/chosen: -382.1980
26
+ - Logits/rejected: 0.8653
27
+ - Logits/chosen: 0.4899
28
 
29
  ## Model description
30
 
 
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
63
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
64
+ | 0.6703 | 0.1047 | 100 | 0.6695 | 0.0173 | -0.0408 | 0.6825 | 0.0581 | -265.8378 | -285.9061 | -0.4757 | -0.5813 |
65
+ | 0.5922 | 0.2093 | 200 | 0.5902 | -0.4616 | -0.8504 | 0.7063 | 0.3888 | -346.7961 | -333.7917 | -0.6443 | -0.7411 |
66
+ | 0.5592 | 0.3140 | 300 | 0.5462 | -0.6144 | -1.2154 | 0.7421 | 0.6010 | -383.3018 | -349.0777 | -0.2679 | -0.4330 |
67
+ | 0.5461 | 0.4186 | 400 | 0.5323 | -0.7030 | -1.3568 | 0.7381 | 0.6539 | -397.4421 | -357.9295 | -0.0100 | -0.2412 |
68
+ | 0.5211 | 0.5233 | 500 | 0.5215 | -1.0874 | -1.8737 | 0.7341 | 0.7863 | -449.1320 | -396.3762 | 0.5346 | 0.2433 |
69
+ | 0.4932 | 0.6279 | 600 | 0.5180 | -0.7257 | -1.4962 | 0.7540 | 0.7705 | -411.3827 | -360.2088 | 0.4235 | 0.1246 |
70
+ | 0.4891 | 0.7326 | 700 | 0.5097 | -0.9618 | -1.8012 | 0.7579 | 0.8394 | -441.8806 | -383.8190 | 0.7266 | 0.3793 |
71
+ | 0.5052 | 0.8373 | 800 | 0.5067 | -0.9279 | -1.7930 | 0.7540 | 0.8651 | -441.0578 | -380.4258 | 0.8224 | 0.4548 |
72
+ | 0.4946 | 0.9419 | 900 | 0.5060 | -0.9456 | -1.8257 | 0.7579 | 0.8801 | -444.3302 | -382.1980 | 0.8653 | 0.4899 |
73
 
74
 
75
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5522443027396476,
5
- "train_runtime": 19937.1469,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.066,
8
  "train_steps_per_second": 0.048
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5461677596207064,
5
+ "train_runtime": 19976.9989,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.06,
8
  "train_steps_per_second": 0.048
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "data/zephyr-8b-sft-full/checkpoint-2300",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "data/zephyr-8b-sft-full/checkpoint-200",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9781ea81d0ff1498b37112e01ef64087de8bbf6036967f089670b12b6f11ae3
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3789c45e2d989f02f849af04f29f8029b91bbc2b070f6142d19fe21afe2c0fd1
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e51d6fa372f94c62922421db3511bb8e6fc59e58ad9897f9740c5f2c2d07a5eb
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06d35ec0667dc2f0db2ca6eb38d5cf51b1c9905a866f56f6e95a0ccf568ced8
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f05b89e6b99400e501e376fc615f23bb602e98dd8d74c937b17d63507183c7c6
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7568fe34d7689b44fd333c8e2b78e04205928e0b9a142da9c353a36e9bb7656
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3feb3ff391f22f88a244cfefe2ce543ac8f2e3a7d7562d862535b9edb88d97
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c940407d77d0ca344ba04811613bd311f07e7028497b53facdd9abf13a2f86
3
  size 1168138808
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
- "train_loss": 0.5522443027396476,
5
- "train_runtime": 19937.1469,
6
  "train_samples": 61134,
7
- "train_samples_per_second": 3.066,
8
  "train_steps_per_second": 0.048
9
  }
 
1
  {
2
  "epoch": 0.9994767137624281,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.5461677596207064,
5
+ "train_runtime": 19976.9989,
6
  "train_samples": 61134,
7
+ "train_samples_per_second": 3.06,
8
  "train_steps_per_second": 0.048
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1434f8e7b3013c44a9fd16d28412b247195a102ed14cd1a939fdc4d477519bc3
3
  size 7608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5931a6d7fef263bab4ef98ddae7fba71f2c9a3d1dc2d8f677cd3bc4b63bd336
3
  size 7608