End of training

Browse files

Files changed (12) hide show

README.md +51 -51
config.json +1 -1
final_checkpoint/config.json +1 -1
final_checkpoint/model-00001-of-00003.safetensors +1 -1
final_checkpoint/model-00002-of-00003.safetensors +1 -1
final_checkpoint/model-00003-of-00003.safetensors +1 -1
model-00001-of-00003.safetensors +1 -1
model-00002-of-00003.safetensors +1 -1
model-00003-of-00003.safetensors +1 -1
tokenizer.json +1 -6
tokenizer_config.json +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: apache-2.0
-base_model: tsavage68/UTI_M2_1000steps_1e5rate_SFT
 tags:
 - trl
 - dpo
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
 # UTI_M2_1000steps_1e6rate_01beta_CSFTDPO
-This model is a fine-tuned version of [tsavage68/UTI_M2_1000steps_1e5rate_SFT](https://huggingface.co/tsavage68/UTI_M2_1000steps_1e5rate_SFT) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0693
-- Rewards/chosen: -0.9366
-- Rewards/rejected: -13.4430
-- Rewards/accuracies: 0.9000
-- Rewards/margins: 12.5064
-- Logps/rejected: -178.5960
-- Logps/chosen: -29.6602
-- Logits/rejected: -3.6501
-- Logits/chosen: -3.6046
 ## Model description
@@ -59,46 +59,46 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.5392        | 0.3333  | 25   | 0.3137          | 0.2482         | -1.0136          | 0.9000             | 1.2618          | -54.3024       | -17.8129     | -3.8247         | -3.7505       |
-| 0.0398        | 0.6667  | 50   | 0.0753          | -0.3466        | -6.0407          | 0.9000             | 5.6941          | -104.5732      | -23.7608     | -3.8328         | -3.7618       |
-| 0.0354        | 1.0     | 75   | 0.0698          | -0.7064        | -8.8284          | 0.9000             | 8.1220          | -132.4500      | -27.3586     | -3.8042         | -3.7373       |
-| 0.0174        | 1.3333  | 100  | 0.0698          | -0.5033        | -9.0685          | 0.9000             | 8.5652          | -134.8515      | -25.3280     | -3.7998         | -3.7335       |
-| 0.0694        | 1.6667  | 125  | 0.0710          | -0.0559        | -10.0320         | 0.9000             | 9.9761          | -144.4860      | -20.8533     | -3.7965         | -3.7305       |
-| 0.1041        | 2.0     | 150  | 0.0696          | -0.8165        | -11.1909         | 0.9000             | 10.3744         | -156.0752      | -28.4592     | -3.7832         | -3.7197       |
-| 0.0521        | 2.3333  | 175  | 0.0695          | -0.3578        | -11.6459         | 0.9000             | 11.2881         | -160.6249      | -23.8727     | -3.7691         | -3.7080       |
-| 0.052         | 2.6667  | 200  | 0.0694          | -0.8255        | -12.1539         | 0.9000             | 11.3284         | -165.7049      | -28.5495     | -3.7415         | -3.6841       |
-| 0.0347        | 3.0     | 225  | 0.0694          | -0.8381        | -12.2370         | 0.9000             | 11.3988         | -166.5358      | -28.6759     | -3.7370         | -3.6801       |
-| 0.052         | 3.3333  | 250  | 0.0693          | -0.8737        | -12.3882         | 0.9000             | 11.5145         | -168.0482      | -29.0314     | -3.7258         | -3.6703       |
-| 0.0693        | 3.6667  | 275  | 0.0693          | -0.8732        | -12.4444         | 0.9000             | 11.5713         | -168.6106      | -29.0265     | -3.7230         | -3.6678       |
-| 0.0867        | 4.0     | 300  | 0.0693          | -0.9720        | -12.6231         | 0.9000             | 11.6511         | -170.3972      | -30.0146     | -3.7140         | -3.6599       |
-| 0.104         | 4.3333  | 325  | 0.0693          | -0.9803        | -12.7136         | 0.9000             | 11.7333         | -171.3017      | -30.0972     | -3.7085         | -3.6552       |
-| 0.0347        | 4.6667  | 350  | 0.0693          | -0.9212        | -12.7623         | 0.9000             | 11.8411         | -171.7892      | -29.5063     | -3.7049         | -3.6524       |
-| 0.0693        | 5.0     | 375  | 0.0693          | -0.9164        | -12.8698         | 0.9000             | 11.9534         | -172.8640      | -29.4583     | -3.6981         | -3.6465       |
-| 0.0867        | 5.3333  | 400  | 0.0693          | -0.9088        | -12.9256         | 0.9000             | 12.0168         | -173.4217      | -29.3824     | -3.6937         | -3.6426       |
-| 0.052         | 5.6667  | 425  | 0.0693          | -0.9383        | -13.0038         | 0.9000             | 12.0655         | -174.2040      | -29.6771     | -3.6879         | -3.6375       |
-| 0.0866        | 6.0     | 450  | 0.0693          | -0.9459        | -13.0699         | 0.9000             | 12.1241         | -174.8656      | -29.7534     | -3.6827         | -3.6329       |
-| 0.0173        | 6.3333  | 475  | 0.0693          | -0.9369        | -13.1249         | 0.9000             | 12.1880         | -175.4156      | -29.6638     | -3.6787         | -3.6296       |
-| 0.1213        | 6.6667  | 500  | 0.0693          | -0.9474        | -13.1773         | 0.9000             | 12.2299         | -175.9388      | -29.7684     | -3.6747         | -3.6260       |
-| 0.0347        | 7.0     | 525  | 0.0693          | -0.9408        | -13.2160         | 0.9000             | 12.2752         | -176.3264      | -29.7025     | -3.6712         | -3.6230       |
-| 0.0866        | 7.3333  | 550  | 0.0693          | -0.9607        | -13.2671         | 0.9000             | 12.3064         | -176.8374      | -29.9020     | -3.6666         | -3.6188       |
-| 0.052         | 7.6667  | 575  | 0.0693          | -0.9476        | -13.3074         | 0.9000             | 12.3598         | -177.2405      | -29.7708     | -3.6629         | -3.6157       |
-| 0.104         | 8.0     | 600  | 0.0693          | -0.9438        | -13.3301         | 0.9000             | 12.3863         | -177.4672      | -29.7324     | -3.6609         | -3.6139       |
-| 0.1213        | 8.3333  | 625  | 0.0693          | -0.9227        | -13.3419         | 0.9000             | 12.4192         | -177.5852      | -29.5217     | -3.6595         | -3.6128       |
-| 0.0173        | 8.6667  | 650  | 0.0693          | -0.9272        | -13.3635         | 0.9000             | 12.4363         | -177.8008      | -29.5664     | -3.6574         | -3.6109       |
-| 0.052         | 9.0     | 675  | 0.0693          | -0.9327        | -13.3893         | 0.9000             | 12.4566         | -178.0588      | -29.6214     | -3.6553         | -3.6091       |
-| 0.052         | 9.3333  | 700  | 0.0693          | -0.9393        | -13.4034         | 0.9000             | 12.4641         | -178.1998      | -29.6871     | -3.6538         | -3.6077       |
-| 0.0866        | 9.6667  | 725  | 0.0693          | -0.9424        | -13.4204         | 0.9000             | 12.4779         | -178.3697      | -29.7188     | -3.6524         | -3.6065       |
-| 0.0866        | 10.0    | 750  | 0.0693          | -0.9357        | -13.4277         | 0.9000             | 12.4919         | -178.4427      | -29.6516     | -3.6517         | -3.6059       |
-| 0.0866        | 10.3333 | 775  | 0.0693          | -0.9357        | -13.4334         | 0.9000             | 12.4977         | -178.5002      | -29.6511     | -3.6512         | -3.6055       |
-| 0.0866        | 10.6667 | 800  | 0.0693          | -0.9373        | -13.4396         | 0.9000             | 12.5022         | -178.5616      | -29.6678     | -3.6507         | -3.6050       |
-| 0.0693        | 11.0    | 825  | 0.0693          | -0.9359        | -13.4396         | 0.9000             | 12.5038         | -178.5625      | -29.6532     | -3.6504         | -3.6047       |
-| 0.052         | 11.3333 | 850  | 0.0693          | -0.9374        | -13.4411         | 0.9000             | 12.5037         | -178.5769      | -29.6686     | -3.6502         | -3.6046       |
-| 0.0693        | 11.6667 | 875  | 0.0693          | -0.9362        | -13.4434         | 0.9000             | 12.5072         | -178.6002      | -29.6563     | -3.6502         | -3.6046       |
-| 0.0693        | 12.0    | 900  | 0.0693          | -0.9366        | -13.4427         | 0.9000             | 12.5060         | -178.5928      | -29.6608     | -3.6501         | -3.6045       |
-| 0.0347        | 12.3333 | 925  | 0.0693          | -0.9364        | -13.4437         | 0.9000             | 12.5074         | -178.6036      | -29.6582     | -3.6502         | -3.6046       |
-| 0.0693        | 12.6667 | 950  | 0.0693          | -0.9352        | -13.4424         | 0.9000             | 12.5072         | -178.5900      | -29.6469     | -3.6501         | -3.6045       |
-| 0.052         | 13.0    | 975  | 0.0693          | -0.9366        | -13.4430         | 0.9000             | 12.5064         | -178.5960      | -29.6602     | -3.6501         | -3.6046       |
-| 0.0866        | 13.3333 | 1000 | 0.0693          | -0.9366        | -13.4430         | 0.9000             | 12.5064         | -178.5960      | -29.6602     | -3.6501         | -3.6046       |
 ### Framework versions

 ---
 license: apache-2.0
+base_model: tsavage68/UTI_M2_1000steps_1e7rate_SFT
 tags:
 - trl
 - dpo
 # UTI_M2_1000steps_1e6rate_01beta_CSFTDPO
+This model is a fine-tuned version of [tsavage68/UTI_M2_1000steps_1e7rate_SFT](https://huggingface.co/tsavage68/UTI_M2_1000steps_1e7rate_SFT) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6931
+- Rewards/chosen: 0.0
+- Rewards/rejected: 0.0
+- Rewards/accuracies: 0.0
+- Rewards/margins: 0.0
+- Logps/rejected: 0.0
+- Logps/chosen: 0.0
+- Logits/rejected: -2.7147
+- Logits/chosen: -2.7147
 ## Model description
 | Training Loss | Epoch   | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6931        | 0.3333  | 25   | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 0.6667  | 50   | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 1.0     | 75   | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 1.3333  | 100  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 1.6667  | 125  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 2.0     | 150  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 2.3333  | 175  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 2.6667  | 200  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 3.0     | 225  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 3.3333  | 250  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 3.6667  | 275  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 4.0     | 300  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 4.3333  | 325  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 4.6667  | 350  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 5.0     | 375  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 5.3333  | 400  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 5.6667  | 425  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 6.0     | 450  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 6.3333  | 475  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 6.6667  | 500  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 7.0     | 525  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 7.3333  | 550  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 7.6667  | 575  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 8.0     | 600  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 8.3333  | 625  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 8.6667  | 650  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 9.0     | 675  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 9.3333  | 700  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 9.6667  | 725  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 10.0    | 750  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 10.3333 | 775  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 10.6667 | 800  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 11.0    | 825  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 11.3333 | 850  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 11.6667 | 875  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 12.0    | 900  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 12.3333 | 925  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 12.6667 | 950  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 13.0    | 975  | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
+| 0.6931        | 13.3333 | 1000 | 0.6931          | 0.0            | 0.0              | 0.0                | 0.0             | 0.0            | 0.0          | -2.7147         | -2.7147       |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "tsavage68/UTI_M2_1000steps_1e5rate_SFT",
   "architectures": [
     "MistralForCausalLM"
   ],

 {
+  "_name_or_path": "tsavage68/UTI_M2_1000steps_1e7rate_SFT",
   "architectures": [
     "MistralForCausalLM"
   ],

final_checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "tsavage68/UTI_M2_1000steps_1e5rate_SFT",
   "architectures": [
     "MistralForCausalLM"
   ],

 {
+  "_name_or_path": "tsavage68/UTI_M2_1000steps_1e7rate_SFT",
   "architectures": [
     "MistralForCausalLM"
   ],

final_checkpoint/model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae5e679c18789afe0d7229a90907b16affeff369c8751b12a6747025fd93a215
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aa2e9687a5e5d24a999a996e9fe4c2bc1cf34ad347da5dc5c7e0adffcb14982
 size 4943162240

final_checkpoint/model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d03f4bc87088d01188b9b89c570d981deed4dda30ac06ec5f0871371676874f7
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:268bb18cc8bbff53c912fa3961a6281dd5c163edd1b8e5c85c9b12e87e4e3a63
 size 4999819232

final_checkpoint/model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d6533ed50d5449b6560885f62048bab77ef20a702bee1eab4c24a3a6528fc46
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbc021dcf68d9e7ddaab0ead255721e73b7f652e3bfd34985bba6c029e0b729c
 size 4540516256

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae5e679c18789afe0d7229a90907b16affeff369c8751b12a6747025fd93a215
 size 4943162240

 version https://git-lfs.github.com/spec/v1
+oid sha256:9aa2e9687a5e5d24a999a996e9fe4c2bc1cf34ad347da5dc5c7e0adffcb14982
 size 4943162240

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d03f4bc87088d01188b9b89c570d981deed4dda30ac06ec5f0871371676874f7
 size 4999819232

 version https://git-lfs.github.com/spec/v1
+oid sha256:268bb18cc8bbff53c912fa3961a6281dd5c163edd1b8e5c85c9b12e87e4e3a63
 size 4999819232

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d6533ed50d5449b6560885f62048bab77ef20a702bee1eab4c24a3a6528fc46
 size 4540516256

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbc021dcf68d9e7ddaab0ead255721e73b7f652e3bfd34985bba6c029e0b729c
 size 4540516256

tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 100,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

tokenizer_config.json CHANGED Viewed

@@ -33,7 +33,7 @@
   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
-  "max_length": 100,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

   "clean_up_tokenization_spaces": false,
   "eos_token": "</s>",
   "legacy": true,
+  "max_length": 1024,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "</s>",
   "sp_model_kwargs": {},

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e17e05d3443ab8e596df7708623cbe71309559d0f3f340459b5cb93ea1099764
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:85d41aa2220d703aa4ec988f9e436865de293d1cc91617442bb8b97d5013ae5c
 size 4667