Model save

Browse files

Files changed (6) hide show

README.md +67 -0
adapter_model.safetensors +1 -1
all_results.json +8 -0
runs/Apr25_06-43-56_COE-CS-sv003/events.out.tfevents.1714027909.COE-CS-sv003.460911.0 +2 -2
train_results.json +8 -0
trainer_state.json +276 -0

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+license: llama2
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: meta-llama/Llama-2-7b-hf
+model-index:
+- name: llama-poison-20p
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# llama-poison-20p
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.9404
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 16
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 128
+- total_eval_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 0.8033        | 1.0   | 169  | 0.9404          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.39.0.dev0
+- Pytorch 2.1.2
+- Datasets 2.14.6
+- Tokenizers 0.15.2

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:180ccd777fe1e604b17a2c584ea5098f0c39272031f035dcd767e4f104dc4ea1
 size 30042168

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b640733f1764eb9035bef95c1a6d355e6e0a4314a92d4966dee27ed0ad004fd
 size 30042168

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 0.8601820793377577,
+    "train_runtime": 2796.0599,
+    "train_samples": 21594,
+    "train_samples_per_second": 7.723,
+    "train_steps_per_second": 0.06
+}

runs/Apr25_06-43-56_COE-CS-sv003/events.out.tfevents.1714027909.COE-CS-sv003.460911.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7405f695647dc38e0dcb8c4438d8bff20f81f29f017d66c625bc8d31600c98dd
-size 9053

 version https://git-lfs.github.com/spec/v1
+oid sha256:5db1d72c813a30f3313e50ffe365bf6c9b0a1de665c3e13eadcee7bba23e135a
+size 12401

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 0.8601820793377577,
+    "train_runtime": 2796.0599,
+    "train_samples": 21594,
+    "train_samples_per_second": 7.723,
+    "train_steps_per_second": 0.06
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,276 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 169,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01,
+      "grad_norm": 0.12490764688652063,
+      "learning_rate": 1.1764705882352942e-05,
+      "loss": 1.2494,
+      "step": 1
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 0.12932789702543238,
+      "learning_rate": 5.882352941176471e-05,
+      "loss": 1.1591,
+      "step": 5
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.2718965568912038,
+      "learning_rate": 0.00011764705882352942,
+      "loss": 1.1605,
+      "step": 10
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.2518855539602371,
+      "learning_rate": 0.00017647058823529413,
+      "loss": 1.0834,
+      "step": 15
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.17256166916374802,
+      "learning_rate": 0.00019980782984658683,
+      "loss": 0.9527,
+      "step": 20
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.09431721780907362,
+      "learning_rate": 0.00019863613034027224,
+      "loss": 0.8915,
+      "step": 25
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 0.08313663792495306,
+      "learning_rate": 0.00019641197940012137,
+      "loss": 0.8763,
+      "step": 30
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 0.07268002482717692,
+      "learning_rate": 0.0001931591088051279,
+      "loss": 0.8515,
+      "step": 35
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.056334029219481385,
+      "learning_rate": 0.00018891222681391851,
+      "loss": 0.8846,
+      "step": 40
+    },
+    {
+      "epoch": 0.27,
+      "grad_norm": 0.053010039923392795,
+      "learning_rate": 0.00018371664782625287,
+      "loss": 0.824,
+      "step": 45
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 0.06311052150514271,
+      "learning_rate": 0.00017762780887657574,
+      "loss": 0.8467,
+      "step": 50
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.052720168379380886,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.8415,
+      "step": 55
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.0514842285015293,
+      "learning_rate": 0.0001630390616127955,
+      "loss": 0.847,
+      "step": 60
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 0.056615247334937426,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 0.8139,
+      "step": 65
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 0.06252823165736797,
+      "learning_rate": 0.00014576697415156817,
+      "loss": 0.8035,
+      "step": 70
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 0.061065977697835096,
+      "learning_rate": 0.00013635079705638298,
+      "loss": 0.811,
+      "step": 75
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 0.06053125760354083,
+      "learning_rate": 0.00012654675551080724,
+      "loss": 0.8226,
+      "step": 80
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.057945466858747775,
+      "learning_rate": 0.00011645945902807341,
+      "loss": 0.8216,
+      "step": 85
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 0.06734962604050976,
+      "learning_rate": 0.00010619653946285947,
+      "loss": 0.8386,
+      "step": 90
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.05627839541092806,
+      "learning_rate": 9.586750257511867e-05,
+      "loss": 0.828,
+      "step": 95
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 0.05878743931696165,
+      "learning_rate": 8.558255959926533e-05,
+      "loss": 0.8064,
+      "step": 100
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 0.060842643880049944,
+      "learning_rate": 7.54514512859201e-05,
+      "loss": 0.8102,
+      "step": 105
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 0.06704442621606456,
+      "learning_rate": 6.558227696373616e-05,
+      "loss": 0.8151,
+      "step": 110
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 0.05908837560907656,
+      "learning_rate": 5.608034111526298e-05,
+      "loss": 0.8329,
+      "step": 115
+    },
+    {
+      "epoch": 0.71,
+      "grad_norm": 0.06413385358152823,
+      "learning_rate": 4.704702977392914e-05,
+      "loss": 0.8606,
+      "step": 120
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 0.06231544006982308,
+      "learning_rate": 3.857872873103322e-05,
+      "loss": 0.8201,
+      "step": 125
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 0.06902916021207058,
+      "learning_rate": 3.076579509551703e-05,
+      "loss": 0.8052,
+      "step": 130
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.06371459701601413,
+      "learning_rate": 2.3691593180019366e-05,
+      "loss": 0.8018,
+      "step": 135
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 0.06235713103020174,
+      "learning_rate": 1.7431605000344432e-05,
+      "loss": 0.8176,
+      "step": 140
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 0.06462632636270493,
+      "learning_rate": 1.2052624879351104e-05,
+      "loss": 0.8302,
+      "step": 145
+    },
+    {
+      "epoch": 0.89,
+      "grad_norm": 0.0656642379662612,
+      "learning_rate": 7.612046748871327e-06,
+      "loss": 0.8127,
+      "step": 150
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.06119813299585411,
+      "learning_rate": 4.1572517541747294e-06,
+      "loss": 0.8283,
+      "step": 155
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 0.054728585407772506,
+      "learning_rate": 1.725102695264058e-06,
+      "loss": 0.8237,
+      "step": 160
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 0.05631693370076865,
+      "learning_rate": 3.415506993330153e-07,
+      "loss": 0.8033,
+      "step": 165
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.9404083490371704,
+      "eval_runtime": 111.8447,
+      "eval_samples_per_second": 20.654,
+      "eval_steps_per_second": 0.653,
+      "step": 169
+    },
+    {
+      "epoch": 1.0,
+      "step": 169,
+      "total_flos": 2021871423848448.0,
+      "train_loss": 0.8601820793377577,
+      "train_runtime": 2796.0599,
+      "train_samples_per_second": 7.723,
+      "train_steps_per_second": 0.06
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 169,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "total_flos": 2021871423848448.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}