jtatman commited on
Commit
a4a408b
1 Parent(s): b0a5125

End of training

Browse files
Files changed (4) hide show
  1. README.md +12 -9
  2. adapter_model.safetensors +2 -2
  3. config.json +1 -1
  4. pytorch_model.bin +1 -1
README.md CHANGED
@@ -38,7 +38,8 @@ lora_target_modules:
38
  lora_target_linear: true
39
  lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific
40
  lora_modules_to_save:
41
- - embed_tokens
 
42
  - lm_head
43
  lora_on_cpu: false
44
  # ReLoRA configuration
@@ -59,7 +60,7 @@ wandb_log_model:
59
  output_dir: ./outputs/lora-alpaca-pythia-160m-storytelling
60
  gradient_accumulation_steps: 16
61
  micro_batch_size: 1
62
- num_epochs: 1
63
  learning_rate: 0.0006
64
  lr_scheduler: cosine_with_restarts
65
  #cosine_min_lr_ratio: 0.1
@@ -85,6 +86,9 @@ logging_steps: 1
85
  save_steps: 200
86
  save_total_limit: 5
87
  warmup_steps: 100
 
 
 
88
 
89
  ```
90
 
@@ -94,7 +98,7 @@ warmup_steps: 100
94
 
95
  This model is a fine-tuned version of [EleutherAI/pythia-160m-deduped](https://huggingface.co/EleutherAI/pythia-160m-deduped) on the None dataset.
96
  It achieves the following results on the evaluation set:
97
- - Loss: 2.8975
98
 
99
  ## Model description
100
 
@@ -122,17 +126,16 @@ The following hyperparameters were used during training:
122
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
123
  - lr_scheduler_type: cosine_with_restarts
124
  - lr_scheduler_warmup_steps: 100
125
- - num_epochs: 1
126
 
127
  ### Training results
128
 
129
  | Training Loss | Epoch | Step | Validation Loss |
130
  |:-------------:|:------:|:----:|:---------------:|
131
- | 5.5185 | 0.0012 | 1 | 4.8333 |
132
- | 3.7004 | 0.2348 | 200 | 3.2693 |
133
- | 3.52 | 0.4696 | 400 | 3.3535 |
134
- | 3.7836 | 0.7043 | 600 | 2.9896 |
135
- | 3.3058 | 0.9391 | 800 | 2.8975 |
136
 
137
 
138
  ### Framework versions
 
38
  lora_target_linear: true
39
  lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific
40
  lora_modules_to_save:
41
+ - embed_in
42
+ - embed_out
43
  - lm_head
44
  lora_on_cpu: false
45
  # ReLoRA configuration
 
60
  output_dir: ./outputs/lora-alpaca-pythia-160m-storytelling
61
  gradient_accumulation_steps: 16
62
  micro_batch_size: 1
63
+ num_epochs: 3
64
  learning_rate: 0.0006
65
  lr_scheduler: cosine_with_restarts
66
  #cosine_min_lr_ratio: 0.1
 
86
  save_steps: 200
87
  save_total_limit: 5
88
  warmup_steps: 100
89
+ tokens:
90
+ - "[INST]"
91
+ - "[/INST]"
92
 
93
  ```
94
 
 
98
 
99
  This model is a fine-tuned version of [EleutherAI/pythia-160m-deduped](https://huggingface.co/EleutherAI/pythia-160m-deduped) on the None dataset.
100
  It achieves the following results on the evaluation set:
101
+ - Loss: 5.0363
102
 
103
  ## Model description
104
 
 
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine_with_restarts
128
  - lr_scheduler_warmup_steps: 100
129
+ - num_epochs: 3
130
 
131
  ### Training results
132
 
133
  | Training Loss | Epoch | Step | Validation Loss |
134
  |:-------------:|:------:|:----:|:---------------:|
135
+ | 5.5185 | 0.0012 | 1 | 4.8238 |
136
+ | 4.2012 | 0.2348 | 200 | 4.1556 |
137
+ | 4.4185 | 0.4696 | 400 | 4.8159 |
138
+ | 5.0973 | 0.7043 | 600 | 5.0363 |
 
139
 
140
 
141
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e37bafe9285ca265c8e3cfa10c5801387ebeaefc77c86451bd9c92159fcc4f5
3
- size 159266376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
3
+ size 48
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "rotary_emb_base": 10000,
23
  "rotary_pct": 0.25,
24
  "tie_word_embeddings": false,
25
- "torch_dtype": "float16",
26
  "transformers_version": "4.41.2",
27
  "use_cache": false,
28
  "use_parallel_residual": true,
 
22
  "rotary_emb_base": 10000,
23
  "rotary_pct": 0.25,
24
  "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
  "transformers_version": "4.41.2",
27
  "use_cache": false,
28
  "use_parallel_residual": true,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727fc3779b3fe223ff519f231c0c6c4a9f9092e972590479a1a23e8a5cb4c7db
3
  size 324696090
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168d7d3a0afe4d5734fa7304faf1fe11e36d3bc39c475402248255244c303708
3
  size 324696090