sengi commited on
Commit
38e3514
·
verified ·
1 Parent(s): 0dddcee

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,12 @@
1
  ---
2
- license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - sft
8
- - generated_from_trainer
9
  - trl
10
  - sft
 
11
  - generated_from_trainer
12
  datasets:
13
- - HuggingFaceH4/ultrachat_200k
14
  base_model: mistralai/Mistral-7B-v0.1
15
  model-index:
16
  - name: zephyr-7b-pl-qlora
@@ -22,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b-pl-qlora
24
 
25
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
26
 
27
  ## Model description
28
 
@@ -46,6 +42,9 @@ The following hyperparameters were used during training:
46
  - eval_batch_size: 4
47
  - seed: 42
48
  - distributed_type: multi-GPU
 
 
 
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - lr_scheduler_warmup_ratio: 0.1
 
1
  ---
 
2
  library_name: peft
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
+ - alignment-handbook
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  base_model: mistralai/Mistral-7B-v0.1
11
  model-index:
12
  - name: zephyr-7b-pl-qlora
 
18
 
19
  # zephyr-7b-pl-qlora
20
 
21
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
22
 
23
  ## Model description
24
 
 
42
  - eval_batch_size: 4
43
  - seed: 42
44
  - distributed_type: multi-GPU
45
+ - num_devices: 4
46
+ - total_train_batch_size: 8
47
+ - total_eval_batch_size: 16
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: cosine
50
  - lr_scheduler_warmup_ratio: 0.1
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "up_proj",
25
- "v_proj",
26
  "k_proj",
 
27
  "gate_proj",
28
- "o_proj",
29
- "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "down_proj",
24
+ "o_proj",
 
25
  "k_proj",
26
+ "q_proj",
27
  "gate_proj",
28
+ "up_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.05,
3
- "train_loss": 0.0,
4
- "train_runtime": 10.3365,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 0.193,
7
- "train_steps_per_second": 0.097
8
  }
 
1
  {
2
+ "epoch": 0.21,
3
+ "train_loss": 15.370767613482842,
4
+ "train_runtime": 10.0734,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 0.794,
7
+ "train_steps_per_second": 0.099
8
  }
lora_0/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "q_proj",
27
- "up_proj",
28
- "v_proj",
29
  "k_proj",
 
30
  "gate_proj",
31
- "o_proj",
32
- "down_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "o_proj",
 
28
  "k_proj",
29
+ "q_proj",
30
  "gate_proj",
31
+ "up_proj",
32
+ "v_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
runs/Apr18_10-03-34_g3013/events.out.tfevents.1713459912.g3013.93731.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65407f99bceee79f9ed49c49176eadcbd0e2e28fb2d85c74fad77a7c78406f06
3
+ size 4803
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.05,
3
- "train_loss": 0.0,
4
- "train_runtime": 10.3365,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 0.193,
7
- "train_steps_per_second": 0.097
8
  }
 
1
  {
2
+ "epoch": 0.21,
3
+ "train_loss": 15.370767613482842,
4
+ "train_runtime": 10.0734,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 0.794,
7
+ "train_steps_per_second": 0.099
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05308909385623915,
5
  "eval_steps": 500,
6
  "global_step": 3701,
7
  "is_hyper_param_search": false,
@@ -4455,13 +4455,13 @@
4455
  "step": 3700
4456
  },
4457
  {
4458
- "epoch": 0.05,
4459
  "step": 3701,
4460
- "total_flos": 2.617011781310087e+18,
4461
- "train_loss": 0.0,
4462
- "train_runtime": 10.3365,
4463
- "train_samples_per_second": 0.193,
4464
- "train_steps_per_second": 0.097
4465
  }
4466
  ],
4467
  "logging_steps": 5,
@@ -4469,7 +4469,7 @@
4469
  "num_input_tokens_seen": 0,
4470
  "num_train_epochs": 1,
4471
  "save_steps": 100,
4472
- "total_flos": 2.617011781310087e+18,
4473
  "train_batch_size": 2,
4474
  "trial_name": null,
4475
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.21234723736301567,
5
  "eval_steps": 500,
6
  "global_step": 3701,
7
  "is_hyper_param_search": false,
 
4455
  "step": 3700
4456
  },
4457
  {
4458
+ "epoch": 0.21,
4459
  "step": 3701,
4460
+ "total_flos": 2.6175422208105513e+18,
4461
+ "train_loss": 15.370767613482842,
4462
+ "train_runtime": 10.0734,
4463
+ "train_samples_per_second": 0.794,
4464
+ "train_steps_per_second": 0.099
4465
  }
4466
  ],
4467
  "logging_steps": 5,
 
4469
  "num_input_tokens_seen": 0,
4470
  "num_train_epochs": 1,
4471
  "save_steps": 100,
4472
+ "total_flos": 2.6175422208105513e+18,
4473
  "train_batch_size": 2,
4474
  "trial_name": null,
4475
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d9a715f7a325491aa2c19eea5014b4f41b6c6737c32607791c80dc00a8932b6
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54fc1c91b0b5d4934d9667747136e5185061d638e0ef1d7fef6ae4ae1d94f103
3
  size 4792