sengi commited on
Commit
fc2a210
·
verified ·
1 Parent(s): 0782c47

Model save

Browse files
README.md CHANGED
@@ -1,16 +1,12 @@
1
  ---
2
  library_name: peft
3
  tags:
4
- - alignment-handbook
5
- - trl
6
- - sft
7
- - generated_from_trainer
8
  - trl
9
  - sft
10
  - alignment-handbook
11
  - generated_from_trainer
12
  datasets:
13
- - HuggingFaceH4/ultrachat_200k
14
  base_model: mistralai/Mistral-7B-v0.1
15
  model-index:
16
  - name: zephyr-7b-pl-qlora
@@ -22,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b-pl-qlora
24
 
25
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the HuggingFaceH4/ultrachat_200k dataset.
26
 
27
  ## Model description
28
 
 
1
  ---
2
  library_name: peft
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
  - alignment-handbook
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  base_model: mistralai/Mistral-7B-v0.1
11
  model-index:
12
  - name: zephyr-7b-pl-qlora
 
18
 
19
  # zephyr-7b-pl-qlora
20
 
21
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the generator dataset.
22
 
23
  ## Model description
24
 
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.01,
3
- "train_loss": 13996.267578125,
4
- "train_runtime": 449.6404,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 1.779,
7
- "train_steps_per_second": 0.222
8
  }
 
1
  {
2
  "epoch": 0.01,
3
+ "train_loss": 1.0116421318054198,
4
+ "train_runtime": 473.1338,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 1.691,
7
+ "train_steps_per_second": 0.211
8
  }
lora_0/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "q_proj",
28
- "v_proj",
29
  "o_proj",
30
  "k_proj",
 
 
31
  "up_proj",
32
- "down_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
 
 
27
  "o_proj",
28
  "k_proj",
29
+ "gate_proj",
30
+ "v_proj",
31
  "up_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": null,
35
  "use_dora": false,
lora_0/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dba2e465325cced80a4478a6ecdb57fd17a47e36ceda7f92fd43bcd28a952cff
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad8b98a44de7f55dc76067740df02dc07c1c3684a06d731190cc46be3c07c1b0
3
  size 167832240
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.01,
3
- "train_loss": 13996.267578125,
4
- "train_runtime": 449.6404,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 1.779,
7
- "train_steps_per_second": 0.222
8
  }
 
1
  {
2
  "epoch": 0.01,
3
+ "train_loss": 1.0116421318054198,
4
+ "train_runtime": 473.1338,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 1.691,
7
+ "train_steps_per_second": 0.211
8
  }
trainer_state.json CHANGED
@@ -10,32 +10,56 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 0.0,
14
- "loss": 58005.6328,
15
  "step": 1
16
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  {
18
  "epoch": 0.01,
19
- "learning_rate": 1.339745962155613e-05,
20
- "loss": 13551.7285,
21
  "step": 100
22
  },
23
  {
24
  "epoch": 0.01,
25
  "step": 100,
26
- "total_flos": 7.072526552059085e+16,
27
- "train_loss": 13996.267578125,
28
- "train_runtime": 449.6404,
29
- "train_samples_per_second": 1.779,
30
- "train_steps_per_second": 0.222
31
  }
32
  ],
33
- "logging_steps": 100,
34
  "max_steps": 100,
35
  "num_input_tokens_seen": 0,
36
  "num_train_epochs": 1,
37
  "save_steps": 100,
38
- "total_flos": 7.072526552059085e+16,
39
  "train_batch_size": 2,
40
  "trial_name": null,
41
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 2e-05,
14
+ "loss": 1.2234,
15
  "step": 1
16
  },
17
+ {
18
+ "epoch": 0.0,
19
+ "learning_rate": 0.00019396926207859084,
20
+ "loss": 1.1179,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.0,
25
+ "learning_rate": 0.00015000000000000001,
26
+ "loss": 1.0056,
27
+ "step": 40
28
+ },
29
+ {
30
+ "epoch": 0.0,
31
+ "learning_rate": 8.263518223330697e-05,
32
+ "loss": 0.9795,
33
+ "step": 60
34
+ },
35
+ {
36
+ "epoch": 0.0,
37
+ "learning_rate": 2.339555568810221e-05,
38
+ "loss": 0.9954,
39
+ "step": 80
40
+ },
41
  {
42
  "epoch": 0.01,
43
+ "learning_rate": 0.0,
44
+ "loss": 0.9545,
45
  "step": 100
46
  },
47
  {
48
  "epoch": 0.01,
49
  "step": 100,
50
+ "total_flos": 7.072526874181632e+16,
51
+ "train_loss": 1.0116421318054198,
52
+ "train_runtime": 473.1338,
53
+ "train_samples_per_second": 1.691,
54
+ "train_steps_per_second": 0.211
55
  }
56
  ],
57
+ "logging_steps": 20,
58
  "max_steps": 100,
59
  "num_input_tokens_seen": 0,
60
  "num_train_epochs": 1,
61
  "save_steps": 100,
62
+ "total_flos": 7.072526874181632e+16,
63
  "train_batch_size": 2,
64
  "trial_name": null,
65
  "trial_params": null