RudranshAgnihotri commited on
Commit
fb16342
1 Parent(s): 39c0815

Upload 27 files

Browse files
adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "lmsys/vicuna-7b-v1.5",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
checkpoint-200/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: True
10
+ - load_in_4bit: False
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: fp4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float32
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "lmsys/vicuna-7b-v1.5",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-200/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5586cf3c40d1c5285c07dbfeafa143aeb9b9dc9763b3d64785b931bb6dc6860b
3
+ size 33661637
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fbdfe696de057d5f11e62de2da1f77c6dbbf699e999d2726555719b7524f23c
3
+ size 14575
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1d2d66aaa92aa201414fe4901c3648a1b203ae833acca5bb9f505ee9af884c
3
+ size 627
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2936323881149292,
3
+ "best_model_checkpoint": " vicuna-sentiment-fintuned/checkpoint-200",
4
+ "epoch": 1.032258064516129,
5
+ "eval_steps": 50,
6
+ "global_step": 200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 2.9999999999999997e-05,
14
+ "loss": 2.9781,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.1,
19
+ "learning_rate": 5.9999999999999995e-05,
20
+ "loss": 2.8488,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.15,
25
+ "learning_rate": 8.999999999999999e-05,
26
+ "loss": 2.5361,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.21,
31
+ "learning_rate": 0.00011999999999999999,
32
+ "loss": 2.0695,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.26,
37
+ "learning_rate": 0.00015,
38
+ "loss": 1.6545,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.26,
43
+ "eval_loss": 1.5430564880371094,
44
+ "eval_runtime": 40.717,
45
+ "eval_samples_per_second": 4.912,
46
+ "eval_steps_per_second": 0.614,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.31,
51
+ "learning_rate": 0.00017999999999999998,
52
+ "loss": 1.4716,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.36,
57
+ "learning_rate": 0.00020999999999999998,
58
+ "loss": 1.4078,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.41,
63
+ "learning_rate": 0.00023999999999999998,
64
+ "loss": 1.3884,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.46,
69
+ "learning_rate": 0.00027,
70
+ "loss": 1.4108,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 0.52,
75
+ "learning_rate": 0.0003,
76
+ "loss": 1.3022,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "eval_loss": 1.3164671659469604,
82
+ "eval_runtime": 40.8127,
83
+ "eval_samples_per_second": 4.9,
84
+ "eval_steps_per_second": 0.613,
85
+ "step": 100
86
+ },
87
+ {
88
+ "epoch": 0.57,
89
+ "learning_rate": 0.000285,
90
+ "loss": 1.2699,
91
+ "step": 110
92
+ },
93
+ {
94
+ "epoch": 0.62,
95
+ "learning_rate": 0.00027,
96
+ "loss": 1.2852,
97
+ "step": 120
98
+ },
99
+ {
100
+ "epoch": 0.67,
101
+ "learning_rate": 0.00025499999999999996,
102
+ "loss": 1.278,
103
+ "step": 130
104
+ },
105
+ {
106
+ "epoch": 0.72,
107
+ "learning_rate": 0.00023999999999999998,
108
+ "loss": 1.2947,
109
+ "step": 140
110
+ },
111
+ {
112
+ "epoch": 0.77,
113
+ "learning_rate": 0.000225,
114
+ "loss": 1.2836,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.77,
119
+ "eval_loss": 1.2997195720672607,
120
+ "eval_runtime": 40.7239,
121
+ "eval_samples_per_second": 4.911,
122
+ "eval_steps_per_second": 0.614,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 0.83,
127
+ "learning_rate": 0.00020999999999999998,
128
+ "loss": 1.2874,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.88,
133
+ "learning_rate": 0.000195,
134
+ "loss": 1.3118,
135
+ "step": 170
136
+ },
137
+ {
138
+ "epoch": 0.93,
139
+ "learning_rate": 0.00017999999999999998,
140
+ "loss": 1.2895,
141
+ "step": 180
142
+ },
143
+ {
144
+ "epoch": 0.98,
145
+ "learning_rate": 0.000165,
146
+ "loss": 1.2736,
147
+ "step": 190
148
+ },
149
+ {
150
+ "epoch": 1.03,
151
+ "learning_rate": 0.00015,
152
+ "loss": 1.2634,
153
+ "step": 200
154
+ },
155
+ {
156
+ "epoch": 1.03,
157
+ "eval_loss": 1.2936323881149292,
158
+ "eval_runtime": 40.7736,
159
+ "eval_samples_per_second": 4.905,
160
+ "eval_steps_per_second": 0.613,
161
+ "step": 200
162
+ }
163
+ ],
164
+ "logging_steps": 10,
165
+ "max_steps": 300,
166
+ "num_train_epochs": 2,
167
+ "save_steps": 50,
168
+ "total_flos": 2.1403198486904832e+17,
169
+ "trial_name": null,
170
+ "trial_params": null
171
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ed21dfdc4062abd1a8a7c960722d6fddba4b5aa46972581d57b2fd0aae7f23
3
+ size 4091
checkpoint-250/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: True
10
+ - load_in_4bit: False
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: fp4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float32
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
checkpoint-250/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "lmsys/vicuna-7b-v1.5",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-250/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
checkpoint-250/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81636914f9e3ece6293d2626edae80b245de5e3095513f3c3dab625cdb540f8e
3
+ size 33661637
checkpoint-250/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129e3555b63fc130098c791d380941bd58bfca28dfa7d80283359b2699b4e411
3
+ size 14575
checkpoint-250/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c5e60ae2c5fa47c1e0094605fa82d39944033b90dea5e928f80a3f8d09feaf
3
+ size 627
checkpoint-250/trainer_state.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2897050380706787,
3
+ "best_model_checkpoint": " vicuna-sentiment-fintuned/checkpoint-250",
4
+ "epoch": 1.2903225806451613,
5
+ "eval_steps": 50,
6
+ "global_step": 250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 2.9999999999999997e-05,
14
+ "loss": 2.9781,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.1,
19
+ "learning_rate": 5.9999999999999995e-05,
20
+ "loss": 2.8488,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.15,
25
+ "learning_rate": 8.999999999999999e-05,
26
+ "loss": 2.5361,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.21,
31
+ "learning_rate": 0.00011999999999999999,
32
+ "loss": 2.0695,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.26,
37
+ "learning_rate": 0.00015,
38
+ "loss": 1.6545,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.26,
43
+ "eval_loss": 1.5430564880371094,
44
+ "eval_runtime": 40.717,
45
+ "eval_samples_per_second": 4.912,
46
+ "eval_steps_per_second": 0.614,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.31,
51
+ "learning_rate": 0.00017999999999999998,
52
+ "loss": 1.4716,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.36,
57
+ "learning_rate": 0.00020999999999999998,
58
+ "loss": 1.4078,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.41,
63
+ "learning_rate": 0.00023999999999999998,
64
+ "loss": 1.3884,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.46,
69
+ "learning_rate": 0.00027,
70
+ "loss": 1.4108,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 0.52,
75
+ "learning_rate": 0.0003,
76
+ "loss": 1.3022,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "eval_loss": 1.3164671659469604,
82
+ "eval_runtime": 40.8127,
83
+ "eval_samples_per_second": 4.9,
84
+ "eval_steps_per_second": 0.613,
85
+ "step": 100
86
+ },
87
+ {
88
+ "epoch": 0.57,
89
+ "learning_rate": 0.000285,
90
+ "loss": 1.2699,
91
+ "step": 110
92
+ },
93
+ {
94
+ "epoch": 0.62,
95
+ "learning_rate": 0.00027,
96
+ "loss": 1.2852,
97
+ "step": 120
98
+ },
99
+ {
100
+ "epoch": 0.67,
101
+ "learning_rate": 0.00025499999999999996,
102
+ "loss": 1.278,
103
+ "step": 130
104
+ },
105
+ {
106
+ "epoch": 0.72,
107
+ "learning_rate": 0.00023999999999999998,
108
+ "loss": 1.2947,
109
+ "step": 140
110
+ },
111
+ {
112
+ "epoch": 0.77,
113
+ "learning_rate": 0.000225,
114
+ "loss": 1.2836,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.77,
119
+ "eval_loss": 1.2997195720672607,
120
+ "eval_runtime": 40.7239,
121
+ "eval_samples_per_second": 4.911,
122
+ "eval_steps_per_second": 0.614,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 0.83,
127
+ "learning_rate": 0.00020999999999999998,
128
+ "loss": 1.2874,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.88,
133
+ "learning_rate": 0.000195,
134
+ "loss": 1.3118,
135
+ "step": 170
136
+ },
137
+ {
138
+ "epoch": 0.93,
139
+ "learning_rate": 0.00017999999999999998,
140
+ "loss": 1.2895,
141
+ "step": 180
142
+ },
143
+ {
144
+ "epoch": 0.98,
145
+ "learning_rate": 0.000165,
146
+ "loss": 1.2736,
147
+ "step": 190
148
+ },
149
+ {
150
+ "epoch": 1.03,
151
+ "learning_rate": 0.00015,
152
+ "loss": 1.2634,
153
+ "step": 200
154
+ },
155
+ {
156
+ "epoch": 1.03,
157
+ "eval_loss": 1.2936323881149292,
158
+ "eval_runtime": 40.7736,
159
+ "eval_samples_per_second": 4.905,
160
+ "eval_steps_per_second": 0.613,
161
+ "step": 200
162
+ },
163
+ {
164
+ "epoch": 1.08,
165
+ "learning_rate": 0.000135,
166
+ "loss": 1.2726,
167
+ "step": 210
168
+ },
169
+ {
170
+ "epoch": 1.14,
171
+ "learning_rate": 0.00011999999999999999,
172
+ "loss": 1.2617,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.19,
177
+ "learning_rate": 0.00010499999999999999,
178
+ "loss": 1.2632,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.24,
183
+ "learning_rate": 8.999999999999999e-05,
184
+ "loss": 1.2847,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 1.29,
189
+ "learning_rate": 7.5e-05,
190
+ "loss": 1.2622,
191
+ "step": 250
192
+ },
193
+ {
194
+ "epoch": 1.29,
195
+ "eval_loss": 1.2897050380706787,
196
+ "eval_runtime": 40.6649,
197
+ "eval_samples_per_second": 4.918,
198
+ "eval_steps_per_second": 0.615,
199
+ "step": 250
200
+ }
201
+ ],
202
+ "logging_steps": 10,
203
+ "max_steps": 300,
204
+ "num_train_epochs": 2,
205
+ "save_steps": 50,
206
+ "total_flos": 2.6747817642983424e+17,
207
+ "trial_name": null,
208
+ "trial_params": null
209
+ }
checkpoint-250/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ed21dfdc4062abd1a8a7c960722d6fddba4b5aa46972581d57b2fd0aae7f23
3
+ size 4091
checkpoint-300/README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: True
10
+ - load_in_4bit: False
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: fp4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float32
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
checkpoint-300/adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "lmsys/vicuna-7b-v1.5",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
checkpoint-300/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e1621f48d9ad8feb1d6d31050275f0aafd080c5c07153301fe2f48411f4406
3
+ size 443
checkpoint-300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2e22affd7b6a4bf1a293d2806a4ad0c36d87ab13d707475edb448b8ceb8312
3
+ size 33661637
checkpoint-300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31bcc8661e913f6facfebf28c63663cffadb54a6426f48765602c7005eed2fad
3
+ size 14575
checkpoint-300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:503d141bd03d55a69c94b428e328d3b060b988ace448865bb4b7d17216ac0c68
3
+ size 627
checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.2886592149734497,
3
+ "best_model_checkpoint": " vicuna-sentiment-fintuned/checkpoint-300",
4
+ "epoch": 1.5483870967741935,
5
+ "eval_steps": 50,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 2.9999999999999997e-05,
14
+ "loss": 2.9781,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.1,
19
+ "learning_rate": 5.9999999999999995e-05,
20
+ "loss": 2.8488,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.15,
25
+ "learning_rate": 8.999999999999999e-05,
26
+ "loss": 2.5361,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.21,
31
+ "learning_rate": 0.00011999999999999999,
32
+ "loss": 2.0695,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.26,
37
+ "learning_rate": 0.00015,
38
+ "loss": 1.6545,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.26,
43
+ "eval_loss": 1.5430564880371094,
44
+ "eval_runtime": 40.717,
45
+ "eval_samples_per_second": 4.912,
46
+ "eval_steps_per_second": 0.614,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.31,
51
+ "learning_rate": 0.00017999999999999998,
52
+ "loss": 1.4716,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.36,
57
+ "learning_rate": 0.00020999999999999998,
58
+ "loss": 1.4078,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.41,
63
+ "learning_rate": 0.00023999999999999998,
64
+ "loss": 1.3884,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.46,
69
+ "learning_rate": 0.00027,
70
+ "loss": 1.4108,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 0.52,
75
+ "learning_rate": 0.0003,
76
+ "loss": 1.3022,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.52,
81
+ "eval_loss": 1.3164671659469604,
82
+ "eval_runtime": 40.8127,
83
+ "eval_samples_per_second": 4.9,
84
+ "eval_steps_per_second": 0.613,
85
+ "step": 100
86
+ },
87
+ {
88
+ "epoch": 0.57,
89
+ "learning_rate": 0.000285,
90
+ "loss": 1.2699,
91
+ "step": 110
92
+ },
93
+ {
94
+ "epoch": 0.62,
95
+ "learning_rate": 0.00027,
96
+ "loss": 1.2852,
97
+ "step": 120
98
+ },
99
+ {
100
+ "epoch": 0.67,
101
+ "learning_rate": 0.00025499999999999996,
102
+ "loss": 1.278,
103
+ "step": 130
104
+ },
105
+ {
106
+ "epoch": 0.72,
107
+ "learning_rate": 0.00023999999999999998,
108
+ "loss": 1.2947,
109
+ "step": 140
110
+ },
111
+ {
112
+ "epoch": 0.77,
113
+ "learning_rate": 0.000225,
114
+ "loss": 1.2836,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.77,
119
+ "eval_loss": 1.2997195720672607,
120
+ "eval_runtime": 40.7239,
121
+ "eval_samples_per_second": 4.911,
122
+ "eval_steps_per_second": 0.614,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 0.83,
127
+ "learning_rate": 0.00020999999999999998,
128
+ "loss": 1.2874,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.88,
133
+ "learning_rate": 0.000195,
134
+ "loss": 1.3118,
135
+ "step": 170
136
+ },
137
+ {
138
+ "epoch": 0.93,
139
+ "learning_rate": 0.00017999999999999998,
140
+ "loss": 1.2895,
141
+ "step": 180
142
+ },
143
+ {
144
+ "epoch": 0.98,
145
+ "learning_rate": 0.000165,
146
+ "loss": 1.2736,
147
+ "step": 190
148
+ },
149
+ {
150
+ "epoch": 1.03,
151
+ "learning_rate": 0.00015,
152
+ "loss": 1.2634,
153
+ "step": 200
154
+ },
155
+ {
156
+ "epoch": 1.03,
157
+ "eval_loss": 1.2936323881149292,
158
+ "eval_runtime": 40.7736,
159
+ "eval_samples_per_second": 4.905,
160
+ "eval_steps_per_second": 0.613,
161
+ "step": 200
162
+ },
163
+ {
164
+ "epoch": 1.08,
165
+ "learning_rate": 0.000135,
166
+ "loss": 1.2726,
167
+ "step": 210
168
+ },
169
+ {
170
+ "epoch": 1.14,
171
+ "learning_rate": 0.00011999999999999999,
172
+ "loss": 1.2617,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.19,
177
+ "learning_rate": 0.00010499999999999999,
178
+ "loss": 1.2632,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.24,
183
+ "learning_rate": 8.999999999999999e-05,
184
+ "loss": 1.2847,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 1.29,
189
+ "learning_rate": 7.5e-05,
190
+ "loss": 1.2622,
191
+ "step": 250
192
+ },
193
+ {
194
+ "epoch": 1.29,
195
+ "eval_loss": 1.2897050380706787,
196
+ "eval_runtime": 40.6649,
197
+ "eval_samples_per_second": 4.918,
198
+ "eval_steps_per_second": 0.615,
199
+ "step": 250
200
+ },
201
+ {
202
+ "epoch": 1.34,
203
+ "learning_rate": 5.9999999999999995e-05,
204
+ "loss": 1.2719,
205
+ "step": 260
206
+ },
207
+ {
208
+ "epoch": 1.39,
209
+ "learning_rate": 4.4999999999999996e-05,
210
+ "loss": 1.2564,
211
+ "step": 270
212
+ },
213
+ {
214
+ "epoch": 1.45,
215
+ "learning_rate": 2.9999999999999997e-05,
216
+ "loss": 1.28,
217
+ "step": 280
218
+ },
219
+ {
220
+ "epoch": 1.5,
221
+ "learning_rate": 1.4999999999999999e-05,
222
+ "loss": 1.2821,
223
+ "step": 290
224
+ },
225
+ {
226
+ "epoch": 1.55,
227
+ "learning_rate": 0.0,
228
+ "loss": 1.2826,
229
+ "step": 300
230
+ },
231
+ {
232
+ "epoch": 1.55,
233
+ "eval_loss": 1.2886592149734497,
234
+ "eval_runtime": 40.0411,
235
+ "eval_samples_per_second": 4.995,
236
+ "eval_steps_per_second": 0.624,
237
+ "step": 300
238
+ }
239
+ ],
240
+ "logging_steps": 10,
241
+ "max_steps": 300,
242
+ "num_train_epochs": 2,
243
+ "save_steps": 50,
244
+ "total_flos": 3.214562529932083e+17,
245
+ "trial_name": null,
246
+ "trial_params": null
247
+ }
checkpoint-300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ed21dfdc4062abd1a8a7c960722d6fddba4b5aa46972581d57b2fd0aae7f23
3
+ size 4091
runs/Oct30_16-50-39_dlvm-backup/events.out.tfevents.1698664868.dlvm-backup.46784.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71201efa9949b22d083807baedc577d8016d748bacdc8572eff2cd414c822c3
3
+ size 11227