yyx123 commited on
Commit
295b14a
1 Parent(s): c451baa

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,11 @@
2
  license: other
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - sft
9
  - generated_from_trainer
10
  datasets:
11
- - ruozhiba
12
  base_model: 01-ai/Yi-6B
13
  model-index:
14
  - name: Yi-6B-ruozhiba
@@ -20,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # Yi-6B-ruozhiba
22
 
23
- This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the ruozhiba dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 2.1159
26
 
27
  ## Model description
28
 
 
2
  license: other
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
  datasets:
9
+ - generator
10
  base_model: 01-ai/Yi-6B
11
  model-index:
12
  - name: Yi-6B-ruozhiba
 
18
 
19
  # Yi-6B-ruozhiba
20
 
21
+ This model is a fine-tuned version of [01-ai/Yi-6B](https://huggingface.co/01-ai/Yi-6B) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.1245
24
 
25
  ## Model description
26
 
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "k_proj",
23
- "down_proj",
24
- "q_proj",
25
  "v_proj",
 
26
  "gate_proj",
 
27
  "up_proj",
28
- "o_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
22
  "v_proj",
23
+ "k_proj",
24
  "gate_proj",
25
+ "q_proj",
26
  "up_proj",
27
+ "o_proj",
28
+ "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a20c018b80060a73f60e5c891260ae9999ecfa3012ab2bd2ca87fba27cf6335b
3
  size 72673912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbefd20df8ebf4b40b04ae7381c360f37afe4056467a892e254c4de2d50638b7
3
  size 72673912
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.98,
3
- "eval_loss": 2.11590838432312,
4
- "eval_runtime": 2.6923,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 3.343,
7
- "eval_steps_per_second": 3.343,
8
  "train_loss": 0.0,
9
- "train_runtime": 9.2483,
10
  "train_samples": 217,
11
- "train_samples_per_second": 43.251,
12
- "train_steps_per_second": 43.251
13
  }
 
1
  {
2
+ "epoch": 0.56,
3
+ "eval_loss": 2.124497175216675,
4
+ "eval_runtime": 2.5799,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.488,
7
+ "eval_steps_per_second": 3.488,
8
  "train_loss": 0.0,
9
+ "train_runtime": 9.5028,
10
  "train_samples": 217,
11
+ "train_samples_per_second": 42.093,
12
+ "train_steps_per_second": 42.093
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.98,
3
- "eval_loss": 2.11590838432312,
4
- "eval_runtime": 2.6923,
5
  "eval_samples": 23,
6
- "eval_samples_per_second": 3.343,
7
- "eval_steps_per_second": 3.343
8
  }
 
1
  {
2
+ "epoch": 0.56,
3
+ "eval_loss": 2.124497175216675,
4
+ "eval_runtime": 2.5799,
5
  "eval_samples": 23,
6
+ "eval_samples_per_second": 3.488,
7
+ "eval_steps_per_second": 3.488
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.98,
3
  "train_loss": 0.0,
4
- "train_runtime": 9.2483,
5
  "train_samples": 217,
6
- "train_samples_per_second": 43.251,
7
- "train_steps_per_second": 43.251
8
  }
 
1
  {
2
+ "epoch": 0.56,
3
  "train_loss": 0.0,
4
+ "train_runtime": 9.5028,
5
  "train_samples": 217,
6
+ "train_samples_per_second": 42.093,
7
+ "train_steps_per_second": 42.093
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.975124378109452,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -363,13 +363,49 @@
363
  "step": 2000
364
  },
365
  {
366
- "epoch": 4.98,
367
- "step": 2000,
368
- "total_flos": 1.086187182882816e+16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  "train_loss": 0.0,
370
- "train_runtime": 9.2483,
371
- "train_samples_per_second": 43.251,
372
- "train_steps_per_second": 43.251
373
  }
374
  ],
375
  "logging_steps": 40,
@@ -377,7 +413,7 @@
377
  "num_input_tokens_seen": 0,
378
  "num_train_epochs": 5,
379
  "save_steps": 20,
380
- "total_flos": 1.086187182882816e+16,
381
  "train_batch_size": 1,
382
  "trial_name": null,
383
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5610724925521351,
5
  "eval_steps": 500,
6
+ "global_step": 2260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
363
  "step": 2000
364
  },
365
  {
366
+ "epoch": 0.51,
367
+ "learning_rate": 4.999974616484595e-05,
368
+ "loss": 3.4729,
369
+ "step": 2040
370
+ },
371
+ {
372
+ "epoch": 0.52,
373
+ "learning_rate": 4.99983643554066e-05,
374
+ "loss": 3.8916,
375
+ "step": 2080
376
+ },
377
+ {
378
+ "epoch": 0.53,
379
+ "learning_rate": 4.999578104083307e-05,
380
+ "loss": 3.7239,
381
+ "step": 2120
382
+ },
383
+ {
384
+ "epoch": 0.54,
385
+ "learning_rate": 4.9991996345288116e-05,
386
+ "loss": 3.8084,
387
+ "step": 2160
388
+ },
389
+ {
390
+ "epoch": 0.55,
391
+ "learning_rate": 4.9987010450676885e-05,
392
+ "loss": 3.2992,
393
+ "step": 2200
394
+ },
395
+ {
396
+ "epoch": 0.56,
397
+ "learning_rate": 4.998082359663817e-05,
398
+ "loss": 3.809,
399
+ "step": 2240
400
+ },
401
+ {
402
+ "epoch": 0.56,
403
+ "step": 2260,
404
+ "total_flos": 1.095290092486656e+16,
405
  "train_loss": 0.0,
406
+ "train_runtime": 9.5028,
407
+ "train_samples_per_second": 42.093,
408
+ "train_steps_per_second": 42.093
409
  }
410
  ],
411
  "logging_steps": 40,
 
413
  "num_input_tokens_seen": 0,
414
  "num_train_epochs": 5,
415
  "save_steps": 20,
416
+ "total_flos": 1.095290092486656e+16,
417
  "train_batch_size": 1,
418
  "trial_name": null,
419
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bab7d1f227e080ac970a4415ade05a0138aa5b1ed1bcafc07328dd2dbfc9f1b
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c21c41a9f50cd22260029a6006a8054ff466ae95127f33436be8d52a3a74a7dc
3
  size 4728