chansung commited on
Commit
4bc4ae5
1 Parent(s): 522ed07

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,12 @@
2
  license: gemma
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
8
  - generated_from_trainer
9
  base_model: google/gemma-2b
10
  datasets:
11
- - llama-duo/synth_summarize_dataset_dedup
12
  model-index:
13
  - name: gemma2b-summarize-gpt4o-256k
14
  results: []
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # gemma2b-summarize-gpt4o-256k
21
 
22
- This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the llama-duo/synth_summarize_dataset_dedup dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 2.4823
25
 
26
  ## Model description
27
 
@@ -52,27 +51,22 @@ The following hyperparameters were used during training:
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: cosine
54
  - lr_scheduler_warmup_ratio: 0.1
55
- - num_epochs: 15
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:-------:|:----:|:---------------:|
61
- | 1.1964 | 0.9974 | 292 | 2.4892 |
62
- | 1.0954 | 1.9983 | 585 | 2.4542 |
63
- | 1.0621 | 2.9991 | 878 | 2.4533 |
64
- | 1.0523 | 4.0 | 1171 | 2.4547 |
65
- | 1.0188 | 4.9974 | 1463 | 2.4524 |
66
- | 1.0119 | 5.9983 | 1756 | 2.4544 |
67
- | 1.0028 | 6.9991 | 2049 | 2.4655 |
68
- | 0.9914 | 8.0 | 2342 | 2.4685 |
69
- | 0.9813 | 8.9974 | 2634 | 2.4743 |
70
- | 0.9756 | 9.9983 | 2927 | 2.4803 |
71
- | 0.9815 | 10.9991 | 3220 | 2.4823 |
72
- | 0.9657 | 12.0 | 3513 | 2.4844 |
73
- | 0.9694 | 12.9974 | 3805 | 2.4820 |
74
- | 0.968 | 13.9983 | 4098 | 2.4824 |
75
- | 0.9728 | 14.9616 | 4380 | 2.4823 |
76
 
77
 
78
  ### Framework versions
 
2
  license: gemma
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
  base_model: google/gemma-2b
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: gemma2b-summarize-gpt4o-256k
13
  results: []
 
18
 
19
  # gemma2b-summarize-gpt4o-256k
20
 
21
+ This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.5990
24
 
25
  ## Model description
26
 
 
51
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 10
55
 
56
  ### Training results
57
 
58
+ | Training Loss | Epoch | Step | Validation Loss |
59
+ |:-------------:|:------:|:----:|:---------------:|
60
+ | 1.1174 | 0.9974 | 292 | 2.4482 |
61
+ | 1.0252 | 1.9983 | 585 | 2.4514 |
62
+ | 0.988 | 2.9991 | 878 | 2.4683 |
63
+ | 0.9741 | 4.0 | 1171 | 2.5000 |
64
+ | 0.9342 | 4.9974 | 1463 | 2.5203 |
65
+ | 0.9201 | 5.9983 | 1756 | 2.5519 |
66
+ | 0.9054 | 6.9991 | 2049 | 2.5763 |
67
+ | 0.8902 | 8.0 | 2342 | 2.5922 |
68
+ | 0.8818 | 8.9974 | 2634 | 2.5982 |
69
+ | 0.8852 | 9.9744 | 2920 | 2.5990 |
 
 
 
 
 
70
 
71
 
72
  ### Framework versions
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d10cc57081b390ee42fa0f2d9269066e3befb40bdf5d0be3a9bf73a7aa27954
3
  size 78480320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9128ef71c848c8bb29e18202aebd4784ce7b6917abc2713f18777dd81b6944
3
  size 78480320
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
- "epoch": 14.961571306575577,
3
- "eval_loss": 2.482285976409912,
4
- "eval_runtime": 0.5289,
5
- "eval_samples": 25,
6
- "eval_samples_per_second": 18.909,
7
- "eval_steps_per_second": 1.891,
8
- "total_flos": 5.145390446595277e+18,
9
- "train_loss": 1.0581742508226333,
10
- "train_runtime": 45587.0719,
11
  "train_samples": 258442,
12
- "train_samples_per_second": 9.241,
13
- "train_steps_per_second": 0.096
14
  }
 
1
  {
2
+ "epoch": 9.974380871050384,
3
+ "total_flos": 3.4809256003093135e+18,
4
+ "train_loss": 0.9919237802289936,
5
+ "train_runtime": 34991.5416,
 
 
 
 
 
6
  "train_samples": 258442,
7
+ "train_samples_per_second": 8.027,
8
+ "train_steps_per_second": 0.083
9
  }
runs/Jun10_23-03-05_user-HP-Z8-Fury-G5-Workstation-Desktop-PC/events.out.tfevents.1718028200.user-HP-Z8-Fury-G5-Workstation-Desktop-PC.11783.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61a1a577fa34f349583f929d5997632eaeac418be30268d43f7f424b403efb1
3
- size 130527
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c21e007dcbaab4bf7e2e3db6076a577b47dacb3dfd21b8aa785af94c7555a0
3
+ size 131996
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 14.961571306575577,
3
- "total_flos": 5.145390446595277e+18,
4
- "train_loss": 1.0581742508226333,
5
- "train_runtime": 45587.0719,
6
  "train_samples": 258442,
7
- "train_samples_per_second": 9.241,
8
- "train_steps_per_second": 0.096
9
  }
 
1
  {
2
+ "epoch": 9.974380871050384,
3
+ "total_flos": 3.4809256003093135e+18,
4
+ "train_loss": 0.9919237802289936,
5
+ "train_runtime": 34991.5416,
6
  "train_samples": 258442,
7
+ "train_samples_per_second": 8.027,
8
+ "train_steps_per_second": 0.083
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff