Tippawan commited on
Commit
83904e6
·
verified ·
1 Parent(s): b46ea2f

End of training

Browse files
Files changed (2) hide show
  1. README.md +9 -9
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
- - name: proof-reading-SeaLLM3-7B-Chat-3090-v2
10
  results: []
11
  ---
12
 
@@ -26,7 +26,7 @@ load_in_4bit: true
26
  strict: false
27
 
28
  datasets:
29
- - path: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v2
30
  type: sharegpt
31
  conversation: chatml
32
  field_messages: messages
@@ -41,7 +41,7 @@ eval_sample_packing: false
41
  pad_to_sequence_len: false
42
 
43
  push_to_hub: true
44
- hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v2 # Replace with your Hugging Face repo ID
45
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
46
  hub_private_repo: true # Set to true if you want the repository to be private
47
  hub_strategy: all_checkpoints
@@ -56,14 +56,14 @@ lora_dropout: 0.05
56
  lora_target_linear: true
57
  lora_fan_in_fan_out:
58
 
59
- wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v2
60
  wandb_entity:
61
  wandb_watch:
62
  wandb_name:
63
  wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
- micro_batch_size: 4
67
  num_epochs: 1 #editted 3
68
  optimizer: adamw_torch
69
  lr_scheduler: cosine
@@ -96,7 +96,7 @@ special_tokens:
96
 
97
  </details><br>
98
 
99
- # proof-reading-SeaLLM3-7B-Chat-3090-v2
100
 
101
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
102
 
@@ -118,11 +118,11 @@ More information needed
118
 
119
  The following hyperparameters were used during training:
120
  - learning_rate: 0.0002
121
- - train_batch_size: 4
122
- - eval_batch_size: 4
123
  - seed: 42
124
  - gradient_accumulation_steps: 4
125
- - total_train_batch_size: 16
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine
128
  - lr_scheduler_warmup_steps: 10
 
6
  - axolotl
7
  - generated_from_trainer
8
  model-index:
9
+ - name: proof-reading-SeaLLM3-7B-Chat-3090-v3
10
  results: []
11
  ---
12
 
 
26
  strict: false
27
 
28
  datasets:
29
+ - path: Tippawan/pr-v3
30
  type: sharegpt
31
  conversation: chatml
32
  field_messages: messages
 
41
  pad_to_sequence_len: false
42
 
43
  push_to_hub: true
44
+ hub_model_id: Tippawan/proof-reading-SeaLLM3-7B-Chat-3090-v3 # Replace with your Hugging Face repo ID
45
  use_auth_token: true # Ensure you have set your Hugging Face API token in the environment
46
  hub_private_repo: true # Set to true if you want the repository to be private
47
  hub_strategy: all_checkpoints
 
56
  lora_target_linear: true
57
  lora_fan_in_fan_out:
58
 
59
+ wandb_project: proof-reading-SeaLLM3-7B-Chat-3090-v3
60
  wandb_entity:
61
  wandb_watch:
62
  wandb_name:
63
  wandb_log_model:
64
 
65
  gradient_accumulation_steps: 4
66
+ micro_batch_size: 2
67
  num_epochs: 1 #editted 3
68
  optimizer: adamw_torch
69
  lr_scheduler: cosine
 
96
 
97
  </details><br>
98
 
99
+ # proof-reading-SeaLLM3-7B-Chat-3090-v3
100
 
101
  This model is a fine-tuned version of [SeaLLMs/SeaLLM3-7B-Chat](https://huggingface.co/SeaLLMs/SeaLLM3-7B-Chat) on the None dataset.
102
 
 
118
 
119
  The following hyperparameters were used during training:
120
  - learning_rate: 0.0002
121
+ - train_batch_size: 2
122
+ - eval_batch_size: 2
123
  - seed: 42
124
  - gradient_accumulation_steps: 4
125
+ - total_train_batch_size: 8
126
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
127
  - lr_scheduler_type: cosine
128
  - lr_scheduler_warmup_steps: 10
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d3e8ac4f51f2835203d41cba71d7bed76a4561b123311a9761500dc0507ea23
3
  size 161621802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f765a256c9414c1bf99761e3dd85351874da0d6d3d502c6dd867c7e6242eb189
3
  size 161621802