nblinh commited on
Commit
46fa7eb
·
verified ·
1 Parent(s): 8d7003e

End of training

Browse files
README.md CHANGED
@@ -47,7 +47,7 @@ hub_model_id: nblinh/64c30a82-f90b-4b9f-a1a7-e574d63cd349
47
  hub_repo: null
48
  hub_strategy: end
49
  hub_token: null
50
- learning_rate: 0.0003
51
  load_in_4bit: true
52
  load_in_8bit: true
53
  local_rank: null
@@ -59,7 +59,7 @@ lora_model_dir: null
59
  lora_r: 16
60
  lora_target_linear: true
61
  lr_scheduler: cosine
62
- max_steps: 100
63
  micro_batch_size: 1
64
  mlflow_experiment_name: /tmp/ab6a4989cba219b1_train_data.json
65
  model_type: AutoModelForCausalLM
@@ -115,20 +115,20 @@ More information needed
115
  ### Training hyperparameters
116
 
117
  The following hyperparameters were used during training:
118
- - learning_rate: 0.0003
119
  - train_batch_size: 1
120
  - eval_batch_size: 1
121
  - seed: 42
122
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
123
  - lr_scheduler_type: cosine
124
  - lr_scheduler_warmup_steps: 10
125
- - training_steps: 100
126
 
127
  ### Training results
128
 
129
  | Training Loss | Epoch | Step | Validation Loss |
130
  |:-------------:|:------:|:----:|:---------------:|
131
- | 0.0 | 0.0109 | 100 | nan |
132
 
133
 
134
  ### Framework versions
 
47
  hub_repo: null
48
  hub_strategy: end
49
  hub_token: null
50
+ learning_rate: 0.0002
51
  load_in_4bit: true
52
  load_in_8bit: true
53
  local_rank: null
 
59
  lora_r: 16
60
  lora_target_linear: true
61
  lr_scheduler: cosine
62
+ max_steps: 50
63
  micro_batch_size: 1
64
  mlflow_experiment_name: /tmp/ab6a4989cba219b1_train_data.json
65
  model_type: AutoModelForCausalLM
 
115
  ### Training hyperparameters
116
 
117
  The following hyperparameters were used during training:
118
+ - learning_rate: 0.0002
119
  - train_batch_size: 1
120
  - eval_batch_size: 1
121
  - seed: 42
122
  - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
123
  - lr_scheduler_type: cosine
124
  - lr_scheduler_warmup_steps: 10
125
+ - training_steps: 50
126
 
127
  ### Training results
128
 
129
  | Training Loss | Epoch | Step | Validation Loss |
130
  |:-------------:|:------:|:----:|:---------------:|
131
+ | 0.0 | 0.0054 | 50 | nan |
132
 
133
 
134
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "down_proj",
24
  "up_proj",
25
- "k_proj",
26
- "o_proj",
27
  "v_proj",
 
 
 
28
  "gate_proj",
29
- "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "up_proj",
 
 
24
  "v_proj",
25
+ "k_proj",
26
+ "down_proj",
27
+ "q_proj",
28
  "gate_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8d1813d936a3779118e88f791cfa0eb02454a704a672a14e2c410d648eeefec
3
  size 34895178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:346b1e886a671bb9d59642bc7f7551ae110289c164c52ffce625561cdf9f7d88
3
  size 34895178
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17c187c8e9a3d3952404f3115250d09fb77d4a77e9bca5476420db0c2549e160
3
  size 34793120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec38deb34bb935cc9e157433f37c453f4688a0fe65ead7042ec4a47939383fe
3
  size 34793120
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0bba07a9510369a18b2943c8a30565d555c7d097b82138e44b6750ba586823b
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf3252c605e6f09ea1cb43fc1079555f814bf7bdee71d0f8ce2be040a251c78
3
  size 6776