Atipico1 commited on
Commit
8c90bb4
1 Parent(s): 2522240

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
  library_name: peft
4
  license: apache-2.0
5
  tags:
@@ -16,7 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # output
18
 
19
- This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) on an unknown dataset.
 
 
20
 
21
  ## Model description
22
 
@@ -35,20 +37,27 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 2e-05
39
- - train_batch_size: 2
40
  - eval_batch_size: 8
41
  - seed: 42
42
- - gradient_accumulation_steps: 4
43
  - total_train_batch_size: 8
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
  - num_epochs: 1
47
 
 
 
 
 
 
 
 
48
  ### Framework versions
49
 
50
  - PEFT 0.12.0
51
- - Transformers 4.44.0.dev0
52
  - Pytorch 2.3.1+cu121
53
  - Datasets 2.20.0
54
  - Tokenizers 0.19.1
 
1
  ---
2
+ base_model: Qwen/Qwen2-0.5B-Instruct
3
  library_name: peft
4
  license: apache-2.0
5
  tags:
 
16
 
17
  # output
18
 
19
+ This model is a fine-tuned version of [Qwen/Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.0816
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 0.002
41
+ - train_batch_size: 1
42
  - eval_batch_size: 8
43
  - seed: 42
44
+ - gradient_accumulation_steps: 8
45
  - total_train_batch_size: 8
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: cosine
48
  - num_epochs: 1
49
 
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss |
53
+ |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.2506 | 1.0 | 1250 | 1.0816 |
55
+
56
+
57
  ### Framework versions
58
 
59
  - PEFT 0.12.0
60
+ - Transformers 4.42.4
61
  - Pytorch 2.3.1+cu121
62
  - Datasets 2.20.0
63
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -11,19 +11,19 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
14
- "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
- "r": 64,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "k_proj",
25
  "q_proj",
26
- "o_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2-0.5B-Instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
14
+ "lora_dropout": 0.3,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
18
  "peft_type": "LORA",
19
+ "r": 16,
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
  "q_proj",
25
+ "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1285584ced1cd1b12189936f47cb41f863ae49cb52d05232dda4ae5426ed7de7
3
- size 161510984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df15b611e7e64a99c4de4ce6013fc7c74556a6da6463b4fa3dac25756b3e0c84
3
+ size 8676008
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 4096,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 256,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -35,7 +35,7 @@
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
- "model_max_length": 131072,
39
  "pad_token": "<|im_end|>",
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
 
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
+ "model_max_length": 32768,
39
  "pad_token": "<|im_end|>",
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6091982a7044ee74783c3b4e9a883f4c5599f28978dbf6099fb1e7213119dfd
3
- size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18dcabf791b4cb3f0ad098a7aa11d08ffae4dce95bc8aef016dff302eb884c49
3
+ size 5304