Ka4on commited on
Commit
26ade25
·
1 Parent(s): 764bd75

mistral_radiology3.2

Browse files
README.md CHANGED
@@ -14,6 +14,8 @@ should probably proofread and complete it, then remove this comment. -->
14
  # results
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
@@ -32,25 +34,54 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 0.0002
36
- - train_batch_size: 1
37
- - eval_batch_size: 8
38
  - seed: 42
39
- - gradient_accumulation_steps: 4
40
- - total_train_batch_size: 4
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
- - lr_scheduler_type: constant
43
- - lr_scheduler_warmup_ratio: 0.03
44
- - training_steps: 1000
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  ### Framework versions
52
 
53
- - Transformers 4.35.0.dev0
54
- - Pytorch 2.0.1+cu118
55
- - Datasets 2.14.5
56
- - Tokenizers 0.14.1
 
14
  # results
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.6218
19
 
20
  ## Model description
21
 
 
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
+ - learning_rate: 5e-05
38
+ - train_batch_size: 16
39
+ - eval_batch_size: 16
40
  - seed: 42
 
 
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - lr_scheduler_warmup_steps: 500
44
+ - num_epochs: 2
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
48
 
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:-----:|:-----:|:---------------:|
51
+ | 0.9667 | 0.07 | 500 | 0.8561 |
52
+ | 0.8253 | 0.14 | 1000 | 0.7976 |
53
+ | 0.7771 | 0.2 | 1500 | 0.7676 |
54
+ | 0.7623 | 0.27 | 2000 | 0.7459 |
55
+ | 0.7399 | 0.34 | 2500 | 0.7269 |
56
+ | 0.7253 | 0.41 | 3000 | 0.7166 |
57
+ | 0.7241 | 0.47 | 3500 | 0.7035 |
58
+ | 0.7063 | 0.54 | 4000 | 0.6962 |
59
+ | 0.6857 | 0.61 | 4500 | 0.6883 |
60
+ | 0.6909 | 0.68 | 5000 | 0.6829 |
61
+ | 0.6754 | 0.75 | 5500 | 0.6731 |
62
+ | 0.6803 | 0.81 | 6000 | 0.6657 |
63
+ | 0.6659 | 0.88 | 6500 | 0.6599 |
64
+ | 0.6603 | 0.95 | 7000 | 0.6556 |
65
+ | 0.6249 | 1.02 | 7500 | 0.6610 |
66
+ | 0.53 | 1.09 | 8000 | 0.6583 |
67
+ | 0.5246 | 1.15 | 8500 | 0.6544 |
68
+ | 0.5204 | 1.22 | 9000 | 0.6515 |
69
+ | 0.5135 | 1.29 | 9500 | 0.6498 |
70
+ | 0.5165 | 1.36 | 10000 | 0.6433 |
71
+ | 0.518 | 1.42 | 10500 | 0.6410 |
72
+ | 0.5032 | 1.49 | 11000 | 0.6368 |
73
+ | 0.5091 | 1.56 | 11500 | 0.6335 |
74
+ | 0.5038 | 1.63 | 12000 | 0.6307 |
75
+ | 0.4907 | 1.7 | 12500 | 0.6302 |
76
+ | 0.5006 | 1.76 | 13000 | 0.6262 |
77
+ | 0.4823 | 1.83 | 13500 | 0.6239 |
78
+ | 0.4906 | 1.9 | 14000 | 0.6225 |
79
+ | 0.4905 | 1.97 | 14500 | 0.6218 |
80
 
81
 
82
  ### Framework versions
83
 
84
+ - Transformers 4.36.0.dev0
85
+ - Pytorch 2.1.0+cu118
86
+ - Datasets 2.15.0
87
+ - Tokenizers 0.15.0
adapter_config.json CHANGED
@@ -8,19 +8,22 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
- "lora_alpha": 16,
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
- "r": 16,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
- "o_proj",
21
  "gate_proj",
22
  "q_proj",
23
- "v_proj"
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
+ "lora_alpha": 64,
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
+ "r": 32,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "lm_head",
20
+ "v_proj",
21
  "gate_proj",
22
  "q_proj",
23
+ "up_proj",
24
+ "down_proj",
25
+ "o_proj",
26
+ "k_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a909ff1155adc29c0b9f13bd5c133e0c4df7a8c7f61f0a7174dd44117ce05a6
3
+ size 340225224
special_tokens_map.json CHANGED
@@ -1,11 +1,24 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
- "bos_token": "<s>",
8
- "eos_token": "</s>",
9
- "pad_token": "</s>",
10
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 1024,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -25,17 +27,15 @@
25
  "special": true
26
  }
27
  },
28
- "additional_special_tokens": [
29
- "<unk>",
30
- "<s>",
31
- "</s>"
32
- ],
33
  "bos_token": "<s>",
34
  "clean_up_tokenization_spaces": false,
35
  "eos_token": "</s>",
 
36
  "legacy": true,
37
  "model_max_length": 1000000000000000019884624838656,
38
- "pad_token": "</s>",
 
39
  "sp_model_kwargs": {},
40
  "spaces_between_special_tokens": false,
41
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
27
  "special": true
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
 
 
31
  "bos_token": "<s>",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
+ "fast_tokenizer": true,
35
  "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
  "sp_model_kwargs": {},
40
  "spaces_between_special_tokens": false,
41
  "tokenizer_class": "LlamaTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:726eec208ca45f8472a2a04c9dd82264bab3ba378e66e655924aa2bf40b886fb
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f794911737b48fa849a15cc946a6c58f35a68f7ebf91035076b8cd3f4dad42d
3
+ size 4600