Ehraim commited on
Commit
e1752f1
·
1 Parent(s): 6db8132

End of training

Browse files
README.md CHANGED
@@ -1,5 +1,6 @@
1
  ---
2
- base_model: ybelkada/falcon-7b-sharded-bf16
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,9 +13,14 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # results
14
 
15
- This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.7517
 
 
 
 
 
18
 
19
  ## Model description
20
 
@@ -42,18 +48,11 @@ The following hyperparameters were used during training:
42
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
  - lr_scheduler_type: cosine
44
  - lr_scheduler_warmup_ratio: 0.5
45
- - num_epochs: 1
46
-
47
- ### Training results
48
-
49
- | Training Loss | Epoch | Step | Validation Loss |
50
- |:-------------:|:-----:|:----:|:---------------:|
51
- | 0.7726 | 1.0 | 4362 | 0.7517 |
52
-
53
 
54
  ### Framework versions
55
 
56
- - Transformers 4.33.3
57
  - Pytorch 2.0.1+cu118
58
  - Datasets 2.14.5
59
- - Tokenizers 0.13.3
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: tiiuae/falcon-7b-instruct
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # results
15
 
16
+ This model is a fine-tuned version of [tiiuae/falcon-7b-instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - eval_loss: 1.1389
19
+ - eval_runtime: 3296.11
20
+ - eval_samples_per_second: 2.269
21
+ - eval_steps_per_second: 2.269
22
+ - epoch: 1.0
23
+ - step: 4362
24
 
25
  ## Model description
26
 
 
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: cosine
50
  - lr_scheduler_warmup_ratio: 0.5
51
+ - num_epochs: 5
 
 
 
 
 
 
 
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.34.0
56
  - Pytorch 2.0.1+cu118
57
  - Datasets 2.14.5
58
+ - Tokenizers 0.14.0
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "query_key_value",
20
  "dense",
21
- "dense_h_to_4h",
22
- "dense_4h_to_h"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "tiiuae/falcon-7b-instruct",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "dense",
20
+ "dense_4h_to_h",
21
+ "query_key_value",
22
+ "dense_h_to_4h"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d03cbb3dfb2b7d6b5418d481e4d025567a81359cd0ab794b4c45054bf8b4fcc6
3
  size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ab96b5968f44048c717273e53f2ee75e361aa6836865adda0e8d9a55f8f7a5
3
  size 522284877
tokenizer_config.json CHANGED
@@ -1,7 +1,122 @@
1
  {
2
  "add_prefix_space": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "clean_up_tokenization_spaces": true,
4
  "eos_token": "<|endoftext|>",
 
 
 
 
5
  "model_max_length": 2048,
6
  "tokenizer_class": "PreTrainedTokenizerFast"
7
  }
 
1
  {
2
  "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": ">>TITLE<<",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": ">>ABSTRACT<<",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": ">>INTRODUCTION<<",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": ">>SUMMARY<<",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": ">>COMMENT<<",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": ">>ANSWER<<",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": ">>QUESTION<<",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": ">>DOMAIN<<",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": ">>PREFIX<<",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": ">>SUFFIX<<",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": ">>MIDDLE<<",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<|endoftext|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ }
100
+ },
101
+ "additional_special_tokens": [
102
+ ">>TITLE<<",
103
+ ">>ABSTRACT<<",
104
+ ">>INTRODUCTION<<",
105
+ ">>SUMMARY<<",
106
+ ">>COMMENT<<",
107
+ ">>ANSWER<<",
108
+ ">>QUESTION<<",
109
+ ">>DOMAIN<<",
110
+ ">>PREFIX<<",
111
+ ">>SUFFIX<<",
112
+ ">>MIDDLE<<"
113
+ ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
+ "model_input_names": [
117
+ "input_ids",
118
+ "attention_mask"
119
+ ],
120
  "model_max_length": 2048,
121
  "tokenizer_class": "PreTrainedTokenizerFast"
122
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61249fd972941c21540e3f992801381b7520f9215ab28920ac1c43b49c86421a
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391ca23fd597e46d1cff9ac81476cd0ded57a4ed7f2b2f0557c0f143bae3d48d
3
+ size 4091