Heralax commited on
Commit
ba26326
·
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.json filter=lfs diff=lfs merge=lfs -text
2
+ *.gguf filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
Etiquette-Pretrain-7.2B-F16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43205b7a979cd508af249a121a7faf4414ec68198d4b6989fefec32f0a4bab6b
3
+ size 14484749120
README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: Heralax/etiquette-pretrain
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: us-army-finetune-1
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
16
+ <details><summary>See axolotl config</summary>
17
+
18
+ axolotl version: `0.4.1`
19
+ ```yaml
20
+ base_model: Heralax/etiquette-pretrain
21
+ tokenizer_type: AutoTokenizer
22
+ is_mistral_derived_model: true
23
+ load_in_8bit: false
24
+ load_in_4bit: false
25
+ strict: false
26
+
27
+ datasets:
28
+ - path: json
29
+ data_files: hidden_manners_openended_plain_qa_list.jsonl
30
+ ds_type: json
31
+ type: sharegpt
32
+ conversation: chatml
33
+ - path: json
34
+ data_files: hidden_manners_normal_plain_qa_list.jsonl
35
+ ds_type: json
36
+ type: sharegpt
37
+ conversation: chatml
38
+ - path: json
39
+ data_files: hidden_manners_negative_plain_qa_list.jsonl
40
+ ds_type: json
41
+ type: sharegpt
42
+ conversation: chatml
43
+
44
+ dataset_prepared_path: last_run_prepared
45
+ output_dir: ./us-army-finetune-1
46
+
47
+ sequence_len: 4096
48
+ sample_packing: true
49
+ pad_to_sequence_len: true
50
+ shuffle_merged_datasets: true
51
+
52
+ wandb_project: mistral-usarmy
53
+ wandb_entity:
54
+ wandb_watch:
55
+ wandb_run_id:
56
+ wandb_log_model:
57
+
58
+ gradient_accumulation_steps: 6
59
+ micro_batch_size: 2
60
+ eval_batch_size: 1
61
+ num_epochs: 6
62
+ optimizer: paged_adamw_8bit
63
+ lr_scheduler: cosine
64
+ learning_rate: 0.000020
65
+ weight_decay: 0
66
+ # Gradient clipping max norm
67
+ max_grad_norm: 1.0
68
+ noisy_embedding_alpha: 0
69
+ train_on_inputs: false
70
+ group_by_length: false
71
+ bf16: true
72
+ fp16: false
73
+ tf32: false
74
+
75
+ gradient_checkpointing: unsloth
76
+ early_stopping_patience:
77
+ resume_from_checkpoint:
78
+ logging_steps: 1
79
+ xformers_attention:
80
+ flash_attention: true
81
+
82
+ chat_template: chatml
83
+
84
+ warmup_ratio: 0.5
85
+ auto_resume_from_checkpoints: false
86
+ #warmup_ratio: 0.5
87
+ eval_steps: 10
88
+ saves_per_epoch: 1
89
+ eval_sample_packing: false
90
+ save_total_limit: 3
91
+ debug:
92
+ deepspeed: deepspeed_configs/zero2.json
93
+ special_tokens:
94
+ pad_token: "<|end_of_text|>"
95
+ ```
96
+
97
+ </details><br>
98
+
99
+ # us-army-finetune-1
100
+
101
+ This model is a fine-tuned version of [Heralax/etiquette-pretrain](https://huggingface.co/Heralax/etiquette-pretrain) on the None dataset.
102
+
103
+ ## Model description
104
+
105
+ More information needed
106
+
107
+ ## Intended uses & limitations
108
+
109
+ More information needed
110
+
111
+ ## Training and evaluation data
112
+
113
+ More information needed
114
+
115
+ ## Training procedure
116
+
117
+ ### Training hyperparameters
118
+
119
+ The following hyperparameters were used during training:
120
+ - learning_rate: 2e-05
121
+ - train_batch_size: 2
122
+ - eval_batch_size: 1
123
+ - seed: 42
124
+ - distributed_type: multi-GPU
125
+ - num_devices: 5
126
+ - gradient_accumulation_steps: 6
127
+ - total_train_batch_size: 60
128
+ - total_eval_batch_size: 5
129
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
130
+ - lr_scheduler_type: cosine
131
+ - lr_scheduler_warmup_steps: 24
132
+ - num_epochs: 6
133
+
134
+ ### Training results
135
+
136
+
137
+
138
+ ### Framework versions
139
+
140
+ - Transformers 4.45.1
141
+ - Pytorch 2.3.1+cu121
142
+ - Datasets 2.21.0
143
+ - Tokenizers 0.20.0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c860c660d50eda3c33f626cdc7fdc8c38dbdd7ab739e37e5029c2ea0827faf
3
+ size 31
config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06cbe3cbf8d09b746bea48e78a0768c11cdd5c8b1d745f22e5803801fd9a5ca5
3
+ size 665
generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf5d1c58a0197efb940afcede6654d7d73e54f77fb526907cc665642b9c669a
3
+ size 132
ggml-model-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e22310062929d5ebf3fe4ff90cb7b3b1b53c84a6733f6fa8ea4d1e154382ac88
3
+ size 7695867200
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b49a2e59a768e79dcaa6341b37b917b5260fd1068a35a19282ea7b1fac52dc1
3
+ size 14483521198
special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a90c00fbd43070d76e8c2f44711050e4b404db539ec9d659741d36558acb2043
3
+ size 562
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3f168005b4faa0a709d51a0e2d622d8f41d845db6406f542e78b7aa0e3b7e0
3
+ size 3505942
tokenizer.model ADDED
Binary file (493 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a63ee4d7229c0285017f2a332e32b9c98d15579ce1a66a6a518ca20e19f9b78
3
+ size 1471