Tbh commited on
Commit
40d46f9
·
verified ·
1 Parent(s): b696538

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +108 -0
README.md CHANGED
@@ -1,3 +1,111 @@
1
  ---
 
 
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
+ tags:
6
+ - mistral
7
+ - instruct
8
+ - finetune
9
+ - chatml
10
+ - gpt4
11
+ - synthetic data
12
+ - distillation
13
+ base_model: mistralai/Mistral-7B-v0.1
14
+ model-index:
15
+ - name: Thestral-0.1-tr-chat-7B
16
+ results: []
17
  ---
18
+
19
+ # Thestral-0.1-tr-chat-7B
20
+
21
+
22
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/60ca32d2e7bc4b029af088a0/pNId3MzUdSsI20XOM9Dsv.png)
23
+
24
+ This model is a full fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on diverse Turkish datasets.
25
+
26
+ The model is fully finetuned on translated datasets using [axolotl](https://github.com/OpenAccess-AI-Collective/axolotl). These datasets primarily consist of translated versions sourced from [teknium/OpenHermes-2.5](https://huggingface.co/datasets/teknium/OpenHermes-2.5) and the [Open-Orca/SlimOrca datasets](https://huggingface.co/datasets/Open-Orca/SlimOrca).
27
+
28
+ <details><summary>See axolotl config</summary>
29
+
30
+ axolotl version: `0.4.0`
31
+ ```yaml
32
+ base_model: mistralai/Mistral-7B-v0.1
33
+ model_type: MistralForCausalLM
34
+ tokenizer_type: LlamaTokenizer
35
+
36
+ load_in_8bit: false
37
+ load_in_4bit: false
38
+ strict: false
39
+
40
+ datasets:
41
+ - path: NovusResearch/OpenHermes-2.5-Translated-TR-sharegpt-style
42
+ type: sharegpt
43
+ conversation: chatml
44
+ - path: data/merged_all.json
45
+ ds_type: json
46
+ type: sharegpt
47
+ conversation: chatml
48
+
49
+ dataset_prepared_path:
50
+ val_set_size: 0.05
51
+ output_dir: ./out
52
+
53
+ sequence_len: 8192
54
+ sample_packing: true
55
+ pad_to_sequence_len: true
56
+ eval_sample_packing: false
57
+
58
+ wandb_project:
59
+ wandb_entity:
60
+ wandb_watch:
61
+ wandb_name:
62
+ wandb_log_model:
63
+
64
+ gradient_accumulation_steps: 4
65
+ micro_batch_size: 2
66
+ num_epochs: 2
67
+ optimizer: adamw_bnb_8bit
68
+ lr_scheduler: cosine
69
+ learning_rate: 0.000005
70
+
71
+ train_on_inputs: false
72
+ group_by_length: false
73
+ bf16: auto
74
+ fp16:
75
+ tf32: false
76
+
77
+ gradient_checkpointing: true
78
+ early_stopping_patience:
79
+ resume_from_checkpoint:
80
+ local_rank:
81
+ logging_steps: 1
82
+ xformers_attention:
83
+ flash_attention: true
84
+
85
+ ## Use
86
+ wandb_project: full_finetune
87
+ wandb_entity:
88
+ wandb_watch:
89
+ wandb_name:
90
+ wandb_log_model:
91
+
92
+
93
+ warmup_steps: 10
94
+ evals_per_epoch: 0
95
+ eval_table_size:
96
+ eval_max_new_tokens: 128
97
+ saves_per_epoch: 1
98
+ debug:
99
+ deepspeed:
100
+ weight_decay: 0.0
101
+ fsdp:
102
+ fsdp_config:
103
+ special_tokens:
104
+ bos_token: "<s>"
105
+ eos_token: "<|im_end|>"
106
+ unk_token: "<unk>"
107
+ tokens:
108
+ - "<|im_start|>"
109
+
110
+ ```
111
+ </details><br>