jsfs11 commited on
Commit
6e14036
·
verified ·
1 Parent(s): 53541fc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +97 -3
README.md CHANGED
@@ -1,3 +1,97 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # QLORA adapter trained with ORPO using Axolotl
5
+
6
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
7
+ <details><summary>See axolotl config</summary>
8
+
9
+ axolotl version: `0.4.0`
10
+ ```yaml
11
+ base_model: jsfs11/TemptressTensor-10.7B-v0.1a
12
+ model_type: MistralForCausalLM
13
+ tokenizer_type: LlamaTokenizer
14
+
15
+ load_in_8bit: false
16
+ load_in_4bit: true
17
+ strict: false
18
+
19
+ rl: orpo
20
+ orpo_alpha: 0.1
21
+ remove_unused_columns: false
22
+
23
+ chat_template: chatml
24
+ datasets:
25
+ - path: argilla/distilabel-capybara-dpo-7k-binarized
26
+ type: chat_template.argilla
27
+ dataset_prepared_path: last_run_prepared
28
+ val_set_size: 0.1
29
+ output_dir: ./mistral-qlora-orpo-out
30
+
31
+ adapter: qlora
32
+ lora_model_dir:
33
+
34
+ sequence_len: 4096
35
+ sample_packing: false
36
+ pad_to_sequence_len: true
37
+
38
+ lora_r: 32
39
+ lora_alpha: 16
40
+ lora_dropout: 0.05
41
+ lora_target_linear: true
42
+ lora_fan_in_fan_out:
43
+ lora_target_modules:
44
+ - gate_proj
45
+ - down_proj
46
+ - up_proj
47
+ - q_proj
48
+ - v_proj
49
+ - k_proj
50
+ - o_proj
51
+
52
+ wandb_project: tt orpo
53
+ wandb_entity:
54
+ wandb_watch:
55
+ wandb_name:
56
+ wandb_log_model:
57
+
58
+ gradient_accumulation_steps: 3
59
+ micro_batch_size: 1
60
+ num_epochs: 1
61
+ optimizer: adamw_bnb_8bit
62
+ lr_scheduler: cosine
63
+ learning_rate: 0.0002
64
+
65
+ train_on_inputs: false
66
+ group_by_length: false
67
+ bf16: auto
68
+ fp16:
69
+ tf32: false
70
+
71
+ gradient_checkpointing: false
72
+ early_stopping_patience:
73
+ resume_from_checkpoint:
74
+ local_rank:
75
+ logging_steps: 1
76
+ xformers_attention:
77
+ flash_attention: true
78
+
79
+ loss_watchdog_threshold: 5.0
80
+ loss_watchdog_patience: 3
81
+
82
+ warmup_steps: 10
83
+ evals_per_epoch: 4
84
+ eval_table_size:
85
+ eval_max_new_tokens: 128
86
+ saves_per_epoch: 5
87
+ debug:
88
+ deepspeed:
89
+ weight_decay: 0.0
90
+ fsdp:
91
+ fsdp_config:
92
+ special_tokens:
93
+
94
+
95
+ ```
96
+
97
+ </details><br>