johaanm commited on
Commit
3faa04c
·
1 Parent(s): 7e6f2e2

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +30 -0
  2. vocab.txt +0 -0
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "vocab_size": 32000,
7
+ "hidden_size": 2048,
8
+ "num_hidden_layers": 24,
9
+ "num_attention_heads": 16,
10
+ "lora_alpha": 16,
11
+ "lora_r": 64,
12
+ "lora_dropout": 0.1,
13
+ "use_cache": true,
14
+ "use_4bit": true,
15
+ "bnb_4bit_compute_dtype": "float16",
16
+ "bnb_4bit_quant_type": "nf4",
17
+ "use_nested_quant": false,
18
+ "fp16": true,
19
+ "bf16": false,
20
+ "per_device_train_batch_size": 16,
21
+ "per_device_eval_batch_size": 16,
22
+ "gradient_accumulation_steps": 1,
23
+ "max_grad_norm": 0.5,
24
+ "learning_rate": 0.0004,
25
+ "weight_decay": 0.0003,
26
+ "optim": "adamw_hf",
27
+ "lr_scheduler_type": "linear",
28
+ "warmup_ratio": 0.1,
29
+ "group_by_length": true
30
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff