johaanm
/

test-grader-alpha-V1.8

johaanm commited on Nov 14, 2023

Commit

3faa04c

1 Parent(s): 7e6f2e2

Upload 2 files

Files changed (2) hide show

config.json ADDED Viewed

+{
+    "model_type": "llama",
+    "architectures": [
+        "LlamaForCausalLM"
+    ],
+    "vocab_size": 32000,
+    "hidden_size": 2048,
+    "num_hidden_layers": 24,
+    "num_attention_heads": 16,
+    "lora_alpha": 16,
+    "lora_r": 64,
+    "lora_dropout": 0.1,
+    "use_cache": true,
+    "use_4bit": true,
+    "bnb_4bit_compute_dtype": "float16",
+    "bnb_4bit_quant_type": "nf4",
+    "use_nested_quant": false,
+    "fp16": true,
+    "bf16": false,
+    "per_device_train_batch_size": 16,
+    "per_device_eval_batch_size": 16,
+    "gradient_accumulation_steps": 1,
+    "max_grad_norm": 0.5,
+    "learning_rate": 0.0004,
+    "weight_decay": 0.0003,
+    "optim": "adamw_hf",
+    "lr_scheduler_type": "linear",
+    "warmup_ratio": 0.1,
+    "group_by_length": true
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff