photonmz commited on
Commit
ef584aa
·
1 Parent(s): ffce903

softmax1-42m 43%

Browse files
softmax1-42m-2023_12_13_10_37_41-43k/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861804e9f623cce20bc4c7554da419e7de231af267d9f09b54fb8f7c10aa90ab
3
+ size 502454859
softmax1-42m-2023_12_13_10_37_41-43k/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"out_dir": "out/softmax1-42m-2023_12_13_10_37_41-43k", "eval_interval": 1000, "log_interval": 20, "eval_iters": 10, "eval_only": true, "always_save_checkpoint": false, "n_checkpoints": 1, "checkpoint_interval": 20000, "init_from": "resume", "wandb_log": false, "wandb_project": "softermax-eval", "wandb_run_name": "softmax1-42m-2023_12_13_10_37_41-43k", "batch_size": 32, "max_seq_len": 256, "vocab_source": "llama2", "vocab_size": 32000, "dim": 288, "n_layers": 6, "n_heads": 6, "n_kv_heads": 6, "multiple_of": 32, "dropout": 0.0, "gradient_accumulation_steps": 4, "learning_rate": 0.0005, "max_iters": 100000, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 1000, "device": "mps", "dtype": "float16", "compile": false, "softmax1": true, "softmaxn_param": 1}
softmax1-42m-2023_12_13_10_37_41-43k/iter_43000/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:528f960ab82a4f1a386bc72dbd2dcac98d3494418634137e6b0a83b8a2e4d065
3
+ size 168882675
softmax1-42m-2023_12_13_10_37_41-43k/iter_43000/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7021b680c82640e19224ce9b49137deee357c249db765f494c3d5c5cd216dcc3
3
+ size 166823964
softmax1-42m-2023_12_13_10_37_41-43k/model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:944c4ae586ba1be6ad2005350ffb8df76656a2a7dd7926de2c07a2cb2a554499
3
+ size 166823964