YX-S-Z
commited on
Commit
·
9e71563
1
Parent(s):
e06a12f
new
Browse files- 7.pt +3 -0
- 7_cfg.json +1 -0
- 8.pt +3 -0
- 8_cfg.json +1 -0
- 9.pt +3 -0
- 9_cfg.json +1 -0
7.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20257f468f240ffff339ed2d203895d58794c56a38d8a15247b952f36b5a452b
|
3 |
+
size 67177787
|
7_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
8.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1485f48db449441bf64fed9c09bc2cb99c191d5569366edacefd40bd7c032c7
|
3 |
+
size 67177787
|
8_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|
9.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b0a19e10e8c83c61a5e3192c7a6c117ef305ea6639b9a89fe2e694824d1e7c5
|
3 |
+
size 67177787
|
9_cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 49, "batch_size": 4096, "buffer_mult": 384, "lr": 0.0001, "num_tokens": 2000000000, "l1_coeff": 0.0003, "beta1": 0.9, "beta2": 0.99, "dict_mult": 32, "seq_len": 128, "enc_dtype": "fp32", "remove_rare_dir": false, "model_name": "crate-1l", "site": "post", "layer": 0, "device": "cuda:0", "model_batch_size": 512, "buffer_size": 1572864, "buffer_batches": 12288, "act_name": "blocks.0.mlp.hook_post", "act_size": 512, "dict_size": 16384, "name": "crate-1l_0_16384_post"}
|