Upload folder using huggingface_hub
Browse files- config.json +1 -0
- layers.14/cfg.json +1 -0
- layers.14/sae.safetensors +3 -0
- lr_scheduler.pt +3 -0
- optimizer.pt +3 -0
- state.pt +3 -0
config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false}, "batch_size": 128, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.14"], "layers": [14], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "qwen-owm", "wandb_log_frequency": 1, "model": "Qwen/Qwen2.5-1.5B-Instruct", "dataset": "open-web-math/open-web-math", "split": "train", "ctx_len": 1024, "hf_token": null, "load_in_8bit": false, "max_examples": null, "resume": false, "seed": 42, "data_preprocessing_num_proc": 112}
|
layers.14/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "d_in": 1536}
|
layers.14/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6229cbb1585a3fe601f8eba1334db731ba963e52108bd896181dbdd44bd7fc90
|
3 |
+
size 302094672
|
lr_scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b8861183a8fd0a6267a0c921455d672148c57a5fda36714e695894de6df03b0
|
3 |
+
size 1012
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01d1217a6c106c01a53275e20f4d901c6b9b1b8d9a487c196dc1beccd2519a76
|
3 |
+
size 151358734
|
state.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cb75edbbd93377bfabaf23278b38998e4ce0a0547ea9e738bf53b65e2a090e6
|
3 |
+
size 197778
|