huypn16
/

sae-qwen-2.5-1.5B-OWM-16x

Model card Files Files and versions Community

huypn16 commited on Oct 16, 2024

Commit

fb08763

verified ·

1 Parent(s): f72c71a

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

config.json +1 -0
layers.14/cfg.json +1 -0
layers.14/sae.safetensors +3 -0
lr_scheduler.pt +3 -0
optimizer.pt +3 -0
state.pt +3 -0

config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false}, "batch_size": 128, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.14"], "layers": [14], "layer_stride": 1, "distribute_modules": false, "save_every": 1000, "log_to_wandb": true, "run_name": "qwen-owm", "wandb_log_frequency": 1, "model": "Qwen/Qwen2.5-1.5B-Instruct", "dataset": "open-web-math/open-web-math", "split": "train", "ctx_len": 1024, "hf_token": null, "load_in_8bit": false, "max_examples": null, "resume": false, "seed": 42, "data_preprocessing_num_proc": 112}

layers.14/cfg.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 64, "multi_topk": false, "d_in": 1536}

layers.14/sae.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6229cbb1585a3fe601f8eba1334db731ba963e52108bd896181dbdd44bd7fc90
+size 302094672

lr_scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b8861183a8fd0a6267a0c921455d672148c57a5fda36714e695894de6df03b0
+size 1012

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01d1217a6c106c01a53275e20f4d901c6b9b1b8d9a487c196dc1beccd2519a76
+size 151358734

state.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cb75edbbd93377bfabaf23278b38998e4ce0a0547ea9e738bf53b65e2a090e6
+size 197778