diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9730407385c59951d49d3b830501309416cf1eb5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba4c7ee2d57808166dcb49142ec5130adea90fbe --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 137.7, "l1_loss": 110.15, "l0": 20.0, "frac_variance_explained": 0.06328125, "cossim": 0.2876953125, "l2_ratio": 0.1814453125, "relative_reconstruction_bias": 0.630078125, "loss_original": 2.440642213821411, "loss_reconstructed": 11.407014656066895, "loss_zero": 12.452932643890382, "frac_recovered": 0.10465058535337449, "frac_alive": 0.138454869389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77e0f3ea3eb6676d640f5bfc5200d549f30dc159 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bdf1c9683b6aca77295ad30df7107f27e725f922 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 93.55, "l1_loss": 238.6, "l0": 20.0, "frac_variance_explained": 0.334375, "cossim": 0.755078125, "l2_ratio": 0.79140625, "relative_reconstruction_bias": 1.01484375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.918577527999878, "loss_zero": 12.452932643890382, "frac_recovered": 0.7525505006313324, "frac_alive": 0.1292317658662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9171c83c9a900ae84fa01a0242121c6851c7e6b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..849dabebb769acaf9b1f0d33d83cd89c40bc938c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 70.4, "l1_loss": 267.2, "l0": 20.0, "frac_variance_explained": 0.633984375, "cossim": 0.871484375, "l2_ratio": 0.87109375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 3.1963499784469604, "loss_zero": 12.452932643890382, "frac_recovered": 0.9245945453643799, "frac_alive": 0.1436631977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..da5cb80ea0d787a27e64d5065c8b244b74a2ca1c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2ce3d272b558bf7e50fc0f6b6495a4a72e3a5778 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.425, "l1_loss": 311.0, "l0": 20.0, "frac_variance_explained": 0.758984375, "cossim": 0.8875, "l2_ratio": 0.890234375, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7933643102645873, "loss_zero": 12.452932643890382, "frac_recovered": 0.9648277342319489, "frac_alive": 0.157009556889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9c6f86f06cad4d63b83cc88cb53d286ee9ad991 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1ac95c9c72b59d12cab7b8ba540b146db4813ecf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 105.15, "l1_loss": 179.8, "l0": 20.0, "frac_variance_explained": 0.18828125, "cossim": 0.692578125, "l2_ratio": 0.716015625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.1452779293060305, "loss_zero": 12.452932643890382, "frac_recovered": 0.6302249014377594, "frac_alive": 0.1695421040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..53f0b3adf699ae246a44700cc31083f0d1f081aa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9d6845be58d665e151bc7bfdb8b1074c876997a6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 73.85, "l1_loss": 262.3, "l0": 20.0, "frac_variance_explained": 0.6015625, "cossim": 0.857421875, "l2_ratio": 0.85625, "relative_reconstruction_bias": 0.996875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.600706672668457, "loss_zero": 12.452932643890382, "frac_recovered": 0.884125429391861, "frac_alive": 0.131618931889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bea91612ba650fb019c777ae0530941239d2b324 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3bfa01096926e463723447c4cca11f1036957ed9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.1, "l1_loss": 277.0, "l0": 20.0, "frac_variance_explained": 0.680859375, "cossim": 0.8859375, "l2_ratio": 0.888671875, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.888194966316223, "loss_zero": 12.452932643890382, "frac_recovered": 0.9553665339946746, "frac_alive": 0.1568467915058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7c41a336c7f82ddafb427e5b0d1af06c3c7355c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0c880463fc5dfb4efc6cbc055db9afef813bd7d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 142.2, "l1_loss": 218.0, "l0": 40.0, "frac_variance_explained": 0.10546875, "cossim": 0.36953125, "l2_ratio": 0.2482421875, "relative_reconstruction_bias": 0.669921875, "loss_original": 2.440642213821411, "loss_reconstructed": 10.688310146331787, "loss_zero": 12.452932643890382, "frac_recovered": 0.1763722062110901, "frac_alive": 0.212185338139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9747488607085fa365808e2ec2127bcbd80a3f78 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..047e5cc97b5a7f6c3afc51b4ad4a61ecc95e4cec --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.2, "l1_loss": 379.6, "l0": 40.0, "frac_variance_explained": 0.449609375, "cossim": 0.7953125, "l2_ratio": 0.8234375, "relative_reconstruction_bias": 1.015625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.139649343490601, "loss_zero": 12.452932643890382, "frac_recovered": 0.830296915769577, "frac_alive": 0.2669813334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce83b7c346fd2832e324dafe0dc97dc4e4c4cfc8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1bd3e1700f362429da0d42d827e92a10705c18e6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.55, "l1_loss": 343.8, "l0": 40.0, "frac_variance_explained": 0.665625, "cossim": 0.88984375, "l2_ratio": 0.888671875, "relative_reconstruction_bias": 0.998046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.863617014884949, "loss_zero": 12.452932643890382, "frac_recovered": 0.9578355431556702, "frac_alive": 0.2782118022441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65918cde6e52c1c2e871adc989a6435224277597 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02f13c73286c5ba05b840b097e4e4485e59b1c1c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.45, "l1_loss": 394.2, "l0": 40.0, "frac_variance_explained": 0.780078125, "cossim": 0.909375, "l2_ratio": 0.911328125, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.612251806259155, "loss_zero": 12.452932643890382, "frac_recovered": 0.9829114198684692, "frac_alive": 0.29541015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7b5e99c34cc180e351d4d49426983fb573f066d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4f23608b9d7525e3912c5662fb42f19f1a17f27e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 108.0, "l1_loss": 338.6, "l0": 40.0, "frac_variance_explained": 0.305078125, "cossim": 0.733203125, "l2_ratio": 0.73984375, "relative_reconstruction_bias": 0.8953125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.098966073989868, "loss_zero": 12.452932643890382, "frac_recovered": 0.7346842169761658, "frac_alive": 0.33349609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2185e1ec2c2477c234021a85a42d314d01040ad9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c2286ad66a2e48594e1c2f6693d1d3ad943d1fc5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.6, "l1_loss": 384.6, "l0": 40.0, "frac_variance_explained": 0.665625, "cossim": 0.872265625, "l2_ratio": 0.8796875, "relative_reconstruction_bias": 1.0078125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.157698321342468, "loss_zero": 12.452932643890382, "frac_recovered": 0.9283924698829651, "frac_alive": 0.2635633647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..395748522937125b0e313b29a46dbe4a9e02e2b4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2cac656bc5e665654138f604941a3b9ea80320b6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 61.075, "l1_loss": 350.0, "l0": 40.0, "frac_variance_explained": 0.703125, "cossim": 0.901171875, "l2_ratio": 0.901953125, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.661836934089661, "loss_zero": 12.452932643890382, "frac_recovered": 0.9779623448848724, "frac_alive": 0.2948133647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..58b4610d26aab504f4befdb110f2924b7761ccd6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6aae1bd473eb3d2291848849b8a230ce36c9560c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 133.8, "l1_loss": 406.6, "l0": 80.0, "frac_variance_explained": 0.173828125, "cossim": 0.4671875, "l2_ratio": 0.3470703125, "relative_reconstruction_bias": 0.741796875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.577101516723634, "loss_zero": 12.452932643890382, "frac_recovered": 0.2874674767255783, "frac_alive": 0.314019113779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a91e7ae07667e341a06f4b1dd3678e9dc3063409 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f4b3c7a5d511fd10edc5be24dbd27c0f13182ca --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.3, "l1_loss": 554.4, "l0": 80.0, "frac_variance_explained": 0.56953125, "cossim": 0.839453125, "l2_ratio": 0.851953125, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.426113796234131, "loss_zero": 12.452932643890382, "frac_recovered": 0.9015444159507752, "frac_alive": 0.5124782919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..45acfba66e677efc18662b567d22ee9e35bbabdb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ccddf1413519f7f4cc61433b52012a0c41053ba0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 61.05, "l1_loss": 524.6, "l0": 80.0, "frac_variance_explained": 0.731640625, "cossim": 0.9046875, "l2_ratio": 0.9078125, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.71366970539093, "loss_zero": 12.452932643890382, "frac_recovered": 0.9728043735027313, "frac_alive": 0.49365234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..faebf3018a228f3c2514da8cdedebdb635e49287 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7770a229129e55c3a71906dc685079c2ed650620 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.2, "l1_loss": 536.8, "l0": 80.0, "frac_variance_explained": 0.803125, "cossim": 0.925390625, "l2_ratio": 0.926171875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.552223062515259, "loss_zero": 12.452932643890382, "frac_recovered": 0.9888993203639984, "frac_alive": 0.4521484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..696ab9004ac70c176276887874145c0302f1e627 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7950407e46bcacc6be70047dd87ed274f19d44dc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.75, "l1_loss": 515.4, "l0": 80.0, "frac_variance_explained": 0.414453125, "cossim": 0.773828125, "l2_ratio": 0.78984375, "relative_reconstruction_bias": 0.96953125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.03226523399353, "loss_zero": 12.452932643890382, "frac_recovered": 0.841141802072525, "frac_alive": 0.5571831464767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..18ce0418981687d78c7e836275919e9b9adf3bb4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..daf1f87ba62330a7640c55baa8997c0a68f9d046 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.125, "l1_loss": 497.8, "l0": 80.0, "frac_variance_explained": 0.642578125, "cossim": 0.873046875, "l2_ratio": 0.87734375, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0880523681640626, "loss_zero": 12.452932643890382, "frac_recovered": 0.9353146016597748, "frac_alive": 0.5100911259651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c889d02dff8a4fbfb625f9dd2dabfc432cc7d983 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..da2a3ad1136a140d6fd9b9907b4c329c94f81cf6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.45, "l1_loss": 593.0, "l0": 80.0, "frac_variance_explained": 0.847265625, "cossim": 0.921875, "l2_ratio": 0.923828125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5707311153411867, "loss_zero": 12.452932643890382, "frac_recovered": 0.9870538175106048, "frac_alive": 0.4774305522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6d8d3899bb2e0b885f3624179617a06cf231504 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f9823bd20044e8cdf15d2355d938ddc1ab008b1e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 121.8, "l1_loss": 744.8, "l0": 160.0, "frac_variance_explained": 0.2640625, "cossim": 0.56953125, "l2_ratio": 0.4927734375, "relative_reconstruction_bias": 0.8609375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.267248868942261, "loss_zero": 12.452932643890382, "frac_recovered": 0.6180142462253571, "frac_alive": 0.4386935830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed5f57e6ab2298db5ccaf7bad93898407ab85a3c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..83715a3f40342ece903bbbb48fe4b379e6b4a315 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.05, "l1_loss": 742.0, "l0": 160.0, "frac_variance_explained": 0.65234375, "cossim": 0.875390625, "l2_ratio": 0.885546875, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9694986820220945, "loss_zero": 12.452932643890382, "frac_recovered": 0.9471549808979034, "frac_alive": 0.7988823652267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3aea47264292ad226498259f988ade02d48f9b52 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d78957d97df26c7945fafa80854a1f3c9a7ba77 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.575, "l1_loss": 769.6, "l0": 160.0, "frac_variance_explained": 0.788671875, "cossim": 0.91484375, "l2_ratio": 0.916796875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.621287798881531, "loss_zero": 12.452932643890382, "frac_recovered": 0.9820147037506104, "frac_alive": 0.76953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..298b3fb63a30008537c7f36a2b8350da776d8d99 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..65bc9f0bbeefbd322607b899abd0555b8b2001e7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.3, "l1_loss": 731.6, "l0": 160.0, "frac_variance_explained": 0.833203125, "cossim": 0.93984375, "l2_ratio": 0.9390625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.512040066719055, "loss_zero": 12.452932643890382, "frac_recovered": 0.9929002702236176, "frac_alive": 0.6171332597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..50c57befc26b391eae418aff4fe5065f1ac3dc06 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..25750b449cd9162fa26ef3921237dd1c8c01dca7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.95, "l1_loss": 813.6, "l0": 160.0, "frac_variance_explained": 0.5359375, "cossim": 0.825390625, "l2_ratio": 0.8390625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.210920238494873, "loss_zero": 12.452932643890382, "frac_recovered": 0.9230841159820556, "frac_alive": 0.7958441972732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..558c578e8e855eea141876c5c9e59f4efb2c7c70 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3cb0cbb6e8bbb1a79aa497c74a414cc3618ea731 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 62.9, "l1_loss": 682.8, "l0": 160.0, "frac_variance_explained": 0.7109375, "cossim": 0.895703125, "l2_ratio": 0.90078125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.732248306274414, "loss_zero": 12.452932643890382, "frac_recovered": 0.9709289193153381, "frac_alive": 0.7855360507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77c4a02cf902eb3acf5fc8562bf5d8e63f4395fc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e1486fdb7921d89cf49c8ca5ff4ea469f2f8fc4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 50.275, "l1_loss": 738.8, "l0": 160.0, "frac_variance_explained": 0.847265625, "cossim": 0.93203125, "l2_ratio": 0.9328125, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.529589533805847, "loss_zero": 12.452932643890382, "frac_recovered": 0.9911516010761261, "frac_alive": 0.6820746660232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8dfc639a7f8131ef01318d5f21ceb65fc23e10b2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4fc8e4c553e9478c59bdb9760bcd5fd1e8b4ba1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 110.65, "l1_loss": 1370.4, "l0": 320.0, "frac_variance_explained": 0.345703125, "cossim": 0.668359375, "l2_ratio": 0.708203125, "relative_reconstruction_bias": 1.05546875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.5167050123214723, "loss_zero": 12.452932643890382, "frac_recovered": 0.8926664888858795, "frac_alive": 0.5868598222732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7910fbbd49c2caac25af01f9f057bba9a0a2c12c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e6b937d07bdc2366a64858f5686c09899ec26d75 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.15, "l1_loss": 1169.2, "l0": 320.0, "frac_variance_explained": 0.741015625, "cossim": 0.901953125, "l2_ratio": 0.903515625, "relative_reconstruction_bias": 0.996484375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.703572392463684, "loss_zero": 12.452932643890382, "frac_recovered": 0.973752784729004, "frac_alive": 0.9632161259651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0cd7d958a34875b7c851e08cc69ccadb43667e65 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a94dac6e740228df2800bbe69d07deed7bd03b1b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 52.55, "l1_loss": 1030.4, "l0": 320.0, "frac_variance_explained": 0.819921875, "cossim": 0.93203125, "l2_ratio": 0.93359375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.532721424102783, "loss_zero": 12.452932643890382, "frac_recovered": 0.9908467590808868, "frac_alive": 0.9549153447151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ee63225be948ecbe7551fd82d67431d3c9cf3d0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..231fab84cc4930f22312ea673ea730d70e1a411a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.125, "l1_loss": 1068.0, "l0": 320.0, "frac_variance_explained": 0.86484375, "cossim": 0.952734375, "l2_ratio": 0.953125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.485049676895142, "loss_zero": 12.452932643890382, "frac_recovered": 0.9955863654613495, "frac_alive": 0.729600727558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f66a3705195f9837c24d8108ba47934aa76a5a1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f3c140c2db4c0051e86c693926b542733ea142e1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.75, "l1_loss": 1098.4, "l0": 320.0, "frac_variance_explained": 0.61484375, "cossim": 0.869921875, "l2_ratio": 0.882421875, "relative_reconstruction_bias": 0.998828125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7715730905532836, "loss_zero": 12.452932643890382, "frac_recovered": 0.9669955611228943, "frac_alive": 0.9564344882965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a3809a7b9e69c34b4a2a1a4973da6d541cfa2d22 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5327d40ba9ad977b2517a0ddb6021e9b33026d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 57.4, "l1_loss": 1006.4, "l0": 320.0, "frac_variance_explained": 0.768359375, "cossim": 0.9140625, "l2_ratio": 0.9203125, "relative_reconstruction_bias": 1.005078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.636249876022339, "loss_zero": 12.452932643890382, "frac_recovered": 0.98048534989357, "frac_alive": 0.9611002802848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..00d22133945de445c83ce74a53fbbb66cdf1c0e9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d76a880b259a0f3a61fab682cc257d8daa92b39 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.6, "l1_loss": 1004.0, "l0": 319.99583435058594, "frac_variance_explained": 0.86640625, "cossim": 0.944921875, "l2_ratio": 0.947265625, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4963902711868284, "loss_zero": 12.452932643890382, "frac_recovered": 0.9944567561149598, "frac_alive": 0.8498806357383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..294d7bd16acc5e33f44f8e1da7eac48f8fe47f93 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5064d0962f06c290f4ff79e83f4b3cbb946c115c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 107.0, "l1_loss": 2512.0, "l0": 640.0, "frac_variance_explained": 0.3421875, "cossim": 0.755078125, "l2_ratio": 1.0265625, "relative_reconstruction_bias": 1.353125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8607425451278687, "loss_zero": 12.452932643890382, "frac_recovered": 0.9581126570701599, "frac_alive": 0.7616645097732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e010c9a51840709326ab3dee1de200f7530d25e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c4ee736793b2ad55819c4c4ab6baf99b8d128f3f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 53.375, "l1_loss": 1479.2, "l0": 640.0, "frac_variance_explained": 0.8, "cossim": 0.926171875, "l2_ratio": 0.9328125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5543439626693725, "loss_zero": 12.452932643890382, "frac_recovered": 0.9886787116527558, "frac_alive": 0.9983723759651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..21e1ec45268a6ed95b2c658da9872f60aa169805 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f4d707a4f555a5aa05a7dd146ddf8e3eb560d40 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.325, "l1_loss": 1506.4, "l0": 640.0, "frac_variance_explained": 0.870703125, "cossim": 0.953125, "l2_ratio": 0.95625, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.484053874015808, "loss_zero": 12.452932643890382, "frac_recovered": 0.9956881284713746, "frac_alive": 0.9110243320465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b8feb8441b4c970c3a0133c7fd3b464a9b4ecfc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9dc9a95a15090e70831c5b12adbf3c42646592aa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.1625, "l1_loss": 2229.6, "l0": 639.75, "frac_variance_explained": 0.9109375, "cossim": 0.968359375, "l2_ratio": 0.96875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.461870551109314, "loss_zero": 12.452932643890382, "frac_recovered": 0.9978919506072998, "frac_alive": 0.6906467080116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2c1b05ea752489b0f47d25627377e8a716b56ccd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0be8754a7b61db238956d45325d6e07564616d8c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.85, "l1_loss": 1591.2, "l0": 640.0, "frac_variance_explained": 0.71484375, "cossim": 0.903125, "l2_ratio": 0.9171875, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5837015867233277, "loss_zero": 12.452932643890382, "frac_recovered": 0.9857530713081359, "frac_alive": 0.9968532919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c198f4893e8b9519bc11872b65a91fa1f3088fa6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a2f9a1251b1269c9237d6386931e7b215160ab0b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.825, "l1_loss": 1537.6, "l0": 640.0, "frac_variance_explained": 0.8578125, "cossim": 0.9359375, "l2_ratio": 0.941015625, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5238553285598755, "loss_zero": 12.452932643890382, "frac_recovered": 0.9917183816432953, "frac_alive": 0.9885525107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9eb10b06657b80bdd4116c9eb3650a7a849eacba --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8061ba3da8f418fcd1da33016fc531d63b226f00 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.8, "l1_loss": 1916.0, "l0": 639.725, "frac_variance_explained": 0.893359375, "cossim": 0.962109375, "l2_ratio": 0.964453125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4669814586639403, "loss_zero": 12.452932643890382, "frac_recovered": 0.9973848223686218, "frac_alive": 0.8089192509651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e332880942cbfe53a982fc0a61700b36b645186 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d85ef6b4b35e78ef15ca1814fa1c0d74c031dd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 190.2, "l1_loss": 152.6, "l0": 20.0, "frac_variance_explained": 0.063671875, "cossim": 0.2880859375, "l2_ratio": 0.18115234375, "relative_reconstruction_bias": 0.62890625, "loss_original": 2.440642213821411, "loss_reconstructed": 11.478903865814209, "loss_zero": 12.452932643890382, "frac_recovered": 0.09734811633825302, "frac_alive": 0.1331380158662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a7ea3957c665dc79ac5892bd58ee59cdbe3d36f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..034e39382e822cc1e1636971a75ef366ebdcd22a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.6, "l1_loss": 367.8, "l0": 20.0, "frac_variance_explained": 0.396875, "cossim": 0.775390625, "l2_ratio": 0.791796875, "relative_reconstruction_bias": 0.996875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.913777303695679, "loss_zero": 12.452932643890382, "frac_recovered": 0.7530180633068084, "frac_alive": 0.1446940153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7a55639f8561b931f9dd086690faa6735b0ae1cd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..221f677b5b4a52312671197b5a5df770e1409153 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 90.3, "l1_loss": 397.0, "l0": 20.0, "frac_variance_explained": 0.68203125, "cossim": 0.884375, "l2_ratio": 0.889453125, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.205756974220276, "loss_zero": 12.452932643890382, "frac_recovered": 0.9236590206623078, "frac_alive": 0.1467013955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3db803803c5befc9106e6ccc85041eb91314323e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5685286810dd2c4c43db69dd250eba5aa6676604 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 84.0, "l1_loss": 361.8, "l0": 20.0, "frac_variance_explained": 0.698046875, "cossim": 0.8984375, "l2_ratio": 0.899609375, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.894393467903137, "loss_zero": 12.452932643890382, "frac_recovered": 0.9547594070434571, "frac_alive": 0.156032994389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c91688c73707607cdc5c1fce9a12212aaf91665 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6c51a659419aaaac6a1f5a3e5c704c828e199a0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 139.5, "l1_loss": 253.1, "l0": 20.0, "frac_variance_explained": 0.210546875, "cossim": 0.703125, "l2_ratio": 0.7328125, "relative_reconstruction_bias": 0.9921875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.966889142990112, "loss_zero": 12.452932643890382, "frac_recovered": 0.648001104593277, "frac_alive": 0.173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d7a01ad6df7f7ffa96e3616bba3b47f635edce7e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..01e5160aec823aeb3b0ea4279e79a97c2f592e4c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 97.6, "l1_loss": 375.0, "l0": 20.0, "frac_variance_explained": 0.62890625, "cossim": 0.8578125, "l2_ratio": 0.86171875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.5767399787902834, "loss_zero": 12.452932643890382, "frac_recovered": 0.8865642011165619, "frac_alive": 0.1312934011220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..81c24977c19a0f9183794438207b2c544592acb8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf9350d1e69e4be2e7aee2c5a068738b968340e5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 85.7, "l1_loss": 384.4, "l0": 20.0, "frac_variance_explained": 0.708203125, "cossim": 0.891015625, "l2_ratio": 0.891796875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9751620054244996, "loss_zero": 12.452932643890382, "frac_recovered": 0.9467064619064331, "frac_alive": 0.1495768278837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2108d975b0e196c90f4247356dd60736d0c2740 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..65dbef67b9e63833a47aa4541e1e9f7a5b6cfdbf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 187.6, "l1_loss": 291.6, "l0": 40.0, "frac_variance_explained": 0.109375, "cossim": 0.37265625, "l2_ratio": 0.24990234375, "relative_reconstruction_bias": 0.6703125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.446138095855712, "loss_zero": 12.452932643890382, "frac_recovered": 0.20063219517469405, "frac_alive": 0.2118055522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..616b89258474c723d89be30c1cef900f664628b4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b6908f8ebe484f482f2ef3d9061199b0fe07f186 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 117.25, "l1_loss": 551.2, "l0": 40.0, "frac_variance_explained": 0.502734375, "cossim": 0.805859375, "l2_ratio": 0.832421875, "relative_reconstruction_bias": 1.0078125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.291177248954773, "loss_zero": 12.452932643890382, "frac_recovered": 0.8151716470718384, "frac_alive": 0.265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..201b017ca62f095889ef23464bd238c37ac7951a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e67a646caca4c2a3cf96a10681df265f3c895571 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.2, "l1_loss": 540.4, "l0": 40.0, "frac_variance_explained": 0.75078125, "cossim": 0.906640625, "l2_ratio": 0.91171875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.873250865936279, "loss_zero": 12.452932643890382, "frac_recovered": 0.9568659245967865, "frac_alive": 0.2742513120174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d83a8e1b6e7566a30d7e0f97e1b3b192aa221aa2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..86ea860dd2398aeca1220429a7443e680bd8aac9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.05, "l1_loss": 541.4, "l0": 40.0, "frac_variance_explained": 0.805859375, "cossim": 0.92421875, "l2_ratio": 0.924609375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6645459651947023, "loss_zero": 12.452932643890382, "frac_recovered": 0.9777025818824768, "frac_alive": 0.2873263955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c451ccabd168327bfd3480a0d31b1c1739dbb0b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..54671eeaa127e8ace119c84fcdf9bf839db3b7f2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.0, "l1_loss": 419.0, "l0": 40.0, "frac_variance_explained": 0.29609375, "cossim": 0.745703125, "l2_ratio": 0.76328125, "relative_reconstruction_bias": 0.983984375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.11709246635437, "loss_zero": 12.452932643890382, "frac_recovered": 0.7328206181526185, "frac_alive": 0.3245985209941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9abc4752831d1894c4f92f20af4bb9d97ed1cc71 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..584fa60d63c2aecb6b31adbf0a357762da8b0a37 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.45, "l1_loss": 500.0, "l0": 40.0, "frac_variance_explained": 0.69296875, "cossim": 0.889453125, "l2_ratio": 0.892578125, "relative_reconstruction_bias": 1.0078125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.089809012413025, "loss_zero": 12.452932643890382, "frac_recovered": 0.9352370738983155, "frac_alive": 0.2636176347732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aad7a52f272a51ea01919a1d83ee50655515d62b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e93ed79570f1884a2241f1c5333c3dd99cfa69b2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.55, "l1_loss": 519.2, "l0": 40.0, "frac_variance_explained": 0.780078125, "cossim": 0.915234375, "l2_ratio": 0.917578125, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7118747234344482, "loss_zero": 12.452932643890382, "frac_recovered": 0.9729695200920105, "frac_alive": 0.29150390625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2cf32618de55f608d233fa127aff8dcf55ab00c3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c6d1486708860c9c30678a8b6f61343f882f6664 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.5, "l1_loss": 544.8, "l0": 80.0, "frac_variance_explained": 0.17421875, "cossim": 0.469140625, "l2_ratio": 0.3478515625, "relative_reconstruction_bias": 0.741796875, "loss_original": 2.440642213821411, "loss_reconstructed": 8.684722423553467, "loss_zero": 12.452932643890382, "frac_recovered": 0.3766397684812546, "frac_alive": 0.31591796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef26d7f04e536d2acd1702f7e83c56f345fb4675 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3897099ec7a6db30a383722ff90bee31dfd97e10 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 104.15, "l1_loss": 791.2, "l0": 80.0, "frac_variance_explained": 0.593359375, "cossim": 0.850390625, "l2_ratio": 0.86015625, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.6241471767425537, "loss_zero": 12.452932643890382, "frac_recovered": 0.8817910313606262, "frac_alive": 0.4998372495174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..12571cbd05e08118dd01de3b34262fb911bf5fb9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..35700be579c39c49120cfcf09a9547e4e11b165b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 75.45, "l1_loss": 650.4, "l0": 80.0, "frac_variance_explained": 0.7671875, "cossim": 0.9203125, "l2_ratio": 0.92109375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7256054162979124, "loss_zero": 12.452932643890382, "frac_recovered": 0.9716189205646515, "frac_alive": 0.4869791567325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f2e4dba0df5ca3288e507ca1b2a608f5b8ebdce7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..27444dc0d1b2f550be7161cd0ba7a3e58bb0ae37 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.225, "l1_loss": 721.6, "l0": 79.95416717529297, "frac_variance_explained": 0.83046875, "cossim": 0.938671875, "l2_ratio": 0.939453125, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.573888349533081, "loss_zero": 12.452932643890382, "frac_recovered": 0.9867395639419556, "frac_alive": 0.464898020029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1209e23db4093ab4c1d0ebb374773f3116d27d0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8402dfd73a8cee1c97ecb91c583d6695909e5936 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 124.2, "l1_loss": 683.6, "l0": 80.0, "frac_variance_explained": 0.394140625, "cossim": 0.7765625, "l2_ratio": 0.7953125, "relative_reconstruction_bias": 0.993359375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.2204547882080075, "loss_zero": 12.452932643890382, "frac_recovered": 0.8223262250423431, "frac_alive": 0.5598958134651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66d54e0631d52bb2279bdf2c200c4a111fe91aa0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f87952624007c98957a116f083c6ee5a11e36032 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.4, "l1_loss": 661.6, "l0": 80.0, "frac_variance_explained": 0.72890625, "cossim": 0.903515625, "l2_ratio": 0.90703125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8219464302062987, "loss_zero": 12.452932643890382, "frac_recovered": 0.9619998276233673, "frac_alive": 0.4825846254825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f95d194e209e5a1dee5b3def5065cf9de77df168 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0adf9bf761b856598adaa10083131367a7d0915f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.35, "l1_loss": 658.8, "l0": 80.0, "frac_variance_explained": 0.797265625, "cossim": 0.9328125, "l2_ratio": 0.934375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.605643391609192, "loss_zero": 12.452932643890382, "frac_recovered": 0.9835797488689423, "frac_alive": 0.4813910722732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c23107b79140e2155cfa7e56170f914b341846e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a90e07cfc5bf029bb8caac024a6837d6ab4755e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 176.1, "l1_loss": 1064.0, "l0": 160.0, "frac_variance_explained": 0.25390625, "cossim": 0.56953125, "l2_ratio": 0.4927734375, "relative_reconstruction_bias": 0.853515625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.878991222381591, "loss_zero": 12.452932643890382, "frac_recovered": 0.656810873746872, "frac_alive": 0.426812082529068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7bafa6e18f24710367ce63f122686d9d6fbffd4d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..200e725ecb4975feab123db57e5976de8c11cbd2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 90.8, "l1_loss": 984.8, "l0": 160.0, "frac_variance_explained": 0.6703125, "cossim": 0.883984375, "l2_ratio": 0.89140625, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0399241209030152, "loss_zero": 12.452932643890382, "frac_recovered": 0.9401364743709564, "frac_alive": 0.7928059697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fff2e19500795341d4b5a79e72084a57dbab18f1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..206b2d6eaae8bd2c8fa07bfc8f15dd3d9753c7be --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.85, "l1_loss": 874.0, "l0": 160.0, "frac_variance_explained": 0.785546875, "cossim": 0.928515625, "l2_ratio": 0.9328125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.617552089691162, "loss_zero": 12.452932643890382, "frac_recovered": 0.9823945164680481, "frac_alive": 0.7352430820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16887048da6813ebf97e86e01d10b73134b7fe2f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4289beebd493d734fc9afdb39946dd1bd278375f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 61.4, "l1_loss": 1017.2, "l0": 159.88333435058593, "frac_variance_explained": 0.85546875, "cossim": 0.946875, "l2_ratio": 0.946875, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5257870674133303, "loss_zero": 12.452932643890382, "frac_recovered": 0.9915340363979339, "frac_alive": 0.6569553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..179ae3ffcb0d1db2ae7c88abe935a42c48d971b6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0150c611174a6cfa3ecaba830b71397373e2c38b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 110.3, "l1_loss": 1083.6, "l0": 160.0, "frac_variance_explained": 0.525, "cossim": 0.824609375, "l2_ratio": 0.843359375, "relative_reconstruction_bias": 1.00859375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.4192012310028077, "loss_zero": 12.452932643890382, "frac_recovered": 0.9023053884506226, "frac_alive": 0.7994791865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..78bd5552314365ae5e0ae9926b4d90815571b3c4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..52c76cbe7c78f97dbaa467459ba87e5bf530a8e1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.55, "l1_loss": 882.4, "l0": 160.0, "frac_variance_explained": 0.75078125, "cossim": 0.913671875, "l2_ratio": 0.91796875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.699724006652832, "loss_zero": 12.452932643890382, "frac_recovered": 0.9741968214511871, "frac_alive": 0.7721896767616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..34db461227404faefffeecc544fdecc5f1ec65f5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c9db107be2aedca3b383c8023c777cdced4b005 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.3, "l1_loss": 904.4, "l0": 160.0, "frac_variance_explained": 0.830078125, "cossim": 0.94296875, "l2_ratio": 0.941796875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5491158962249756, "loss_zero": 12.452932643890382, "frac_recovered": 0.9892117202281951, "frac_alive": 0.6984049677848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c26691a634d9aa2a511472324103e114ae660eca --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d3315dd45f25e4788a7f21fb76055e5406cb2c5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 148.2, "l1_loss": 1852.0, "l0": 320.0, "frac_variance_explained": 0.349609375, "cossim": 0.670703125, "l2_ratio": 0.7125, "relative_reconstruction_bias": 1.059375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.601191687583923, "loss_zero": 12.452932643890382, "frac_recovered": 0.8842435717582703, "frac_alive": 0.587022602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..837ca9ca7a8325a0d5b9385065683763c0bc5a70 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb8237801914464571efb2f39ff4e15fd93a9897 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.8, "l1_loss": 1368.8, "l0": 320.0, "frac_variance_explained": 0.726953125, "cossim": 0.90625, "l2_ratio": 0.912890625, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7479029655456544, "loss_zero": 12.452932643890382, "frac_recovered": 0.9693350851535797, "frac_alive": 0.9601779580116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4bcf670ac914a8b18eb0f6ddb56bde8ee20e3c35 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..23b2d00535becf7fcc4a420d995311cd3c9b4143 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 62.1, "l1_loss": 1234.4, "l0": 320.0, "frac_variance_explained": 0.84453125, "cossim": 0.94296875, "l2_ratio": 0.943359375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5427860260009765, "loss_zero": 12.452932643890382, "frac_recovered": 0.989845073223114, "frac_alive": 0.9289821982383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1ee948c0584939ef7d593620fd9a4888316f50d8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..36a8f950aa2763b9533cb55b6c2f2eba95e72098 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 53.075, "l1_loss": 1509.6, "l0": 319.62916870117186, "frac_variance_explained": 0.895703125, "cossim": 0.9609375, "l2_ratio": 0.960546875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4898521900177, "loss_zero": 12.452932643890382, "frac_recovered": 0.9951108396053314, "frac_alive": 0.7687717080116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f5a03f3b83ed13f3b6a3d4004b96699dc77c1013 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c49311355f73a2c7d0d059b09a70cd245d974170 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.5, "l1_loss": 1762.4, "l0": 320.0, "frac_variance_explained": 0.674609375, "cossim": 0.871875, "l2_ratio": 0.887890625, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8713510036468506, "loss_zero": 12.452932643890382, "frac_recovered": 0.9570393919944763, "frac_alive": 0.94921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6665d94e27c2ffcabd1528187c3669ac1f93cdf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e64e313661b95ab35517b7a9998d42190d47112d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.6, "l1_loss": 1289.6, "l0": 320.0, "frac_variance_explained": 0.80625, "cossim": 0.930859375, "l2_ratio": 0.93359375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6019895553588865, "loss_zero": 12.452932643890382, "frac_recovered": 0.9839423656463623, "frac_alive": 0.9495985507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83e143ed1fba9b83c2a56bfd63f49c9f5c95d371 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a95331db7a8f9c0f753c64a559ba6449d36bd528 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.15, "l1_loss": 1327.2, "l0": 319.84583435058596, "frac_variance_explained": 0.874609375, "cossim": 0.9515625, "l2_ratio": 0.951171875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.515988826751709, "loss_zero": 12.452932643890382, "frac_recovered": 0.9925126552581787, "frac_alive": 0.9074435830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa2e0c3b0b588c6ca81a9babb400c29f3541bdc3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ebb28dcc914c10111f850ed5caa904cb7f9bb4b0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.7, "l1_loss": 3342.4, "l0": 640.0, "frac_variance_explained": 0.334375, "cossim": 0.755078125, "l2_ratio": 1.02890625, "relative_reconstruction_bias": 1.36328125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.949670839309692, "loss_zero": 12.452932643890382, "frac_recovered": 0.9492510080337524, "frac_alive": 0.7532551884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa45598f4276b0856fcf91bec82ddb177059382d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d10684d32a8f771da7f1715187ed3f39571b36d3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.35, "l1_loss": 1944.0, "l0": 640.0, "frac_variance_explained": 0.797265625, "cossim": 0.929296875, "l2_ratio": 0.93671875, "relative_reconstruction_bias": 1.00703125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5808478593826294, "loss_zero": 12.452932643890382, "frac_recovered": 0.9860338687896728, "frac_alive": 0.99755859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..214fd6312d8626846d7e1a889b68fe9bb4a10a9b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f2817bc7d1226d538539e6c7d1812aa1b0f22fa4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 53.525, "l1_loss": 1890.4, "l0": 640.0, "frac_variance_explained": 0.878515625, "cossim": 0.96015625, "l2_ratio": 0.961328125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4936158418655396, "loss_zero": 12.452932643890382, "frac_recovered": 0.9947379648685455, "frac_alive": 0.8681098222732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..42f3abb2ce8a2ab030eba85459f4b9450507348a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9fdb2c24fd81d60d2797256a45b0d5559f3c9a83 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.175, "l1_loss": 3017.6, "l0": 640.0, "frac_variance_explained": 0.9296875, "cossim": 0.974609375, "l2_ratio": 0.97578125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.469519329071045, "loss_zero": 12.452932643890382, "frac_recovered": 0.9971326410770416, "frac_alive": 0.762803852558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6346c2b3b2ef8f4c8f6fac914893bf7f01dee503 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c09a6e737a5a8e004135fb82dc931fc23e290301 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 84.0, "l1_loss": 2200.0, "l0": 640.0, "frac_variance_explained": 0.735546875, "cossim": 0.902734375, "l2_ratio": 0.921875, "relative_reconstruction_bias": 1.015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6315461158752442, "loss_zero": 12.452932643890382, "frac_recovered": 0.9809844374656678, "frac_alive": 0.9978298544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e608fab59117b74ec46bf2a7dc0e2cb870e0ea01 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b6f712f50710063907ec3af585839f1a7626fff --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.85, "l1_loss": 1758.4, "l0": 640.0, "frac_variance_explained": 0.8234375, "cossim": 0.93984375, "l2_ratio": 0.943359375, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5422339916229246, "loss_zero": 12.452932643890382, "frac_recovered": 0.9898876786231995, "frac_alive": 0.9901258945465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d7fa1b6454d60c9318283f2ab4a4ff7c85e7636 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e6997f3f1e2cc372b7e97d6057aafcf6dd4ff5a4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.35, "l1_loss": 2516.8, "l0": 640.0, "frac_variance_explained": 0.903515625, "cossim": 0.9671875, "l2_ratio": 0.968359375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4744928598403932, "loss_zero": 12.452932643890382, "frac_recovered": 0.9966399788856506, "frac_alive": 0.7989909052848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..14f20a5037b6180b116358129dfce88c2246372b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d5836badfd069119a2ec1677a99e8a7e33b4ef7c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 290.6, "l1_loss": 235.2, "l0": 20.0, "frac_variance_explained": 0.065234375, "cossim": 0.289453125, "l2_ratio": 0.18271484375, "relative_reconstruction_bias": 0.631640625, "loss_original": 2.440642213821411, "loss_reconstructed": 11.838497161865234, "loss_zero": 12.452932643890382, "frac_recovered": 0.06133820600807667, "frac_alive": 0.1409505158662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f8bb2e8a4dcbb597b4fcd97f7c2806ac6e2e511 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f954c72fe2bef6aa01e804653eed40ed9bb585d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 198.0, "l1_loss": 520.4, "l0": 20.0, "frac_variance_explained": 0.38984375, "cossim": 0.75703125, "l2_ratio": 0.7765625, "relative_reconstruction_bias": 0.98046875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.558078384399414, "loss_zero": 12.452932643890382, "frac_recovered": 0.7885506510734558, "frac_alive": 0.1609700471162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9f3e7874ece2e9469b92e02fa089db196eb828f9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9190cf2b477bcd4066ce41a781bd188a3c3a1cc2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 135.85, "l1_loss": 569.2, "l0": 20.0, "frac_variance_explained": 0.672265625, "cossim": 0.883203125, "l2_ratio": 0.886328125, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0247864961624145, "loss_zero": 12.452932643890382, "frac_recovered": 0.941745525598526, "frac_alive": 0.1605902761220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fdeaa0bf0d9aa57aa1830fa111cc443bb911d62b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..824b848e6177ef943b8ba264d9f73fa3de7606b8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 126.2, "l1_loss": 588.8, "l0": 20.0, "frac_variance_explained": 0.72265625, "cossim": 0.901171875, "l2_ratio": 0.902734375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8494953393936155, "loss_zero": 12.452932643890382, "frac_recovered": 0.9592397749423981, "frac_alive": 0.1686197966337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..177b18f7e2570ba282a197f3c71fbfaf60119fae --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b2da5d3349a2572b8a5debf4802f0cecbd63cf33 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 219.3, "l1_loss": 377.0, "l0": 20.0, "frac_variance_explained": 0.19375, "cossim": 0.6859375, "l2_ratio": 0.6953125, "relative_reconstruction_bias": 0.98515625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.777294731140136, "loss_zero": 12.452932643890382, "frac_recovered": 0.6669279158115387, "frac_alive": 0.1839735209941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9ce24e34713147835950403da9172d74ff9da50 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd9b1ca392e77f0515e83a32b03da012e0060f89 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 150.1, "l1_loss": 557.6, "l0": 20.0, "frac_variance_explained": 0.610546875, "cossim": 0.85859375, "l2_ratio": 0.862109375, "relative_reconstruction_bias": 1.003515625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.230862021446228, "loss_zero": 12.452932643890382, "frac_recovered": 0.9211653947830201, "frac_alive": 0.150661900639534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b41427c60f7010ea2db689ad7fd349046099c5d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..186147c35b0d7eb4db6789c1cec7926daa467b28 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 129.25, "l1_loss": 578.8, "l0": 20.0, "frac_variance_explained": 0.696484375, "cossim": 0.8890625, "l2_ratio": 0.8890625, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9045059442520142, "loss_zero": 12.452932643890382, "frac_recovered": 0.9537485361099243, "frac_alive": 0.1656358540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fc58d5015704ce15b80389d2ab7d85552848e6dc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53d4e2485324f31768f8da6c63820eee91c57b17 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 282.8, "l1_loss": 444.8, "l0": 40.0, "frac_variance_explained": 0.109765625, "cossim": 0.375, "l2_ratio": 0.25234375, "relative_reconstruction_bias": 0.671875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.810188674926758, "loss_zero": 12.452932643890382, "frac_recovered": 0.2641167253255844, "frac_alive": 0.2122938334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05c808cf576c7f2053c6790972dca820187f678a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..071c28d2c88fde8a9bd4feb331de685b54598cf7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 179.1, "l1_loss": 748.8, "l0": 40.0, "frac_variance_explained": 0.4484375, "cossim": 0.789453125, "l2_ratio": 0.821484375, "relative_reconstruction_bias": 1.0234375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.051023459434509, "loss_zero": 12.452932643890382, "frac_recovered": 0.839172750711441, "frac_alive": 0.3034396767616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cb48f37bae6ee1ec15a3253c88230eaa7638ac62 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c69bab1147771689c9b82b2a76525f0a7dfbb327 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 128.05, "l1_loss": 763.6, "l0": 40.0, "frac_variance_explained": 0.710546875, "cossim": 0.896484375, "l2_ratio": 0.901171875, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8199576854705812, "loss_zero": 12.452932643890382, "frac_recovered": 0.9621783435344696, "frac_alive": 0.2947591245174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d538b33ee1d04d0396d78bc0d30f72f6e98a038 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0fdd7a005f88c1dea9aca8c3ab5a4ec445a40106 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 113.3, "l1_loss": 790.4, "l0": 40.0, "frac_variance_explained": 0.7765625, "cossim": 0.916796875, "l2_ratio": 0.918359375, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6750154972076414, "loss_zero": 12.452932643890382, "frac_recovered": 0.9766558885574341, "frac_alive": 0.3024088442325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f754a4634af75e49195ea421e1bda825514251dc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f36592ebdf4e374cb411585180c8e5c55d735b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 216.3, "l1_loss": 648.4, "l0": 40.0, "frac_variance_explained": 0.274609375, "cossim": 0.719921875, "l2_ratio": 0.730859375, "relative_reconstruction_bias": 0.96171875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.922332763671875, "loss_zero": 12.452932643890382, "frac_recovered": 0.7522672057151795, "frac_alive": 0.3479275107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..73403f9743df27f90d0057ae8fb6ced32f609804 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba945bb6e419b3ff6dc861e6cb0abc38ee5e1923 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 133.85, "l1_loss": 754.4, "l0": 40.0, "frac_variance_explained": 0.68203125, "cossim": 0.881640625, "l2_ratio": 0.881640625, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.949249505996704, "loss_zero": 12.452932643890382, "frac_recovered": 0.9492873609066009, "frac_alive": 0.2939453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..201a5fdb1212d9fd42b7919ebfe0084ac8ed5612 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3323f08a4c08530a88027fc8bb83a328164d81f1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 114.75, "l1_loss": 768.4, "l0": 40.0, "frac_variance_explained": 0.7609375, "cossim": 0.915234375, "l2_ratio": 0.915625, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7129630565643312, "loss_zero": 12.452932643890382, "frac_recovered": 0.9728759765625, "frac_alive": 0.3045789897441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c1f766ff42965376c47fbd8a31bdb789a440e6cf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73875fb9b03c5bd9e4d584194aacaccf72dbeec9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 277.2, "l1_loss": 850.4, "l0": 80.0, "frac_variance_explained": 0.171875, "cossim": 0.473046875, "l2_ratio": 0.3517578125, "relative_reconstruction_bias": 0.741015625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.440323400497436, "loss_zero": 12.452932643890382, "frac_recovered": 0.5010212510824203, "frac_alive": 0.3150499165058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0b7a60ccafaec528fa121b521f631a10ddd4c7a9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f253340a1d282e28ee27765d28d0a5e76164376c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 167.0, "l1_loss": 1076.0, "l0": 80.0, "frac_variance_explained": 0.5359375, "cossim": 0.82421875, "l2_ratio": 0.837890625, "relative_reconstruction_bias": 1.007421875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.4837981939315794, "loss_zero": 12.452932643890382, "frac_recovered": 0.8958383560180664, "frac_alive": 0.5556640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..00a1c6ba0a31acabc6de7add8b3c2f2ac893fbf5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6339262e11e3c847a23e8523c910a1417116bd2a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 118.3, "l1_loss": 1036.0, "l0": 80.0, "frac_variance_explained": 0.759375, "cossim": 0.91328125, "l2_ratio": 0.91484375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.720684838294983, "loss_zero": 12.452932643890382, "frac_recovered": 0.9721010744571685, "frac_alive": 0.5036892294883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..227fce94b6a87371db43120188186315f70f4d25 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3259faa773731cc2155c8932582cd0d904b1ae4c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 102.45, "l1_loss": 1077.6, "l0": 80.0, "frac_variance_explained": 0.8140625, "cossim": 0.9328125, "l2_ratio": 0.934375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6034613132476805, "loss_zero": 12.452932643890382, "frac_recovered": 0.9837961971759797, "frac_alive": 0.4701063334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..499daa00680f3a0b2f01d56b5bbb201a38558d65 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8eb1e8043050c9deebf295999c1b573c7a1021a8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 189.4, "l1_loss": 993.6, "l0": 80.0, "frac_variance_explained": 0.37578125, "cossim": 0.7609375, "l2_ratio": 0.783203125, "relative_reconstruction_bias": 1.0109375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.112102770805359, "loss_zero": 12.452932643890382, "frac_recovered": 0.8331315755844116, "frac_alive": 0.5949978232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eddcc5c45c26004f66959772f108dedd34264a1c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..608811f45678ae9d3d589c5e9dd449fd3f15ff8a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.95, "l1_loss": 1101.6, "l0": 80.0, "frac_variance_explained": 0.73359375, "cossim": 0.894921875, "l2_ratio": 0.898828125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8188110113143923, "loss_zero": 12.452932643890382, "frac_recovered": 0.962310516834259, "frac_alive": 0.5104166865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb1ede746960fb1f098ca67a3b0b05ed23202948 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0cd7468e4dd0f2f389073f1401e208cbd1ad9832 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 105.05, "l1_loss": 1054.8, "l0": 80.0, "frac_variance_explained": 0.805859375, "cossim": 0.930078125, "l2_ratio": 0.930078125, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.62584183216095, "loss_zero": 12.452932643890382, "frac_recovered": 0.9815659761428833, "frac_alive": 0.4847005307674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..07f2d419dab06cd4a041334eca31345450d1490f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e59f0ceb9b3d516a2f14dfc0fd1e3cf96b6079b3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 267.9, "l1_loss": 1635.2, "l0": 160.0, "frac_variance_explained": 0.25859375, "cossim": 0.575, "l2_ratio": 0.498828125, "relative_reconstruction_bias": 0.8546875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.914085578918457, "loss_zero": 12.452932643890382, "frac_recovered": 0.75317023396492, "frac_alive": 0.451226145029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c271c3a7887ca3383a8f0c5d485b88b1eecb4c8a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..13ce9400845056dd1ce6ee7dcc647b1cd33ed544 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 147.7, "l1_loss": 1464.0, "l0": 160.0, "frac_variance_explained": 0.625390625, "cossim": 0.861328125, "l2_ratio": 0.87109375, "relative_reconstruction_bias": 1.00859375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0243191957473754, "loss_zero": 12.452932643890382, "frac_recovered": 0.9417394459247589, "frac_alive": 0.830078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf384274ee5f32a3fdf1392ebafd79f270164a0a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a15eb444b45b0ec08119ccaa1fb594337fc59bb6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 103.6, "l1_loss": 1430.4, "l0": 160.0, "frac_variance_explained": 0.8140625, "cossim": 0.933203125, "l2_ratio": 0.934375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6129833221435548, "loss_zero": 12.452932643890382, "frac_recovered": 0.982846474647522, "frac_alive": 0.7294921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e0fefb32b51d810efd1cc3905da6befb30e6735 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..972263f9185009fd6ba0cee779dab43d280af9fe --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.2, "l1_loss": 1544.0, "l0": 160.0, "frac_variance_explained": 0.852734375, "cossim": 0.94609375, "l2_ratio": 0.94609375, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5553539514541628, "loss_zero": 12.452932643890382, "frac_recovered": 0.988594776391983, "frac_alive": 0.6384548544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2072e564f4064dae1ea1c9de71c1f7be799c4d1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d114f7bc072f4426e4ba7e559a2ee35fa5394713 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 172.6, "l1_loss": 1547.2, "l0": 160.0, "frac_variance_explained": 0.492578125, "cossim": 0.81171875, "l2_ratio": 0.82421875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.4245002269744873, "loss_zero": 12.452932643890382, "frac_recovered": 0.9017863512039185, "frac_alive": 0.8332248330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e092eec31c7a35f5f806d90aa36058723abcbb80 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bd1a2346fa596ffdcf3aae267f7c14a24762b906 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 123.9, "l1_loss": 1403.2, "l0": 160.0, "frac_variance_explained": 0.730078125, "cossim": 0.9015625, "l2_ratio": 0.905078125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7305481672286986, "loss_zero": 12.452932643890382, "frac_recovered": 0.9711121261119843, "frac_alive": 0.7883029580116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ccf69fab5587a3092031c2c67fe9c05157793659 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9c97d3c4247aa803f431abd53a4dbe082ccf83fa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 99.85, "l1_loss": 1436.8, "l0": 160.0, "frac_variance_explained": 0.819921875, "cossim": 0.93671875, "l2_ratio": 0.93515625, "relative_reconstruction_bias": 0.998046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5777201890945434, "loss_zero": 12.452932643890382, "frac_recovered": 0.9863627076148986, "frac_alive": 0.6957465410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b6909dbcb37992de55dc3a03fbaae6a397fa84c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3dd76899fd6f15e8e0f74642d61c40ed7fd170f3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 222.2, "l1_loss": 2817.6, "l0": 320.0, "frac_variance_explained": 0.358203125, "cossim": 0.67421875, "l2_ratio": 0.7171875, "relative_reconstruction_bias": 1.065625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.5219098329544067, "loss_zero": 12.452932643890382, "frac_recovered": 0.8921224176883698, "frac_alive": 0.6167534589767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..19fdb63a262fe0b7d7cdf18cfc1046389e271138 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fea3052198053230a1f961fb99e0fb9b35c472bf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.3, "l1_loss": 2309.6, "l0": 320.0, "frac_variance_explained": 0.729296875, "cossim": 0.895703125, "l2_ratio": 0.89921875, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7776209115982056, "loss_zero": 12.452932643890382, "frac_recovered": 0.9663821458816528, "frac_alive": 0.9741753339767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7f2a81f1a7d12640475196eaf7d5a166ff9f8a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a4746f00f58a56895ce66d27cbd9bc170e0a401f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 97.6, "l1_loss": 1986.4, "l0": 320.0, "frac_variance_explained": 0.83359375, "cossim": 0.939453125, "l2_ratio": 0.9421875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5706740856170653, "loss_zero": 12.452932643890382, "frac_recovered": 0.9870663106441497, "frac_alive": 0.9305012822151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..53071fd3a25c87b6c2a29b4eeebf51a8825587c4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d133e7a0daa967d779d2a41b002d8078272629be --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.65, "l1_loss": 2379.2, "l0": 320.0, "frac_variance_explained": 0.881640625, "cossim": 0.958984375, "l2_ratio": 0.96015625, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5211398363113404, "loss_zero": 12.452932643890382, "frac_recovered": 0.9920058131217957, "frac_alive": 0.7404513955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d3a90da059fa307f7ca7426506fdb8532c430c9f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af35a90763804860df25267b9d779454ebe46bf5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 150.9, "l1_loss": 2267.2, "l0": 320.0, "frac_variance_explained": 0.603515625, "cossim": 0.854296875, "l2_ratio": 0.874609375, "relative_reconstruction_bias": 1.0125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.943194341659546, "loss_zero": 12.452932643890382, "frac_recovered": 0.9498593986034394, "frac_alive": 0.9683159589767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae56d593584e9f27f3ef613618cef18db45608b4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..82b7fb40afa4cf3a624113b81f9a7444931ffad5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.2, "l1_loss": 2189.6, "l0": 320.0, "frac_variance_explained": 0.780078125, "cossim": 0.9203125, "l2_ratio": 0.92265625, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.655140733718872, "loss_zero": 12.452932643890382, "frac_recovered": 0.9786317229270936, "frac_alive": 0.9635416865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef84643559c7c476251bf2d4e6912bdf6736940d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5823d644854147c092e5437575b2c706aa025e3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.25, "l1_loss": 2033.6, "l0": 320.0, "frac_variance_explained": 0.8546875, "cossim": 0.95, "l2_ratio": 0.947265625, "relative_reconstruction_bias": 0.997265625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.538654351234436, "loss_zero": 12.452932643890382, "frac_recovered": 0.9902595639228821, "frac_alive": 0.8887261152267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d97dcd18b5a4b6619da05927f46d1efb667d846 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb8292317b8d44a13f35179992564630226ec3c7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 217.4, "l1_loss": 5139.2, "l0": 640.0, "frac_variance_explained": 0.3359375, "cossim": 0.756640625, "l2_ratio": 1.034375, "relative_reconstruction_bias": 1.36640625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0001665115356446, "loss_zero": 12.452932643890382, "frac_recovered": 0.9441926419734955, "frac_alive": 0.7566189169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..818ab47d1be2926cbdee23c0a42bf13bc1bd2e16 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..de576290f3730ab65bfd2e24355c1679ad2d64cf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.5, "l1_loss": 3238.4, "l0": 640.0, "frac_variance_explained": 0.7859375, "cossim": 0.915234375, "l2_ratio": 0.9203125, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.630190372467041, "loss_zero": 12.452932643890382, "frac_recovered": 0.9811166226863861, "frac_alive": 0.9990234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd68e9af885f18cf1c3a699650595b72733796f8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5258b7b8bd2570a6686474ec91a573479516cb1a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 88.6, "l1_loss": 2838.4, "l0": 640.0, "frac_variance_explained": 0.86015625, "cossim": 0.95078125, "l2_ratio": 0.953125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.529343676567078, "loss_zero": 12.452932643890382, "frac_recovered": 0.9911853194236755, "frac_alive": 0.9651692509651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..699dfe996b38fff2f8d22c2bf1f6ec6fd88da886 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1283848ffadd9b55afc3fce90b32d0da6dbb17aa --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 70.5, "l1_loss": 4704.0, "l0": 640.0, "frac_variance_explained": 0.91484375, "cossim": 0.969921875, "l2_ratio": 0.970703125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4923532485961912, "loss_zero": 12.452932643890382, "frac_recovered": 0.9948698222637177, "frac_alive": 0.7985026240348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ab9e7f85c650b04175b6f0f1afdc306eeb4c937d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..035a416dad407c1fb59f40caaa098ef0bcf6218c --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 132.45, "l1_loss": 3212.8, "l0": 640.0, "frac_variance_explained": 0.70234375, "cossim": 0.89296875, "l2_ratio": 0.909375, "relative_reconstruction_bias": 1.01328125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.699756073951721, "loss_zero": 12.452932643890382, "frac_recovered": 0.9741712868213653, "frac_alive": 0.99853515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..409cce83b48954e796926387a4515161c7950acc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0b1da91f482873e7a45e38dcc0643e266a9e0e56 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 104.05, "l1_loss": 2830.4, "l0": 640.0, "frac_variance_explained": 0.8125, "cossim": 0.933984375, "l2_ratio": 0.937109375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.584867167472839, "loss_zero": 12.452932643890382, "frac_recovered": 0.9856380462646485, "frac_alive": 0.998046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1000ad0f9717c42eed83b127b6fc6a594b33b865 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fcee2a7827ad2fd25e6b27d6cf7bb1410b2c1ab9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.6, "l1_loss": 3915.2, "l0": 640.0, "frac_variance_explained": 0.909765625, "cossim": 0.965234375, "l2_ratio": 0.965234375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.497997522354126, "loss_zero": 12.452932643890382, "frac_recovered": 0.9943046987056732, "frac_alive": 0.904893696308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9810bc3442b4e8c15d30569ba35334e1a666f662 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f8aca06bb08504218b73a207be6f2a5a66f1991a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 79.4, "l1_loss": 65.425, "l0": 20.0, "frac_variance_explained": 0.06796875, "cossim": 0.2947265625, "l2_ratio": 0.1853515625, "relative_reconstruction_bias": 0.629296875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.95833683013916, "loss_zero": 12.452932643890382, "frac_recovered": 0.24916650652885436, "frac_alive": 0.1384006142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..190b360c54bab3a162b84405746ab9058a366487 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e57a2ad36dfde3ecbba15f8423c9ea5af1c9cf09 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.0, "l1_loss": 172.5, "l0": 20.0, "frac_variance_explained": 0.431640625, "cossim": 0.794140625, "l2_ratio": 0.797265625, "relative_reconstruction_bias": 0.99296875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.076321721076965, "loss_zero": 12.452932643890382, "frac_recovered": 0.8366711139678955, "frac_alive": 0.1558702290058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9bdff7a03da31ceb8f0113479eb4f5c9e125cbdb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8183fd73c742fe64414c6404247c8c32757d4c5e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 33.775, "l1_loss": 163.3, "l0": 20.0, "frac_variance_explained": 0.705859375, "cossim": 0.898046875, "l2_ratio": 0.898046875, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.718431806564331, "loss_zero": 12.452932643890382, "frac_recovered": 0.9723090648651123, "frac_alive": 0.1581488698720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b419a7e72c89d9e96ad8c6945da0bae9e106787a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e915d1cbca2795d0c535baa4f9f67c53e3534ab5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.0, "l1_loss": 169.7, "l0": 20.0, "frac_variance_explained": 0.769140625, "cossim": 0.922265625, "l2_ratio": 0.921484375, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6113957166671753, "loss_zero": 12.452932643890382, "frac_recovered": 0.9829996824264526, "frac_alive": 0.1534288227558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1afd728ae6ba582aeadcb473dda0a92c494d823d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..56973e9569bd6cd7a5105e91ce6536ab6710fc7f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.725, "l1_loss": 124.9, "l0": 20.0, "frac_variance_explained": 0.2296875, "cossim": 0.6859375, "l2_ratio": 0.669140625, "relative_reconstruction_bias": 0.962890625, "loss_original": 2.440642213821411, "loss_reconstructed": 6.1103638172149655, "loss_zero": 12.452932643890382, "frac_recovered": 0.6335384666919708, "frac_alive": 0.1831597238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7538519b4ec59fb49fd7c6aa02bee5fdd4432ec6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0bd814e19b6bf154f449d548ba04245fe28e51ad --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.7125, "l1_loss": 166.0, "l0": 20.0, "frac_variance_explained": 0.673046875, "cossim": 0.886328125, "l2_ratio": 0.891015625, "relative_reconstruction_bias": 1.009375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.839069890975952, "loss_zero": 12.452932643890382, "frac_recovered": 0.9602638244628906, "frac_alive": 0.153591588139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..148c5d699953c3ff213837675ab95a276f5cac17 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a33ecc8dbf37b996f6dc39b1dc0220527f4daa83 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.125, "l1_loss": 168.2, "l0": 20.0, "frac_variance_explained": 0.740234375, "cossim": 0.912109375, "l2_ratio": 0.9125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6347166299819946, "loss_zero": 12.452932643890382, "frac_recovered": 0.980675333738327, "frac_alive": 0.1606987863779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf3205ddde13bfef6860b349b76d464b40b153d1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f6e772a03093f7920a7e63f7f60f3249ec146936 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.5, "l1_loss": 124.4, "l0": 40.0, "frac_variance_explained": 0.113671875, "cossim": 0.3802734375, "l2_ratio": 0.256640625, "relative_reconstruction_bias": 0.673828125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.318093872070312, "loss_zero": 12.452932643890382, "frac_recovered": 0.3131313890218735, "frac_alive": 0.2225477397441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5dba212e9c25401374a22c492a2c66caf4dcad61 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..48e4a1ffe3b845a832f4cbd128c6fdb7ee2eb351 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.725, "l1_loss": 248.9, "l0": 40.0, "frac_variance_explained": 0.50703125, "cossim": 0.821484375, "l2_ratio": 0.827734375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.444863748550415, "loss_zero": 12.452932643890382, "frac_recovered": 0.8997273623943329, "frac_alive": 0.3173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cbba21a7749ebf932f1df4e93f6b20bea04df11 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04f7a479b453d922a5f6e0db754be3c4468a1fa5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.4625, "l1_loss": 234.0, "l0": 40.0, "frac_variance_explained": 0.76328125, "cossim": 0.913671875, "l2_ratio": 0.914453125, "relative_reconstruction_bias": 1.002734375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.668365216255188, "loss_zero": 12.452932643890382, "frac_recovered": 0.9773101389408112, "frac_alive": 0.3059353232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95a270035f32dbb493cb44056847c95e7062fa2b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..71dee49d41199af7115998df76dc760368b633df --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.475, "l1_loss": 233.3, "l0": 40.0, "frac_variance_explained": 0.814453125, "cossim": 0.937890625, "l2_ratio": 0.93828125, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.577351212501526, "loss_zero": 12.452932643890382, "frac_recovered": 0.9863953530788422, "frac_alive": 0.2570529580116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d43403a4ed3f9715cc188b52a7bd2bdbb1065275 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c354d19c76a1b50fe82321de1850d8b00c876e19 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.8, "l1_loss": 205.9, "l0": 40.0, "frac_variance_explained": 0.31328125, "cossim": 0.731640625, "l2_ratio": 0.72265625, "relative_reconstruction_bias": 0.976171875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.934949207305908, "loss_zero": 12.452932643890382, "frac_recovered": 0.7508984625339508, "frac_alive": 0.3238389790058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c3e1adf5932b9c00101aec30387c556662b2d489 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9294956e26b814c9e4651a393e6a3853cd613588 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.325, "l1_loss": 220.5, "l0": 40.0, "frac_variance_explained": 0.6875, "cossim": 0.8953125, "l2_ratio": 0.8984375, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.729294180870056, "loss_zero": 12.452932643890382, "frac_recovered": 0.9712401390075683, "frac_alive": 0.3047960102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e62092c13447e490f66a6474551e1d9e8f973af --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..52771a181935866186e66a9e55ef78a9a45b888e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.3375, "l1_loss": 231.9, "l0": 40.0, "frac_variance_explained": 0.759765625, "cossim": 0.91640625, "l2_ratio": 0.915625, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.598768639564514, "loss_zero": 12.452932643890382, "frac_recovered": 0.9842611491680145, "frac_alive": 0.2821180522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9cdf1a2991c69a78381960f3b910e93b9cea5eb0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c3987e71d170cb04ac965525ea6982c6c4b9195f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 73.0, "l1_loss": 231.3, "l0": 80.0, "frac_variance_explained": 0.187109375, "cossim": 0.479296875, "l2_ratio": 0.35859375, "relative_reconstruction_bias": 0.7484375, "loss_original": 2.440642213821411, "loss_reconstructed": 8.026517772674561, "loss_zero": 12.452932643890382, "frac_recovered": 0.4421375274658203, "frac_alive": 0.3435872495174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c5068c9c2bcf8e7f2e9979d514b77f2a533b4210 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c8cadeb8dbcb6347df4eb0067c37accb22c1fc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.55, "l1_loss": 327.0, "l0": 80.0, "frac_variance_explained": 0.584765625, "cossim": 0.846875, "l2_ratio": 0.853125, "relative_reconstruction_bias": 1.00703125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.971432828903198, "loss_zero": 12.452932643890382, "frac_recovered": 0.947002649307251, "frac_alive": 0.567491352558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ba2221c341202a564c8a3ae29e66f15eb5b0ee7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9c966693015fb0a9e4611d3b3205e22e99c9fdc1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.8, "l1_loss": 301.8, "l0": 80.0, "frac_variance_explained": 0.76015625, "cossim": 0.919140625, "l2_ratio": 0.91875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6169229984283446, "loss_zero": 12.452932643890382, "frac_recovered": 0.9824475586414337, "frac_alive": 0.5130751132965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e4cdb6ef1b533aa7cf15b8a23f8c8573d1f1d23 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8577eee41759f626957e0d8ca88a87cb08d15fc --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.85, "l1_loss": 344.2, "l0": 80.0, "frac_variance_explained": 0.8296875, "cossim": 0.9359375, "l2_ratio": 0.936328125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.548701548576355, "loss_zero": 12.452932643890382, "frac_recovered": 0.989256089925766, "frac_alive": 0.440972238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..03d5dbfe2dfa13eec41e392083466f3316ff0ce5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4890c0df89794d23b5ccf1819609a9041392a44f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.725, "l1_loss": 316.8, "l0": 80.0, "frac_variance_explained": 0.413671875, "cossim": 0.77734375, "l2_ratio": 0.773828125, "relative_reconstruction_bias": 0.98515625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.7842599153518677, "loss_zero": 12.452932643890382, "frac_recovered": 0.8658327519893646, "frac_alive": 0.5611979365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a09e83ffd525a4e5b24433e3f74f5e926499d61 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..270145fb0aafd5472fc1ed2d2c8e6a3a4162f2f2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.75, "l1_loss": 343.0, "l0": 80.0, "frac_variance_explained": 0.7734375, "cossim": 0.908203125, "l2_ratio": 0.910546875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6711817026138305, "loss_zero": 12.452932643890382, "frac_recovered": 0.9770391523838043, "frac_alive": 0.5460612177848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3529ca9b6365fc67d41fd4aa28e79004a35eea14 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..418fe1cc3cfd86a7c00dd371e5017d7e62fd2b5b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.3375, "l1_loss": 317.8, "l0": 80.0, "frac_variance_explained": 0.7875, "cossim": 0.929296875, "l2_ratio": 0.92890625, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5706660985946654, "loss_zero": 12.452932643890382, "frac_recovered": 0.9870666921138763, "frac_alive": 0.472222238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe3088b1be2e1e5b0a92abdafe50f35f107fc7b5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..872ae597fd47b2299e8112f11a23834afbad6bfb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 68.425, "l1_loss": 436.2, "l0": 160.0, "frac_variance_explained": 0.2875, "cossim": 0.5828125, "l2_ratio": 0.50859375, "relative_reconstruction_bias": 0.873828125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.794749927520752, "loss_zero": 12.452932643890382, "frac_recovered": 0.6650312125682831, "frac_alive": 0.5022786259651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..511ad987c2fd8c01351db7f008dd71c6ff2266b8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..66ad158e2aa8c2f915ca52e9d2ff51670aea0bb6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.45, "l1_loss": 440.6, "l0": 160.0, "frac_variance_explained": 0.683203125, "cossim": 0.8890625, "l2_ratio": 0.891015625, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6913110494613646, "loss_zero": 12.452932643890382, "frac_recovered": 0.9750205039978027, "frac_alive": 0.8249782919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0359417750f5adc57e97302409cf1f11abaa5279 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..accd41bcb727da929ce549fb228a52d4473f26a9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.8625, "l1_loss": 401.2, "l0": 160.0, "frac_variance_explained": 0.79453125, "cossim": 0.93125, "l2_ratio": 0.932421875, "relative_reconstruction_bias": 1.002734375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.563430070877075, "loss_zero": 12.452932643890382, "frac_recovered": 0.9877831339836121, "frac_alive": 0.7734917402267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f3981970a90ffaa7be43ce96a6c7cb7b97d0729 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fa968555970035bf55c415827a1b3fbdf444cb55 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.4125, "l1_loss": 459.0, "l0": 160.0, "frac_variance_explained": 0.8515625, "cossim": 0.951171875, "l2_ratio": 0.95390625, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5208311319351195, "loss_zero": 12.452932643890382, "frac_recovered": 0.9920356631278991, "frac_alive": 0.640733540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..037ed87ad0ae4961fefd5ef1b2265f06c6305f9e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..28bee0e45c1d25ee32223b0046f0edd0200b342e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.925, "l1_loss": 479.8, "l0": 160.0, "frac_variance_explained": 0.519140625, "cossim": 0.81953125, "l2_ratio": 0.819140625, "relative_reconstruction_bias": 0.99296875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9531614780426025, "loss_zero": 12.452932643890382, "frac_recovered": 0.9488685250282287, "frac_alive": 0.8244900107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66677a85c68c704970415b0fbebdbe2144798486 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3129925fbb89cb277e550c17eecb699abd483bc5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 33.9125, "l1_loss": 411.6, "l0": 160.0, "frac_variance_explained": 0.73359375, "cossim": 0.90625, "l2_ratio": 0.909375, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.601050686836243, "loss_zero": 12.452932643890382, "frac_recovered": 0.9840281367301941, "frac_alive": 0.8039821982383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8cfee5eb8356b397b485cb56e8a8b59fa0527079 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..de49b6348c453b5e84ac02ef03e5d0de101e25bd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.4125, "l1_loss": 436.8, "l0": 160.0, "frac_variance_explained": 0.826171875, "cossim": 0.943359375, "l2_ratio": 0.94296875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.539991092681885, "loss_zero": 12.452932643890382, "frac_recovered": 0.9901264131069183, "frac_alive": 0.7169596552848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c051ed481a2ebc45c1453982fc6a397d43f6e46 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..203c57dc83abcdc6449406061d2688a3dd1732f2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 61.1, "l1_loss": 789.6, "l0": 320.0, "frac_variance_explained": 0.374609375, "cossim": 0.6796875, "l2_ratio": 0.730078125, "relative_reconstruction_bias": 1.07421875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.4105432748794557, "loss_zero": 12.452932643890382, "frac_recovered": 0.9031490743160248, "frac_alive": 0.650987446308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..af6d05a4497218166b591d11bccd937ecaef13a9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca18867a48e35956a08c2248ec13c63a4e9adf71 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.3875, "l1_loss": 615.6, "l0": 320.0, "frac_variance_explained": 0.73984375, "cossim": 0.9109375, "l2_ratio": 0.913671875, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5712565898895265, "loss_zero": 12.452932643890382, "frac_recovered": 0.9870066344738007, "frac_alive": 0.9637044072151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d7eacd8ad5d12580f5456701e526e3583f2ebf66 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..28de1215ae3e558f07372cc1c84b7b34002bdcf4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.6375, "l1_loss": 573.6, "l0": 320.0, "frac_variance_explained": 0.819140625, "cossim": 0.9390625, "l2_ratio": 0.940625, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.523586940765381, "loss_zero": 12.452932643890382, "frac_recovered": 0.9917560160160065, "frac_alive": 0.9573025107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ea7e96e39f37f3924f3dbd64eef3be0d2b224ab --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e83070be4944a03917b12c57c5145958fa927ded --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.825, "l1_loss": 732.4, "l0": 320.0, "frac_variance_explained": 0.882421875, "cossim": 0.961328125, "l2_ratio": 0.962890625, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.493744659423828, "loss_zero": 12.452932643890382, "frac_recovered": 0.9947247564792633, "frac_alive": 0.8442925214767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67c765b1832f53c1528fd9ac6beef8f1e1e20237 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..30591638c7ecaa2556875398879b5fab9c83eb3d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.375, "l1_loss": 672.4, "l0": 320.0, "frac_variance_explained": 0.63828125, "cossim": 0.86953125, "l2_ratio": 0.87734375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6309582233428954, "loss_zero": 12.452932643890382, "frac_recovered": 0.9810457110404969, "frac_alive": 0.9631618857383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ab1224f893147cefc0d0bee63620d3531398d3e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f33fb2e421c8d35ac6eb333e36e5b54dbdecf955 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.2875, "l1_loss": 554.4, "l0": 320.0, "frac_variance_explained": 0.789453125, "cossim": 0.9296875, "l2_ratio": 0.93046875, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.541101813316345, "loss_zero": 12.452932643890382, "frac_recovered": 0.9900118887424469, "frac_alive": 0.9561089277267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..85886561857decce66437e589a8c5cfc76a82895 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..867b10dd5dd3b333e3e301ecd8594f71bdb6d48a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.3875, "l1_loss": 607.6, "l0": 320.0, "frac_variance_explained": 0.844921875, "cossim": 0.946875, "l2_ratio": 0.95, "relative_reconstruction_bias": 1.005078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5037263154983522, "loss_zero": 12.452932643890382, "frac_recovered": 0.9937267541885376, "frac_alive": 0.9239909052848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6e219c14073c1b869f566951d577ae55a774603 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..54e5cf77fe78f0efde2d3351d138a28863f4ebeb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.325, "l1_loss": 1440.0, "l0": 640.0, "frac_variance_explained": 0.397265625, "cossim": 0.76328125, "l2_ratio": 1.0546875, "relative_reconstruction_bias": 1.375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.742577600479126, "loss_zero": 12.452932643890382, "frac_recovered": 0.9698809325695038, "frac_alive": 0.83544921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5812b43f98e015e92a6aac6ba0605c34fa2533ea --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a40efd234ae3aea46409dc2662c83ad0f2c6c094 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.725, "l1_loss": 854.0, "l0": 640.0, "frac_variance_explained": 0.807421875, "cossim": 0.932421875, "l2_ratio": 0.937109375, "relative_reconstruction_bias": 1.00703125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.511625289916992, "loss_zero": 12.452932643890382, "frac_recovered": 0.992942887544632, "frac_alive": 0.9987521767616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f3a6a9828833e219ad882a270f8711156b35f4d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..10c832d6b17b8c7184bd30d244732de7095c764d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.175, "l1_loss": 838.4, "l0": 640.0, "frac_variance_explained": 0.86953125, "cossim": 0.954296875, "l2_ratio": 0.95546875, "relative_reconstruction_bias": 1.003515625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4867657661437987, "loss_zero": 12.452932643890382, "frac_recovered": 0.9954212665557861, "frac_alive": 0.996690571308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..049def18bd852ad7c0f09baf8898d99b54c38ee4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..682ef2119840310da3c6c84daf53518c34f7d779 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.8, "l1_loss": 1300.0, "l0": 640.0, "frac_variance_explained": 0.901953125, "cossim": 0.967578125, "l2_ratio": 0.970703125, "relative_reconstruction_bias": 1.00703125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.471339964866638, "loss_zero": 12.452932643890382, "frac_recovered": 0.9969493508338928, "frac_alive": 0.9496527910232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6fe0a6316c85460ee4b95b5fce23932e3b70acb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b53bed33fc3c6fe39bc3142942d687416db9d717 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.875, "l1_loss": 978.4, "l0": 640.0, "frac_variance_explained": 0.751953125, "cossim": 0.905859375, "l2_ratio": 0.91875, "relative_reconstruction_bias": 1.008203125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5359509944915772, "loss_zero": 12.452932643890382, "frac_recovered": 0.9905226826667786, "frac_alive": 0.999131977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..20f496d5ec073eef414ebdd52e72b6c4da2b2bdb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e63cad501c017b94393b021d553f1dd5ae77f1f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 26.05, "l1_loss": 777.2, "l0": 640.0, "frac_variance_explained": 0.834765625, "cossim": 0.94453125, "l2_ratio": 0.945703125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.498408341407776, "loss_zero": 12.452932643890382, "frac_recovered": 0.9942609667778015, "frac_alive": 0.9962565302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7272e23ae6371ab648a43eaabd9ee4bf5aaf2b9e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca45c4384cbfe56e94555ec1b0e669e85f4c4e8a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.3, "l1_loss": 1007.2, "l0": 640.0, "frac_variance_explained": 0.89140625, "cossim": 0.965625, "l2_ratio": 0.967578125, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4793680906295776, "loss_zero": 12.452932643890382, "frac_recovered": 0.9961497604846954, "frac_alive": 0.966796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd11d7f9e4fe074597b35ad323a560482e2f7369 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb504024309c09877dcb0dcfff9433ce4321baa7 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.95, "l1_loss": 79.85, "l0": 20.0, "frac_variance_explained": 0.063671875, "cossim": 0.289453125, "l2_ratio": 0.18203125, "relative_reconstruction_bias": 0.6296875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.99864740371704, "loss_zero": 12.452932643890382, "frac_recovered": 0.24520010501146317, "frac_alive": 0.1371527761220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ad5a2af8d72c3132ba34dac663c8a08a94cbe844 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..05e5681c7214831a4df148791127bafc87f12b1a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 67.775, "l1_loss": 207.0, "l0": 20.0, "frac_variance_explained": 0.424609375, "cossim": 0.771875, "l2_ratio": 0.785546875, "relative_reconstruction_bias": 0.980078125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.673088788986206, "loss_zero": 12.452932643890382, "frac_recovered": 0.7770695388317108, "frac_alive": 0.1438802033662796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..79c30254af749348f3860a836c3aa77f44bf4648 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a57d8b0e4dd9195beb8eb5285fd0b94974d605b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.75, "l1_loss": 195.5, "l0": 20.0, "frac_variance_explained": 0.65078125, "cossim": 0.8796875, "l2_ratio": 0.88359375, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8792571783065797, "loss_zero": 12.452932643890382, "frac_recovered": 0.956269359588623, "frac_alive": 0.146484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0bee84a2f996212904b9ef3e3fd21de6efaa62d1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1cddf8d8894f3236ccccfe4d79b5f0c3922958ad --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.4, "l1_loss": 204.7, "l0": 20.0, "frac_variance_explained": 0.723828125, "cossim": 0.904296875, "l2_ratio": 0.90625, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.647126483917236, "loss_zero": 12.452932643890382, "frac_recovered": 0.9794343411922455, "frac_alive": 0.1592339426279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6c9365e16cdeaebb0892a94257549943eec4d15e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..47baa199af55e9de23446a01216251b1a224d3e4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 76.0, "l1_loss": 140.25, "l0": 20.0, "frac_variance_explained": 0.21015625, "cossim": 0.69375, "l2_ratio": 0.685546875, "relative_reconstruction_bias": 0.9703125, "loss_original": 2.440642213821411, "loss_reconstructed": 6.198859167098999, "loss_zero": 12.452932643890382, "frac_recovered": 0.6247840762138367, "frac_alive": 0.1758897602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef708cffd2aab4566d3f94c730b4c70d2e166537 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..77f8c6c4ff5f5b7b017afcce95ea0dfc1a0b55bf --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.175, "l1_loss": 212.9, "l0": 20.0, "frac_variance_explained": 0.6734375, "cossim": 0.8734375, "l2_ratio": 0.878515625, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.089528727531433, "loss_zero": 12.452932643890382, "frac_recovered": 0.9352387905120849, "frac_alive": 0.1478949636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..60f1f6cd765bc1786bc3cef610c8345d414c0dd1 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 20, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eca6058a2c3fd8b3c00f2123282e1dab4d3c34b5 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.1, "l1_loss": 200.0, "l0": 20.0, "frac_variance_explained": 0.711328125, "cossim": 0.89921875, "l2_ratio": 0.898046875, "relative_reconstruction_bias": 0.99765625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.684427809715271, "loss_zero": 12.452932643890382, "frac_recovered": 0.975710517168045, "frac_alive": 0.1566297709941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd4212b6a2f2a4fa777637285639ead15134ec7e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..162b4dfb5ff9fd0f4d67a330aefab7cb01fa93ad --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 102.75, "l1_loss": 159.6, "l0": 40.0, "frac_variance_explained": 0.108203125, "cossim": 0.374609375, "l2_ratio": 0.25087890625, "relative_reconstruction_bias": 0.66875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.386548137664795, "loss_zero": 12.452932643890382, "frac_recovered": 0.3063706010580063, "frac_alive": 0.2197808176279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..401ec02db8d676ac8b8d395c2d0514594cdedc86 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ec619ed43f5d4af481cb439fb5928a52c629cbee --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.9, "l1_loss": 304.2, "l0": 40.0, "frac_variance_explained": 0.523046875, "cossim": 0.812890625, "l2_ratio": 0.834375, "relative_reconstruction_bias": 1.01328125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.7924010515213014, "loss_zero": 12.452932643890382, "frac_recovered": 0.8649622142314911, "frac_alive": 0.2794053852558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fa1a4bf1e36e312cb41a919993329487493c3511 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..569ca98ed6bd68a89ccc9d864d3d7ff64f857570 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.275, "l1_loss": 263.3, "l0": 40.0, "frac_variance_explained": 0.714453125, "cossim": 0.90078125, "l2_ratio": 0.900390625, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.692185115814209, "loss_zero": 12.452932643890382, "frac_recovered": 0.9749473631381989, "frac_alive": 0.27880859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f3b6fc971eaa67e2267c6c8a89fa4d8fc3d1e4f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8b8b2c8479a1498beb9540582489a327d9379a0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.125, "l1_loss": 273.0, "l0": 40.0, "frac_variance_explained": 0.803515625, "cossim": 0.930078125, "l2_ratio": 0.930859375, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.552605724334717, "loss_zero": 12.452932643890382, "frac_recovered": 0.9888627886772156, "frac_alive": 0.2864583432674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..44133408e38e304e7464276dbaa7944ccb644286 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a66997a1f83cbf9c6a267b760a816d55048c687f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.9, "l1_loss": 235.8, "l0": 40.0, "frac_variance_explained": 0.2984375, "cossim": 0.730859375, "l2_ratio": 0.7328125, "relative_reconstruction_bias": 0.985546875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.218111610412597, "loss_zero": 12.452932643890382, "frac_recovered": 0.7227450847625733, "frac_alive": 0.3250325620174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..41b2f892b7b47261f303b99936618854de72f958 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89f13cc5ed15323a120dc9756178257c0bac506a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.05, "l1_loss": 264.0, "l0": 40.0, "frac_variance_explained": 0.66875, "cossim": 0.881640625, "l2_ratio": 0.884375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8299009084701536, "loss_zero": 12.452932643890382, "frac_recovered": 0.9612075924873352, "frac_alive": 0.2823893129825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..67fb0ac1263b86da55542ea64746e3b3e78095a4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 40, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..edab0614ff546bd527196280886bb3321e0ab85a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.95, "l1_loss": 267.9, "l0": 40.0, "frac_variance_explained": 0.7640625, "cossim": 0.919140625, "l2_ratio": 0.920703125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5750033855438232, "loss_zero": 12.452932643890382, "frac_recovered": 0.9866260170936585, "frac_alive": 0.284722238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c4b57bc73c37c2cba23819f972d5b191918adc0 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..507d06be1204772435f314fd3384b4a5189627ce --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 92.3, "l1_loss": 286.2, "l0": 80.0, "frac_variance_explained": 0.183203125, "cossim": 0.47265625, "l2_ratio": 0.351171875, "relative_reconstruction_bias": 0.7421875, "loss_original": 2.440642213821411, "loss_reconstructed": 8.09604024887085, "loss_zero": 12.452932643890382, "frac_recovered": 0.4352301359176636, "frac_alive": 0.330620676279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b84a5c1efd241497d230228aad880240f16730ad --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04ff941bc5a7de4970a68911715db240f6bd2e52 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 53.275, "l1_loss": 364.4, "l0": 80.0, "frac_variance_explained": 0.58515625, "cossim": 0.852734375, "l2_ratio": 0.862890625, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.1309916734695435, "loss_zero": 12.452932643890382, "frac_recovered": 0.931030935049057, "frac_alive": 0.5353190302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f4d99b0e0e7c3d495e35ac036741ca3fe4811a4f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e7230ae0ed8b2549063461d676bc33f91a0be65a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.975, "l1_loss": 352.2, "l0": 80.0, "frac_variance_explained": 0.748046875, "cossim": 0.912109375, "l2_ratio": 0.915625, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.60854709148407, "loss_zero": 12.452932643890382, "frac_recovered": 0.9832943916320801, "frac_alive": 0.5107964277267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd11572b75c2d3983c1ef8e173c4229f1134581 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ae2829901bc48a1706dbf0815d18eb0634058a8e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.2875, "l1_loss": 390.0, "l0": 80.0, "frac_variance_explained": 0.825, "cossim": 0.937890625, "l2_ratio": 0.938671875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.516243076324463, "loss_zero": 12.452932643890382, "frac_recovered": 0.9924913465976715, "frac_alive": 0.4480794370174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c91527489cee57bebe65978623ec644ea41e339 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..437277ebf775a67b099b76544038b4c8f1318ce9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.6, "l1_loss": 368.0, "l0": 80.0, "frac_variance_explained": 0.38515625, "cossim": 0.7734375, "l2_ratio": 0.776171875, "relative_reconstruction_bias": 0.98984375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.262200713157654, "loss_zero": 12.452932643890382, "frac_recovered": 0.8181838512420654, "frac_alive": 0.5559895634651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8d171edb51fa1a77664ac90b4f28e290a34524a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d160860b7cf65412a5adde5a4e907b0fc767fdac --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.15, "l1_loss": 343.0, "l0": 80.0, "frac_variance_explained": 0.7140625, "cossim": 0.903125, "l2_ratio": 0.90390625, "relative_reconstruction_bias": 0.996484375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.661093282699585, "loss_zero": 12.452932643890382, "frac_recovered": 0.9780488312244415, "frac_alive": 0.5080295205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fc97a2a43480303ca40f8d7dcb5fc5c18a8552db --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 80, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2140f2f058590a639f0429fa506b88d12d463740 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.05, "l1_loss": 367.6, "l0": 80.0, "frac_variance_explained": 0.80390625, "cossim": 0.932421875, "l2_ratio": 0.9328125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5410542249679566, "loss_zero": 12.452932643890382, "frac_recovered": 0.9900187611579895, "frac_alive": 0.4778103232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b455a5a24f1c2dedc318816a716d69a10504b7df --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..262d91bef4a5291cda6bf1ae641b72ecf006a9eb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 86.15, "l1_loss": 534.8, "l0": 160.0, "frac_variance_explained": 0.27421875, "cossim": 0.575390625, "l2_ratio": 0.497265625, "relative_reconstruction_bias": 0.866796875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.911270380020142, "loss_zero": 12.452932643890382, "frac_recovered": 0.6534240901470184, "frac_alive": 0.4622395932674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d7e3df9a66d4a701bbd0d064e048f49daf21ecc3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5edb137049b07f0988c529116dd821b69a45f7c3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.5, "l1_loss": 521.6, "l0": 160.0, "frac_variance_explained": 0.683203125, "cossim": 0.88671875, "l2_ratio": 0.8953125, "relative_reconstruction_bias": 1.0078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8128823041915894, "loss_zero": 12.452932643890382, "frac_recovered": 0.962822437286377, "frac_alive": 0.8107096552848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c5e0b4ae0b11d4fddd122b0c4f8dcd4a19d66ac9 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4f4e2570921241f403c4c31f093e5d070c76096f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.625, "l1_loss": 467.8, "l0": 160.0, "frac_variance_explained": 0.789453125, "cossim": 0.928125, "l2_ratio": 0.932421875, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5421324014663695, "loss_zero": 12.452932643890382, "frac_recovered": 0.9899133086204529, "frac_alive": 0.7516276240348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b34cff328b96e3ce51728f25d018ee7cd77b31a --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a8de0c41d1ed4c9c412c469164289f7974190a6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.6, "l1_loss": 559.8, "l0": 160.0, "frac_variance_explained": 0.8609375, "cossim": 0.948828125, "l2_ratio": 0.949609375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4919713020324705, "loss_zero": 12.452932643890382, "frac_recovered": 0.9949017584323883, "frac_alive": 0.6184895634651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7d4e72fbafa463990645a9249daa36a4ed41409 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..05322273f52e37785fb3a8cec09a6a02f39dfc21 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.8, "l1_loss": 563.6, "l0": 160.0, "frac_variance_explained": 0.516015625, "cossim": 0.822265625, "l2_ratio": 0.83046875, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.1499987840652466, "loss_zero": 12.452932643890382, "frac_recovered": 0.9291528046131134, "frac_alive": 0.8151584267616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..019851d322f5d558f0e94f12b18aa05b29da5492 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c9b4744b24e1ea17a9ac60440b50d34e3da436f6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.625, "l1_loss": 484.6, "l0": 160.0, "frac_variance_explained": 0.734765625, "cossim": 0.90859375, "l2_ratio": 0.9125, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5914368867874145, "loss_zero": 12.452932643890382, "frac_recovered": 0.9849962055683136, "frac_alive": 0.7962239384651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a59382a5eb269f6e8207f528acd2d7fdd48f3829 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 160, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7706b18abaa7c9a83830a28d23652ac252cbe20b --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.8375, "l1_loss": 535.0, "l0": 160.0, "frac_variance_explained": 0.860546875, "cossim": 0.94140625, "l2_ratio": 0.941015625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.514176058769226, "loss_zero": 12.452932643890382, "frac_recovered": 0.9926984131336212, "frac_alive": 0.6957465410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5830efd4adb0ef18f39de3f5ff0ba3e387aeb7b8 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0ccf5f1818664fc5fc87a85abcd054aac5b7cdfd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 76.45, "l1_loss": 964.8, "l0": 320.0, "frac_variance_explained": 0.364453125, "cossim": 0.673046875, "l2_ratio": 0.71484375, "relative_reconstruction_bias": 1.06171875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.8941278219223023, "loss_zero": 12.452932643890382, "frac_recovered": 0.8548661530017853, "frac_alive": 0.61083984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..37a3fe5592b582e4a874516dccaab3e936e5adc6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b85babede3901c5e022ad3967d348065ab920eb3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.65, "l1_loss": 742.4, "l0": 320.0, "frac_variance_explained": 0.731640625, "cossim": 0.905859375, "l2_ratio": 0.91015625, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5959763288497926, "loss_zero": 12.452932643890382, "frac_recovered": 0.9845311343669891, "frac_alive": 0.9627821445465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a5aa3d927820412d9503c701166a1e973ed759fb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..990dd1f2a3dd9520fef6413abcd9bda5c4f52402 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.7, "l1_loss": 658.8, "l0": 320.0, "frac_variance_explained": 0.827734375, "cossim": 0.941796875, "l2_ratio": 0.941015625, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.506504034996033, "loss_zero": 12.452932643890382, "frac_recovered": 0.9934565842151641, "frac_alive": 0.941514790058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3175a45dd56e21b85aaf75a42414585de77930cd --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8ea98e9398e76742c982a834bccb2fec0db11231 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.5125, "l1_loss": 846.8, "l0": 320.0, "frac_variance_explained": 0.8828125, "cossim": 0.96171875, "l2_ratio": 0.96171875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4759001970291137, "loss_zero": 12.452932643890382, "frac_recovered": 0.996501874923706, "frac_alive": 0.7259657382965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bf5137d186e35f569d70c865092758c640210e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbe0ec2666ad932e2b7fec5df2487698c58dce91 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 52.275, "l1_loss": 907.2, "l0": 320.0, "frac_variance_explained": 0.663671875, "cossim": 0.86796875, "l2_ratio": 0.87578125, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6827621698379516, "loss_zero": 12.452932643890382, "frac_recovered": 0.9758654475212097, "frac_alive": 0.9623481035232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb859ef867884e20d9743a10f1f8e5969bc7d523 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ee2875e6b5e8cc406798896956f0eaa16026dcf4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.5, "l1_loss": 665.2, "l0": 320.0, "frac_variance_explained": 0.77421875, "cossim": 0.923046875, "l2_ratio": 0.9265625, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.537190818786621, "loss_zero": 12.452932643890382, "frac_recovered": 0.9904000639915467, "frac_alive": 0.953233540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..40e540058956e0d2a305a32c18c6155f4deef961 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 320, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd94c19539c522673ff8bfc8fdeeae767dcdb0eb --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.6, "l1_loss": 696.4, "l0": 320.0, "frac_variance_explained": 0.849609375, "cossim": 0.95, "l2_ratio": 0.95234375, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.489767813682556, "loss_zero": 12.452932643890382, "frac_recovered": 0.9951238095760345, "frac_alive": 0.8999565839767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d425213950d9bea409c0352d6ee889a1e715f99 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "0", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..263194613cdd9e6045edf051a4bdea903988094f --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 72.8, "l1_loss": 1727.2, "l0": 640.0, "frac_variance_explained": 0.36171875, "cossim": 0.75859375, "l2_ratio": 1.03203125, "relative_reconstruction_bias": 1.3640625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.917159104347229, "loss_zero": 12.452932643890382, "frac_recovered": 0.9524367094039917, "frac_alive": 0.7743055820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25cb2b0a39b81a89d210927d7fd56f903acc44ee --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "154", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b9589503513c81ecfe30f4701fe3db737ed19db6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.05, "l1_loss": 1015.2, "l0": 640.0, "frac_variance_explained": 0.78984375, "cossim": 0.929296875, "l2_ratio": 0.934375, "relative_reconstruction_bias": 1.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.508847784996033, "loss_zero": 12.452932643890382, "frac_recovered": 0.9932186603546143, "frac_alive": 0.9986979365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..432495b51361cfa570781f152a6d7876cc0d0448 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "1544", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f6b0370145457bbb3b1d6ef9055d1f89cbe1185d --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.6125, "l1_loss": 1098.8, "l0": 640.0, "frac_variance_explained": 0.87265625, "cossim": 0.95703125, "l2_ratio": 0.957421875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4735523223876954, "loss_zero": 12.452932643890382, "frac_recovered": 0.9967362225055695, "frac_alive": 0.9080946445465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..81638e74de6b6f20e2db86715c6a7af59f8accb2 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "15440", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3ee216ee0107dd7e98bed91e8a9849aef3af156 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.025, "l1_loss": 1850.4, "l0": 640.0, "frac_variance_explained": 0.921484375, "cossim": 0.973046875, "l2_ratio": 0.973828125, "relative_reconstruction_bias": 1.0, "loss_original": 2.440642213821411, "loss_reconstructed": 2.457998824119568, "loss_zero": 12.452932643890382, "frac_recovered": 0.9982805609703064, "frac_alive": 0.7088758945465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0a65b8ab436dd334c7433583fc9bc2ac8a3219d4 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "48", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..72019b79b07331b55c3de0f5f371e3c61bbe83f3 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.9, "l1_loss": 1130.4, "l0": 640.0, "frac_variance_explained": 0.709375, "cossim": 0.89921875, "l2_ratio": 0.91328125, "relative_reconstruction_bias": 1.0109375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.53686363697052, "loss_zero": 12.452932643890382, "frac_recovered": 0.9904285967350006, "frac_alive": 0.9985893964767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..da721c5b7834c355176295ae14c3ce2e2895325e --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "488", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d2579184c526b1c0831fb3b46dd07f10a56bf6c6 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.5, "l1_loss": 942.4, "l0": 640.0, "frac_variance_explained": 0.830078125, "cossim": 0.942578125, "l2_ratio": 0.944140625, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4987658500671386, "loss_zero": 12.452932643890382, "frac_recovered": 0.9942247211933136, "frac_alive": 0.983506977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..efb3ae6464d34a4ed00d6ed906e61ec36508e937 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0001885618083164127, + "steps": "4882", + "seed": 0, + "activation_dim": 2304, + "dict_size": 18432, + "k": 640, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0231e08b5ddf8a4d75814a3bcd8f674504d32102 --- /dev/null +++ b/gemma-2-2b_sweep_topk_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.45, "l1_loss": 1432.8, "l0": 640.0, "frac_variance_explained": 0.902734375, "cossim": 0.966015625, "l2_ratio": 0.965625, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.4615091562271116, "loss_zero": 12.452932643890382, "frac_recovered": 0.9979331851005554, "frac_alive": 0.8211262822151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file