diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb6f25b6c59c8be1d75b7f6de6bce97b6f5685ff --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.1375, "l1_loss": 131.35, "l0": 20.0, "frac_variance_explained": 0.769921875, "cossim": 0.907421875, "l2_ratio": 0.909375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.6859375, "loss_zero": 12.4125, "frac_recovered": 0.959765625, "frac_alive": 0.19366455078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3bd470ce26d6fbcc81c12b5691a7b19578d7e92c --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.2, "l1_loss": 195.5, "l0": 40.0, "frac_variance_explained": 0.821484375, "cossim": 0.922265625, "l2_ratio": 0.9234375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.5671875, "loss_zero": 12.4125, "frac_recovered": 0.971484375, "frac_alive": 0.31732177734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f21810a1ea68ebc763948c86607cfcc9c91a5ea --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.4875, "l1_loss": 167.8, "l0": 80.0, "frac_variance_explained": 0.744140625, "cossim": 0.927734375, "l2_ratio": 0.9265625, "relative_reconstruction_bias": 0.9984375, "loss_original": 2.3, "loss_reconstructed": 2.4828125, "loss_zero": 12.4125, "frac_recovered": 0.980859375, "frac_alive": 0.46820068359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..67fe6e2c3e771288c7428ac7b5c62efd866a6342 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 16.7875, "l1_loss": 246.9, "l0": 160.0, "frac_variance_explained": 0.79296875, "cossim": 0.94140625, "l2_ratio": 0.9421875, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.4359375, "loss_zero": 12.4125, "frac_recovered": 0.984765625, "frac_alive": 0.628173828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..17020bf6ad83f13189da821ae2021d162c90512a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.79375, "l1_loss": 486.2, "l0": 320.0, "frac_variance_explained": 0.887109375, "cossim": 0.95625, "l2_ratio": 0.957421875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.390625, "loss_zero": 12.4125, "frac_recovered": 0.98828125, "frac_alive": 0.75830078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ea458047659dbe80990bb9a2fee42a8b96b79b68 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 12.1875, "l1_loss": 757.6, "l0": 640.0, "frac_variance_explained": 0.9, "cossim": 0.9703125, "l2_ratio": 0.969140625, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.35, "loss_zero": 12.4125, "frac_recovered": 0.992578125, "frac_alive": 0.59039306640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..369f840e2ffa09abe502504bf273e3dd61c6cfe8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 68.45, "l1_loss": 53.0, "l0": 20.0, "frac_variance_explained": 0.05703125, "cossim": 0.2986328125, "l2_ratio": 0.19013671875, "relative_reconstruction_bias": 0.626171875, "loss_original": 2.3, "loss_reconstructed": 11.96875, "loss_zero": 12.4125, "frac_recovered": 0.043798828125, "frac_alive": 0.11407470703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..decfcc6c5765da814bedd9a47a56b36c2278c373 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.9625, "l1_loss": 168.6, "l0": 20.0, "frac_variance_explained": 0.495703125, "cossim": 0.789453125, "l2_ratio": 0.833984375, "relative_reconstruction_bias": 0.93984375, "loss_original": 2.3, "loss_reconstructed": 4.746875, "loss_zero": 12.4125, "frac_recovered": 0.75703125, "frac_alive": 0.1495361328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..92fc0f7d3bf4e0e80c117a2720e2821875d3f2c8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.1, "l1_loss": 84.2, "l0": 20.0, "frac_variance_explained": 0.60234375, "cossim": 0.884765625, "l2_ratio": 0.887890625, "relative_reconstruction_bias": 1.002734375, "loss_original": 2.3, "loss_reconstructed": 2.8765625, "loss_zero": 12.4125, "frac_recovered": 0.9421875, "frac_alive": 0.18695068359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..983854318b315dcf51604fa4da15f1a912fde0f7 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.5625, "l1_loss": 85.55, "l0": 20.0, "frac_variance_explained": 0.6609375, "cossim": 0.90078125, "l2_ratio": 0.904296875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.721875, "loss_zero": 12.4125, "frac_recovered": 0.95703125, "frac_alive": 0.19183349609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b7840b5467bf54140b835700f257b6d62080ed7 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.35, "l1_loss": 132.0, "l0": 20.0, "frac_variance_explained": 0.76640625, "cossim": 0.9046875, "l2_ratio": 0.905859375, "relative_reconstruction_bias": 1.002734375, "loss_original": 2.3, "loss_reconstructed": 2.6984375, "loss_zero": 12.4125, "frac_recovered": 0.959375, "frac_alive": 0.19061279296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c2fa8473f2980372a81947658490455b29ac6696 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.3625, "l1_loss": 64.325, "l0": 20.0, "frac_variance_explained": 0.176953125, "cossim": 0.7515625, "l2_ratio": 0.757421875, "relative_reconstruction_bias": 0.9359375, "loss_original": 2.3, "loss_reconstructed": 7.221875, "loss_zero": 12.4125, "frac_recovered": 0.5134765625, "frac_alive": 0.1993408203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a42d81df6a2248e81077a52f2b920ff2201bb632 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.9375, "l1_loss": 81.7, "l0": 20.0, "frac_variance_explained": 0.5171875, "cossim": 0.860546875, "l2_ratio": 0.860546875, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 3.19375, "loss_zero": 12.4125, "frac_recovered": 0.91015625, "frac_alive": 0.17303466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd0cd3093ac13ca02a26903b216e4deb42eb1ad6 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.7125, "l1_loss": 86.1, "l0": 20.0, "frac_variance_explained": 0.646875, "cossim": 0.90078125, "l2_ratio": 0.89921875, "relative_reconstruction_bias": 0.99609375, "loss_original": 2.3, "loss_reconstructed": 2.7765625, "loss_zero": 12.4125, "frac_recovered": 0.95234375, "frac_alive": 0.1864013671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e50a25c119de85f04ba271459dc7d2c24592fb1 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.6375, "l1_loss": 124.2, "l0": 20.0, "frac_variance_explained": 0.715625, "cossim": 0.901171875, "l2_ratio": 0.90390625, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 2.7453125, "loss_zero": 12.4125, "frac_recovered": 0.9546875, "frac_alive": 0.18743896484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e418a08d41da1f310b60fb7e496a383784b8045a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.9, "l1_loss": 94.5, "l0": 40.0, "frac_variance_explained": 0.10078125, "cossim": 0.3873046875, "l2_ratio": 0.2634765625, "relative_reconstruction_bias": 0.675390625, "loss_original": 2.3, "loss_reconstructed": 11.30625, "loss_zero": 12.4125, "frac_recovered": 0.109130859375, "frac_alive": 0.18304443359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..80910829e62d253430ce96b4864aaff783638f75 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.875, "l1_loss": 193.75, "l0": 40.0, "frac_variance_explained": 0.483203125, "cossim": 0.818359375, "l2_ratio": 0.856640625, "relative_reconstruction_bias": 1.042578125, "loss_original": 2.3, "loss_reconstructed": 4.0953125, "loss_zero": 12.4125, "frac_recovered": 0.82265625, "frac_alive": 0.3134765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b39d9f6517bb49993ee328de9d9ce29274ca9cf --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.7125, "l1_loss": 262.8, "l0": 40.0, "frac_variance_explained": 0.74375, "cossim": 0.91171875, "l2_ratio": 0.9125, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.69375, "loss_zero": 12.4125, "frac_recovered": 0.961328125, "frac_alive": 0.32391357421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..118f6268c224e19be0206e5aa3a0ff341b4bed97 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.3625, "l1_loss": 167.95, "l0": 40.0, "frac_variance_explained": 0.760546875, "cossim": 0.915625, "l2_ratio": 0.9171875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.5828125, "loss_zero": 12.4125, "frac_recovered": 0.971484375, "frac_alive": 0.322021484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf89037900f265321341517ee867e028f40a3225 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.675, "l1_loss": 117.0, "l0": 40.0, "frac_variance_explained": 0.71171875, "cossim": 0.9203125, "l2_ratio": 0.9203125, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.5765625, "loss_zero": 12.4125, "frac_recovered": 0.971484375, "frac_alive": 0.308837890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7c4cb8c8980faf7a6abf4092240e88ed6adb1534 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.8625, "l1_loss": 151.25, "l0": 40.0, "frac_variance_explained": 0.289453125, "cossim": 0.7703125, "l2_ratio": 0.771484375, "relative_reconstruction_bias": 0.8107421875, "loss_original": 2.3, "loss_reconstructed": 5.828125, "loss_zero": 12.4125, "frac_recovered": 0.65078125, "frac_alive": 0.38104248046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c30f6d6a3915d31a76214815341f9cb9f36edaa8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.4, "l1_loss": 109.6, "l0": 40.0, "frac_variance_explained": 0.58671875, "cossim": 0.885546875, "l2_ratio": 0.889453125, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.3, "loss_reconstructed": 2.909375, "loss_zero": 12.4125, "frac_recovered": 0.939453125, "frac_alive": 0.32025146484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e404f21c4d27a84a59ec54457795913227514a48 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 20.4, "l1_loss": 186.05, "l0": 40.0, "frac_variance_explained": 0.75703125, "cossim": 0.9140625, "l2_ratio": 0.9140625, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 2.6140625, "loss_zero": 12.4125, "frac_recovered": 0.967578125, "frac_alive": 0.31817626953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ec1d7366bb3888765b9efaa34c66b601a61ece4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.425, "l1_loss": 148.65, "l0": 40.0, "frac_variance_explained": 0.748828125, "cossim": 0.92109375, "l2_ratio": 0.920703125, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.6, "loss_zero": 12.4125, "frac_recovered": 0.96953125, "frac_alive": 0.324951171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f74b7a7bbd09a18ac2ae41e7fbb37cd4b97a7c47 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.175, "l1_loss": 147.3, "l0": 80.0, "frac_variance_explained": 0.17578125, "cossim": 0.4873046875, "l2_ratio": 0.36953125, "relative_reconstruction_bias": 0.75859375, "loss_original": 2.3, "loss_reconstructed": 10.9375, "loss_zero": 12.4125, "frac_recovered": 0.14580078125, "frac_alive": 0.28033447265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e71f9288ce027e70286407393a57d846d28c179d --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.075, "l1_loss": 224.4, "l0": 80.0, "frac_variance_explained": 0.5328125, "cossim": 0.85390625, "l2_ratio": 0.863671875, "relative_reconstruction_bias": 1.0109375, "loss_original": 2.3, "loss_reconstructed": 3.3921875, "loss_zero": 12.4125, "frac_recovered": 0.890625, "frac_alive": 0.60479736328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..715cc3ca46d838c041984a4dad805307dfa3dac3 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.8125, "l1_loss": 268.7, "l0": 80.0, "frac_variance_explained": 0.773828125, "cossim": 0.91875, "l2_ratio": 0.91875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.584375, "loss_zero": 12.4125, "frac_recovered": 0.971484375, "frac_alive": 0.51849365234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cdbbb10a85ff8ad693df58d11b50baca7d71de75 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.55, "l1_loss": 168.1, "l0": 80.0, "frac_variance_explained": 0.74609375, "cossim": 0.92890625, "l2_ratio": 0.93046875, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.5015625, "loss_zero": 12.4125, "frac_recovered": 0.97890625, "frac_alive": 0.46893310546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..504ae759a8de0e9fa875db3dd6f198c3a8b9084e --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.175, "l1_loss": 249.8, "l0": 80.0, "frac_variance_explained": 0.83359375, "cossim": 0.93203125, "l2_ratio": 0.931640625, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.49375, "loss_zero": 12.4125, "frac_recovered": 0.9796875, "frac_alive": 0.4757080078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7205cdfdba59fb63e9548416a7f24eba70619896 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.6125, "l1_loss": 149.6, "l0": 80.0, "frac_variance_explained": 0.35546875, "cossim": 0.811328125, "l2_ratio": 0.817578125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 4.328125, "loss_zero": 12.4125, "frac_recovered": 0.798046875, "frac_alive": 0.63909912109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2dd4e73ac38cc21a38c60f48b865e81ebc99c5d2 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.85, "l1_loss": 156.4, "l0": 80.0, "frac_variance_explained": 0.637109375, "cossim": 0.900390625, "l2_ratio": 0.902734375, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.7703125, "loss_zero": 12.4125, "frac_recovered": 0.952734375, "frac_alive": 0.5589599609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f45e60834e1281b2e4b6d7c0b72ca14d8321a145 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.95, "l1_loss": 208.3, "l0": 80.0, "frac_variance_explained": 0.753125, "cossim": 0.927734375, "l2_ratio": 0.926953125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.521875, "loss_zero": 12.4125, "frac_recovered": 0.976953125, "frac_alive": 0.46966552734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b3af19c5f3244ca49480ab28ec1ac4a5ee7f604 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.6625, "l1_loss": 164.6, "l0": 80.0, "frac_variance_explained": 0.740625, "cossim": 0.926953125, "l2_ratio": 0.92734375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.5109375, "loss_zero": 12.4125, "frac_recovered": 0.978125, "frac_alive": 0.4600830078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3971cf070840f1dd4fb0eccda53ff3d15c1e67bf --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.425, "l1_loss": 302.2, "l0": 160.0, "frac_variance_explained": 0.251171875, "cossim": 0.588671875, "l2_ratio": 0.52421875, "relative_reconstruction_bias": 0.88515625, "loss_original": 2.3, "loss_reconstructed": 8.775, "loss_zero": 12.4125, "frac_recovered": 0.3591796875, "frac_alive": 0.3839111328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7e4a1dbe21ec4773ad3857bb58a6a600f78236d1 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.975, "l1_loss": 231.5, "l0": 160.0, "frac_variance_explained": 0.56953125, "cossim": 0.880078125, "l2_ratio": 0.884375, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.3, "loss_reconstructed": 2.9640625, "loss_zero": 12.4125, "frac_recovered": 0.93359375, "frac_alive": 0.87225341796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..470da8d9fc9375909b47a6e0b78298afe260e41a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.9125, "l1_loss": 382.6, "l0": 160.0, "frac_variance_explained": 0.758984375, "cossim": 0.92578125, "l2_ratio": 0.925390625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.5203125, "loss_zero": 12.4125, "frac_recovered": 0.977734375, "frac_alive": 0.77105712890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1fcb26bb4f2720f20ad53c2b75cce81afb0aab9c --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 16.6, "l1_loss": 237.4, "l0": 160.0, "frac_variance_explained": 0.788671875, "cossim": 0.942578125, "l2_ratio": 0.944140625, "relative_reconstruction_bias": 1.00390625, "loss_original": 2.3, "loss_reconstructed": 2.4484375, "loss_zero": 12.4125, "frac_recovered": 0.983984375, "frac_alive": 0.6077880859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd66b250bb75d30b6f3dfd484cb412828227f154 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 16.7875, "l1_loss": 276.6, "l0": 159.99583435058594, "frac_variance_explained": 0.816015625, "cossim": 0.9421875, "l2_ratio": 0.94375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.44375, "loss_zero": 12.4125, "frac_recovered": 0.984765625, "frac_alive": 0.635009765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff6233a1727737f8f6e171cec319ecb7fba4cec4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.2, "l1_loss": 238.9, "l0": 160.0, "frac_variance_explained": 0.4703125, "cossim": 0.844921875, "l2_ratio": 0.85234375, "relative_reconstruction_bias": 1.00546875, "loss_original": 2.3, "loss_reconstructed": 3.4765625, "loss_zero": 12.4125, "frac_recovered": 0.88203125, "frac_alive": 0.87200927734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3d8147d94820b90e6c00c7c86954a8a83a8c2ed --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.025, "l1_loss": 487.5, "l0": 160.0, "frac_variance_explained": 0.76640625, "cossim": 0.90703125, "l2_ratio": 0.911328125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.665625, "loss_zero": 12.4125, "frac_recovered": 0.963671875, "frac_alive": 0.84619140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a223f201e9388bb04b1968b1f3685e4818e675f7 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 17.575, "l1_loss": 410.5, "l0": 159.97916717529296, "frac_variance_explained": 0.821484375, "cossim": 0.93828125, "l2_ratio": 0.9375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.465625, "loss_zero": 12.4125, "frac_recovered": 0.982421875, "frac_alive": 0.6915283203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..233ce9956415c68ff06627dedc47cdf3007df788 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_3_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 17.31875, "l1_loss": 458.8, "l0": 160.0, "frac_variance_explained": 0.867578125, "cossim": 0.9390625, "l2_ratio": 0.941015625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.45625, "loss_zero": 12.4125, "frac_recovered": 0.983984375, "frac_alive": 0.66497802734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eeb902e86317c95988f973b6b3070335200a370c --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.325, "l1_loss": 495.8, "l0": 320.0, "frac_variance_explained": 0.310546875, "cossim": 0.685546875, "l2_ratio": 0.757421875, "relative_reconstruction_bias": 1.10546875, "loss_original": 2.3, "loss_reconstructed": 6.1, "loss_zero": 12.4125, "frac_recovered": 0.6234375, "frac_alive": 0.53265380859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..46517f0b10e632981ec15360387a3d8cd87a5aa0 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.625, "l1_loss": 564.8, "l0": 320.0, "frac_variance_explained": 0.734765625, "cossim": 0.9078125, "l2_ratio": 0.910546875, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.3, "loss_reconstructed": 2.671875, "loss_zero": 12.4125, "frac_recovered": 0.96171875, "frac_alive": 0.985595703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a423ea5fa028822e7d8a41d22e082f8a970595f --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 17.26875, "l1_loss": 323.4, "l0": 320.0, "frac_variance_explained": 0.776953125, "cossim": 0.93828125, "l2_ratio": 0.93984375, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.4578125, "loss_zero": 12.4125, "frac_recovered": 0.983203125, "frac_alive": 0.94976806640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3fe09ca03ad79b5920c66b086658a780f57441b3 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 14.88125, "l1_loss": 496.4, "l0": 320.0, "frac_variance_explained": 0.865625, "cossim": 0.9546875, "l2_ratio": 0.95625, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.396875, "loss_zero": 12.4125, "frac_recovered": 0.98828125, "frac_alive": 0.7767333984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c00b0a0b829a4753b178dd9399058df9fda60cf4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 15.225, "l1_loss": 451.0, "l0": 319.99583435058594, "frac_variance_explained": 0.860546875, "cossim": 0.953515625, "l2_ratio": 0.95390625, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.3953125, "loss_zero": 12.4125, "frac_recovered": 0.98828125, "frac_alive": 0.75531005859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42cb823fc06e1bae62d177a94a763a5d204f137c --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 25.475, "l1_loss": 598.6, "l0": 320.0, "frac_variance_explained": 0.668359375, "cossim": 0.88203125, "l2_ratio": 0.889453125, "relative_reconstruction_bias": 1.0109375, "loss_original": 2.3, "loss_reconstructed": 2.925, "loss_zero": 12.4125, "frac_recovered": 0.93828125, "frac_alive": 0.9815673828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..62321a8038d08ed2a9b2fa6ac5975cb78a5ddf12 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 19.375, "l1_loss": 540.4, "l0": 320.0, "frac_variance_explained": 0.776171875, "cossim": 0.92421875, "l2_ratio": 0.926953125, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.3, "loss_reconstructed": 2.5546875, "loss_zero": 12.4125, "frac_recovered": 0.97421875, "frac_alive": 0.97955322265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b7d2a8e957bb5562b449edd0568a0001e93dae04 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 15.58125, "l1_loss": 655.4, "l0": 320.0, "frac_variance_explained": 0.908203125, "cossim": 0.95, "l2_ratio": 0.951953125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.4140625, "loss_zero": 12.4125, "frac_recovered": 0.9875, "frac_alive": 0.897705078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6aa8adb3a056e0ff94b01334f9183131850aa4d7 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_4_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 15.35625, "l1_loss": 359.4, "l0": 320.0, "frac_variance_explained": 0.826171875, "cossim": 0.951953125, "l2_ratio": 0.9515625, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.409375, "loss_zero": 12.4125, "frac_recovered": 0.98828125, "frac_alive": 0.84075927734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..371465fe2da8fbd306f8f57ea46e398a529680b0 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.2, "l1_loss": 984.8, "l0": 640.0, "frac_variance_explained": 0.2765625, "cossim": 0.76796875, "l2_ratio": 1.09140625, "relative_reconstruction_bias": 1.40625, "loss_original": 2.3, "loss_reconstructed": 4.0078125, "loss_zero": 12.4125, "frac_recovered": 0.82890625, "frac_alive": 0.6943359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..62c6fe1768220eaf5922701683af64f3cf518905 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 18.7875, "l1_loss": 890.6, "l0": 640.0, "frac_variance_explained": 0.794140625, "cossim": 0.92890625, "l2_ratio": 0.93359375, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.3, "loss_reconstructed": 2.5078125, "loss_zero": 12.4125, "frac_recovered": 0.978515625, "frac_alive": 0.9993896484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..859ef93a7efc49f3284703423381f3a220de65b4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 15.13125, "l1_loss": 582.0, "l0": 639.9958374023438, "frac_variance_explained": 0.847265625, "cossim": 0.95390625, "l2_ratio": 0.95390625, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.40625, "loss_zero": 12.4125, "frac_recovered": 0.98828125, "frac_alive": 0.984619140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1e967eb8bf46947317da2e03722633a8ec27e93f --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 12.78125, "l1_loss": 782.8, "l0": 639.8833374023437, "frac_variance_explained": 0.893359375, "cossim": 0.966796875, "l2_ratio": 0.967578125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.35625, "loss_zero": 12.4125, "frac_recovered": 0.992578125, "frac_alive": 0.702880859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4ff034ef81f956196f9d6b498533926c5238982 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 12.41875, "l1_loss": 807.2, "l0": 639.4541748046875, "frac_variance_explained": 0.912109375, "cossim": 0.96875, "l2_ratio": 0.96953125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.353125, "loss_zero": 12.4125, "frac_recovered": 0.992578125, "frac_alive": 0.647705078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2407c067320310351edcd0f3f125338654989819 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.75, "l1_loss": 666.2, "l0": 640.0, "frac_variance_explained": 0.725390625, "cossim": 0.912109375, "l2_ratio": 0.923828125, "relative_reconstruction_bias": 1.015625, "loss_original": 2.3, "loss_reconstructed": 2.6640625, "loss_zero": 12.4125, "frac_recovered": 0.9625, "frac_alive": 0.999267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..862079c031e291568ed7de52a2d1e2524f98e719 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 17.2625, "l1_loss": 717.4, "l0": 640.0, "frac_variance_explained": 0.83515625, "cossim": 0.9421875, "l2_ratio": 0.94375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.446875, "loss_zero": 12.4125, "frac_recovered": 0.983984375, "frac_alive": 0.99658203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..39ec796da1d9a0a72c92c500884378da8a9e42a2 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 13.6875, "l1_loss": 850.8, "l0": 639.9583374023438, "frac_variance_explained": 0.909765625, "cossim": 0.962109375, "l2_ratio": 0.96328125, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.371875, "loss_zero": 12.4125, "frac_recovered": 0.991796875, "frac_alive": 0.89892578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a44959c28f8cfac1232f405aa3c4de9cb4d0e958 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_5_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 13.275, "l1_loss": 698.4, "l0": 640.0, "frac_variance_explained": 0.869921875, "cossim": 0.9640625, "l2_ratio": 0.965625, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.3625, "loss_zero": 12.4125, "frac_recovered": 0.9921875, "frac_alive": 0.74676513671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3d2150b4afeb44ef83733d724b801dcce580cb69 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.825, "l1_loss": 249.2, "l0": 20.0, "frac_variance_explained": 0.799609375, "cossim": 0.93671875, "l2_ratio": 0.935546875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.3, "loss_reconstructed": 3.10625, "loss_zero": 11.30625, "frac_recovered": 0.910546875, "frac_alive": 0.17633056640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_1/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c7bf1a56f7db97d1242980f57b8275d6b669960b --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.975, "l1_loss": 340.0, "l0": 40.0, "frac_variance_explained": 0.875390625, "cossim": 0.944921875, "l2_ratio": 0.94453125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.80625, "loss_zero": 11.30625, "frac_recovered": 0.94296875, "frac_alive": 0.3282470703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_2/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c046b342a6e35c26367f2a5805c3d541e4758128 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.55, "l1_loss": 350.0, "l0": 80.0, "frac_variance_explained": 0.8265625, "cossim": 0.951171875, "l2_ratio": 0.949609375, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.6640625, "loss_zero": 11.30625, "frac_recovered": 0.958984375, "frac_alive": 0.51617431640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_3/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0517ee20a606f3a36b0675a734ac07fc87323414 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 30.8125, "l1_loss": 523.2, "l0": 160.0, "frac_variance_explained": 0.875, "cossim": 0.962109375, "l2_ratio": 0.96015625, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 2.55625, "loss_zero": 11.30625, "frac_recovered": 0.971875, "frac_alive": 0.719482421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_4/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f11fc8f23aef0366df5f0144423a54451a67bdc8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.55, "l1_loss": 799.6, "l0": 320.0, "frac_variance_explained": 0.90859375, "cossim": 0.96953125, "l2_ratio": 0.968359375, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.4640625, "loss_zero": 11.30625, "frac_recovered": 0.982421875, "frac_alive": 0.845703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_5/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f53d4713eb7a3d97adeec61d5c4aaecb47da2c87 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 21.5, "l1_loss": 1420.0, "l0": 640.0, "frac_variance_explained": 0.953515625, "cossim": 0.98203125, "l2_ratio": 0.981640625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.384375, "loss_zero": 11.30625, "frac_recovered": 0.9890625, "frac_alive": 0.7069091796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf4e012dde362050e1dcf68243aa2b55ffd2a5d3 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 136.35, "l1_loss": 112.8, "l0": 20.0, "frac_variance_explained": 0.049609375, "cossim": 0.3044921875, "l2_ratio": 0.1931640625, "relative_reconstruction_bias": 0.6328125, "loss_original": 2.3, "loss_reconstructed": 12.29375, "loss_zero": 11.30625, "frac_recovered": -0.109814453125, "frac_alive": 0.084716796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..16addb16ce13bfbed93f6cd97fd493f676164509 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.275, "l1_loss": 285.2, "l0": 20.0, "frac_variance_explained": 0.615625, "cossim": 0.851171875, "l2_ratio": 0.875390625, "relative_reconstruction_bias": 0.95078125, "loss_original": 2.3, "loss_reconstructed": 5.365625, "loss_zero": 11.30625, "frac_recovered": 0.659375, "frac_alive": 0.10498046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..14df32375984407980874c6279bdc53b9f6fd0b8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.95, "l1_loss": 205.3, "l0": 20.0, "frac_variance_explained": 0.71796875, "cossim": 0.91796875, "l2_ratio": 0.917578125, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 3.4, "loss_zero": 11.30625, "frac_recovered": 0.8765625, "frac_alive": 0.15087890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a8a1880263edebbdc9f8acd0acdc43abb6b9a9ed --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.2, "l1_loss": 206.9, "l0": 20.0, "frac_variance_explained": 0.755859375, "cossim": 0.930859375, "l2_ratio": 0.93046875, "relative_reconstruction_bias": 0.998828125, "loss_original": 2.3, "loss_reconstructed": 3.14375, "loss_zero": 11.30625, "frac_recovered": 0.90625, "frac_alive": 0.17340087890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfce7c5d773fb493cb4f08154291c9bc1a96277 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.9, "l1_loss": 253.5, "l0": 20.0, "frac_variance_explained": 0.819140625, "cossim": 0.929296875, "l2_ratio": 0.93046875, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 3.125, "loss_zero": 11.30625, "frac_recovered": 0.909375, "frac_alive": 0.16864013671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..304c79aa563f629b0e617d7977e8a7efcc5ffe65 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.9, "l1_loss": 156.5, "l0": 20.0, "frac_variance_explained": 0.305859375, "cossim": 0.813671875, "l2_ratio": 0.8375, "relative_reconstruction_bias": 0.96015625, "loss_original": 2.3, "loss_reconstructed": 6.6375, "loss_zero": 11.30625, "frac_recovered": 0.5181640625, "frac_alive": 0.16241455078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d10a148aaa455ff7c4d97151e29cd902e70dddae --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.175, "l1_loss": 190.8, "l0": 20.0, "frac_variance_explained": 0.648828125, "cossim": 0.902734375, "l2_ratio": 0.90546875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 3.815625, "loss_zero": 11.30625, "frac_recovered": 0.83125, "frac_alive": 0.12701416015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c136fdfc4f5c6e0c0a6d7347adc1bda7049ff482 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.45, "l1_loss": 207.8, "l0": 20.0, "frac_variance_explained": 0.7390625, "cossim": 0.927734375, "l2_ratio": 0.9296875, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 3.228125, "loss_zero": 11.30625, "frac_recovered": 0.896484375, "frac_alive": 0.16119384765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2fb902dee9e5281d9643f1295a06f623a5352fd6 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.05, "l1_loss": 224.1, "l0": 20.0, "frac_variance_explained": 0.782421875, "cossim": 0.934765625, "l2_ratio": 0.934375, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.3, "loss_reconstructed": 3.10625, "loss_zero": 11.30625, "frac_recovered": 0.910546875, "frac_alive": 0.17303466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..35ff293105e88d5757504440ab7159c7f814ea75 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_0_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 43.0, "l1_loss": 253.0, "l0": 20.0, "frac_variance_explained": 0.791796875, "cossim": 0.927734375, "l2_ratio": 0.92890625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 3.1765625, "loss_zero": 11.30625, "frac_recovered": 0.903515625, "frac_alive": 0.1666259765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..16d83f5d7e816ff26561ac956a928ce6cb727329 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 128.15, "l1_loss": 208.4, "l0": 40.0, "frac_variance_explained": 0.08359375, "cossim": 0.3927734375, "l2_ratio": 0.2662109375, "relative_reconstruction_bias": 0.67265625, "loss_original": 2.3, "loss_reconstructed": 11.25625, "loss_zero": 11.30625, "frac_recovered": 0.005377197265625, "frac_alive": 0.1331787109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd58bc2dc04059a6b9f1b80a124c766fb5e187f4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.025, "l1_loss": 342.3, "l0": 40.0, "frac_variance_explained": 0.626953125, "cossim": 0.876171875, "l2_ratio": 0.886328125, "relative_reconstruction_bias": 1.009765625, "loss_original": 2.3, "loss_reconstructed": 4.615625, "loss_zero": 11.30625, "frac_recovered": 0.742578125, "frac_alive": 0.2408447265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dc3be50a06f1d38017d6101e48016ec25205fac0 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.575, "l1_loss": 428.8, "l0": 40.0, "frac_variance_explained": 0.82265625, "cossim": 0.940625, "l2_ratio": 0.93984375, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 3.0140625, "loss_zero": 11.30625, "frac_recovered": 0.922265625, "frac_alive": 0.2928466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..86e31eeaf4be152193ce41f8f2f18561f3e046e6 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.425, "l1_loss": 312.8, "l0": 40.0, "frac_variance_explained": 0.828125, "cossim": 0.93984375, "l2_ratio": 0.939453125, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.8375, "loss_zero": 11.30625, "frac_recovered": 0.93984375, "frac_alive": 0.3302001953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c74c43fc3cf6d019c80f330a23372270c9442179 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.85, "l1_loss": 271.2, "l0": 40.0, "frac_variance_explained": 0.801171875, "cossim": 0.94609375, "l2_ratio": 0.9453125, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.821875, "loss_zero": 11.30625, "frac_recovered": 0.940625, "frac_alive": 0.32086181640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5875cd87ec8f33b4d05589d55e3fbd552fee756b --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 79.025, "l1_loss": 285.2, "l0": 40.0, "frac_variance_explained": 0.401953125, "cossim": 0.835546875, "l2_ratio": 0.853515625, "relative_reconstruction_bias": 0.878515625, "loss_original": 2.3, "loss_reconstructed": 5.678125, "loss_zero": 11.30625, "frac_recovered": 0.625, "frac_alive": 0.3209228515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02f222deb5cc69b5e48aa9bf899516f303245fe9 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.225, "l1_loss": 270.2, "l0": 40.0, "frac_variance_explained": 0.715625, "cossim": 0.921484375, "l2_ratio": 0.92578125, "relative_reconstruction_bias": 1.004296875, "loss_original": 2.3, "loss_reconstructed": 3.3640625, "loss_zero": 11.30625, "frac_recovered": 0.882421875, "frac_alive": 0.2572021484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1bb89e7e6a8b4c0edcdf500e0f663c1d853ecda2 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.875, "l1_loss": 339.0, "l0": 40.0, "frac_variance_explained": 0.820703125, "cossim": 0.940234375, "l2_ratio": 0.940625, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.8890625, "loss_zero": 11.30625, "frac_recovered": 0.93515625, "frac_alive": 0.3106689453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f20025f276ae26f62996990ba2cf65eab3bac5d0 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.5, "l1_loss": 283.6, "l0": 40.0, "frac_variance_explained": 0.826953125, "cossim": 0.948046875, "l2_ratio": 0.948046875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.3, "loss_reconstructed": 2.80625, "loss_zero": 11.30625, "frac_recovered": 0.94296875, "frac_alive": 0.33111572265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e16f4b0df8092f7f7b97028c60e45744be008fbf --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_1_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.575, "l1_loss": 300.4, "l0": 40.0, "frac_variance_explained": 0.81875, "cossim": 0.943359375, "l2_ratio": 0.94453125, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.853125, "loss_zero": 11.30625, "frac_recovered": 0.9390625, "frac_alive": 0.32354736328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..16ef0b04a92196cb2e1db21f4f8ecec7f276fbcd --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 107.7, "l1_loss": 356.8, "l0": 80.0, "frac_variance_explained": 0.147265625, "cossim": 0.492578125, "l2_ratio": 0.37421875, "relative_reconstruction_bias": 0.7609375, "loss_original": 2.3, "loss_reconstructed": 9.49375, "loss_zero": 11.30625, "frac_recovered": 0.2009765625, "frac_alive": 0.20849609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0daa1cee166f902fd7a02a7c2fd794901473436 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.725, "l1_loss": 416.8, "l0": 80.0, "frac_variance_explained": 0.65, "cossim": 0.899609375, "l2_ratio": 0.90625, "relative_reconstruction_bias": 1.002734375, "loss_original": 2.3, "loss_reconstructed": 3.8765625, "loss_zero": 11.30625, "frac_recovered": 0.82421875, "frac_alive": 0.48846435546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..77d72afe7a6dad6e2ca3d618e75660ccd015d10d --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.125, "l1_loss": 460.8, "l0": 80.0, "frac_variance_explained": 0.834375, "cossim": 0.941015625, "l2_ratio": 0.93984375, "relative_reconstruction_bias": 0.998828125, "loss_original": 2.3, "loss_reconstructed": 2.83125, "loss_zero": 11.30625, "frac_recovered": 0.940625, "frac_alive": 0.51043701171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..97bae7c2f9f17f6bda0314e80672b60c0c7a8a71 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.0, "l1_loss": 356.4, "l0": 80.0, "frac_variance_explained": 0.828515625, "cossim": 0.951953125, "l2_ratio": 0.953125, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.69375, "loss_zero": 11.30625, "frac_recovered": 0.957421875, "frac_alive": 0.5113525390625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d29f409df4afb3cc15305c4854685a585f462a2a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 33.425, "l1_loss": 424.4, "l0": 80.0, "frac_variance_explained": 0.883203125, "cossim": 0.95078125, "l2_ratio": 0.951171875, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.6828125, "loss_zero": 11.30625, "frac_recovered": 0.958203125, "frac_alive": 0.52288818359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc611c33ff24666ae1199530a5ff842e470076f --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.25, "l1_loss": 342.4, "l0": 80.0, "frac_variance_explained": 0.48671875, "cossim": 0.8671875, "l2_ratio": 0.879296875, "relative_reconstruction_bias": 1.005859375, "loss_original": 2.3, "loss_reconstructed": 4.728125, "loss_zero": 11.30625, "frac_recovered": 0.73046875, "frac_alive": 0.5579833984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..885b64c26c162efaac8aed8c518348205c42bd79 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.325, "l1_loss": 352.8, "l0": 80.0, "frac_variance_explained": 0.75546875, "cossim": 0.930859375, "l2_ratio": 0.93125, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 3.1109375, "loss_zero": 11.30625, "frac_recovered": 0.909375, "frac_alive": 0.4815673828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..39ad0eb8a07ece67192b71c6d45002d67fccbfaa --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.8, "l1_loss": 409.6, "l0": 80.0, "frac_variance_explained": 0.834375, "cossim": 0.955078125, "l2_ratio": 0.955078125, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 2.725, "loss_zero": 11.30625, "frac_recovered": 0.953515625, "frac_alive": 0.489990234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5bf0451b71041ef743e1b4a43005810fcd2c619c --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.8, "l1_loss": 357.6, "l0": 80.0, "frac_variance_explained": 0.832421875, "cossim": 0.95, "l2_ratio": 0.95, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.6640625, "loss_zero": 11.30625, "frac_recovered": 0.958984375, "frac_alive": 0.5294189453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b10c974f631f0712a5461fa000ec5ecd75cd9d1d --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_2_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 34.575, "l1_loss": 356.6, "l0": 80.0, "frac_variance_explained": 0.831640625, "cossim": 0.954296875, "l2_ratio": 0.953125, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.7046875, "loss_zero": 11.30625, "frac_recovered": 0.955859375, "frac_alive": 0.49163818359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c6e838b3a0930189ecda92c180446bfe51eee450 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 104.65, "l1_loss": 689.2, "l0": 160.0, "frac_variance_explained": 0.21328125, "cossim": 0.59453125, "l2_ratio": 0.53046875, "relative_reconstruction_bias": 0.887109375, "loss_original": 2.3, "loss_reconstructed": 7.175, "loss_zero": 11.30625, "frac_recovered": 0.4587890625, "frac_alive": 0.303466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..77556ae160cca21f3e94c7bda05c8ed0012f13ea --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.575, "l1_loss": 503.0, "l0": 160.0, "frac_variance_explained": 0.70546875, "cossim": 0.919921875, "l2_ratio": 0.9265625, "relative_reconstruction_bias": 1.005078125, "loss_original": 2.3, "loss_reconstructed": 3.2671875, "loss_zero": 11.30625, "frac_recovered": 0.8921875, "frac_alive": 0.80963134765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2bbd5f0f2608d4d4c8589fc577b846091a2eef73 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.35, "l1_loss": 632.0, "l0": 160.0, "frac_variance_explained": 0.835546875, "cossim": 0.94921875, "l2_ratio": 0.948828125, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.6921875, "loss_zero": 11.30625, "frac_recovered": 0.956640625, "frac_alive": 0.78729248046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8cbf0ab9c05adfc993b6d78bef92bc2f8706f00f --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.275, "l1_loss": 496.0, "l0": 160.0, "frac_variance_explained": 0.853125, "cossim": 0.95859375, "l2_ratio": 0.9578125, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.58125, "loss_zero": 11.30625, "frac_recovered": 0.968359375, "frac_alive": 0.704833984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9689290b6b3929b72d8c2f072734590bad61767a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.375, "l1_loss": 515.6, "l0": 160.0, "frac_variance_explained": 0.870703125, "cossim": 0.958203125, "l2_ratio": 0.9578125, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.5703125, "loss_zero": 11.30625, "frac_recovered": 0.9703125, "frac_alive": 0.72021484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..524b6fb6691cd7f1324d3fb64e1ba657bab8e856 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.375, "l1_loss": 537.4, "l0": 160.0, "frac_variance_explained": 0.568359375, "cossim": 0.887109375, "l2_ratio": 0.902734375, "relative_reconstruction_bias": 1.01328125, "loss_original": 2.3, "loss_reconstructed": 3.7984375, "loss_zero": 11.30625, "frac_recovered": 0.83359375, "frac_alive": 0.78143310546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c82b1a81474b43d7269caa2a6b34e1c8cc34df01 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.925, "l1_loss": 733.0, "l0": 160.0, "frac_variance_explained": 0.840234375, "cossim": 0.936328125, "l2_ratio": 0.938671875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.903125, "loss_zero": 11.30625, "frac_recovered": 0.932421875, "frac_alive": 0.80389404296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..788c1889f3072992647d38422dc34677e21d573d --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.6, "l1_loss": 659.0, "l0": 160.0, "frac_variance_explained": 0.878125, "cossim": 0.958984375, "l2_ratio": 0.959375, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.6046875, "loss_zero": 11.30625, "frac_recovered": 0.965625, "frac_alive": 0.7408447265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e388e18bbd206c5f69b65499147f156087191502 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.2875, "l1_loss": 554.2, "l0": 160.0, "frac_variance_explained": 0.887109375, "cossim": 0.960546875, "l2_ratio": 0.96015625, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.55625, "loss_zero": 11.30625, "frac_recovered": 0.971875, "frac_alive": 0.7100830078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d9606e29897fe890bdb8f4a3ddf42ad15bdb855a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_3_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.925, "l1_loss": 696.0, "l0": 160.0, "frac_variance_explained": 0.904296875, "cossim": 0.958203125, "l2_ratio": 0.960546875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.5875, "loss_zero": 11.30625, "frac_recovered": 0.967578125, "frac_alive": 0.73175048828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..52e80435fd8b317a5a9852e8ddf4909f6a3b3b57 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.2, "l1_loss": 1235.2, "l0": 320.0, "frac_variance_explained": 0.255859375, "cossim": 0.69296875, "l2_ratio": 0.764453125, "relative_reconstruction_bias": 1.1046875, "loss_original": 2.3, "loss_reconstructed": 5.24375, "loss_zero": 11.30625, "frac_recovered": 0.672265625, "frac_alive": 0.4349365234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a38d545cfd009e4ee0489a8e9471220b8515aecb --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.425, "l1_loss": 913.6, "l0": 320.0, "frac_variance_explained": 0.8078125, "cossim": 0.93671875, "l2_ratio": 0.941796875, "relative_reconstruction_bias": 1.005859375, "loss_original": 2.3, "loss_reconstructed": 2.8625, "loss_zero": 11.30625, "frac_recovered": 0.93671875, "frac_alive": 0.9776611328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d762a0a6ae15a8c7494d52497a2be31ef604f0aa --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 32.3375, "l1_loss": 669.2, "l0": 320.0, "frac_variance_explained": 0.85234375, "cossim": 0.959375, "l2_ratio": 0.959375, "relative_reconstruction_bias": 1.001953125, "loss_original": 2.3, "loss_reconstructed": 2.58125, "loss_zero": 11.30625, "frac_recovered": 0.96875, "frac_alive": 0.9576416015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..376e8ab259eed0263c3393d3fc4a2b3ef766d88f --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.2375, "l1_loss": 849.2, "l0": 320.0, "frac_variance_explained": 0.91015625, "cossim": 0.96875, "l2_ratio": 0.968359375, "relative_reconstruction_bias": 0.999609375, "loss_original": 2.3, "loss_reconstructed": 2.4828125, "loss_zero": 11.30625, "frac_recovered": 0.978515625, "frac_alive": 0.86370849609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eebe933b8b28788cc4ecbe5fc490f0ba140c804e --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.375, "l1_loss": 804.4, "l0": 320.0, "frac_variance_explained": 0.9046875, "cossim": 0.9671875, "l2_ratio": 0.966796875, "relative_reconstruction_bias": 0.99921875, "loss_original": 2.3, "loss_reconstructed": 2.471875, "loss_zero": 11.30625, "frac_recovered": 0.980859375, "frac_alive": 0.8427734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73f9cf189cd2148b631093bb8474dcce46ce8c84 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.2, "l1_loss": 993.6, "l0": 320.0, "frac_variance_explained": 0.741796875, "cossim": 0.913671875, "l2_ratio": 0.920703125, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.3, "loss_reconstructed": 3.1359375, "loss_zero": 11.30625, "frac_recovered": 0.905859375, "frac_alive": 0.9654541015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..710f5d5155f89b03d17924ed54d6815cd6384331 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 36.65, "l1_loss": 865.6, "l0": 320.0, "frac_variance_explained": 0.8453125, "cossim": 0.94765625, "l2_ratio": 0.94921875, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.7203125, "loss_zero": 11.30625, "frac_recovered": 0.953125, "frac_alive": 0.9755859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e77f2efd220b2a30243535dcab0ad4ede71955fd --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 29.1875, "l1_loss": 986.4, "l0": 320.0, "frac_variance_explained": 0.933984375, "cossim": 0.96484375, "l2_ratio": 0.965234375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.5140625, "loss_zero": 11.30625, "frac_recovered": 0.9765625, "frac_alive": 0.93798828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5eb2596b241119a8ac82816bbdb7d32a753926e0 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.7875, "l1_loss": 852.4, "l0": 320.0, "frac_variance_explained": 0.91328125, "cossim": 0.967578125, "l2_ratio": 0.96875, "relative_reconstruction_bias": 1.001171875, "loss_original": 2.3, "loss_reconstructed": 2.4640625, "loss_zero": 11.30625, "frac_recovered": 0.982421875, "frac_alive": 0.8382568359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..44f01009e9f1d0480701aa151b0a967237080250 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_4_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.9125, "l1_loss": 698.0, "l0": 320.0, "frac_variance_explained": 0.878515625, "cossim": 0.967578125, "l2_ratio": 0.96796875, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.4921875, "loss_zero": 11.30625, "frac_recovered": 0.978125, "frac_alive": 0.89276123046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..76d1a5802086c85281d71f140395956071f29859 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 93.75, "l1_loss": 2287.2, "l0": 640.0, "frac_variance_explained": 0.232421875, "cossim": 0.773828125, "l2_ratio": 1.10859375, "relative_reconstruction_bias": 1.4203125, "loss_original": 2.3, "loss_reconstructed": 3.8078125, "loss_zero": 11.30625, "frac_recovered": 0.8328125, "frac_alive": 0.6279296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8bddbc7d880076f41826315d8178407f315c188 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.775, "l1_loss": 1358.8, "l0": 640.0, "frac_variance_explained": 0.85078125, "cossim": 0.950390625, "l2_ratio": 0.953515625, "relative_reconstruction_bias": 1.0046875, "loss_original": 2.3, "loss_reconstructed": 2.63125, "loss_zero": 11.30625, "frac_recovered": 0.96171875, "frac_alive": 0.9990234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1d555a356159f26cf388d8a413ad940136bc32b4 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.125, "l1_loss": 1045.6, "l0": 640.0, "frac_variance_explained": 0.897265625, "cossim": 0.9671875, "l2_ratio": 0.9671875, "relative_reconstruction_bias": 1.0, "loss_original": 2.3, "loss_reconstructed": 2.471875, "loss_zero": 11.30625, "frac_recovered": 0.981640625, "frac_alive": 0.99517822265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_19531/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_19531/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6e0f2dae5e7bf79077ce11efd2d0029839f9bd69 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_19531/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 23.5875, "l1_loss": 1406.4, "l0": 639.9958374023438, "frac_variance_explained": 0.930078125, "cossim": 0.977734375, "l2_ratio": 0.977734375, "relative_reconstruction_bias": 1.0015625, "loss_original": 2.3, "loss_reconstructed": 2.396875, "loss_zero": 11.30625, "frac_recovered": 0.9875, "frac_alive": 0.80499267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_29296/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_29296/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..32e99cc61cdf422d724d2475ab959b49269af0b8 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_29296/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.675, "l1_loss": 1410.4, "l0": 639.9875, "frac_variance_explained": 0.938671875, "cossim": 0.978125, "l2_ratio": 0.978515625, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.3875, "loss_zero": 11.30625, "frac_recovered": 0.9875, "frac_alive": 0.74365234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bf00f4ef4d68b5c0f5907e3f74cb14b85767a4e3 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 41.675, "l1_loss": 1220.8, "l0": 640.0, "frac_variance_explained": 0.788671875, "cossim": 0.93359375, "l2_ratio": 0.951953125, "relative_reconstruction_bias": 1.01171875, "loss_original": 2.3, "loss_reconstructed": 2.78125, "loss_zero": 11.30625, "frac_recovered": 0.946875, "frac_alive": 0.998779296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4f6dedd5c97dbecea596705e74ca07871c9e085a --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.8375, "l1_loss": 1153.6, "l0": 640.0, "frac_variance_explained": 0.886328125, "cossim": 0.961328125, "l2_ratio": 0.9625, "relative_reconstruction_bias": 1.00234375, "loss_original": 2.3, "loss_reconstructed": 2.54375, "loss_zero": 11.30625, "frac_recovered": 0.973046875, "frac_alive": 0.998046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53f43bf110557bad8e3ecd365cab31f4f2ed67a6 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.95, "l1_loss": 1404.8, "l0": 640.0, "frac_variance_explained": 0.938671875, "cossim": 0.973828125, "l2_ratio": 0.975, "relative_reconstruction_bias": 1.003125, "loss_original": 2.3, "loss_reconstructed": 2.4234375, "loss_zero": 11.30625, "frac_recovered": 0.985546875, "frac_alive": 0.95556640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48828/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48828/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..371b615701b66a120787e9b83f495f07ced69350 --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_48828/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 22.125, "l1_loss": 1440.8, "l0": 640.0, "frac_variance_explained": 0.94921875, "cossim": 0.981640625, "l2_ratio": 0.98203125, "relative_reconstruction_bias": 1.000390625, "loss_original": 2.3, "loss_reconstructed": 2.384375, "loss_zero": 11.30625, "frac_recovered": 0.9890625, "frac_alive": 0.70782470703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_9765/eval_results.json b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_9765/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5207c64090d54d31c2b3afd82db9806a4434154b --- /dev/null +++ b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_19_checkpoints/trainer_5_step_9765/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.575, "l1_loss": 1298.4, "l0": 640.0, "frac_variance_explained": 0.913671875, "cossim": 0.97578125, "l2_ratio": 0.976953125, "relative_reconstruction_bias": 1.00078125, "loss_original": 2.3, "loss_reconstructed": 2.4078125, "loss_zero": 11.30625, "frac_recovered": 0.9875, "frac_alive": 0.86810302734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file