fnlp
/

Llama3_1-8B-Base-LXTC-32x

Model card Files Files and versions Community

Llama3_1-8B-Base-LXTC-32x / Llama3_1-8B-Base-L11TC-32x /hyperparams.json

Hzfinfdu's picture

Rename folders

5a5bf63 verified 2 months ago

history blame contribute delete

1.01 kB

	{
	"device": "cuda:0",
	"seed": 42,
	"dtype": "torch.bfloat16",
	"hook_point_in": "blocks.11.ln2.hook_normalized",
	"hook_point_out": "blocks.11.hook_mlp_out",
	"use_decoder_bias": true,
	"apply_decoder_bias_to_pre_encoder": false,
	"expansion_factor": 32,
	"d_model": 4096,
	"d_sae": 131072,
	"bias_init_method": "all_zero",
	"act_fn": "jumprelu",
	"jump_relu_threshold": 0.119140625,
	"norm_activation": "dataset-wise",
	"dataset_average_activation_norm": {
	"in": 64.0,
	"out": 3.796875
	},
	"decoder_exactly_fixed_norm": false,
	"sparsity_include_decoder_norm": true,
	"use_glu_encoder": false,
	"init_decoder_norm": 0.5,
	"init_encoder_norm": 0.5,
	"init_encoder_with_decoder_transpose": false,
	"lp": 1,
	"l1_coefficient": 8e-05,
	"l1_coefficient_warmup_steps": 78125,
	"top_k": 50,
	"k_warmup_steps": 78125,
	"use_batch_norm_mse": true,
	"use_ghost_grads": false,
	"tp_size": 1,
	"ddp_size": 1
	}