jaszczur
/

mixture_of_tokens

Text Generation

Model card Files Files and versions Community

mixture_of_tokens / config.json

jaszczur's picture

Upload folder using huggingface_hub

38caf34 verified 7 months ago

history blame contribute delete

1.02 kB

	{
	"_name_or_path": "/home/rllaskowski_a100/mot-training/experiments/mot_medium_32_8/checkpoint-100000",
	"activation_function": "gelu_new",
	"architectures": [
	"MoTLMHeadModel"
	],
	"attn_pdrop": 0.1,
	"auto_map": {
	"AutoConfig": "configuration_mot.MoTConfig",
	"AutoModelForCausalLM": "modeling_mot.MoTLMHeadModel",
	"AutoModel": "modeling_mot.MoTModel"
	},
	"bos_token_id": 50256,
	"embd_pdrop": 0.1,
	"emit_softmax_over_experts": false,
	"eos_token_id": 50256,
	"expert_size": 256,
	"group_size": 32,
	"init_scale": 1.0,
	"initializer_range": 0.02,
	"layer_norm_epsilon": 1e-05,
	"model_type": "mot",
	"n_embd": 512,
	"n_expert": 256,
	"n_head": 8,
	"n_inner": 65536,
	"n_layer": 8,
	"n_positions": 1024,
	"reorder_and_upcast_attn": false,
	"resid_pdrop": 0.1,
	"scale_attn_by_inverse_layer_idx": false,
	"scale_attn_weights": true,
	"torch_dtype": "float32",
	"transformers_version": "4.42.1",
	"use_cache": true,
	"use_discrete_routing": false,
	"vocab_size": 50257
	}