Xylaria-1.8 / config.json
Reality123b's picture
Upload config.json with huggingface_hub
b949a3e verified
{
"_name_or_path": "Xylaria",
"architectures": [
"XylariaTransformer"
],
"attention_dropout": 0.003,
"bos_token_id": 151643,
"custom_attention_type": "grouped_query",
"custom_settings": {
"param_0": 0.18909638773500503,
"param_1": 0.8193609427023019,
"param_2": 0.8222061374701647,
"param_3": 0.9849500721409111,
"param_4": 0.6006148886232605,
"param_5": 0.27476915865965346,
"param_6": 0.38785891496724245,
"param_7": 0.4125778939436352,
"param_8": 0.5190071705230352,
"param_9": 0.8516028213154339
},
"embedding_dropout": 0.0165,
"eos_token_id": 151645,
"extra_layer_norm": false,
"hidden_act": "silu",
"hidden_size": 5119,
"initializer_range": 0.0208,
"intermediate_size": 27647,
"max_position_embeddings": 40960,
"max_window_layers": 64,
"model_type": "Xylaria1",
"num_attention_heads": 39,
"num_hidden_layers": 63,
"num_key_value_heads": 8,
"obfuscation_level": 3,
"rms_norm_eps": 1e-05,
"rope_scaling": null,
"rope_theta": 1028724.3021944767,
"seed": 1741770187,
"sliding_window": null,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.48.3",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 152064,
"xylaria_version": "1.8"
}