|
{ |
|
"_name_or_path": "Xylaria", |
|
"architectures": [ |
|
"XylariaTransformer" |
|
], |
|
"attention_dropout": 0.003, |
|
"bos_token_id": 151643, |
|
"custom_attention_type": "grouped_query", |
|
"custom_settings": { |
|
"param_0": 0.18909638773500503, |
|
"param_1": 0.8193609427023019, |
|
"param_2": 0.8222061374701647, |
|
"param_3": 0.9849500721409111, |
|
"param_4": 0.6006148886232605, |
|
"param_5": 0.27476915865965346, |
|
"param_6": 0.38785891496724245, |
|
"param_7": 0.4125778939436352, |
|
"param_8": 0.5190071705230352, |
|
"param_9": 0.8516028213154339 |
|
}, |
|
"embedding_dropout": 0.0165, |
|
"eos_token_id": 151645, |
|
"extra_layer_norm": false, |
|
"hidden_act": "silu", |
|
"hidden_size": 5119, |
|
"initializer_range": 0.0208, |
|
"intermediate_size": 27647, |
|
"max_position_embeddings": 40960, |
|
"max_window_layers": 64, |
|
"model_type": "Xylaria1", |
|
"num_attention_heads": 39, |
|
"num_hidden_layers": 63, |
|
"num_key_value_heads": 8, |
|
"obfuscation_level": 3, |
|
"rms_norm_eps": 1e-05, |
|
"rope_scaling": null, |
|
"rope_theta": 1028724.3021944767, |
|
"seed": 1741770187, |
|
"sliding_window": null, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.48.3", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064, |
|
"xylaria_version": "1.8" |
|
} |
|
|