|
{ |
|
"model_class": "GLMModel", |
|
"tokenizer_type": "glm_ChineseSPTokenizer", |
|
"num_layers": 48, |
|
"hidden_size": 4096, |
|
"num_attention_heads": 64, |
|
"vocab_size": 50048, |
|
"hidden_dropout": 0.1, |
|
"attention_dropout": 0.1, |
|
"layernorm_order": "pre", |
|
"model_parallel_size": 1, |
|
"max_sequence_length": 1025, |
|
"block_lm": "true", |
|
"masked_lm": false, |
|
"bert_prob": 0.5, |
|
"gpt_infill_prob": 0.5, |
|
"gpt_min_ratio": 0.5, |
|
"gap_sentence_prob": 0.0, |
|
"gap_sentence_ratio": 0.15, |
|
"avg_block_length": 3, |
|
"short_seq_prob": 0.0, |
|
"single_span_prob": 0.0, |
|
"task_mask": "true", |
|
"no_shuffle_block": false, |
|
"no_block_position": false, |
|
"sentinel_token": false, |
|
"block_mask_prob": 0.0, |
|
"context_mask_ratio": 0.0, |
|
"random_position": false, |
|
"cloze_eval": "true", |
|
"old_checkpoint": false, |
|
"tokenizer_model_type": "glm-10b" |
|
} |