BERTu-ner / config.json
KurtMica's picture
Model files.
f28817d
{
"dataset_reader": {
"type": "multitask",
"readers": {
"ner": {
"type": "wikiann",
"token_indexers": {
"transformer": {
"type": "pretrained_transformer_mismatched",
"max_length": 512,
"model_name": "MLRS/BERTu"
}
}
}
}
},
"model": {
"type": "multitask",
"arg_name_mapping": {
"backbone": {
"tokens": "text",
"words": "text"
}
},
"backbone": {
"type": "embedder_and_mask",
"text_field_embedder": {
"token_embedders": {
"transformer": {
"type": "pretrained_transformer_mismatched_with_dropout",
"last_layer_only": false,
"layer_dropout": 0.1,
"max_length": 512,
"model_name": "MLRS/BERTu",
"tokenizer_kwargs": {},
"train_parameters": true
}
}
}
},
"heads": {
"ner": {
"type": "crf_tagger",
"calculate_span_f1": true,
"constrain_crf_decoding": true,
"dropout": 0.2,
"encoder": {
"type": "pass_through",
"input_dim": 768
},
"include_start_end_transitions": false,
"label_encoding": "BIO"
}
}
},
"train_data_path": {
"ner": "panx_dataset/mt/train"
},
"validation_data_path": {
"ner": "panx_dataset/mt/dev"
},
"trainer": {
"callbacks": [
{
"tensorboard_writer": {
"should_log_learning_rate": true,
"should_log_parameter_statistics": true
},
"type": "tensorboard"
}
],
"cuda_device": 0,
"grad_norm": 5,
"learning_rate_scheduler": {
"type": "ulmfit_sqrt",
"affected_group_count": 2,
"decay_factor": 0.05,
"discriminative_fine_tuning": true,
"factor": 5,
"gradual_unfreezing": true,
"model_size": 1,
"start_step": 2,
"warmup_steps": 2
},
"num_epochs": 200,
"optimizer": {
"type": "huggingface_adamw",
"betas": [
0.9,
0.999
],
"correct_bias": false,
"lr": 0.0005,
"parameter_groups": [
[
[
"text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
"text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
],
{}
],
[
[
"text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
"text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
"text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
"text_field_embedder.*transformer_model.pooler.dense.bias"
],
{
"weight_decay": 0
}
],
[
[
"text_field_embedder.*._scalar_mix.*",
"text_field_embedder.*transformer_model.pooler.dense.weight",
"_head_sentinel",
"head_arc_feedforward._linear_layers.*.weight",
"child_arc_feedforward._linear_layers.*.weight",
"head_tag_feedforward._linear_layers.*.weight",
"child_tag_feedforward._linear_layers.*.weight",
"arc_attention._weight_matrix",
"tag_bilinear.weight",
"tag_projection_layer._module.weight",
"crf",
"linear.weight",
"tagger_linear.weight"
],
{}
],
[
[
"head_arc_feedforward._linear_layers.*.bias",
"child_arc_feedforward._linear_layers.*.bias",
"head_tag_feedforward._linear_layers.*.bias",
"child_tag_feedforward._linear_layers.*.bias",
"arc_attention._bias",
"tag_bilinear.bias",
"tag_projection_layer._module.bias",
"linear.bias",
"tagger_linear.bias"
],
{
"weight_decay": 0
}
]
],
"weight_decay": 0.01
},
"patience": 20,
"validation_metric": [
"+ner_f1-measure-overall"
]
},
"data_loader": {
"type": "multitask",
"scheduler": {
"type": "unbalanced_homogeneous_roundrobin",
"batch_size": 64,
"dataset_sizes": {
"ner": 100
}
},
"shuffle": true
},
"numpy_seed": 1337,
"pytorch_seed": 133,
"random_seed": 13370,
"validation_data_loader": {
"type": "multitask",
"scheduler": {
"type": "homogeneous_roundrobin",
"batch_size": 64
},
"shuffle": true
}
}