File size: 4,294 Bytes
48a051f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
{
"datasets": {
"fantasy": {
"canonical_cluster_threshold": 2,
"cluster_threshold": 2,
"has_conll": true,
"name": "fantasy",
"num_dev_docs": 20,
"num_test_docs": 20,
"num_train_docs": 171,
"targeted_eval": false
},
"litbank": {
"canonical_cluster_threshold": 1,
"cluster_threshold": 1,
"cross_val_split": 0,
"has_conll": true,
"name": "LitBank",
"num_dev_docs": 10,
"num_test_docs": 10,
"num_train_docs": 80,
"targeted_eval": false
}
},
"desc": "longformer base version retrain",
"infra": {
"is_local": true,
"work_dir": "./"
},
"keep_singletons": true,
"key": "coref_lf_train_final_runs_2",
"log_vals": false,
"metrics": [
"MUC",
"Bcub",
"CEAFE"
],
"model": {
"doc_encoder": {
"add_speaker_tokens": true,
"chunking": "independent",
"finetune": true,
"speaker_end": "[SPEAKER_END]",
"speaker_start": "[SPEAKER_START]",
"transformer": {
"max_encoder_segment_len": 4096,
"max_segment_len": 4096,
"model_size": "large",
"model_str": "allenai/longformer-large-4096",
"name": "longformer"
}
},
"memory": {
"emb_size": 20,
"entity_rep": "wt_avg",
"mem_type": {
"eval_max_ents": null,
"max_ents": null,
"name": "unbounded"
},
"mlp_depth": 1,
"mlp_size": 3000,
"num_feats": 2,
"sim_func": "hadamard",
"thresh": 0.0
},
"mention_params": {
"emb_size": 20,
"ext_ment": false,
"max_span_width": 20,
"ment_emb": "attn",
"ment_emb_to_size_factor": {
"attn": 3,
"endpoint": 2,
"max": 1
},
"mlp_depth": 1,
"mlp_size": 3000,
"top_span_ratio": 0.4,
"use_gold_ments": false,
"use_topk": false
},
"metadata_params": {
"default_genre": "nw",
"genres": [
"bc",
"bn",
"mz",
"nw",
"pt",
"tc",
"wb"
],
"use_genre_feature": false
}
},
"optimizer": {
"fine_tune_lr": 1e-05,
"init_lr": 0.0003,
"lr_decay": "linear",
"max_gradient_norm": 1.0
},
"override_encoder": false,
"override_memory": false,
"paths": {
"base_data_dir": "${paths.resource_dir}/raw_data",
"base_model_dir": "${infra.work_dir}/../models",
"best_model_dir": ".//../models/coref_joint_f55855e64a8bca2f420dbe05725aa39e_coref_lf_train_final_runs_2/best",
"best_model_path": "/home/kawshikcvit/coref_research/gpt-coref-met/models/coref_joint_f55855e64a8bca2f420dbe05725aa39e_coref_lf_train_final_runs_2/best/model.pth",
"conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl",
"doc_encoder_dirname": "doc_encoder",
"model_dir": ".//../models/coref_joint_f55855e64a8bca2f420dbe05725aa39e_coref_lf_train_final_runs_2",
"model_filename": "model.pth",
"model_name": null,
"model_name_prefix": "coref_",
"model_path": "/home/kawshikcvit/coref_research/gpt-coref-met/models/coref_joint_f55855e64a8bca2f420dbe05725aa39e_coref_lf_train_final_runs_2/model.pth",
"resource_dir": "../data/"
},
"seed": 2,
"train": true,
"trainer": {
"dropout_rate": 0.3,
"eval_per_k_steps": null,
"eval_type": "full",
"label_smoothing_wt": 0.1,
"log_frequency": 500,
"max_evals": 25,
"max_training_segments": 1,
"ment_loss": "all",
"normalize_loss": false,
"num_training_steps": null,
"patience": 10,
"to_save_model": false
},
"use_wandb": true
} |