{ "datasets": { "fantasy": { "canonical_cluster_threshold": 0, "cluster_threshold": 0, "has_conll": true, "name": "fantasy", "num_dev_docs": 20, "num_test_docs": 20, "num_train_docs": 171, "targeted_eval": false }, "litbank": { "canonical_cluster_threshold": 0, "cluster_threshold": 0, "cross_val_split": 0, "has_conll": true, "name": "LitBank", "num_dev_docs": 10, "num_test_docs": 10, "num_train_docs": 80, "targeted_eval": false } }, "desc": "Major Entity Tracking", "device": "cuda:0", "infra": { "is_local": true, "work_dir": "./" }, "keep_singletons": true, "key": "lf_sd_train_gen_4", "log_vals": false, "metrics": [ "MUC", "Bcub", "CEAFE" ], "model": { "doc_encoder": { "add_speaker_tokens": true, "chunking": "independent", "finetune": true, "speaker_end": "[SPEAKER_END]", "speaker_start": "[SPEAKER_START]", "transformer": { "max_encoder_segment_len": 4096, "max_segment_len": 4096, "model_size": "large", "model_str": "allenai/longformer-large-4096", "name": "longformer" } }, "memory": { "batch_size": 64, "emb_size": 20, "entity_rep": "wt_avg", "mem_type": { "eval_max_ents": null, "max_ents": null, "name": "unbounded" }, "mlp_depth": 1, "mlp_size": 3000, "num_embeds": 10, "num_feats": 2, "pivot": false, "pseudo_dist": true, "rep_pos": "learned", "sim_func": "hadamard", "thresh": 0.0, "type": "hybrid" }, "mention_params": { "emb_size": 20, "ext_ment": false, "ignore_non_gold": true, "max_span_width": 20, "ment_emb": "attn", "ment_emb_to_size_factor": { "attn": 3, "endpoint": 2, "max": 1 }, "mlp_depth": 1, "mlp_size": 3000, "top_span_ratio": 0.4, "use_gold_ments": false, "use_topk": false }, "metadata_params": { "default_genre": "nw", "genres": [ "bc", "bn", "mz", "nw", "pt", "tc", "wb" ], "use_genre_feature": false } }, "optimizer": { "fine_tune_lr": 1e-05, "init_lr": 0.0003, "lr_decay": "linear", "max_gradient_norm": 1.0 }, "override_encoder": false, "override_memory": false, "paths": { "base_data_dir": "${paths.resource_dir}/raw_data", "base_model_dir": "${infra.work_dir}/../models", "best_model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best", "best_model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth", "conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl", "doc_encoder_dirname": "doc_encoder", "model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4", "model_filename": "model.pth", "model_name": null, "model_name_prefix": "met_", "model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/model.pth", "resource_dir": "../data/" }, "seed": 4, "train": true, "trainer": { "dropout_rate": 0.3, "eval_per_k_steps": null, "eval_type": "full", "generalise": true, "label_smoothing_wt": 0.1, "log_frequency": 500, "max_evals": 25, "max_training_segments": 1, "ment_loss_incl": true, "ment_loss_mode": "all", "normalize_loss": false, "num_training_steps": null, "patience": 10, "to_save_model": false }, "use_wandb": true }