KawshikManikantan commited on
Commit
32d2d2c
·
1 Parent(s): c007b15
.gitattributes CHANGED
@@ -32,16 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/ filter=lfs diff=lfs merge=lfs -text
37
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best filter=lfs diff=lfs merge=lfs -text
38
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/config.json filter=lfs diff=lfs merge=lfs -text
39
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth filter=lfs diff=lfs merge=lfs -text
40
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/added_tokens.json filter=lfs diff=lfs merge=lfs -text
41
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/config.json filter=lfs diff=lfs merge=lfs -text
42
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/merges.txt filter=lfs diff=lfs merge=lfs -text
43
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/model.safetensors filter=lfs diff=lfs merge=lfs -text
44
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/special_tokens_map.json filter=lfs diff=lfs merge=lfs -text
45
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
46
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
- models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/vocab.json filter=lfs diff=lfs merge=lfs -text
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore CHANGED
@@ -1,5 +1,3 @@
1
- models/
2
- models_orig/
3
  baseline_src/wandb
4
  data/raw_data
5
  **/wandb/
 
 
 
1
  baseline_src/wandb
2
  data/raw_data
3
  **/wandb/
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "[SPEAKER_END]": 50266,
3
+ "[SPEAKER_START]": 50265
4
+ }
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "allenai/longformer-large-4096",
3
+ "architectures": [
4
+ "LongformerModel"
5
+ ],
6
+ "attention_mode": "longformer",
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "attention_window": [
9
+ 512,
10
+ 512,
11
+ 512,
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512,
26
+ 512,
27
+ 512,
28
+ 512,
29
+ 512,
30
+ 512,
31
+ 512,
32
+ 512
33
+ ],
34
+ "bos_token_id": 0,
35
+ "eos_token_id": 2,
36
+ "gradient_checkpointing": false,
37
+ "hidden_act": "gelu",
38
+ "hidden_dropout_prob": 0.1,
39
+ "hidden_size": 1024,
40
+ "ignore_attention_mask": false,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 4096,
43
+ "layer_norm_eps": 1e-05,
44
+ "max_position_embeddings": 4098,
45
+ "model_type": "longformer",
46
+ "num_attention_heads": 16,
47
+ "num_hidden_layers": 24,
48
+ "onnx_export": false,
49
+ "pad_token_id": 1,
50
+ "sep_token_id": 2,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.40.2",
53
+ "type_vocab_size": 1,
54
+ "vocab_size": 50267
55
+ }
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab82ffdba31413b7bbbb925d54ac5765bc8ff9e5ddf4e1e7c9b6a2ae8bb2ffd
3
+ size 1734276256
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[SPEAKER_START]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "[SPEAKER_END]",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "cls_token": "<s>",
20
+ "eos_token": "</s>",
21
+ "mask_token": {
22
+ "content": "<mask>",
23
+ "lstrip": true,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "pad_token": "<pad>",
29
+ "sep_token": "</s>",
30
+ "unk_token": "<unk>"
31
+ }
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "50265": {
45
+ "content": "[SPEAKER_START]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "50266": {
53
+ "content": "[SPEAKER_END]",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "additional_special_tokens": [
62
+ "[SPEAKER_START]",
63
+ "[SPEAKER_END]"
64
+ ],
65
+ "bos_token": "<s>",
66
+ "clean_up_tokenization_spaces": true,
67
+ "cls_token": "<s>",
68
+ "eos_token": "</s>",
69
+ "errors": "replace",
70
+ "mask_token": "<mask>",
71
+ "model_max_length": 1000000000000000019884624838656,
72
+ "pad_token": "<pad>",
73
+ "sep_token": "</s>",
74
+ "tokenizer_class": "LongformerTokenizer",
75
+ "trim_offsets": true,
76
+ "unk_token": "<unk>"
77
+ }
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e71f84325c6264b91dc0f6f0db2e618b89207ab25e54eb7973b4527bce8f9b0
3
+ size 149249711
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/config.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "datasets": {
3
+ "fantasy": {
4
+ "canonical_cluster_threshold": 0,
5
+ "cluster_threshold": 0,
6
+ "has_conll": true,
7
+ "name": "fantasy",
8
+ "num_dev_docs": 20,
9
+ "num_test_docs": 20,
10
+ "num_train_docs": 171,
11
+ "targeted_eval": false
12
+ },
13
+ "litbank": {
14
+ "canonical_cluster_threshold": 0,
15
+ "cluster_threshold": 0,
16
+ "cross_val_split": 0,
17
+ "has_conll": true,
18
+ "name": "LitBank",
19
+ "num_dev_docs": 10,
20
+ "num_test_docs": 10,
21
+ "num_train_docs": 80,
22
+ "targeted_eval": false
23
+ }
24
+ },
25
+ "desc": "Major Entity Tracking",
26
+ "device": "cuda:0",
27
+ "infra": {
28
+ "is_local": true,
29
+ "work_dir": "./"
30
+ },
31
+ "keep_singletons": true,
32
+ "key": "lf_sd_train_gen_4",
33
+ "log_vals": false,
34
+ "metrics": [
35
+ "MUC",
36
+ "Bcub",
37
+ "CEAFE"
38
+ ],
39
+ "model": {
40
+ "doc_encoder": {
41
+ "add_speaker_tokens": true,
42
+ "chunking": "independent",
43
+ "finetune": true,
44
+ "speaker_end": "[SPEAKER_END]",
45
+ "speaker_start": "[SPEAKER_START]",
46
+ "transformer": {
47
+ "max_encoder_segment_len": 4096,
48
+ "max_segment_len": 4096,
49
+ "model_size": "large",
50
+ "model_str": "allenai/longformer-large-4096",
51
+ "name": "longformer"
52
+ }
53
+ },
54
+ "memory": {
55
+ "batch_size": 64,
56
+ "emb_size": 20,
57
+ "entity_rep": "wt_avg",
58
+ "mem_type": {
59
+ "eval_max_ents": null,
60
+ "max_ents": null,
61
+ "name": "unbounded"
62
+ },
63
+ "mlp_depth": 1,
64
+ "mlp_size": 3000,
65
+ "num_embeds": 10,
66
+ "num_feats": 2,
67
+ "pivot": false,
68
+ "pseudo_dist": true,
69
+ "rep_pos": "learned",
70
+ "sim_func": "hadamard",
71
+ "thresh": 0.0,
72
+ "type": "hybrid"
73
+ },
74
+ "mention_params": {
75
+ "emb_size": 20,
76
+ "ext_ment": false,
77
+ "ignore_non_gold": true,
78
+ "max_span_width": 20,
79
+ "ment_emb": "attn",
80
+ "ment_emb_to_size_factor": {
81
+ "attn": 3,
82
+ "endpoint": 2,
83
+ "max": 1
84
+ },
85
+ "mlp_depth": 1,
86
+ "mlp_size": 3000,
87
+ "top_span_ratio": 0.4,
88
+ "use_gold_ments": false,
89
+ "use_topk": false
90
+ },
91
+ "metadata_params": {
92
+ "default_genre": "nw",
93
+ "genres": [
94
+ "bc",
95
+ "bn",
96
+ "mz",
97
+ "nw",
98
+ "pt",
99
+ "tc",
100
+ "wb"
101
+ ],
102
+ "use_genre_feature": false
103
+ }
104
+ },
105
+ "optimizer": {
106
+ "fine_tune_lr": 1e-05,
107
+ "init_lr": 0.0003,
108
+ "lr_decay": "linear",
109
+ "max_gradient_norm": 1.0
110
+ },
111
+ "override_encoder": false,
112
+ "override_memory": false,
113
+ "paths": {
114
+ "base_data_dir": "${paths.resource_dir}/raw_data",
115
+ "base_model_dir": "${infra.work_dir}/../models",
116
+ "best_model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best",
117
+ "best_model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth",
118
+ "conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl",
119
+ "doc_encoder_dirname": "doc_encoder",
120
+ "model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4",
121
+ "model_filename": "model.pth",
122
+ "model_name": null,
123
+ "model_name_prefix": "met_",
124
+ "model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/model.pth",
125
+ "resource_dir": "../data/"
126
+ },
127
+ "seed": 4,
128
+ "train": true,
129
+ "trainer": {
130
+ "dropout_rate": 0.3,
131
+ "eval_per_k_steps": null,
132
+ "eval_type": "full",
133
+ "generalise": true,
134
+ "label_smoothing_wt": 0.1,
135
+ "log_frequency": 500,
136
+ "max_evals": 25,
137
+ "max_training_segments": 1,
138
+ "ment_loss_incl": true,
139
+ "ment_loss_mode": "all",
140
+ "normalize_loss": false,
141
+ "num_training_steps": null,
142
+ "patience": 10,
143
+ "to_save_model": false
144
+ },
145
+ "use_wandb": true
146
+ }
models/mod_checkpts.ipynb ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 23,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "/tmp/ipykernel_2893037/1660114035.py:11: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
13
+ " checkpoint = torch.load(model_path)\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4\n",
21
+ "hybrid\n",
22
+ "False\n",
23
+ "met_joint_ffbdebf78d7907df10c789e0d8cc8992_lf_sd_train_final_runs_1\n",
24
+ "hybrid\n",
25
+ "False\n",
26
+ "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_5\n",
27
+ "hybrid\n",
28
+ "False\n",
29
+ "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_2\n",
30
+ "hybrid\n",
31
+ "False\n",
32
+ "met_joint_b9386d26e9d1192471fa4b95de7b44ef_lf_sd_train_final_runs_2\n",
33
+ "hybrid\n",
34
+ "False\n",
35
+ "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_3\n",
36
+ "hybrid\n",
37
+ "False\n",
38
+ "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_1\n",
39
+ "hybrid\n",
40
+ "False\n",
41
+ "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_4\n",
42
+ "hybrid\n",
43
+ "False\n",
44
+ "met_joint_abfff0992d2512e1fa18eb3444686071_lf_sd_train_gen_5\n",
45
+ "hybrid\n",
46
+ "False\n",
47
+ "met_joint_fc741fbf5d3d3874c10921ddad5dca5b_lf_sd_train_final_runs_3\n",
48
+ "hybrid\n",
49
+ "False\n",
50
+ "met_joint_d2df7853437ea532c03908843d3348b6_lf_sd_train_final_runs_5\n",
51
+ "hybrid\n",
52
+ "False\n",
53
+ "met_joint_6d2fa90c72a23c2c9c4ebcf439b39d15_lf_sd_train_final_runs_4\n",
54
+ "hybrid\n",
55
+ "False\n"
56
+ ]
57
+ }
58
+ ],
59
+ "source": [
60
+ "import torch\n",
61
+ "import os\n",
62
+ "\n",
63
+ "models_par = os.listdir('models_7_6_24')\n",
64
+ "for model_dir in models_par:\n",
65
+ " # if model_dir.startswith(\"met\"):\n",
66
+ " # print(model_dir)\n",
67
+ " model_path_folders = [\"models_7_6_24\", model_dir, \"best\", \"model.pth\"]\n",
68
+ " model_path = os.path.join(*model_path_folders)\n",
69
+ " if os.path.exists(model_path):\n",
70
+ " checkpoint = torch.load(model_path)\n",
71
+ " if 'type' in checkpoint['config']['model']['memory'] and checkpoint['config']['model']['memory']['type'] == 'hybrid':\n",
72
+ " print(model_dir)\n",
73
+ " print(checkpoint['config']['model']['memory']['type'])\n",
74
+ " print(checkpoint['config']['model']['memory']['pivot'])\n",
75
+ " # checkpoint['config']['model']['memory']['type'] = 'hybrid'\n",
76
+ " # checkpoint['config']['model']['memory']['type'] = 'static'\n",
77
+ " # torch.save(checkpoint, model_path)"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": null,
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": []
86
+ }
87
+ ],
88
+ "metadata": {
89
+ "kernelspec": {
90
+ "display_name": "myenv",
91
+ "language": "python",
92
+ "name": "python3"
93
+ },
94
+ "language_info": {
95
+ "codemirror_mode": {
96
+ "name": "ipython",
97
+ "version": 3
98
+ },
99
+ "file_extension": ".py",
100
+ "mimetype": "text/x-python",
101
+ "name": "python",
102
+ "nbconvert_exporter": "python",
103
+ "pygments_lexer": "ipython3",
104
+ "version": "3.12.2"
105
+ }
106
+ },
107
+ "nbformat": 4,
108
+ "nbformat_minor": 2
109
+ }