Spaces:

KawshikManikantan
/

MEIRa

Running

App Files Files Community

KawshikManikantan commited on Nov 5, 2024

Commit

32d2d2c

1 Parent(s): c007b15

upload_3

Browse files

Files changed (13) hide show

.gitattributes +1 -13
.gitignore +0 -2
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/added_tokens.json +4 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/config.json +55 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/merges.txt +0 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/model.safetensors +3 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/special_tokens_map.json +31 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer.json +0 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer_config.json +77 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/vocab.json +0 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth +3 -0
models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/config.json +146 -0
models/mod_checkpts.ipynb +109 -0

.gitattributes CHANGED Viewed

@@ -32,16 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/ filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/config.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/added_tokens.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/config.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/merges.txt filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/model.safetensors filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/special_tokens_map.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer.json filter=lfs diff=lfs merge=lfs -text
-models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/vocab.json filter=lfs diff=lfs merge=lfs -text

 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,5 +1,3 @@
-models/
-models_orig/
 baseline_src/wandb
 data/raw_data
 **/wandb/

 baseline_src/wandb
 data/raw_data
 **/wandb/

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "[SPEAKER_END]": 50266,
+  "[SPEAKER_START]": 50265
+}

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "allenai/longformer-large-4096",
+  "architectures": [
+    "LongformerModel"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.2",
+  "type_vocab_size": 1,
+  "vocab_size": 50267
+}

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ab82ffdba31413b7bbbb925d54ac5765bc8ff9e5ddf4e1e7c9b6a2ae8bb2ffd
+size 1734276256

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[SPEAKER_START]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "[SPEAKER_END]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50265": {
+      "content": "[SPEAKER_START]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50266": {
+      "content": "[SPEAKER_END]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "[SPEAKER_START]",
+    "[SPEAKER_END]"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "LongformerTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/doc_encoder/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e71f84325c6264b91dc0f6f0db2e618b89207ab25e54eb7973b4527bce8f9b0
+size 149249711

models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/config.json ADDED Viewed

	@@ -0,0 +1,146 @@

+{
+    "datasets": {
+        "fantasy": {
+            "canonical_cluster_threshold": 0,
+            "cluster_threshold": 0,
+            "has_conll": true,
+            "name": "fantasy",
+            "num_dev_docs": 20,
+            "num_test_docs": 20,
+            "num_train_docs": 171,
+            "targeted_eval": false
+        },
+        "litbank": {
+            "canonical_cluster_threshold": 0,
+            "cluster_threshold": 0,
+            "cross_val_split": 0,
+            "has_conll": true,
+            "name": "LitBank",
+            "num_dev_docs": 10,
+            "num_test_docs": 10,
+            "num_train_docs": 80,
+            "targeted_eval": false
+        }
+    },
+    "desc": "Major Entity Tracking",
+    "device": "cuda:0",
+    "infra": {
+        "is_local": true,
+        "work_dir": "./"
+    },
+    "keep_singletons": true,
+    "key": "lf_sd_train_gen_4",
+    "log_vals": false,
+    "metrics": [
+        "MUC",
+        "Bcub",
+        "CEAFE"
+    ],
+    "model": {
+        "doc_encoder": {
+            "add_speaker_tokens": true,
+            "chunking": "independent",
+            "finetune": true,
+            "speaker_end": "[SPEAKER_END]",
+            "speaker_start": "[SPEAKER_START]",
+            "transformer": {
+                "max_encoder_segment_len": 4096,
+                "max_segment_len": 4096,
+                "model_size": "large",
+                "model_str": "allenai/longformer-large-4096",
+                "name": "longformer"
+            }
+        },
+        "memory": {
+            "batch_size": 64,
+            "emb_size": 20,
+            "entity_rep": "wt_avg",
+            "mem_type": {
+                "eval_max_ents": null,
+                "max_ents": null,
+                "name": "unbounded"
+            },
+            "mlp_depth": 1,
+            "mlp_size": 3000,
+            "num_embeds": 10,
+            "num_feats": 2,
+            "pivot": false,
+            "pseudo_dist": true,
+            "rep_pos": "learned",
+            "sim_func": "hadamard",
+            "thresh": 0.0,
+            "type": "hybrid"
+        },
+        "mention_params": {
+            "emb_size": 20,
+            "ext_ment": false,
+            "ignore_non_gold": true,
+            "max_span_width": 20,
+            "ment_emb": "attn",
+            "ment_emb_to_size_factor": {
+                "attn": 3,
+                "endpoint": 2,
+                "max": 1
+            },
+            "mlp_depth": 1,
+            "mlp_size": 3000,
+            "top_span_ratio": 0.4,
+            "use_gold_ments": false,
+            "use_topk": false
+        },
+        "metadata_params": {
+            "default_genre": "nw",
+            "genres": [
+                "bc",
+                "bn",
+                "mz",
+                "nw",
+                "pt",
+                "tc",
+                "wb"
+            ],
+            "use_genre_feature": false
+        }
+    },
+    "optimizer": {
+        "fine_tune_lr": 1e-05,
+        "init_lr": 0.0003,
+        "lr_decay": "linear",
+        "max_gradient_norm": 1.0
+    },
+    "override_encoder": false,
+    "override_memory": false,
+    "paths": {
+        "base_data_dir": "${paths.resource_dir}/raw_data",
+        "base_model_dir": "${infra.work_dir}/../models",
+        "best_model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best",
+        "best_model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth",
+        "conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl",
+        "doc_encoder_dirname": "doc_encoder",
+        "model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4",
+        "model_filename": "model.pth",
+        "model_name": null,
+        "model_name_prefix": "met_",
+        "model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/model.pth",
+        "resource_dir": "../data/"
+    },
+    "seed": 4,
+    "train": true,
+    "trainer": {
+        "dropout_rate": 0.3,
+        "eval_per_k_steps": null,
+        "eval_type": "full",
+        "generalise": true,
+        "label_smoothing_wt": 0.1,
+        "log_frequency": 500,
+        "max_evals": 25,
+        "max_training_segments": 1,
+        "ment_loss_incl": true,
+        "ment_loss_mode": "all",
+        "normalize_loss": false,
+        "num_training_steps": null,
+        "patience": 10,
+        "to_save_model": false
+    },
+    "use_wandb": true
+}

models/mod_checkpts.ipynb ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_2893037/1660114035.py:11: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  checkpoint = torch.load(model_path)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_ffbdebf78d7907df10c789e0d8cc8992_lf_sd_train_final_runs_1\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_5\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_2\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_b9386d26e9d1192471fa4b95de7b44ef_lf_sd_train_final_runs_2\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_3\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_1\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_aa985942ad294012e11cbc13fccf7de9_lf_sd_train_runs_4\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_abfff0992d2512e1fa18eb3444686071_lf_sd_train_gen_5\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_fc741fbf5d3d3874c10921ddad5dca5b_lf_sd_train_final_runs_3\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_d2df7853437ea532c03908843d3348b6_lf_sd_train_final_runs_5\n",
+      "hybrid\n",
+      "False\n",
+      "met_joint_6d2fa90c72a23c2c9c4ebcf439b39d15_lf_sd_train_final_runs_4\n",
+      "hybrid\n",
+      "False\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import os\n",
+    "\n",
+    "models_par = os.listdir('models_7_6_24')\n",
+    "for model_dir in models_par:\n",
+    "    # if model_dir.startswith(\"met\"):\n",
+    "    #     print(model_dir)\n",
+    "    model_path_folders = [\"models_7_6_24\", model_dir, \"best\", \"model.pth\"]\n",
+    "    model_path = os.path.join(*model_path_folders)\n",
+    "    if os.path.exists(model_path):\n",
+    "        checkpoint = torch.load(model_path)\n",
+    "        if 'type' in checkpoint['config']['model']['memory'] and checkpoint['config']['model']['memory']['type'] == 'hybrid':\n",
+    "            print(model_dir)\n",
+    "            print(checkpoint['config']['model']['memory']['type'])\n",
+    "            print(checkpoint['config']['model']['memory']['pivot'])\n",
+    "            # checkpoint['config']['model']['memory']['type'] = 'hybrid'\n",
+    "            # checkpoint['config']['model']['memory']['type'] = 'static'\n",
+    "            # torch.save(checkpoint, model_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "myenv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}