KoichiYasuoka
/

modernbert-large-japanese-wikipedia-ud-square

@@ -34,392 +34,448 @@
   "hidden_size": 1024,
   "id2label": {
     "0": "ADJ",
-    "1": "ADJ|acl-s",
-    "2": "ADJ|acl-t",
-    "3": "ADJ|advcl-s",
-    "4": "ADJ|advcl-t",
-    "5": "ADJ|amod-s",
-    "6": "ADJ|amod-t",
-    "7": "ADJ|ccomp-s",
-    "8": "ADJ|ccomp-t",
-    "9": "ADJ|csubj-s",
-    "10": "ADJ|csubj-t",
-    "11": "ADJ|csubj:outer-s",
-    "12": "ADJ|csubj:outer-t",
-    "13": "ADJ|dep-s",
-    "14": "ADJ|dep-t",
-    "15": "ADJ|nmod-s",
-    "16": "ADJ|nmod-t",
-    "17": "ADJ|nsubj-s",
-    "18": "ADJ|nsubj-t",
-    "19": "ADJ|obj-s",
-    "20": "ADJ|obj-t",
-    "21": "ADJ|obl-s",
-    "22": "ADJ|obl-t",
-    "23": "ADJ|root",
-    "24": "ADP",
-    "25": "ADP|case-s",
-    "26": "ADP|case-t",
-    "27": "ADP|fixed-s",
-    "28": "ADP|fixed-t",
-    "29": "ADV",
-    "30": "ADV|advcl-s",
-    "31": "ADV|advcl-t",
-    "32": "ADV|advmod-s",
-    "33": "ADV|advmod-t",
-    "34": "ADV|dep-s",
-    "35": "ADV|dep-t",
-    "36": "ADV|obj-s",
-    "37": "ADV|obj-t",
-    "38": "ADV|root",
-    "39": "AUX",
-    "40": "AUX|Polarity=Neg",
-    "41": "AUX|Polarity=Neg|aux-s",
-    "42": "AUX|Polarity=Neg|aux-t",
-    "43": "AUX|Polarity=Neg|fixed-s",
-    "44": "AUX|Polarity=Neg|fixed-t",
-    "45": "AUX|aux-s",
-    "46": "AUX|aux-t",
-    "47": "AUX|cop-s",
-    "48": "AUX|cop-t",
-    "49": "AUX|fixed-s",
-    "50": "AUX|fixed-t",
-    "51": "AUX|root",
-    "52": "CCONJ",
-    "53": "CCONJ|cc-s",
-    "54": "CCONJ|cc-t",
-    "55": "DET",
-    "56": "DET|det-s",
-    "57": "DET|det-t",
-    "58": "INTJ",
-    "59": "INTJ|discourse-s",
-    "60": "INTJ|discourse-t",
-    "61": "INTJ|root",
-    "62": "NOUN",
-    "63": "NOUN|Polarity=Neg",
-    "64": "NOUN|Polarity=Neg|obl-s",
-    "65": "NOUN|Polarity=Neg|obl-t",
-    "66": "NOUN|Polarity=Neg|root",
-    "67": "NOUN|acl-s",
-    "68": "NOUN|acl-t",
-    "69": "NOUN|advcl-s",
-    "70": "NOUN|advcl-t",
-    "71": "NOUN|ccomp-s",
-    "72": "NOUN|ccomp-t",
-    "73": "NOUN|compound-s",
-    "74": "NOUN|compound-t",
-    "75": "NOUN|csubj-s",
-    "76": "NOUN|csubj-t",
-    "77": "NOUN|csubj:outer-s",
-    "78": "NOUN|csubj:outer-t",
-    "79": "NOUN|nmod-s",
-    "80": "NOUN|nmod-t",
-    "81": "NOUN|nsubj-s",
-    "82": "NOUN|nsubj-t",
-    "83": "NOUN|nsubj:outer-s",
-    "84": "NOUN|nsubj:outer-t",
-    "85": "NOUN|obj-s",
-    "86": "NOUN|obj-t",
-    "87": "NOUN|obl-s",
-    "88": "NOUN|obl-t",
-    "89": "NOUN|root",
-    "90": "NUM",
-    "91": "NUM|advcl-s",
-    "92": "NUM|advcl-t",
-    "93": "NUM|compound-s",
-    "94": "NUM|compound-t",
-    "95": "NUM|nmod-s",
-    "96": "NUM|nmod-t",
-    "97": "NUM|nsubj-s",
-    "98": "NUM|nsubj-t",
-    "99": "NUM|nsubj:outer-s",
-    "100": "NUM|nsubj:outer-t",
-    "101": "NUM|nummod-s",
-    "102": "NUM|nummod-t",
-    "103": "NUM|obj-s",
-    "104": "NUM|obj-t",
-    "105": "NUM|obl-s",
-    "106": "NUM|obl-t",
-    "107": "NUM|root",
-    "108": "PART",
-    "109": "PART|mark-s",
-    "110": "PART|mark-t",
-    "111": "PRON",
-    "112": "PRON|acl-s",
-    "113": "PRON|acl-t",
-    "114": "PRON|advcl-s",
-    "115": "PRON|advcl-t",
-    "116": "PRON|nmod-s",
-    "117": "PRON|nmod-t",
-    "118": "PRON|nsubj-s",
-    "119": "PRON|nsubj-t",
-    "120": "PRON|nsubj:outer-s",
-    "121": "PRON|nsubj:outer-t",
-    "122": "PRON|obj-s",
-    "123": "PRON|obj-t",
-    "124": "PRON|obl-s",
-    "125": "PRON|obl-t",
-    "126": "PRON|root",
-    "127": "PROPN",
-    "128": "PROPN|acl-s",
-    "129": "PROPN|acl-t",
-    "130": "PROPN|advcl-s",
-    "131": "PROPN|advcl-t",
-    "132": "PROPN|compound-s",
-    "133": "PROPN|compound-t",
-    "134": "PROPN|nmod-s",
-    "135": "PROPN|nmod-t",
-    "136": "PROPN|nsubj-s",
-    "137": "PROPN|nsubj-t",
-    "138": "PROPN|nsubj:outer-s",
-    "139": "PROPN|nsubj:outer-t",
-    "140": "PROPN|obj-s",
-    "141": "PROPN|obj-t",
-    "142": "PROPN|obl-s",
-    "143": "PROPN|obl-t",
-    "144": "PROPN|root",
-    "145": "PUNCT",
-    "146": "PUNCT|punct-s",
-    "147": "PUNCT|punct-t",
-    "148": "SCONJ",
-    "149": "SCONJ|dep-s",
-    "150": "SCONJ|dep-t",
-    "151": "SCONJ|fixed-s",
-    "152": "SCONJ|fixed-t",
-    "153": "SCONJ|mark-s",
-    "154": "SCONJ|mark-t",
-    "155": "SYM",
-    "156": "SYM|compound-s",
-    "157": "SYM|compound-t",
-    "158": "SYM|dep-s",
-    "159": "SYM|dep-t",
-    "160": "SYM|nmod-s",
-    "161": "SYM|nmod-t",
-    "162": "SYM|obl-s",
-    "163": "SYM|obl-t",
-    "164": "VERB",
-    "165": "VERB|acl-s",
-    "166": "VERB|acl-t",
-    "167": "VERB|advcl-s",
-    "168": "VERB|advcl-t",
-    "169": "VERB|ccomp-s",
-    "170": "VERB|ccomp-t",
-    "171": "VERB|compound-s",
-    "172": "VERB|compound-t",
-    "173": "VERB|csubj-s",
-    "174": "VERB|csubj-t",
-    "175": "VERB|csubj:outer-s",
-    "176": "VERB|csubj:outer-t",
-    "177": "VERB|nmod-s",
-    "178": "VERB|nmod-t",
-    "179": "VERB|obj-s",
-    "180": "VERB|obj-t",
-    "181": "VERB|obl-s",
-    "182": "VERB|obl-t",
-    "183": "VERB|root",
-    "184": "X",
-    "185": "X|dep-s",
-    "186": "X|dep-t",
-    "187": "X|goeswith-s",
-    "188": "X|goeswith-t",
-    "189": "X|nmod-s",
-    "190": "X|nmod-t"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
   "intermediate_size": 2624,
   "label2id": {
     "ADJ": 0,
-    "ADJ|acl-s": 1,
-    "ADJ|acl-t": 2,
-    "ADJ|advcl-s": 3,
-    "ADJ|advcl-t": 4,
-    "ADJ|amod-s": 5,
-    "ADJ|amod-t": 6,
-    "ADJ|ccomp-s": 7,
-    "ADJ|ccomp-t": 8,
-    "ADJ|csubj-s": 9,
-    "ADJ|csubj-t": 10,
-    "ADJ|csubj:outer-s": 11,
-    "ADJ|csubj:outer-t": 12,
-    "ADJ|dep-s": 13,
-    "ADJ|dep-t": 14,
-    "ADJ|nmod-s": 15,
-    "ADJ|nmod-t": 16,
-    "ADJ|nsubj-s": 17,
-    "ADJ|nsubj-t": 18,
-    "ADJ|obj-s": 19,
-    "ADJ|obj-t": 20,
-    "ADJ|obl-s": 21,
-    "ADJ|obl-t": 22,
-    "ADJ|root": 23,
-    "ADP": 24,
-    "ADP|case-s": 25,
-    "ADP|case-t": 26,
-    "ADP|fixed-s": 27,
-    "ADP|fixed-t": 28,
-    "ADV": 29,
-    "ADV|advcl-s": 30,
-    "ADV|advcl-t": 31,
-    "ADV|advmod-s": 32,
-    "ADV|advmod-t": 33,
-    "ADV|dep-s": 34,
-    "ADV|dep-t": 35,
-    "ADV|obj-s": 36,
-    "ADV|obj-t": 37,
-    "ADV|root": 38,
-    "AUX": 39,
-    "AUX|Polarity=Neg": 40,
-    "AUX|Polarity=Neg|aux-s": 41,
-    "AUX|Polarity=Neg|aux-t": 42,
-    "AUX|Polarity=Neg|fixed-s": 43,
-    "AUX|Polarity=Neg|fixed-t": 44,
-    "AUX|aux-s": 45,
-    "AUX|aux-t": 46,
-    "AUX|cop-s": 47,
-    "AUX|cop-t": 48,
-    "AUX|fixed-s": 49,
-    "AUX|fixed-t": 50,
-    "AUX|root": 51,
-    "CCONJ": 52,
-    "CCONJ|cc-s": 53,
-    "CCONJ|cc-t": 54,
-    "DET": 55,
-    "DET|det-s": 56,
-    "DET|det-t": 57,
-    "INTJ": 58,
-    "INTJ|discourse-s": 59,
-    "INTJ|discourse-t": 60,
-    "INTJ|root": 61,
-    "NOUN": 62,
-    "NOUN|Polarity=Neg": 63,
-    "NOUN|Polarity=Neg|obl-s": 64,
-    "NOUN|Polarity=Neg|obl-t": 65,
-    "NOUN|Polarity=Neg|root": 66,
-    "NOUN|acl-s": 67,
-    "NOUN|acl-t": 68,
-    "NOUN|advcl-s": 69,
-    "NOUN|advcl-t": 70,
-    "NOUN|ccomp-s": 71,
-    "NOUN|ccomp-t": 72,
-    "NOUN|compound-s": 73,
-    "NOUN|compound-t": 74,
-    "NOUN|csubj-s": 75,
-    "NOUN|csubj-t": 76,
-    "NOUN|csubj:outer-s": 77,
-    "NOUN|csubj:outer-t": 78,
-    "NOUN|nmod-s": 79,
-    "NOUN|nmod-t": 80,
-    "NOUN|nsubj-s": 81,
-    "NOUN|nsubj-t": 82,
-    "NOUN|nsubj:outer-s": 83,
-    "NOUN|nsubj:outer-t": 84,
-    "NOUN|obj-s": 85,
-    "NOUN|obj-t": 86,
-    "NOUN|obl-s": 87,
-    "NOUN|obl-t": 88,
-    "NOUN|root": 89,
-    "NUM": 90,
-    "NUM|advcl-s": 91,
-    "NUM|advcl-t": 92,
-    "NUM|compound-s": 93,
-    "NUM|compound-t": 94,
-    "NUM|nmod-s": 95,
-    "NUM|nmod-t": 96,
-    "NUM|nsubj-s": 97,
-    "NUM|nsubj-t": 98,
-    "NUM|nsubj:outer-s": 99,
-    "NUM|nsubj:outer-t": 100,
-    "NUM|nummod-s": 101,
-    "NUM|nummod-t": 102,
-    "NUM|obj-s": 103,
-    "NUM|obj-t": 104,
-    "NUM|obl-s": 105,
-    "NUM|obl-t": 106,
-    "NUM|root": 107,
-    "PART": 108,
-    "PART|mark-s": 109,
-    "PART|mark-t": 110,
-    "PRON": 111,
-    "PRON|acl-s": 112,
-    "PRON|acl-t": 113,
-    "PRON|advcl-s": 114,
-    "PRON|advcl-t": 115,
-    "PRON|nmod-s": 116,
-    "PRON|nmod-t": 117,
-    "PRON|nsubj-s": 118,
-    "PRON|nsubj-t": 119,
-    "PRON|nsubj:outer-s": 120,
-    "PRON|nsubj:outer-t": 121,
-    "PRON|obj-s": 122,
-    "PRON|obj-t": 123,
-    "PRON|obl-s": 124,
-    "PRON|obl-t": 125,
-    "PRON|root": 126,
-    "PROPN": 127,
-    "PROPN|acl-s": 128,
-    "PROPN|acl-t": 129,
-    "PROPN|advcl-s": 130,
-    "PROPN|advcl-t": 131,
-    "PROPN|compound-s": 132,
-    "PROPN|compound-t": 133,
-    "PROPN|nmod-s": 134,
-    "PROPN|nmod-t": 135,
-    "PROPN|nsubj-s": 136,
-    "PROPN|nsubj-t": 137,
-    "PROPN|nsubj:outer-s": 138,
-    "PROPN|nsubj:outer-t": 139,
-    "PROPN|obj-s": 140,
-    "PROPN|obj-t": 141,
-    "PROPN|obl-s": 142,
-    "PROPN|obl-t": 143,
-    "PROPN|root": 144,
-    "PUNCT": 145,
-    "PUNCT|punct-s": 146,
-    "PUNCT|punct-t": 147,
-    "SCONJ": 148,
-    "SCONJ|dep-s": 149,
-    "SCONJ|dep-t": 150,
-    "SCONJ|fixed-s": 151,
-    "SCONJ|fixed-t": 152,
-    "SCONJ|mark-s": 153,
-    "SCONJ|mark-t": 154,
-    "SYM": 155,
-    "SYM|compound-s": 156,
-    "SYM|compound-t": 157,
-    "SYM|dep-s": 158,
-    "SYM|dep-t": 159,
-    "SYM|nmod-s": 160,
-    "SYM|nmod-t": 161,
-    "SYM|obl-s": 162,
-    "SYM|obl-t": 163,
-    "VERB": 164,
-    "VERB|acl-s": 165,
-    "VERB|acl-t": 166,
-    "VERB|advcl-s": 167,
-    "VERB|advcl-t": 168,
-    "VERB|ccomp-s": 169,
-    "VERB|ccomp-t": 170,
-    "VERB|compound-s": 171,
-    "VERB|compound-t": 172,
-    "VERB|csubj-s": 173,
-    "VERB|csubj-t": 174,
-    "VERB|csubj:outer-s": 175,
-    "VERB|csubj:outer-t": 176,
-    "VERB|nmod-s": 177,
-    "VERB|nmod-t": 178,
-    "VERB|obj-s": 179,
-    "VERB|obj-t": 180,
-    "VERB|obl-s": 181,
-    "VERB|obl-t": 182,
-    "VERB|root": 183,
-    "X": 184,
-    "X|dep-s": 185,
-    "X|dep-t": 186,
-    "X|goeswith-s": 187,
-    "X|goeswith-t": 188,
-    "X|nmod-s": 189,
-    "X|nmod-t": 190
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,

   "hidden_size": 1024,
   "id2label": {
     "0": "ADJ",
+    "1": "ADJ.",
+    "2": "ADJ.|[acl]",
+    "3": "ADJ.|[advcl]",
+    "4": "ADJ.|[amod]",
+    "5": "ADJ.|[ccomp]",
+    "6": "ADJ.|[csubj:outer]",
+    "7": "ADJ.|[csubj]",
+    "8": "ADJ.|[dep]",
+    "9": "ADJ.|[nmod]",
+    "10": "ADJ.|[nsubj]",
+    "11": "ADJ.|[obj]",
+    "12": "ADJ.|[obl]",
+    "13": "ADJ.|[root]",
+    "14": "ADJ|[acl]",
+    "15": "ADJ|[advcl]",
+    "16": "ADJ|[amod]",
+    "17": "ADJ|[ccomp]",
+    "18": "ADJ|[csubj:outer]",
+    "19": "ADJ|[csubj]",
+    "20": "ADJ|[dep]",
+    "21": "ADJ|[nmod]",
+    "22": "ADJ|[nsubj]",
+    "23": "ADJ|[obj]",
+    "24": "ADJ|[obl]",
+    "25": "ADJ|[root]",
+    "26": "ADP",
+    "27": "ADP.",
+    "28": "ADP.|[case]",
+    "29": "ADP.|[fixed]",
+    "30": "ADP|[case]",
+    "31": "ADP|[fixed]",
+    "32": "ADV",
+    "33": "ADV.",
+    "34": "ADV.|[advcl]",
+    "35": "ADV.|[advmod]",
+    "36": "ADV.|[dep]",
+    "37": "ADV.|[obj]",
+    "38": "ADV.|[root]",
+    "39": "ADV|[advcl]",
+    "40": "ADV|[advmod]",
+    "41": "ADV|[dep]",
+    "42": "ADV|[obj]",
+    "43": "ADV|[root]",
+    "44": "AUX",
+    "45": "AUX.",
+    "46": "AUX.|Polarity=Neg",
+    "47": "AUX.|Polarity=Neg|[aux]",
+    "48": "AUX.|Polarity=Neg|[fixed]",
+    "49": "AUX.|[aux]",
+    "50": "AUX.|[cop]",
+    "51": "AUX.|[fixed]",
+    "52": "AUX.|[root]",
+    "53": "AUX|Polarity=Neg",
+    "54": "AUX|Polarity=Neg|[aux]",
+    "55": "AUX|Polarity=Neg|[fixed]",
+    "56": "AUX|[aux]",
+    "57": "AUX|[cop]",
+    "58": "AUX|[fixed]",
+    "59": "AUX|[root]",
+    "60": "CCONJ",
+    "61": "CCONJ.",
+    "62": "CCONJ.|[cc]",
+    "63": "CCONJ|[cc]",
+    "64": "DET",
+    "65": "DET.",
+    "66": "DET.|[det]",
+    "67": "DET|[det]",
+    "68": "INTJ",
+    "69": "INTJ.",
+    "70": "INTJ.|[discourse]",
+    "71": "INTJ.|[root]",
+    "72": "INTJ|[discourse]",
+    "73": "INTJ|[root]",
+    "74": "NOUN",
+    "75": "NOUN.",
+    "76": "NOUN.|Polarity=Neg",
+    "77": "NOUN.|Polarity=Neg|[obl]",
+    "78": "NOUN.|Polarity=Neg|[root]",
+    "79": "NOUN.|[acl]",
+    "80": "NOUN.|[advcl]",
+    "81": "NOUN.|[ccomp]",
+    "82": "NOUN.|[compound]",
+    "83": "NOUN.|[csubj:outer]",
+    "84": "NOUN.|[csubj]",
+    "85": "NOUN.|[nmod]",
+    "86": "NOUN.|[nsubj:outer]",
+    "87": "NOUN.|[nsubj]",
+    "88": "NOUN.|[obj]",
+    "89": "NOUN.|[obl]",
+    "90": "NOUN.|[root]",
+    "91": "NOUN|Polarity=Neg",
+    "92": "NOUN|Polarity=Neg|[obl]",
+    "93": "NOUN|Polarity=Neg|[root]",
+    "94": "NOUN|[acl]",
+    "95": "NOUN|[advcl]",
+    "96": "NOUN|[ccomp]",
+    "97": "NOUN|[compound]",
+    "98": "NOUN|[csubj:outer]",
+    "99": "NOUN|[csubj]",
+    "100": "NOUN|[nmod]",
+    "101": "NOUN|[nsubj:outer]",
+    "102": "NOUN|[nsubj]",
+    "103": "NOUN|[obj]",
+    "104": "NOUN|[obl]",
+    "105": "NOUN|[root]",
+    "106": "NUM",
+    "107": "NUM.",
+    "108": "NUM.|[advcl]",
+    "109": "NUM.|[compound]",
+    "110": "NUM.|[nmod]",
+    "111": "NUM.|[nsubj:outer]",
+    "112": "NUM.|[nsubj]",
+    "113": "NUM.|[nummod]",
+    "114": "NUM.|[obj]",
+    "115": "NUM.|[obl]",
+    "116": "NUM.|[root]",
+    "117": "NUM|[advcl]",
+    "118": "NUM|[compound]",
+    "119": "NUM|[nmod]",
+    "120": "NUM|[nsubj:outer]",
+    "121": "NUM|[nsubj]",
+    "122": "NUM|[nummod]",
+    "123": "NUM|[obj]",
+    "124": "NUM|[obl]",
+    "125": "NUM|[root]",
+    "126": "PART",
+    "127": "PART.",
+    "128": "PART.|[mark]",
+    "129": "PART|[mark]",
+    "130": "PRON",
+    "131": "PRON.",
+    "132": "PRON.|[acl]",
+    "133": "PRON.|[advcl]",
+    "134": "PRON.|[nmod]",
+    "135": "PRON.|[nsubj:outer]",
+    "136": "PRON.|[nsubj]",
+    "137": "PRON.|[obj]",
+    "138": "PRON.|[obl]",
+    "139": "PRON.|[root]",
+    "140": "PRON|[acl]",
+    "141": "PRON|[advcl]",
+    "142": "PRON|[nmod]",
+    "143": "PRON|[nsubj:outer]",
+    "144": "PRON|[nsubj]",
+    "145": "PRON|[obj]",
+    "146": "PRON|[obl]",
+    "147": "PRON|[root]",
+    "148": "PROPN",
+    "149": "PROPN.",
+    "150": "PROPN.|[acl]",
+    "151": "PROPN.|[advcl]",
+    "152": "PROPN.|[compound]",
+    "153": "PROPN.|[nmod]",
+    "154": "PROPN.|[nsubj:outer]",
+    "155": "PROPN.|[nsubj]",
+    "156": "PROPN.|[obj]",
+    "157": "PROPN.|[obl]",
+    "158": "PROPN.|[root]",
+    "159": "PROPN|[acl]",
+    "160": "PROPN|[advcl]",
+    "161": "PROPN|[compound]",
+    "162": "PROPN|[nmod]",
+    "163": "PROPN|[nsubj:outer]",
+    "164": "PROPN|[nsubj]",
+    "165": "PROPN|[obj]",
+    "166": "PROPN|[obl]",
+    "167": "PROPN|[root]",
+    "168": "PUNCT",
+    "169": "PUNCT.",
+    "170": "PUNCT.|[punct]",
+    "171": "PUNCT|[punct]",
+    "172": "SCONJ",
+    "173": "SCONJ.",
+    "174": "SCONJ.|[dep]",
+    "175": "SCONJ.|[fixed]",
+    "176": "SCONJ.|[mark]",
+    "177": "SCONJ|[dep]",
+    "178": "SCONJ|[fixed]",
+    "179": "SCONJ|[mark]",
+    "180": "SYM",
+    "181": "SYM.",
+    "182": "SYM.|[compound]",
+    "183": "SYM.|[dep]",
+    "184": "SYM.|[nmod]",
+    "185": "SYM.|[obl]",
+    "186": "SYM|[compound]",
+    "187": "SYM|[dep]",
+    "188": "SYM|[nmod]",
+    "189": "SYM|[obl]",
+    "190": "VERB",
+    "191": "VERB.",
+    "192": "VERB.|[acl]",
+    "193": "VERB.|[advcl]",
+    "194": "VERB.|[ccomp]",
+    "195": "VERB.|[compound]",
+    "196": "VERB.|[csubj:outer]",
+    "197": "VERB.|[csubj]",
+    "198": "VERB.|[nmod]",
+    "199": "VERB.|[obj]",
+    "200": "VERB.|[obl]",
+    "201": "VERB.|[root]",
+    "202": "VERB|[acl]",
+    "203": "VERB|[advcl]",
+    "204": "VERB|[ccomp]",
+    "205": "VERB|[compound]",
+    "206": "VERB|[csubj:outer]",
+    "207": "VERB|[csubj]",
+    "208": "VERB|[nmod]",
+    "209": "VERB|[obj]",
+    "210": "VERB|[obl]",
+    "211": "VERB|[root]",
+    "212": "X",
+    "213": "X.",
+    "214": "X.|[dep]",
+    "215": "X.|[goeswith]",
+    "216": "X.|[nmod]",
+    "217": "X|[dep]",
+    "218": "X|[nmod]"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
   "intermediate_size": 2624,
   "label2id": {
     "ADJ": 0,
+    "ADJ.": 1,
+    "ADJ.|[acl]": 2,
+    "ADJ.|[advcl]": 3,
+    "ADJ.|[amod]": 4,
+    "ADJ.|[ccomp]": 5,
+    "ADJ.|[csubj:outer]": 6,
+    "ADJ.|[csubj]": 7,
+    "ADJ.|[dep]": 8,
+    "ADJ.|[nmod]": 9,
+    "ADJ.|[nsubj]": 10,
+    "ADJ.|[obj]": 11,
+    "ADJ.|[obl]": 12,
+    "ADJ.|[root]": 13,
+    "ADJ|[acl]": 14,
+    "ADJ|[advcl]": 15,
+    "ADJ|[amod]": 16,
+    "ADJ|[ccomp]": 17,
+    "ADJ|[csubj:outer]": 18,
+    "ADJ|[csubj]": 19,
+    "ADJ|[dep]": 20,
+    "ADJ|[nmod]": 21,
+    "ADJ|[nsubj]": 22,
+    "ADJ|[obj]": 23,
+    "ADJ|[obl]": 24,
+    "ADJ|[root]": 25,
+    "ADP": 26,
+    "ADP.": 27,
+    "ADP.|[case]": 28,
+    "ADP.|[fixed]": 29,
+    "ADP|[case]": 30,
+    "ADP|[fixed]": 31,
+    "ADV": 32,
+    "ADV.": 33,
+    "ADV.|[advcl]": 34,
+    "ADV.|[advmod]": 35,
+    "ADV.|[dep]": 36,
+    "ADV.|[obj]": 37,
+    "ADV.|[root]": 38,
+    "ADV|[advcl]": 39,
+    "ADV|[advmod]": 40,
+    "ADV|[dep]": 41,
+    "ADV|[obj]": 42,
+    "ADV|[root]": 43,
+    "AUX": 44,
+    "AUX.": 45,
+    "AUX.|Polarity=Neg": 46,
+    "AUX.|Polarity=Neg|[aux]": 47,
+    "AUX.|Polarity=Neg|[fixed]": 48,
+    "AUX.|[aux]": 49,
+    "AUX.|[cop]": 50,
+    "AUX.|[fixed]": 51,
+    "AUX.|[root]": 52,
+    "AUX|Polarity=Neg": 53,
+    "AUX|Polarity=Neg|[aux]": 54,
+    "AUX|Polarity=Neg|[fixed]": 55,
+    "AUX|[aux]": 56,
+    "AUX|[cop]": 57,
+    "AUX|[fixed]": 58,
+    "AUX|[root]": 59,
+    "CCONJ": 60,
+    "CCONJ.": 61,
+    "CCONJ.|[cc]": 62,
+    "CCONJ|[cc]": 63,
+    "DET": 64,
+    "DET.": 65,
+    "DET.|[det]": 66,
+    "DET|[det]": 67,
+    "INTJ": 68,
+    "INTJ.": 69,
+    "INTJ.|[discourse]": 70,
+    "INTJ.|[root]": 71,
+    "INTJ|[discourse]": 72,
+    "INTJ|[root]": 73,
+    "NOUN": 74,
+    "NOUN.": 75,
+    "NOUN.|Polarity=Neg": 76,
+    "NOUN.|Polarity=Neg|[obl]": 77,
+    "NOUN.|Polarity=Neg|[root]": 78,
+    "NOUN.|[acl]": 79,
+    "NOUN.|[advcl]": 80,
+    "NOUN.|[ccomp]": 81,
+    "NOUN.|[compound]": 82,
+    "NOUN.|[csubj:outer]": 83,
+    "NOUN.|[csubj]": 84,
+    "NOUN.|[nmod]": 85,
+    "NOUN.|[nsubj:outer]": 86,
+    "NOUN.|[nsubj]": 87,
+    "NOUN.|[obj]": 88,
+    "NOUN.|[obl]": 89,
+    "NOUN.|[root]": 90,
+    "NOUN|Polarity=Neg": 91,
+    "NOUN|Polarity=Neg|[obl]": 92,
+    "NOUN|Polarity=Neg|[root]": 93,
+    "NOUN|[acl]": 94,
+    "NOUN|[advcl]": 95,
+    "NOUN|[ccomp]": 96,
+    "NOUN|[compound]": 97,
+    "NOUN|[csubj:outer]": 98,
+    "NOUN|[csubj]": 99,
+    "NOUN|[nmod]": 100,
+    "NOUN|[nsubj:outer]": 101,
+    "NOUN|[nsubj]": 102,
+    "NOUN|[obj]": 103,
+    "NOUN|[obl]": 104,
+    "NOUN|[root]": 105,
+    "NUM": 106,
+    "NUM.": 107,
+    "NUM.|[advcl]": 108,
+    "NUM.|[compound]": 109,
+    "NUM.|[nmod]": 110,
+    "NUM.|[nsubj:outer]": 111,
+    "NUM.|[nsubj]": 112,
+    "NUM.|[nummod]": 113,
+    "NUM.|[obj]": 114,
+    "NUM.|[obl]": 115,
+    "NUM.|[root]": 116,
+    "NUM|[advcl]": 117,
+    "NUM|[compound]": 118,
+    "NUM|[nmod]": 119,
+    "NUM|[nsubj:outer]": 120,
+    "NUM|[nsubj]": 121,
+    "NUM|[nummod]": 122,
+    "NUM|[obj]": 123,
+    "NUM|[obl]": 124,
+    "NUM|[root]": 125,
+    "PART": 126,
+    "PART.": 127,
+    "PART.|[mark]": 128,
+    "PART|[mark]": 129,
+    "PRON": 130,
+    "PRON.": 131,
+    "PRON.|[acl]": 132,
+    "PRON.|[advcl]": 133,
+    "PRON.|[nmod]": 134,
+    "PRON.|[nsubj:outer]": 135,
+    "PRON.|[nsubj]": 136,
+    "PRON.|[obj]": 137,
+    "PRON.|[obl]": 138,
+    "PRON.|[root]": 139,
+    "PRON|[acl]": 140,
+    "PRON|[advcl]": 141,
+    "PRON|[nmod]": 142,
+    "PRON|[nsubj:outer]": 143,
+    "PRON|[nsubj]": 144,
+    "PRON|[obj]": 145,
+    "PRON|[obl]": 146,
+    "PRON|[root]": 147,
+    "PROPN": 148,
+    "PROPN.": 149,
+    "PROPN.|[acl]": 150,
+    "PROPN.|[advcl]": 151,
+    "PROPN.|[compound]": 152,
+    "PROPN.|[nmod]": 153,
+    "PROPN.|[nsubj:outer]": 154,
+    "PROPN.|[nsubj]": 155,
+    "PROPN.|[obj]": 156,
+    "PROPN.|[obl]": 157,
+    "PROPN.|[root]": 158,
+    "PROPN|[acl]": 159,
+    "PROPN|[advcl]": 160,
+    "PROPN|[compound]": 161,
+    "PROPN|[nmod]": 162,
+    "PROPN|[nsubj:outer]": 163,
+    "PROPN|[nsubj]": 164,
+    "PROPN|[obj]": 165,
+    "PROPN|[obl]": 166,
+    "PROPN|[root]": 167,
+    "PUNCT": 168,
+    "PUNCT.": 169,
+    "PUNCT.|[punct]": 170,
+    "PUNCT|[punct]": 171,
+    "SCONJ": 172,
+    "SCONJ.": 173,
+    "SCONJ.|[dep]": 174,
+    "SCONJ.|[fixed]": 175,
+    "SCONJ.|[mark]": 176,
+    "SCONJ|[dep]": 177,
+    "SCONJ|[fixed]": 178,
+    "SCONJ|[mark]": 179,
+    "SYM": 180,
+    "SYM.": 181,
+    "SYM.|[compound]": 182,
+    "SYM.|[dep]": 183,
+    "SYM.|[nmod]": 184,
+    "SYM.|[obl]": 185,
+    "SYM|[compound]": 186,
+    "SYM|[dep]": 187,
+    "SYM|[nmod]": 188,
+    "SYM|[obl]": 189,
+    "VERB": 190,
+    "VERB.": 191,
+    "VERB.|[acl]": 192,
+    "VERB.|[advcl]": 193,
+    "VERB.|[ccomp]": 194,
+    "VERB.|[compound]": 195,
+    "VERB.|[csubj:outer]": 196,
+    "VERB.|[csubj]": 197,
+    "VERB.|[nmod]": 198,
+    "VERB.|[obj]": 199,
+    "VERB.|[obl]": 200,
+    "VERB.|[root]": 201,
+    "VERB|[acl]": 202,
+    "VERB|[advcl]": 203,
+    "VERB|[ccomp]": 204,
+    "VERB|[compound]": 205,
+    "VERB|[csubj:outer]": 206,
+    "VERB|[csubj]": 207,
+    "VERB|[nmod]": 208,
+    "VERB|[obj]": 209,
+    "VERB|[obl]": 210,
+    "VERB|[root]": 211,
+    "X": 212,
+    "X.": 213,
+    "X.|[dep]": 214,
+    "X.|[goeswith]": 215,
+    "X.|[nmod]": 216,
+    "X|[dep]": 217,
+    "X|[nmod]": 218
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,

maker.py CHANGED Viewed

@@ -2,43 +2,33 @@
 src="KoichiYasuoka/modernbert-large-japanese-wikipedia-upos"
 tgt="KoichiYasuoka/modernbert-large-japanese-wikipedia-ud-square"
 url="https://github.com/UniversalDependencies/UD_Japanese-GSDLUW"
-import os
 d=os.path.basename(url)
 os.system("test -d "+d+" || git clone --depth=1 "+url)
 os.system("for F in train dev test ; do cp "+d+"/*-$F.conllu $F.conllu ; done")
-class UDTriangularDataset(object):
   def __init__(self,conllu,tokenizer):
     self.conllu=open(conllu,"r",encoding="utf-8")
     self.tokenizer=tokenizer
     self.seeks=[0]
-    label=set(["SYM","X"])
-    dep=set(["X|goeswith-s","X|goeswith-t"])
     s=self.conllu.readline()
     while s!="":
       if s=="\n":
-        if 0<len(self.tokenizer(t)["input_ids"])<91:
-          pass
-        else:
-          self.seeks.pop(-1)
         self.seeks.append(self.conllu.tell())
-      elif s.startswith("# text ="):
-        t=s[8:].strip()
       else:
         w=s.split("\t")
         if len(w)==10:
           if w[0].isdecimal():
-            p=w[3] if w[5]=="_" else w[3]+"|"+w[5]
-            label.add(p)
-            if w[6]=="0":
-              dep.add(p+"|"+w[7])
-            else:
-              dep.add(p+"|"+w[7]+"-s")
-              dep.add(p+"|"+w[7]+"-t")
       s=self.conllu.readline()
-    lid={l:i for i,l in enumerate(sorted(label))}
-    for i,d in enumerate(sorted(dep),len(lid)):
-      lid[d]=i
-    self.label2id=lid
   def __call__(*args):
     lid={l:i for i,l in enumerate(sorted(set(sum([list(t.label2id) for t in args],[]))))}
     for t in args:
@@ -48,39 +38,55 @@ class UDTriangularDataset(object):
     self.conllu.close()
   __len__=lambda self:len(self.seeks)-1
   def __getitem__(self,i):
-    s=self.seeks[i]
-    self.conllu.seek(s)
     c,t=[],[""]
     while t[0]!="\n":
       t=self.conllu.readline().split("\t")
       if len(t)==10 and t[0].isdecimal():
         c.append(t)
     v=self.tokenizer([t[1] for t in c],add_special_tokens=False)["input_ids"]
     for i in range(len(v)-1,-1,-1):
-      for j in range(1,len(v[i])):
-        c.insert(i+1,[c[i][0],"_","_","X","_","_",c[i][0],"goeswith","_","_"])
     y=["0"]+[t[0] for t in c]
     h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
     p=[t[3] if t[5]=="_" else t[3]+"|"+t[5] for t in c]
-    d=[t[7] for t in c]
     v=sum(v,[])
-    ids=[self.tokenizer.cls_token_id]
-    upos=["SYM"]
     for i in range(len(v)):
-      for j in range(len(v)):
-        ids.append(v[j])
-        if i==j:
-          upos.append(p[i]+"|"+d[i] if h[i]==j+1 else p[i]+"|"+d[i]+"-t")
-        else:
-          upos.append(p[i]+"|"+d[i]+"-s" if h[i]==j+1 else p[j])
-      ids.append(self.tokenizer.sep_token_id)
-      upos.append("SYM")
     return {"input_ids":ids,"labels":[self.label2id[p] for p in upos]}
 from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
 tkz=AutoTokenizer.from_pretrained(src)
-trainDS=UDTriangularDataset("train.conllu",tkz)
-devDS=UDTriangularDataset("dev.conllu",tkz)
-testDS=UDTriangularDataset("test.conllu",tkz)
 lid=trainDS(devDS,testDS)
 cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True,trust_remote_code=True)
 mdl=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True,trust_remote_code=True)

 src="KoichiYasuoka/modernbert-large-japanese-wikipedia-upos"
 tgt="KoichiYasuoka/modernbert-large-japanese-wikipedia-ud-square"
 url="https://github.com/UniversalDependencies/UD_Japanese-GSDLUW"
+import os,numpy
 d=os.path.basename(url)
 os.system("test -d "+d+" || git clone --depth=1 "+url)
 os.system("for F in train dev test ; do cp "+d+"/*-$F.conllu $F.conllu ; done")
+class UDSquareDataset(object):
   def __init__(self,conllu,tokenizer):
     self.conllu=open(conllu,"r",encoding="utf-8")
     self.tokenizer=tokenizer
     self.seeks=[0]
+    label=set(["SYM.","X.","X.|[goeswith]"])
     s=self.conllu.readline()
     while s!="":
       if s=="\n":
         self.seeks.append(self.conllu.tell())
       else:
         w=s.split("\t")
         if len(w)==10:
           if w[0].isdecimal():
+            p=w[3]
+            q="" if w[5]=="_" else "|"+w[5]
+            r="|["+w[7]+"]"
+            label.add(p+q)
+            label.add(p+"."+q)
+            label.add(p+q+r)
+            label.add(p+"."+q+r)
       s=self.conllu.readline()
+    self.label2id={l:i for i,l in enumerate(sorted(label))}
   def __call__(*args):
     lid={l:i for i,l in enumerate(sorted(set(sum([list(t.label2id) for t in args],[]))))}
     for t in args:
     self.conllu.close()
   __len__=lambda self:len(self.seeks)-1
   def __getitem__(self,i):
+    self.conllu.seek(self.seeks[i])
     c,t=[],[""]
     while t[0]!="\n":
       t=self.conllu.readline().split("\t")
       if len(t)==10 and t[0].isdecimal():
         c.append(t)
+    h={t[6] for t in c}
+    for t in c:
+      if t[6]!="0" and t[0] not in h:
+        t[3]+="."
     v=self.tokenizer([t[1] for t in c],add_special_tokens=False)["input_ids"]
     for i in range(len(v)-1,-1,-1):
+      if v[i]==[]:
+        v[i]=[self.tokenizer.unk_token_id]
+      if len(v[i])>1:
+        c[i][3]=c[i][3].replace(".","")
+        for j in range(1,len(v[i])):
+          c.insert(i+1,[c[i][0],"_","_","X.","_","_",c[i][0],"goeswith","_","_"])
     y=["0"]+[t[0] for t in c]
     h=[i if t[6]=="0" else y.index(t[6]) for i,t in enumerate(c,1)]
     p=[t[3] if t[5]=="_" else t[3]+"|"+t[5] for t in c]
+    d=["|["+t[7]+"]" for t in c]
+    x=[not t[3].endswith(".") for t in c]
+    if len(x)<90:
+      x=[True]*len(x)
+    else:
+      w=(sum([1 for b in x if b])+1)*(len(x)+1)+1
+      for i in numpy.argsort([-abs(j-i-1) for i,j in enumerate(h)]):
+        if w+len(x)>8191:
+          break
+        if not x[i]:
+          x[i]=True
+          w+=len(x)+1
     v=sum(v,[])
+    ids=[self.tokenizer.cls_token_id]+v+[self.tokenizer.sep_token_id]
+    upos=["SYM."]+p+["SYM."]
     for i in range(len(v)):
+      if x[i]:
+        for j in range(len(v)):
+          ids.append(self.tokenizer.mask_token_id if i==j else v[j])
+          upos.append(p[j]+d[j] if h[j]==i+1 else p[j])
+        ids.append(self.tokenizer.sep_token_id)
+        upos.append("SYM.")
     return {"input_ids":ids,"labels":[self.label2id[p] for p in upos]}
 from transformers import AutoTokenizer,AutoConfig,AutoModelForTokenClassification,DataCollatorForTokenClassification,TrainingArguments,Trainer
 tkz=AutoTokenizer.from_pretrained(src)
+trainDS=UDSquareDataset("train.conllu",tkz)
+devDS=UDSquareDataset("dev.conllu",tkz)
+testDS=UDSquareDataset("test.conllu",tkz)
 lid=trainDS(devDS,testDS)
 cfg=AutoConfig.from_pretrained(src,num_labels=len(lid),label2id=lid,id2label={i:l for l,i in lid.items()},ignore_mismatched_sizes=True,trust_remote_code=True)
 mdl=AutoModelForTokenClassification.from_pretrained(src,config=cfg,ignore_mismatched_sizes=True,trust_remote_code=True)

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4be01ad1760cae99e4954f7d022fddce93f9da9cb1f9fa25b0b65281ebf2140b
-size 1644098178

 version https://git-lfs.github.com/spec/v1
+oid sha256:8cd7aff6611ce073b2bc046f702814d6c3c698086fdafc2267b23df981d3df60
+size 1644212930

ud.py CHANGED Viewed

@@ -5,55 +5,79 @@ class UniversalDependenciesPipeline(TokenClassificationPipeline):
   def __init__(self,**kwargs):
     super().__init__(**kwargs)
     x=self.model.config.label2id
-    self.root=numpy.full((len(x)),numpy.nan)
-    self.arc_start=numpy.full((len(x)),numpy.nan)
-    self.arc_tail=numpy.full((len(x)),numpy.nan)
     for k,v in x.items():
-      if k.endswith("|root"):
         self.root[v]=0
-      elif k.endswith("-s"):
-        self.arc_start[v]=0
-      elif k.endswith("-t"):
-        self.arc_tail[v]=0
   def _forward(self,model_inputs):
     import torch
     v=model_inputs["input_ids"][0].tolist()
     with torch.no_grad():
-      e=self.model(input_ids=torch.tensor([v+v[1:]*(len(v)-3)]).to(self.device))
-    return {"logits":e.logits,**model_inputs}
   def check_model_type(self,supported_models):
     pass
   def postprocess(self,model_outputs,**kwargs):
     if "logits" not in model_outputs:
       return "".join(self.postprocess(x,**kwargs) for x in model_outputs)
     m=model_outputs["logits"][0].cpu().numpy()
-    w=len(model_outputs["input_ids"][0])-2
-    e=numpy.zeros((w,w,m.shape[-1]))
-    for i in range(w):
-      k=numpy.roll(m[i*(w+2)+1]+self.arc_tail,-1)
-      for j in range(w):
-        if i==j:
-          e[i,i]=m[i*(w+1)+j+1]+self.root
-        else:
-          e[j,i]=m[i*(w+1)+j+1]+self.arc_start+k
-    g=self.model.config.label2id["X|goeswith-s"]
-    r=numpy.tri(e.shape[0])
     for i in range(e.shape[0]):
       for j in range(i+2,e.shape[1]):
-        r[i,j]=r[i,j-1] if numpy.nanargmax(e[i,j-1])==g else 1
-    e[:,:,g]+=numpy.where(r==0,0,numpy.nan)
-    m,p=numpy.nanmax(e,axis=2),numpy.nanargmax(e,axis=2)
     h=self.chu_liu_edmonds(m)
     z=[i for i,j in enumerate(h) if i==j]
     if len(z)>1:
-      k,h=z[numpy.nanargmax(m[z,z])],numpy.nanmin(m)-numpy.nanmax(m)
       m[:,z]+=[[0 if j in z and (i!=j or i==k) else h for i in z] for j in range(m.shape[0])]
       h=self.chu_liu_edmonds(m)
     v=[(s,e) for s,e in model_outputs["offset_mapping"][0].tolist() if s<e]
     q=[self.model.config.id2label[p[j,i]].split("|") for i,j in enumerate(h)]
     if "aggregation_strategy" in kwargs and kwargs["aggregation_strategy"]!="none":
       for i,j in reversed(list(enumerate(q[1:],1))):
-        if j[-1]=="goeswith-s" and set([t[-1] for t in q[h[i]+1:i+1]])=={"goeswith-s"}:
           h=[b if i>b else b-1 for a,b in enumerate(h) if i!=a]
           v[i-1]=(v[i-1][0],v.pop(i)[1])
           q.pop(i)
@@ -64,10 +88,11 @@ class UniversalDependenciesPipeline(TokenClassificationPipeline):
     t=model_outputs["sentence"].replace("\n"," ")
     u="# text = "+t+"\n"
     for i,(s,e) in enumerate(v):
-      u+="\t".join([str(i+1),t[s:e],"_",q[i][0],"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),"root" if q[i][-1]=="root" else q[i][-1][0:-2],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"
     return u+"\n"
   def chu_liu_edmonds(self,matrix):
-    h=numpy.nanargmax(matrix,axis=0)
     x=[-1 if i==j else j for i,j in enumerate(h)]
     for b in [lambda x,i,j:-1 if i not in x else x[i],lambda x,i,j:-1 if j<0 else x[j]]:
       y=[]
@@ -78,10 +103,10 @@ class UniversalDependenciesPipeline(TokenClassificationPipeline):
       if max(x)<0:
         return h
     y,x=[i for i,j in enumerate(x) if j==max(x)],[i for i,j in enumerate(x) if j<max(x)]
-    z=matrix-numpy.nanmax(matrix,axis=0)
-    m=numpy.block([[z[x,:][:,x],numpy.nanmax(z[x,:][:,y],axis=1).reshape(len(x),1)],[numpy.nanmax(z[y,:][:,x],axis=0),numpy.nanmax(z[y,y])]])
-    k=[j if i==len(x) else x[j] if j<len(x) else y[numpy.nanargmax(z[y,x[i]])] for i,j in enumerate(self.chu_liu_edmonds(m))]
     h=[j if i in y else k[x.index(i)] for i,j in enumerate(h)]
-    i=y[numpy.nanargmax(z[x[k[-1]],y] if k[-1]<len(x) else z[y,y])]
     h[i]=x[k[-1]] if k[-1]<len(x) else i
     return h

   def __init__(self,**kwargs):
     super().__init__(**kwargs)
     x=self.model.config.label2id
+    self.root=numpy.full((len(x)),-numpy.inf)
+    self.arc=numpy.full((len(x)),-numpy.inf)
     for k,v in x.items():
+      if k.endswith("|[root]"):
         self.root[v]=0
+      elif k.endswith("]"):
+        self.arc[v]=0
   def _forward(self,model_inputs):
     import torch
     v=model_inputs["input_ids"][0].tolist()
+    if len(v)<91:
+      x=[True]*(len(v)-2)
+    else:
+      with torch.no_grad():
+        e=self.model(input_ids=torch.tensor([v]).to(self.device))
+      m=e.logits[0].cpu().numpy()
+      e=numpy.exp(m-numpy.max(m,axis=-1,keepdims=True))
+      z=e/e.sum(axis=-1,keepdims=True)
+      k=numpy.argmax(m,axis=1).tolist()
+      x=[not self.model.config.id2label[p].split("|")[0].endswith(".") for p in k[1:-1]]
+      w=(sum([1 for b in x if b])+1)*(len(x)+1)+1
+      for i in numpy.argsort([z[i+1,k[i+1]] for i in range(len(x))]):
+        if w+len(x)>8191:
+          break
+        if not x[i]:
+          x[i]=True
+          w+=len(x)+1
+    ids=list(v)
+    for i in range(len(x)):
+      if x[i]:
+        ids+=v[1:i+1]+[self.tokenizer.mask_token_id]+v[i+2:]
     with torch.no_grad():
+      e=self.model(input_ids=torch.tensor([ids]).to(self.device))
+    return {"logits":e.logits,"thin_out":x,**model_inputs}
   def check_model_type(self,supported_models):
     pass
   def postprocess(self,model_outputs,**kwargs):
     if "logits" not in model_outputs:
       return "".join(self.postprocess(x,**kwargs) for x in model_outputs)
     m=model_outputs["logits"][0].cpu().numpy()
+    x=model_outputs["thin_out"]
+    e=numpy.full((len(x),len(x),m.shape[-1]),m.min())
+    k=len(x)+2
+    for i in range(len(x)):
+      if x[i]:
+        for j in range(len(x)):
+          if i==j:
+            e[i,i]=m[k]+self.root
+          else:
+            e[i,j]=m[k]+self.arc
+          k+=1
+        k+=1
+    g=self.model.config.label2id["X.|[goeswith]"]
+    m,r=numpy.max(e,axis=2),numpy.tri(e.shape[0])
     for i in range(e.shape[0]):
       for j in range(i+2,e.shape[1]):
+        r[i,j]=1
+        if numpy.argmax(e[i,j-1])==g:
+          if numpy.argmax(m[:,j-1])==i:
+            r[i,j]=r[i,j-1]
+    e[:,:,g]+=numpy.where(r==0,0,-numpy.inf)
+    m,p=numpy.max(e,axis=2),numpy.argmax(e,axis=2)
     h=self.chu_liu_edmonds(m)
     z=[i for i,j in enumerate(h) if i==j]
     if len(z)>1:
+      k,h=z[numpy.argmax(m[z,z])],numpy.min(m)-numpy.max(m)
       m[:,z]+=[[0 if j in z and (i!=j or i==k) else h for i in z] for j in range(m.shape[0])]
       h=self.chu_liu_edmonds(m)
     v=[(s,e) for s,e in model_outputs["offset_mapping"][0].tolist() if s<e]
     q=[self.model.config.id2label[p[j,i]].split("|") for i,j in enumerate(h)]
     if "aggregation_strategy" in kwargs and kwargs["aggregation_strategy"]!="none":
       for i,j in reversed(list(enumerate(q[1:],1))):
+        if j[-1]=="[goeswith]" and set([t[-1] for t in q[h[i]+1:i+1]])=={"[goeswith]"}:
           h=[b if i>b else b-1 for a,b in enumerate(h) if i!=a]
           v[i-1]=(v[i-1][0],v.pop(i)[1])
           q.pop(i)
     t=model_outputs["sentence"].replace("\n"," ")
     u="# text = "+t+"\n"
     for i,(s,e) in enumerate(v):
+      u+="\t".join([str(i+1),t[s:e],"_",q[i][0].replace(".",""),"_","_" if len(q[i])<3 else "|".join(q[i][1:-1]),str(0 if h[i]==i else h[i]+1),q[i][-1][1:-1],"_","_" if i+1<len(v) and e<v[i+1][0] else "SpaceAfter=No"])+"\n"
     return u+"\n"
   def chu_liu_edmonds(self,matrix):
+    import numpy
+    h=numpy.argmax(matrix,axis=0)
     x=[-1 if i==j else j for i,j in enumerate(h)]
     for b in [lambda x,i,j:-1 if i not in x else x[i],lambda x,i,j:-1 if j<0 else x[j]]:
       y=[]
       if max(x)<0:
         return h
     y,x=[i for i,j in enumerate(x) if j==max(x)],[i for i,j in enumerate(x) if j<max(x)]
+    z=matrix-numpy.max(matrix,axis=0)
+    m=numpy.block([[z[x,:][:,x],numpy.max(z[x,:][:,y],axis=1).reshape(len(x),1)],[numpy.max(z[y,:][:,x],axis=0),numpy.max(z[y,y])]])
+    k=[j if i==len(x) else x[j] if j<len(x) else y[numpy.argmax(z[y,x[i]])] for i,j in enumerate(self.chu_liu_edmonds(m))]
     h=[j if i in y else k[x.index(i)] for i,j in enumerate(h)]
+    i=y[numpy.argmax(z[x[k[-1]],y] if k[-1]<len(x) else z[y,y])]
     h[i]=x[k[-1]] if k[-1]<len(x) else i
     return h