xusenlin commited on
Commit
ba9c6c6
1 Parent(s): e6f30f0
README.md CHANGED
@@ -1,5 +1,22 @@
1
- ---
2
- license: apache-2.0
3
- ---
4
 
5
- GPLinker关系抽取模型
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GPLinker关系抽取模型
 
 
2
 
3
+ ## 模型介绍
4
+
5
+ + 数据集:百度 `DUIE2.0` 信息抽取
6
+
7
+ + 模型方法:[GPLinker:基于GlobalPointer的实体关系联合抽取](https://kexue.fm/archives/8888)
8
+
9
+ ## 使用方法
10
+
11
+ ```commandline
12
+ pip install lightningnlp
13
+ ```
14
+
15
+ ```python
16
+ from pprint import pprint
17
+ from lightningnlp.task.relation_extraction import RelationExtractionPipeline
18
+
19
+ pipline = RelationExtractionPipeline(model_name_or_path="xusenlin/duie-gplinker", model_name="gplinker", model_type="bert")
20
+ text = "查尔斯·阿兰基斯(Charles Aránguiz),1989年4月17日出生于智利圣地亚哥,智利职业足球运动员,司职中场,效力于德国足球甲级联赛勒沃库森足球俱乐部。"
21
+ pprint(pipline(text))
22
+ ```
config.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hfl/chinese-roberta-wwm-ext",
3
+ "architectures": [
4
+ "GPLinker"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "eos_token_id": 2,
11
+ "head_size": 64,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2predicate": {
16
+ "0": "\u4e08\u592b",
17
+ "1": "\u4e0a\u6620\u65f6\u95f4",
18
+ "2": "\u4e13\u4e1a\u4ee3\u7801",
19
+ "3": "\u4e3b\u6301\u4eba",
20
+ "4": "\u4e3b\u6f14",
21
+ "5": "\u4e3b\u89d2",
22
+ "6": "\u4eba\u53e3\u6570\u91cf",
23
+ "7": "\u4f5c\u66f2",
24
+ "8": "\u4f5c\u8005",
25
+ "9": "\u4f5c\u8bcd",
26
+ "10": "\u4fee\u4e1a\u5e74\u9650",
27
+ "11": "\u51fa\u54c1\u516c\u53f8",
28
+ "12": "\u51fa\u7248\u793e",
29
+ "13": "\u51fa\u751f\u5730",
30
+ "14": "\u51fa\u751f\u65e5\u671f",
31
+ "15": "\u521b\u59cb\u4eba",
32
+ "16": "\u5236\u7247\u4eba",
33
+ "17": "\u5360\u5730\u9762\u79ef",
34
+ "18": "\u53f7",
35
+ "19": "\u5609\u5bbe",
36
+ "20": "\u56fd\u7c4d",
37
+ "21": "\u59bb\u5b50",
38
+ "22": "\u5b57",
39
+ "23": "\u5b98\u65b9\u8bed\u8a00",
40
+ "24": "\u5bfc\u6f14",
41
+ "25": "\u603b\u90e8\u5730\u70b9",
42
+ "26": "\u6210\u7acb\u65e5\u671f",
43
+ "27": "\u6240\u5728\u57ce\u5e02",
44
+ "28": "\u6240\u5c5e\u4e13\u8f91",
45
+ "29": "\u6539\u7f16\u81ea",
46
+ "30": "\u671d\u4ee3",
47
+ "31": "\u6b4c\u624b",
48
+ "32": "\u6bcd\u4eb2",
49
+ "33": "\u6bd5\u4e1a\u9662\u6821",
50
+ "34": "\u6c11\u65cf",
51
+ "35": "\u6c14\u5019",
52
+ "36": "\u6ce8\u518c\u8d44\u672c",
53
+ "37": "\u6d77\u62d4",
54
+ "38": "\u7236\u4eb2",
55
+ "39": "\u76ee",
56
+ "40": "\u7956\u7c4d",
57
+ "41": "\u7b80\u79f0",
58
+ "42": "\u7f16\u5267",
59
+ "43": "\u8463\u4e8b\u957f",
60
+ "44": "\u8eab\u9ad8",
61
+ "45": "\u8fde\u8f7d\u7f51\u7ad9",
62
+ "46": "\u90ae\u653f\u7f16\u7801",
63
+ "47": "\u9762\u79ef",
64
+ "48": "\u9996\u90fd"
65
+ },
66
+ "initializer_range": 0.02,
67
+ "intermediate_size": 3072,
68
+ "layer_norm_eps": 1e-12,
69
+ "max_position_embeddings": 512,
70
+ "model_type": "bert",
71
+ "num_attention_heads": 12,
72
+ "num_hidden_layers": 12,
73
+ "num_predicates": 49,
74
+ "output_past": true,
75
+ "pad_token_id": 0,
76
+ "pooler_fc_size": 768,
77
+ "pooler_num_attention_heads": 12,
78
+ "pooler_num_fc_layers": 3,
79
+ "pooler_size_per_head": 128,
80
+ "pooler_type": "first_token_transform",
81
+ "position_embedding_type": "absolute",
82
+ "predicate2id": {
83
+ "\u4e08\u592b": 0,
84
+ "\u4e0a\u6620\u65f6\u95f4": 1,
85
+ "\u4e13\u4e1a\u4ee3\u7801": 2,
86
+ "\u4e3b\u6301\u4eba": 3,
87
+ "\u4e3b\u6f14": 4,
88
+ "\u4e3b\u89d2": 5,
89
+ "\u4eba\u53e3\u6570\u91cf": 6,
90
+ "\u4f5c\u66f2": 7,
91
+ "\u4f5c\u8005": 8,
92
+ "\u4f5c\u8bcd": 9,
93
+ "\u4fee\u4e1a\u5e74\u9650": 10,
94
+ "\u51fa\u54c1\u516c\u53f8": 11,
95
+ "\u51fa\u7248\u793e": 12,
96
+ "\u51fa\u751f\u5730": 13,
97
+ "\u51fa\u751f\u65e5\u671f": 14,
98
+ "\u521b\u59cb\u4eba": 15,
99
+ "\u5236\u7247\u4eba": 16,
100
+ "\u5360\u5730\u9762\u79ef": 17,
101
+ "\u53f7": 18,
102
+ "\u5609\u5bbe": 19,
103
+ "\u56fd\u7c4d": 20,
104
+ "\u59bb\u5b50": 21,
105
+ "\u5b57": 22,
106
+ "\u5b98\u65b9\u8bed\u8a00": 23,
107
+ "\u5bfc\u6f14": 24,
108
+ "\u603b\u90e8\u5730\u70b9": 25,
109
+ "\u6210\u7acb\u65e5\u671f": 26,
110
+ "\u6240\u5728\u57ce\u5e02": 27,
111
+ "\u6240\u5c5e\u4e13\u8f91": 28,
112
+ "\u6539\u7f16\u81ea": 29,
113
+ "\u671d\u4ee3": 30,
114
+ "\u6b4c\u624b": 31,
115
+ "\u6bcd\u4eb2": 32,
116
+ "\u6bd5\u4e1a\u9662\u6821": 33,
117
+ "\u6c11\u65cf": 34,
118
+ "\u6c14\u5019": 35,
119
+ "\u6ce8\u518c\u8d44\u672c": 36,
120
+ "\u6d77\u62d4": 37,
121
+ "\u7236\u4eb2": 38,
122
+ "\u76ee": 39,
123
+ "\u7956\u7c4d": 40,
124
+ "\u7b80\u79f0": 41,
125
+ "\u7f16\u5267": 42,
126
+ "\u8463\u4e8b\u957f": 43,
127
+ "\u8eab\u9ad8": 44,
128
+ "\u8fde\u8f7d\u7f51\u7ad9": 45,
129
+ "\u90ae\u653f\u7f16\u7801": 46,
130
+ "\u9762\u79ef": 47,
131
+ "\u9996\u90fd": 48
132
+ },
133
+ "torch_dtype": "float32",
134
+ "transformers_version": "4.22.1",
135
+ "type_vocab_size": 2,
136
+ "use_cache": true,
137
+ "vocab_size": 21128
138
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5875be88fd45bc4585ad0cc60f12727749df74a62f8bf383a9e7e1978bc5c920
3
+ size 408328921
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "name_or_path": "hfl/chinese-roberta-wwm-ext",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "special_tokens_map_file": "/home/xusenlin/.cache/huggingface/hub/models--hfl--chinese-roberta-wwm-ext/snapshots/5c58d0b8ec1d9014354d691c538661bf00bfdb44/special_tokens_map.json",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "BertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }