upload
Browse files- README.md +21 -4
- config.json +138 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +13 -0
README.md
CHANGED
@@ -1,5 +1,22 @@
|
|
1 |
-
|
2 |
-
license: apache-2.0
|
3 |
-
---
|
4 |
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# GPLinker关系抽取模型
|
|
|
|
|
2 |
|
3 |
+
## 模型介绍
|
4 |
+
|
5 |
+
+ 数据集:百度 `DUIE2.0` 信息抽取
|
6 |
+
|
7 |
+
+ 模型方法:[GPLinker:基于GlobalPointer的实体关系联合抽取](https://kexue.fm/archives/8888)
|
8 |
+
|
9 |
+
## 使用方法
|
10 |
+
|
11 |
+
```commandline
|
12 |
+
pip install lightningnlp
|
13 |
+
```
|
14 |
+
|
15 |
+
```python
|
16 |
+
from pprint import pprint
|
17 |
+
from lightningnlp.task.relation_extraction import RelationExtractionPipeline
|
18 |
+
|
19 |
+
pipline = RelationExtractionPipeline(model_name_or_path="xusenlin/duie-gplinker", model_name="gplinker", model_type="bert")
|
20 |
+
text = "查尔斯·阿兰基斯(Charles Aránguiz),1989年4月17日出生于智利圣地亚哥,智利职业足球运动员,司职中场,效力于德国足球甲级联赛勒沃库森足球俱乐部。"
|
21 |
+
pprint(pipline(text))
|
22 |
+
```
|
config.json
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "hfl/chinese-roberta-wwm-ext",
|
3 |
+
"architectures": [
|
4 |
+
"GPLinker"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"eos_token_id": 2,
|
11 |
+
"head_size": 64,
|
12 |
+
"hidden_act": "gelu",
|
13 |
+
"hidden_dropout_prob": 0.1,
|
14 |
+
"hidden_size": 768,
|
15 |
+
"id2predicate": {
|
16 |
+
"0": "\u4e08\u592b",
|
17 |
+
"1": "\u4e0a\u6620\u65f6\u95f4",
|
18 |
+
"2": "\u4e13\u4e1a\u4ee3\u7801",
|
19 |
+
"3": "\u4e3b\u6301\u4eba",
|
20 |
+
"4": "\u4e3b\u6f14",
|
21 |
+
"5": "\u4e3b\u89d2",
|
22 |
+
"6": "\u4eba\u53e3\u6570\u91cf",
|
23 |
+
"7": "\u4f5c\u66f2",
|
24 |
+
"8": "\u4f5c\u8005",
|
25 |
+
"9": "\u4f5c\u8bcd",
|
26 |
+
"10": "\u4fee\u4e1a\u5e74\u9650",
|
27 |
+
"11": "\u51fa\u54c1\u516c\u53f8",
|
28 |
+
"12": "\u51fa\u7248\u793e",
|
29 |
+
"13": "\u51fa\u751f\u5730",
|
30 |
+
"14": "\u51fa\u751f\u65e5\u671f",
|
31 |
+
"15": "\u521b\u59cb\u4eba",
|
32 |
+
"16": "\u5236\u7247\u4eba",
|
33 |
+
"17": "\u5360\u5730\u9762\u79ef",
|
34 |
+
"18": "\u53f7",
|
35 |
+
"19": "\u5609\u5bbe",
|
36 |
+
"20": "\u56fd\u7c4d",
|
37 |
+
"21": "\u59bb\u5b50",
|
38 |
+
"22": "\u5b57",
|
39 |
+
"23": "\u5b98\u65b9\u8bed\u8a00",
|
40 |
+
"24": "\u5bfc\u6f14",
|
41 |
+
"25": "\u603b\u90e8\u5730\u70b9",
|
42 |
+
"26": "\u6210\u7acb\u65e5\u671f",
|
43 |
+
"27": "\u6240\u5728\u57ce\u5e02",
|
44 |
+
"28": "\u6240\u5c5e\u4e13\u8f91",
|
45 |
+
"29": "\u6539\u7f16\u81ea",
|
46 |
+
"30": "\u671d\u4ee3",
|
47 |
+
"31": "\u6b4c\u624b",
|
48 |
+
"32": "\u6bcd\u4eb2",
|
49 |
+
"33": "\u6bd5\u4e1a\u9662\u6821",
|
50 |
+
"34": "\u6c11\u65cf",
|
51 |
+
"35": "\u6c14\u5019",
|
52 |
+
"36": "\u6ce8\u518c\u8d44\u672c",
|
53 |
+
"37": "\u6d77\u62d4",
|
54 |
+
"38": "\u7236\u4eb2",
|
55 |
+
"39": "\u76ee",
|
56 |
+
"40": "\u7956\u7c4d",
|
57 |
+
"41": "\u7b80\u79f0",
|
58 |
+
"42": "\u7f16\u5267",
|
59 |
+
"43": "\u8463\u4e8b\u957f",
|
60 |
+
"44": "\u8eab\u9ad8",
|
61 |
+
"45": "\u8fde\u8f7d\u7f51\u7ad9",
|
62 |
+
"46": "\u90ae\u653f\u7f16\u7801",
|
63 |
+
"47": "\u9762\u79ef",
|
64 |
+
"48": "\u9996\u90fd"
|
65 |
+
},
|
66 |
+
"initializer_range": 0.02,
|
67 |
+
"intermediate_size": 3072,
|
68 |
+
"layer_norm_eps": 1e-12,
|
69 |
+
"max_position_embeddings": 512,
|
70 |
+
"model_type": "bert",
|
71 |
+
"num_attention_heads": 12,
|
72 |
+
"num_hidden_layers": 12,
|
73 |
+
"num_predicates": 49,
|
74 |
+
"output_past": true,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"pooler_fc_size": 768,
|
77 |
+
"pooler_num_attention_heads": 12,
|
78 |
+
"pooler_num_fc_layers": 3,
|
79 |
+
"pooler_size_per_head": 128,
|
80 |
+
"pooler_type": "first_token_transform",
|
81 |
+
"position_embedding_type": "absolute",
|
82 |
+
"predicate2id": {
|
83 |
+
"\u4e08\u592b": 0,
|
84 |
+
"\u4e0a\u6620\u65f6\u95f4": 1,
|
85 |
+
"\u4e13\u4e1a\u4ee3\u7801": 2,
|
86 |
+
"\u4e3b\u6301\u4eba": 3,
|
87 |
+
"\u4e3b\u6f14": 4,
|
88 |
+
"\u4e3b\u89d2": 5,
|
89 |
+
"\u4eba\u53e3\u6570\u91cf": 6,
|
90 |
+
"\u4f5c\u66f2": 7,
|
91 |
+
"\u4f5c\u8005": 8,
|
92 |
+
"\u4f5c\u8bcd": 9,
|
93 |
+
"\u4fee\u4e1a\u5e74\u9650": 10,
|
94 |
+
"\u51fa\u54c1\u516c\u53f8": 11,
|
95 |
+
"\u51fa\u7248\u793e": 12,
|
96 |
+
"\u51fa\u751f\u5730": 13,
|
97 |
+
"\u51fa\u751f\u65e5\u671f": 14,
|
98 |
+
"\u521b\u59cb\u4eba": 15,
|
99 |
+
"\u5236\u7247\u4eba": 16,
|
100 |
+
"\u5360\u5730\u9762\u79ef": 17,
|
101 |
+
"\u53f7": 18,
|
102 |
+
"\u5609\u5bbe": 19,
|
103 |
+
"\u56fd\u7c4d": 20,
|
104 |
+
"\u59bb\u5b50": 21,
|
105 |
+
"\u5b57": 22,
|
106 |
+
"\u5b98\u65b9\u8bed\u8a00": 23,
|
107 |
+
"\u5bfc\u6f14": 24,
|
108 |
+
"\u603b\u90e8\u5730\u70b9": 25,
|
109 |
+
"\u6210\u7acb\u65e5\u671f": 26,
|
110 |
+
"\u6240\u5728\u57ce\u5e02": 27,
|
111 |
+
"\u6240\u5c5e\u4e13\u8f91": 28,
|
112 |
+
"\u6539\u7f16\u81ea": 29,
|
113 |
+
"\u671d\u4ee3": 30,
|
114 |
+
"\u6b4c\u624b": 31,
|
115 |
+
"\u6bcd\u4eb2": 32,
|
116 |
+
"\u6bd5\u4e1a\u9662\u6821": 33,
|
117 |
+
"\u6c11\u65cf": 34,
|
118 |
+
"\u6c14\u5019": 35,
|
119 |
+
"\u6ce8\u518c\u8d44\u672c": 36,
|
120 |
+
"\u6d77\u62d4": 37,
|
121 |
+
"\u7236\u4eb2": 38,
|
122 |
+
"\u76ee": 39,
|
123 |
+
"\u7956\u7c4d": 40,
|
124 |
+
"\u7b80\u79f0": 41,
|
125 |
+
"\u7f16\u5267": 42,
|
126 |
+
"\u8463\u4e8b\u957f": 43,
|
127 |
+
"\u8eab\u9ad8": 44,
|
128 |
+
"\u8fde\u8f7d\u7f51\u7ad9": 45,
|
129 |
+
"\u90ae\u653f\u7f16\u7801": 46,
|
130 |
+
"\u9762\u79ef": 47,
|
131 |
+
"\u9996\u90fd": 48
|
132 |
+
},
|
133 |
+
"torch_dtype": "float32",
|
134 |
+
"transformers_version": "4.22.1",
|
135 |
+
"type_vocab_size": 2,
|
136 |
+
"use_cache": true,
|
137 |
+
"vocab_size": 21128
|
138 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5875be88fd45bc4585ad0cc60f12727749df74a62f8bf383a9e7e1978bc5c920
|
3 |
+
size 408328921
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"do_lower_case": true,
|
4 |
+
"mask_token": "[MASK]",
|
5 |
+
"name_or_path": "hfl/chinese-roberta-wwm-ext",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"special_tokens_map_file": "/home/xusenlin/.cache/huggingface/hub/models--hfl--chinese-roberta-wwm-ext/snapshots/5c58d0b8ec1d9014354d691c538661bf00bfdb44/special_tokens_map.json",
|
9 |
+
"strip_accents": null,
|
10 |
+
"tokenize_chinese_chars": true,
|
11 |
+
"tokenizer_class": "BertTokenizer",
|
12 |
+
"unk_token": "[UNK]"
|
13 |
+
}
|