ageng-anugrah
commited on
Commit
•
a2afebd
1
Parent(s):
016cba2
modify model
Browse files- README.md +7 -14
- config.json +14 -14
- pytorch_model.bin +1 -1
README.md
CHANGED
@@ -9,31 +9,24 @@ tags:
|
|
9 |
|
10 |
### Load model and tokenizer
|
11 |
```python
|
12 |
-
from transformers import AutoTokenizer,
|
13 |
|
14 |
tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
|
15 |
-
model =
|
16 |
```
|
17 |
|
18 |
### Extract NER Tag
|
19 |
```python
|
20 |
import torch
|
21 |
def predict(model, tokenizer, sentence):
|
22 |
-
# will be moved to config later
|
23 |
-
ids_to_labels = {
|
24 |
-
0: 'B-ORGANISATION',
|
25 |
-
1: 'B-PERSON',
|
26 |
-
2: 'B-PLACE',
|
27 |
-
3: 'I-ORGANISATION',
|
28 |
-
4: 'I-PERSON',
|
29 |
-
5: 'I-PLACE',
|
30 |
-
6: 'O',
|
31 |
-
}
|
32 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
33 |
inputs = tokenizer(sentence.split(),
|
34 |
is_split_into_words = True,
|
35 |
return_offsets_mapping=True,
|
36 |
-
return_tensors="pt"
|
|
|
|
|
|
|
37 |
|
38 |
model.to(device)
|
39 |
# move to gpu
|
@@ -48,7 +41,7 @@ def predict(model, tokenizer, sentence):
|
|
48 |
flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
|
49 |
|
50 |
tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
|
51 |
-
token_predictions = [
|
52 |
wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
|
53 |
|
54 |
prediction = []
|
|
|
9 |
|
10 |
### Load model and tokenizer
|
11 |
```python
|
12 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification
|
13 |
|
14 |
tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
|
15 |
+
model = AutoModelForTokenClassification.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
|
16 |
```
|
17 |
|
18 |
### Extract NER Tag
|
19 |
```python
|
20 |
import torch
|
21 |
def predict(model, tokenizer, sentence):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
inputs = tokenizer(sentence.split(),
|
24 |
is_split_into_words = True,
|
25 |
return_offsets_mapping=True,
|
26 |
+
return_tensors="pt",
|
27 |
+
padding='max_length',
|
28 |
+
truncation=True,
|
29 |
+
max_length=512)
|
30 |
|
31 |
model.to(device)
|
32 |
# move to gpu
|
|
|
41 |
flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
|
42 |
|
43 |
tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
|
44 |
+
token_predictions = [model.config.id2label[i] for i in flattened_predictions.cpu().numpy()]
|
45 |
wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
|
46 |
|
47 |
prediction = []
|
config.json
CHANGED
@@ -11,24 +11,24 @@
|
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 1024,
|
13 |
"id2label": {
|
14 |
-
"0": "
|
15 |
-
"1": "
|
16 |
-
"2": "
|
17 |
-
"3": "
|
18 |
-
"4": "
|
19 |
-
"5": "
|
20 |
-
"6": "
|
21 |
},
|
22 |
"initializer_range": 0.02,
|
23 |
"intermediate_size": 4096,
|
24 |
"label2id": {
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
"layer_norm_eps": 1e-12,
|
34 |
"max_position_embeddings": 512,
|
|
|
11 |
"hidden_dropout_prob": 0.1,
|
12 |
"hidden_size": 1024,
|
13 |
"id2label": {
|
14 |
+
"0": "B-ORGANISATION",
|
15 |
+
"1": "B-PERSON",
|
16 |
+
"2": "B-PLACE",
|
17 |
+
"3": "I-ORGANISATION",
|
18 |
+
"4": "I-PERSON",
|
19 |
+
"5": "I-PLACE",
|
20 |
+
"6": "O"
|
21 |
},
|
22 |
"initializer_range": 0.02,
|
23 |
"intermediate_size": 4096,
|
24 |
"label2id": {
|
25 |
+
"B-ORGANISATION": 0,
|
26 |
+
"B-PERSON": 1,
|
27 |
+
"B-PLACE": 2,
|
28 |
+
"I-ORGANISATION": 3,
|
29 |
+
"I-PERSON": 4,
|
30 |
+
"I-PLACE": 5,
|
31 |
+
"O": 6
|
32 |
},
|
33 |
"layer_norm_eps": 1e-12,
|
34 |
"max_position_embeddings": 512,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1336536045
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c763564e134825e0b325b98415ec3dfaab39d4e8bbd1644372a94489050ade21
|
3 |
size 1336536045
|