ageng-anugrah
/

indobert-large-p2-finetuned-ner

@@ -9,31 +9,24 @@ tags:
 ### Load model and tokenizer
 ```python
-from transformers import AutoTokenizer, AutoModel
 tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
-model = AutoModel.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
 ```
 ### Extract NER Tag
 ```python
 import torch
 def predict(model, tokenizer, sentence):
-    # will be moved to config later
-    ids_to_labels = {
-      0: 'B-ORGANISATION',
-      1: 'B-PERSON',
-      2: 'B-PLACE',
-      3: 'I-ORGANISATION',
-      4: 'I-PERSON',
-      5: 'I-PLACE',
-      6: 'O',
-    }
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     inputs = tokenizer(sentence.split(),
                     is_split_into_words = True,
                     return_offsets_mapping=True,
-                    return_tensors="pt")
     model.to(device)
     # move to gpu
@@ -48,7 +41,7 @@ def predict(model, tokenizer, sentence):
     flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
     tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
-    token_predictions = [ids_to_labels[i] for i in flattened_predictions.cpu().numpy()]
     wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
     prediction = []

 ### Load model and tokenizer
 ```python
+from transformers import AutoTokenizer, AutoModelForTokenClassification
 tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
+model = AutoModelForTokenClassification.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
 ```
 ### Extract NER Tag
 ```python
 import torch
 def predict(model, tokenizer, sentence):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     inputs = tokenizer(sentence.split(),
                     is_split_into_words = True,
                     return_offsets_mapping=True,
+                    return_tensors="pt",
+                    padding='max_length',
+                    truncation=True,
+                    max_length=512)
     model.to(device)
     # move to gpu
     flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
     tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
+    token_predictions = [model.config.id2label[i] for i in flattened_predictions.cpu().numpy()]
     wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
     prediction = []

config.json CHANGED Viewed

@@ -11,24 +11,24 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 1024,
   "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1",
-    "2": "LABEL_2",
-    "3": "LABEL_3",
-    "4": "LABEL_4",
-    "5": "LABEL_5",
-    "6": "LABEL_6"
   },
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1,
-    "LABEL_2": 2,
-    "LABEL_3": 3,
-    "LABEL_4": 4,
-    "LABEL_5": 5,
-    "LABEL_6": 6
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

   "hidden_dropout_prob": 0.1,
   "hidden_size": 1024,
   "id2label": {
+    "0": "B-ORGANISATION",
+    "1": "B-PERSON",
+    "2": "B-PLACE",
+    "3": "I-ORGANISATION",
+    "4": "I-PERSON",
+    "5": "I-PLACE",
+    "6": "O"
   },
   "initializer_range": 0.02,
   "intermediate_size": 4096,
   "label2id": {
+    "B-ORGANISATION": 0,
+    "B-PERSON": 1,
+    "B-PLACE": 2,
+    "I-ORGANISATION": 3,
+    "I-PERSON": 4,
+    "I-PLACE": 5,
+    "O": 6
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:393718ff7be5d13f68dd695793eefcf0273e9bcf6dee91d6613989cdb96072a5
 size 1336536045

 version https://git-lfs.github.com/spec/v1
+oid sha256:c763564e134825e0b325b98415ec3dfaab39d4e8bbd1644372a94489050ade21
 size 1336536045