|
--- |
|
datasets: |
|
- pubmed |
|
language: |
|
- en |
|
tags: |
|
- BERT |
|
--- |
|
# Model Card for Model ID |
|
|
|
base_model : [google-bert/bert-large-uncased](https://huggingface.co/google-bert/bert-large-uncased) |
|
|
|
hidden_size : 1024 |
|
|
|
max_position_embeddings : 512 |
|
|
|
num_attention_heads : 16 |
|
|
|
num_hidden_layers : 24 |
|
|
|
vocab_size : 30522 |
|
|
|
# Basic usage |
|
|
|
```python |
|
from transformers import AutoTokenizer, AutoModelForTokenClassification |
|
import numpy as np |
|
|
|
# match tag |
|
id2tag = {0:'O', 1:'B_MT', 2:'I_MT'} |
|
|
|
# load model & tokenizer |
|
MODEL_NAME = 'MDDDDR/bert_large_uncased_NER' |
|
|
|
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME) |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
|
# prepare input |
|
text = 'mental disorder can also contribute to the development of diabetes through various mechanism including increased stress, poor self care behavior, and adverse effect on glucose metabolism.' |
|
tokenized = tokenizer(text, return_tensors='pt') |
|
|
|
# forward pass |
|
output = model(**tokenized) |
|
|
|
# result |
|
pred = np.argmax(output[0].cpu().detach().numpy(), axis=2)[0][1:-1] |
|
|
|
# check pred |
|
for txt, pred in zip(tokenizer.tokenize(text), pred): |
|
print("{}\t{}".format(id2tag[pred], txt)) |
|
# B_MT mental |
|
# B_MT disorder |
|
``` |
|
|
|
## Framework versions |
|
- transformers : 4.39.1 |
|
- torch : 2.1.0+cu121 |
|
- datasets : 2.18.0 |
|
- tokenizers : 0.15.2 |
|
- numpy : 1.20.0 |