Update README.md
Browse files
README.md
CHANGED
@@ -51,8 +51,25 @@ The largest Spanish biomedical and heath corpus to date gathered from a massive
|
|
51 |
WIP 🚧
|
52 |
|
53 |
## How to use the discriminator in `transformers`
|
|
|
|
|
|
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
## Acknowledgments
|
58 |
|
|
|
51 |
WIP 🚧
|
52 |
|
53 |
## How to use the discriminator in `transformers`
|
54 |
+
```py
|
55 |
+
from transformers import ElectraForPreTraining, ElectraTokenizerFast
|
56 |
+
import torch
|
57 |
|
58 |
+
discriminator = ElectraForPreTraining.from_pretrained("mrm8488/biomedtra-small-es")
|
59 |
+
tokenizer = ElectraTokenizerFast.from_pretrained("mrm8488/biomedtra-small-es")
|
60 |
+
|
61 |
+
sentence = "Los españoles tienden a sufir déficit de vitamina c"
|
62 |
+
fake_sentence = "Los españoles tienden a déficit sufrir de vitamina c"
|
63 |
+
|
64 |
+
fake_tokens = tokenizer.tokenize(fake_sentence)
|
65 |
+
fake_inputs = tokenizer.encode(fake_sentence, return_tensors="pt")
|
66 |
+
discriminator_outputs = discriminator(fake_inputs)
|
67 |
+
predictions = torch.round((torch.sign(discriminator_outputs[0]) + 1) / 2)
|
68 |
+
|
69 |
+
[print("%7s" % token, end="") for token in fake_tokens]
|
70 |
+
|
71 |
+
[print("%7s" % prediction, end="") for prediction in predictions.tolist()]
|
72 |
+
```
|
73 |
|
74 |
## Acknowledgments
|
75 |
|