Update README.md
Browse files
README.md
CHANGED
@@ -62,10 +62,11 @@ from medkit.core.text import TextDocument
|
|
62 |
from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
|
63 |
|
64 |
matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
|
|
|
65 |
test_doc = TextDocument("Elle souffre d'asthme mais n'a pas besoin d'Allegra")
|
|
|
66 |
|
67 |
-
#
|
68 |
-
detected_entities = matcher.run([test_doc.raw_segment])
|
69 |
msg = "|".join(f"'{entity.label}':{entity.text}" for entity in detected_entities)
|
70 |
print(f"Text: '{test_doc.text}'\n{msg}")
|
71 |
```
|
@@ -119,10 +120,18 @@ from medkit.text.metrics.ner import SeqEvalEvaluator
|
|
119 |
matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
|
120 |
predicted_entities = [matcher.run([doc.raw_segment]) for doc in test_documents]
|
121 |
|
122 |
-
# define seqeval evaluator
|
123 |
evaluator = SeqEvalEvaluator(tagging_scheme="iob2")
|
124 |
evaluator.compute(test_documents,predicted_entities=predicted_entities)
|
125 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# Citation
|
128 |
|
|
|
62 |
from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
|
63 |
|
64 |
matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
|
65 |
+
|
66 |
test_doc = TextDocument("Elle souffre d'asthme mais n'a pas besoin d'Allegra")
|
67 |
+
detected_entities = matcher.run([test_doc.raw_segment])
|
68 |
|
69 |
+
# show information
|
|
|
70 |
msg = "|".join(f"'{entity.label}':{entity.text}" for entity in detected_entities)
|
71 |
print(f"Text: '{test_doc.text}'\n{msg}")
|
72 |
```
|
|
|
120 |
matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
|
121 |
predicted_entities = [matcher.run([doc.raw_segment]) for doc in test_documents]
|
122 |
|
|
|
123 |
evaluator = SeqEvalEvaluator(tagging_scheme="iob2")
|
124 |
evaluator.compute(test_documents,predicted_entities=predicted_entities)
|
125 |
```
|
126 |
+
You can use the tokenizer from HF to evaluate by tokens instead of characters
|
127 |
+
```python
|
128 |
+
from transformers import AutoTokenizer
|
129 |
+
|
130 |
+
tokenizer_drbert = AutoTokenizer.from_pretrained("camila-ud/DrBERT-CASM2", use_fast=True)
|
131 |
+
|
132 |
+
evaluator = SeqEvalEvaluator(tokenizer=tokenizer_drbert,tagging_scheme="iob2")
|
133 |
+
evaluator.compute(test_documents,predicted_entities=predicted_entities)
|
134 |
+
```
|
135 |
|
136 |
# Citation
|
137 |
|