Update README.md
Browse files
README.md
CHANGED
@@ -68,24 +68,31 @@ Here is how to use this model to extract radiological sentence embeddings and ob
|
|
68 |
```python
|
69 |
import torch
|
70 |
from transformers import AutoModel, AutoTokenizer
|
|
|
71 |
# Load the model and tokenizer
|
72 |
url = "microsoft/BiomedVLP-BioViL-T"
|
73 |
tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
|
74 |
model = AutoModel.from_pretrained(url, trust_remote_code=True)
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
"
|
|
|
|
|
|
|
|
|
80 |
# Tokenize and compute the sentence embeddings
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
86 |
attention_mask=tokenizer_output.attention_mask)
|
87 |
-
|
88 |
-
|
|
|
89 |
```
|
90 |
|
91 |
## Data
|
|
|
68 |
```python
|
69 |
import torch
|
70 |
from transformers import AutoModel, AutoTokenizer
|
71 |
+
|
72 |
# Load the model and tokenizer
|
73 |
url = "microsoft/BiomedVLP-BioViL-T"
|
74 |
tokenizer = AutoTokenizer.from_pretrained(url, trust_remote_code=True)
|
75 |
model = AutoModel.from_pretrained(url, trust_remote_code=True)
|
76 |
+
|
77 |
+
# Input text prompts describing findings.
|
78 |
+
# The order of prompts is adjusted to capture the spectrum from absence of a finding to its temporal progression.
|
79 |
+
text_prompts = ["No pleural effusion or pneumothorax is seen",
|
80 |
+
"There is no pneumothorax or pleural effusion",
|
81 |
+
"The extent of the pleural effusion is reduced.",
|
82 |
+
"The extent of the pleural effusion remains constant.",
|
83 |
+
"Interval enlargement of pleural effusion"]
|
84 |
+
|
85 |
# Tokenize and compute the sentence embeddings
|
86 |
+
with torch.no_grad():
|
87 |
+
tokenizer_output = tokenizer.batch_encode_plus(batch_text_or_text_pairs=text_prompts,
|
88 |
+
add_special_tokens=True,
|
89 |
+
padding='longest',
|
90 |
+
return_tensors='pt')
|
91 |
+
embeddings = model.get_projected_text_embeddings(input_ids=tokenizer_output.input_ids,
|
92 |
attention_mask=tokenizer_output.attention_mask)
|
93 |
+
|
94 |
+
# Compute the cosine similarity of sentence embeddings obtained from input text prompts.
|
95 |
+
sim = torch.mm(embeddings, embeddings.t())
|
96 |
```
|
97 |
|
98 |
## Data
|