aehrc
/

cxrmate-single-tf

@@ -4,7 +4,7 @@ from typing import Any, Optional, Tuple, Union
 import torch
 import transformers
 from torch.nn import CrossEntropyLoss
-from transformers import VisionEncoderDecoderModel
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
 from transformers.modeling_utils import PreTrainedModel
@@ -94,6 +94,10 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
         decoder: Optional[PreTrainedModel] = None,
     ):
         if config is None and (encoder is None or decoder is None):
             raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
         if config is None:
@@ -132,9 +136,6 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
         self.encoder.config = self.config.encoder
         self.decoder.config = self.config.decoder
-        # config.add_cross_attention = True
-        # config.is_decoder = True
     def forward(
         self,
         pixel_values: Optional[torch.FloatTensor] = None,
@@ -317,7 +318,7 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
         return token_type_ids
-    def tokenize_report_teacher_forcing(self, findings: str, impression: str, tokenizer):
         """
         Tokenize the reports and creates the inputs and targets for teacher forcing.
@@ -326,6 +327,7 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
             impression - impression section.
             return_token_type_ids - return the token type identifiers.
             tokenizer - Hugging Face tokenizer.
         Returns:
             decoder_input_ids - the token identifiers for the input of the decoder.
@@ -342,7 +344,7 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
             report,
             padding='longest',
             truncation=True,
-            max_length=self.decoder_max_len + 1,  # +1 to account for the bias between input and target.
             return_tensors='pt',
             return_token_type_ids=False,
             add_special_tokens=False,
@@ -363,7 +365,7 @@ class SingleCXREncoderDecoderModel(VisionEncoderDecoderModel):
         return batch_dict
-    def split_and_decode_sections(self, token_ids, special_token_ids, tokenizer):
         """
         Split the token identifiers into sections, then convert the token identifiers into strings.

 import torch
 import transformers
 from torch.nn import CrossEntropyLoss
+from transformers import PreTrainedTokenizerFast, VisionEncoderDecoderModel
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
 from transformers.modeling_utils import PreTrainedModel
         decoder: Optional[PreTrainedModel] = None,
     ):
+        if decoder:
+            assert decoder.config.add_cross_attention, '"add_cross_attention" must be True for the given decoder'
+            assert decoder.config.is_decoder, '"is_decoder" must be True for the given decoder'
         if config is None and (encoder is None or decoder is None):
             raise ValueError("Either a configuration or an encoder and a decoder has to be provided.")
         if config is None:
         self.encoder.config = self.config.encoder
         self.decoder.config = self.config.decoder
     def forward(
         self,
         pixel_values: Optional[torch.FloatTensor] = None,
         return token_type_ids
+    def tokenize_report_teacher_forcing(self, findings: str, impression: str, tokenizer: PreTrainedTokenizerFast, max_len: int):
         """
         Tokenize the reports and creates the inputs and targets for teacher forcing.
             impression - impression section.
             return_token_type_ids - return the token type identifiers.
             tokenizer - Hugging Face tokenizer.
+            max_len - maximum number of tokens.
         Returns:
             decoder_input_ids - the token identifiers for the input of the decoder.
             report,
             padding='longest',
             truncation=True,
+            max_length=max_len + 1,  # +1 to account for the bias between input and target.
             return_tensors='pt',
             return_token_type_ids=False,
             add_special_tokens=False,
         return batch_dict
+    def split_and_decode_sections(self, token_ids, special_token_ids, tokenizer: PreTrainedTokenizerFast):
         """
         Split the token identifiers into sections, then convert the token identifiers into strings.