Tu2003716
/

COCOM_disabled_flash_attn

Model card Files Files and versions Community

Tu2003716 commited on Dec 7, 2024

Commit

0706b6b

·

verified ·

1 Parent(s): 1010809

Upload modeling_cocom.py

Files changed (1) hide show

modeling_cocom.py +2 -11

modeling_cocom.py CHANGED Viewed

@@ -196,7 +196,6 @@ class COCOM(PreTrainedModel):
         else:
             compressed_embs = self.compr_decoder(enc_input_ids, enc_attention_mask)
             input_embeds = self.replace_embeddings(compressed_embs, dec_input_ids, indices)
-        inputs_embeds = inputs_embeds.to(compressed_embs.device)
         return input_embeds
     def compr_decoder(self, input_ids, attention_mask):
@@ -220,9 +219,7 @@ class COCOM(PreTrainedModel):
         for i in range(batch_size):
             for j in range(indices[i], indices[i + 1]):
                 start_idx = first_mem_token_indices[i].item() + (j-indices[i]) * slot_len
-                # inputs_embeds[i, start_idx:start_idx + num_embs, :] = compressed_embs[j]
-                inputs_embeds = inputs_embeds.to(compressed_embs.device)
-                inputs_embeds[i, start_idx:start_idx + num_embs, :] = compressed_embs[j].to(inputs_embeds.device)
         return inputs_embeds
@@ -239,13 +236,7 @@ class COCOM(PreTrainedModel):
         # dec_attention_mask: attention mask of dec_input_ids
         # Perform compression with gradient tracking
-        # inputs_embeds = self.compress_and_replace_emb(enc_input_ids, enc_attention_mask, dec_input_ids)
-        inputs_embeds = self.compress_and_replace_emb(
-                                                        enc_input_ids.to(self.decoder.device),
-                                                        enc_attention_mask.to(self.decoder.device),
-                                                        dec_input_ids.to(self.decoder.device),
-                                                    )
         # if training_form is compressor, then detach the inputs_embeds, to make gradient not count in decoder
         if (self.training_form == "compressor") and (self.compr is None):
             inputs_embeds  = inputs_embeds.detach()

         else:
             compressed_embs = self.compr_decoder(enc_input_ids, enc_attention_mask)
             input_embeds = self.replace_embeddings(compressed_embs, dec_input_ids, indices)
         return input_embeds
     def compr_decoder(self, input_ids, attention_mask):
         for i in range(batch_size):
             for j in range(indices[i], indices[i + 1]):
                 start_idx = first_mem_token_indices[i].item() + (j-indices[i]) * slot_len
+                inputs_embeds[i, start_idx:start_idx + num_embs, :] = compressed_embs[j]
         return inputs_embeds
         # dec_attention_mask: attention mask of dec_input_ids
         # Perform compression with gradient tracking
+        inputs_embeds = self.compress_and_replace_emb(enc_input_ids, enc_attention_mask, dec_input_ids)
         # if training_form is compressor, then detach the inputs_embeds, to make gradient not count in decoder
         if (self.training_form == "compressor") and (self.compr is None):
             inputs_embeds  = inputs_embeds.detach()