InstaDeepAI
/

ChatNT

Text Generation

feature-extraction

Model card Files Files and versions Community

Yanisadel commited on Apr 1

Commit

8f1087e

·

1 Parent(s): 80a78d5

Update chatNT.py

Files changed (1) hide show

chatNT.py +10 -0

chatNT.py CHANGED Viewed

@@ -405,7 +405,9 @@ class TorchBioBrainDecoder(nn.Module):
         """
         # Compute English token embeddings
         tokens_embeddings = self.gpt_model.token_embed(english_token_ids)
         if projected_bio_embeddings is not None:
             (
@@ -696,11 +698,14 @@ class TorchMultiOmicsModel(PreTrainedModel):
             if projected_bio_embeddings is None:
                 # Compute bio sequences embeddings
                 bio_embeddings_list = [
                     self.biobrain_encoder(bio_token_ids=bio_token_ids[:, bio_seq_num])
                     for bio_seq_num in range(num_bio_sequences)
                 ]
                 # Project these embeddings
                 projected_bio_embeddings = [
                     self.projection_model(
@@ -710,9 +715,14 @@ class TorchMultiOmicsModel(PreTrainedModel):
                     )
                     for bio_seq_num, bio_embeddings in enumerate(bio_embeddings_list)
                 ]
                 projected_bio_embeddings = torch.stack(projected_bio_embeddings, dim=1)
         # decode
         logits = self.biobrain_decoder(
             english_token_ids=english_token_ids,
             projected_bio_embeddings=projected_bio_embeddings,

         """
         # Compute English token embeddings
+        print("(debug) in biobraindecoder, english tokens ids : ", english_token_ids.shape)
         tokens_embeddings = self.gpt_model.token_embed(english_token_ids)
+        print("(debug) tokens_embeddings shape : ", tokens_embeddings.shape)
         if projected_bio_embeddings is not None:
             (
             if projected_bio_embeddings is None:
                 # Compute bio sequences embeddings
+                print("(debug) shape bio tokens ids : ", bio_tokens_ids.shape)
                 bio_embeddings_list = [
                     self.biobrain_encoder(bio_token_ids=bio_token_ids[:, bio_seq_num])
                     for bio_seq_num in range(num_bio_sequences)
                 ]
+                print("(debug) shape of embeddings : ", bio_embeddings_list[0].shape)
                 # Project these embeddings
                 projected_bio_embeddings = [
                     self.projection_model(
                     )
                     for bio_seq_num, bio_embeddings in enumerate(bio_embeddings_list)
                 ]
+                print("(debug) Shape output projection model : ", projected_bio_embeddings[0].shape)
                 projected_bio_embeddings = torch.stack(projected_bio_embeddings, dim=1)
+                print("(debug) Shape projected bio embeddings : "), projected_bio_embeddings.shape)
         # decode
+        print("(debug) Going in biobrain decoder : ")
+        print("(debug) English token ids : ", english_token_ids.shape)
+        print("(debug) Projected bio embeddings : ", projected_bio_embeddings.shape)
         logits = self.biobrain_decoder(
             english_token_ids=english_token_ids,
             projected_bio_embeddings=projected_bio_embeddings,