isoformer-anonymous commited on
Commit
8b8a26d
1 Parent(s): dae1abd

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -0
README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
2
+ import numpy as np
3
+ import torch
4
+
5
+ # Import the tokenizer and the model
6
+ tokenizer = AutoTokenizer.from_pretrained("isoformer-anonymous/Isoformer", trust_remote_code=True)
7
+ model = AutoModelForMaskedLM.from_pretrained("isoformer-anonymous/Isoformer",trust_remote_code=True)
8
+
9
+ protein_sequences = ["RSRSRSRSRSRSRSRSRSRSRL" * 9]
10
+ rna_sequences = ["ATTCCGGTTTTCA" * 9]
11
+ sequence_length = 196_608
12
+ rng = np.random.default_rng(seed=0)
13
+ dna_sequences = ["".join(rng.choice(list("ATCGN"), size=(sequence_length,)))]
14
+
15
+ torch_tokens = tokenizer(
16
+ dna_input=dna_sequences, rna_input=rna_sequences, protein_input=protein_sequences
17
+ )
18
+ dna_torch_tokens = torch.tensor(torch_tokens[0]["input_ids"])
19
+ rna_torch_tokens = torch.tensor(torch_tokens[1]["input_ids"])
20
+ protein_torch_tokens = torch.tensor(torch_tokens[2]["input_ids"])
21
+
22
+ torch_output = model.forward(
23
+ tensor_dna=dna_torch_tokens,
24
+ tensor_rna=rna_torch_tokens,
25
+ tensor_protein=protein_torch_tokens,
26
+ attention_mask_rna=rna_torch_tokens != 1,
27
+ attention_mask_protein=protein_torch_tokens != 1,
28
+ )
29
+
30
+ print(f"Gene expression predictions: {torch_output['gene_expression_predictions']}")
31
+ print(f"Final DNA embedding: {torch_output['final_dna_embeddings']}")