antichronology commited on
Commit
c767554
·
verified ·
1 Parent(s): e00d5c4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +48 -8
README.md CHANGED
@@ -38,12 +38,35 @@ The 4-track model requires only a one-hot encoded sequence of your mRNA. This re
38
  Here is example code
39
  ```
40
  # Sequence for short mRNA
41
-
 
 
 
 
 
 
 
 
42
  # One hot encode function
43
-
 
 
 
 
 
 
 
44
  # Load Orthrus
45
-
 
 
 
 
 
46
  # Generate embedding
 
 
 
47
  ```
48
 
49
  #### 6-Track Model (Recommended)
@@ -64,14 +87,31 @@ chmod +x starter_build.sh
64
 
65
  We can now generate six track encodings for any transcript!
66
  ```
67
- # import six hot encoding function
68
-
69
  # import Genome, Interval, instantiate Genome
 
 
 
 
70
 
71
  # Load Orthrus 6 track
72
-
 
 
 
 
 
73
  # Generate embedding
74
-
 
 
 
 
 
 
 
 
 
 
75
  ```
76
 
77
- Alternatively, this information can be extracted from gene pred files available for download from the UCSC Genome Browser [here](https://genome.ucsc.edu/cgi-bin/hgTables).
 
38
  Here is example code
39
  ```
40
  # Sequence for short mRNA
41
+ > seq=(
42
+ 'TCATCTGGATTATACATATTTCGCAATGAAAGAGAGGAAGAAAAGGAAGCAGCAAAATATGTGGAGGCCCA'
43
+ 'ACAAAAGAGACTAGAAGCCTTATTCACTAAAATTCAGGAGGAATTTGAAGAACATGAAGTTACTTCCTCC
44
+ 'ACTGAAGTCTTGAACCCCCCAAAGTCATCCATGAGGGTTGGAATCAACTTCTGAAAACACAACAAAACCA'
45
+ 'TATTTACCATCACGTGCACTAACAAGACAGCAAGTTCGTGCTTTGCAAGATGGTGCAGAGCTTTATGAAG'
46
+ 'CAGTGAAGAATGCAGCAGACCCAGCTTACCTTGAGGGTTATTTCAGTGAAGAGCAGTTAAGAGCCTTGAA'
47
+ 'TAATCACAGGCAAATGTTGAATGATAAGAAACAAGCTCAGATCCAGTTGGAAATTAGGAAGGCCATGGAA'
48
+ 'TCTGCTGAACAAAAGGAACAAGGTTTATCAAGGGATGTCACAACCGTGTGGAAGTTGCGTATTGTAAGCTATTC'
49
+ )
50
  # One hot encode function
51
+ > oh = seq_to_oh(seq)
52
+ > one_hot = seq_to_oh(seq)
53
+ > one_hot = one_hot.T
54
+ > torch_one_hot = torch.tensor(one_hot, dtype=torch.float32)
55
+ > torch_one_hot = torch_one_hot.unsqueeze(0)
56
+ > print(torch_one_hot.shape)
57
+ > torch_one_hot = torch_one_hot.to(device='cuda')
58
+ > lengths = torch.tensor([torch_one_hot.shape[2]]).to(device='cuda')
59
  # Load Orthrus
60
+ > run_name="orthrus_base_4_track"
61
+ > checkpoint="epoch=18-step=20000.ckpt"
62
+ > model_repository="./models"
63
+ > model = load_model(f"{model_repository}{run_name}", checkpoint_name=checkpoint)
64
+ > model = model.to(torch.device('cuda'))
65
+ > print(model)
66
  # Generate embedding
67
+ > reps = model.representation(torch_one_hot, lengths)
68
+ > print(reps.shape)
69
+ # torch.Size([1, 256])
70
  ```
71
 
72
  #### 6-Track Model (Recommended)
 
87
 
88
  We can now generate six track encodings for any transcript!
89
  ```
 
 
90
  # import Genome, Interval, instantiate Genome
91
+ > genome = Genome("gencode.v29")
92
+ > interval = Interval("chr7", "+", 117120016, 117120201, genome)
93
+ > genome.dna(interval)
94
+ # CTCTTATGCTCGGGTGATCC
95
 
96
  # Load Orthrus 6 track
97
+ > run_name="orthrus_large_6_track"
98
+ > checkpoint="epoch=22-step=20000.ckpt"
99
+ > model_repository="./models"
100
+ > model = load_model(f"{model_repository}{run_name}", checkpoint_name=checkpoint)
101
+ > model = model.to(torch.device('cuda'))
102
+ > print(model)
103
  # Generate embedding
104
+ > transcripts = find_transcript_by_gene_name(genome, 'BCL2L1')
105
+ > print(transcripts)
106
+ > t = transcripts[0]
107
+ > sixt = create_six_track_encoding(t)
108
+ > sixt = torch.tensor(sixt, dtype=torch.float32)
109
+ > sixt = sixt.unsqueeze(0)
110
+ > sixt = sixt.to(device='cuda')
111
+ > lengths = torch.tensor([sixt.shape[2]]).to(device='cuda')
112
+ > embedding = model.representation(sixt, lengths)
113
+ > print(embedding.shape)
114
+ # torch.Size([1, 512])
115
  ```
116
 
117
+ Alternatively, this information can be extracted from genePred files available for download from the UCSC Genome Browser [here](https://genome.ucsc.edu/cgi-bin/hgTables).