Caleb Ellington
commited on
Commit
•
6f00a8b
1
Parent(s):
2197b40
update with major refactor
Browse files- README.md +3 -3
- config.yaml +5 -5
README.md
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
from huggingface_hub import snapshot_download
|
5 |
from pathlib import Path
|
6 |
|
7 |
-
model_name = "genbio-ai/
|
8 |
genbio_models_path = Path.home().joinpath('genbio_models', model_name)
|
9 |
genbio_models_path.mkdir(parents=True, exist_ok=True)
|
10 |
snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
@@ -12,12 +12,12 @@ snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
|
12 |
### Load model for inference
|
13 |
```python
|
14 |
import torch
|
15 |
-
from
|
16 |
|
17 |
ckpt_path = genbio_models_path.joinpath('model.ckpt')
|
18 |
model = SequenceClassification.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
|
19 |
|
20 |
-
collated_batch = model.
|
21 |
logits = model(collated_batch)
|
22 |
print(logits)
|
23 |
print(torch.argmax(logits, dim=-1))
|
|
|
4 |
from huggingface_hub import snapshot_download
|
5 |
from pathlib import Path
|
6 |
|
7 |
+
model_name = "genbio-ai/aido_dna_7b-7b-gue-splice-reconstruction-ckpt"
|
8 |
genbio_models_path = Path.home().joinpath('genbio_models', model_name)
|
9 |
genbio_models_path.mkdir(parents=True, exist_ok=True)
|
10 |
snapshot_download(repo_id=model_name, local_dir=genbio_models_path)
|
|
|
12 |
### Load model for inference
|
13 |
```python
|
14 |
import torch
|
15 |
+
from modelgenerator.tasks import SequenceClassification
|
16 |
|
17 |
ckpt_path = genbio_models_path.joinpath('model.ckpt')
|
18 |
model = SequenceClassification.load_from_checkpoint(ckpt_path, strict_loading=False).eval()
|
19 |
|
20 |
+
collated_batch = model.transform({"sequences": ["ACGT", "AGCT"]})
|
21 |
logits = model(collated_batch)
|
22 |
print(logits)
|
23 |
print(torch.argmax(logits, dim=-1))
|
config.yaml
CHANGED
@@ -141,10 +141,10 @@ trainer:
|
|
141 |
reload_dataloaders_every_n_epochs: 0
|
142 |
default_root_dir: logs
|
143 |
model:
|
144 |
-
class_path:
|
145 |
init_args:
|
146 |
backbone:
|
147 |
-
class_path:
|
148 |
init_args:
|
149 |
from_scratch: false
|
150 |
use_peft: true
|
@@ -156,7 +156,7 @@ model:
|
|
156 |
model_init_args: null
|
157 |
max_length: 402
|
158 |
adapter:
|
159 |
-
class_path:
|
160 |
init_args:
|
161 |
pooling: mean_pooling
|
162 |
hidden_sizes:
|
@@ -181,14 +181,14 @@ model:
|
|
181 |
differentiable: false
|
182 |
fused: null
|
183 |
lr_scheduler:
|
184 |
-
class_path:
|
185 |
init_args:
|
186 |
warmup_ratio: 0.1
|
187 |
use_legacy_adapter: false
|
188 |
strict_loading: true
|
189 |
reset_optimizer_states: false
|
190 |
data:
|
191 |
-
class_path:
|
192 |
init_args:
|
193 |
hf_name: leannmlindsey/GUE
|
194 |
task: splice_reconstructed
|
|
|
141 |
reload_dataloaders_every_n_epochs: 0
|
142 |
default_root_dir: logs
|
143 |
model:
|
144 |
+
class_path: modelgenerator.tasks.SequenceClassification
|
145 |
init_args:
|
146 |
backbone:
|
147 |
+
class_path: modelgenerator.backbones.aido_dna_7b
|
148 |
init_args:
|
149 |
from_scratch: false
|
150 |
use_peft: true
|
|
|
156 |
model_init_args: null
|
157 |
max_length: 402
|
158 |
adapter:
|
159 |
+
class_path: modelgenerator.adapters.MLPPoolAdapter
|
160 |
init_args:
|
161 |
pooling: mean_pooling
|
162 |
hidden_sizes:
|
|
|
181 |
differentiable: false
|
182 |
fused: null
|
183 |
lr_scheduler:
|
184 |
+
class_path: modelgenerator.lr_schedulers.CosineWithWarmup
|
185 |
init_args:
|
186 |
warmup_ratio: 0.1
|
187 |
use_legacy_adapter: false
|
188 |
strict_loading: true
|
189 |
reset_optimizer_states: false
|
190 |
data:
|
191 |
+
class_path: modelgenerator.data.GUEClassification
|
192 |
init_args:
|
193 |
hf_name: leannmlindsey/GUE
|
194 |
task: splice_reconstructed
|