ligeti commited on
Commit
c2af699
·
verified ·
1 Parent(s): ab722a4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +10 -17
README.md CHANGED
@@ -23,24 +23,17 @@ ProkBERT-mini-long (also prokbert-mini-k6s2) is part of the ProkBERT family of g
23
  The following example demonstrates how to use the ProkBERT-mini-long model for processing a DNA sequence:
24
 
25
  ```python
26
- from transformers import MegatronBertForMaskedLM
27
- from prokbert.prokbert_tokenizer import ProkBERTTokenizer
28
 
29
- # Tokenization parameters
30
- tokenization_parameters = {
31
- 'kmer': 6,
32
- 'shift': 2
33
- }
34
- # Initialize the tokenizer and model
35
- tokenizer = ProkBERTTokenizer(tokenization_params=tokenization_parameters, operation_space='sequence')
36
- model = MegatronBertForMaskedLM.from_pretrained("neuralbioinfo/prokbert-mini-long")
37
- # Example DNA sequence
38
- sequence = 'ATGTCCGCGGGACCT'
39
- # Tokenize the sequence
40
- inputs = tokenizer(sequence, return_tensors="pt")
41
- # Ensure that inputs have a batch dimension
42
- inputs = {key: value.unsqueeze(0) for key, value in inputs.items()}
43
- # Generate outputs from the model
44
  outputs = model(**inputs)
45
  ```
46
 
 
23
  The following example demonstrates how to use the ProkBERT-mini-long model for processing a DNA sequence:
24
 
25
  ```python
26
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
 
27
 
28
+ tokenizer = AutoTokenizer.from_pretrained("neuralbioinfo/prokbert-mini-long", trust_remote_code=True)
29
+ model = AutoModel.from_pretrained("neuralbioinfo/prokbert-mini-long", trust_remote_code=True)
30
+
31
+ segment = "ATGTCCGCGGGACCT"
32
+
33
+ # Tokenize the input and return as PyTorch tensors
34
+ inputs = tokenizer(segment, return_tensors="pt")
35
+
36
+ # Pass the tokenized input to the model
 
 
 
 
 
 
37
  outputs = model(**inputs)
38
  ```
39