ThorbenF commited on
Commit
a2460df
·
1 Parent(s): 4ed9ef0

Update requirements and make necessary code changes

Browse files
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -39,8 +39,9 @@ from scipy.special import expit
39
  #from peft import get_peft_config, PeftModel, PeftConfig, inject_adapter_in_model, LoraConfig
40
 
41
  checkpoint='ThorbenF/prot_t5_xl_uniref50'
 
42
 
43
- model, tokenizer = load_model(checkpoint)
44
 
45
  def create_dataset(tokenizer,seqs,labels,checkpoint):
46
 
@@ -80,7 +81,7 @@ def predict_protein_sequence(test_one_letter_sequence):
80
  test_one_letter_sequence = test_one_letter_sequence.replace("O", "X").replace("B", "X").replace("U", "X").replace("Z", "X").replace("J", "X")
81
 
82
  # Add spaces between each amino acid for ProtT5 and ProstT5 models
83
- if "prot_t5" in checkpoint:
84
  test_one_letter_sequence = " ".join(test_one_letter_sequence)
85
 
86
  # Add <AA2fold> for ProstT5 model input format
 
39
  #from peft import get_peft_config, PeftModel, PeftConfig, inject_adapter_in_model, LoraConfig
40
 
41
  checkpoint='ThorbenF/prot_t5_xl_uniref50'
42
+ max_length=1500
43
 
44
+ model, tokenizer = load_model(checkpoint,max_length)
45
 
46
  def create_dataset(tokenizer,seqs,labels,checkpoint):
47
 
 
81
  test_one_letter_sequence = test_one_letter_sequence.replace("O", "X").replace("B", "X").replace("U", "X").replace("Z", "X").replace("J", "X")
82
 
83
  # Add spaces between each amino acid for ProtT5 and ProstT5 models
84
+ if ("prot_t5" in checkpoint) or ("ProstT5" in checkpoint):
85
  test_one_letter_sequence = " ".join(test_one_letter_sequence)
86
 
87
  # Add <AA2fold> for ProstT5 model input format
.ipynb_checkpoints/model_loader-checkpoint.py CHANGED
@@ -613,7 +613,7 @@ def load_esm_model_classification(checkpoint, num_labels, half_precision, full=F
613
 
614
  return model, tokenizer
615
 
616
- def load_model(checkpoint):
617
  #checkpoint='ThorbenF/prot_t5_xl_uniref50'
618
  #best_model_path='ThorbenF/prot_t5_xl_uniref50/cpt.pth'
619
  full=False
 
613
 
614
  return model, tokenizer
615
 
616
+ def load_model(checkpoint,max_length):
617
  #checkpoint='ThorbenF/prot_t5_xl_uniref50'
618
  #best_model_path='ThorbenF/prot_t5_xl_uniref50/cpt.pth'
619
  full=False
app.py CHANGED
@@ -39,8 +39,9 @@ from scipy.special import expit
39
  #from peft import get_peft_config, PeftModel, PeftConfig, inject_adapter_in_model, LoraConfig
40
 
41
  checkpoint='ThorbenF/prot_t5_xl_uniref50'
 
42
 
43
- model, tokenizer = load_model(checkpoint)
44
 
45
  def create_dataset(tokenizer,seqs,labels,checkpoint):
46
 
@@ -80,7 +81,7 @@ def predict_protein_sequence(test_one_letter_sequence):
80
  test_one_letter_sequence = test_one_letter_sequence.replace("O", "X").replace("B", "X").replace("U", "X").replace("Z", "X").replace("J", "X")
81
 
82
  # Add spaces between each amino acid for ProtT5 and ProstT5 models
83
- if "prot_t5" in checkpoint:
84
  test_one_letter_sequence = " ".join(test_one_letter_sequence)
85
 
86
  # Add <AA2fold> for ProstT5 model input format
 
39
  #from peft import get_peft_config, PeftModel, PeftConfig, inject_adapter_in_model, LoraConfig
40
 
41
  checkpoint='ThorbenF/prot_t5_xl_uniref50'
42
+ max_length=1500
43
 
44
+ model, tokenizer = load_model(checkpoint,max_length)
45
 
46
  def create_dataset(tokenizer,seqs,labels,checkpoint):
47
 
 
81
  test_one_letter_sequence = test_one_letter_sequence.replace("O", "X").replace("B", "X").replace("U", "X").replace("Z", "X").replace("J", "X")
82
 
83
  # Add spaces between each amino acid for ProtT5 and ProstT5 models
84
+ if ("prot_t5" in checkpoint) or ("ProstT5" in checkpoint):
85
  test_one_letter_sequence = " ".join(test_one_letter_sequence)
86
 
87
  # Add <AA2fold> for ProstT5 model input format
model_loader.py CHANGED
@@ -613,7 +613,7 @@ def load_esm_model_classification(checkpoint, num_labels, half_precision, full=F
613
 
614
  return model, tokenizer
615
 
616
- def load_model(checkpoint):
617
  #checkpoint='ThorbenF/prot_t5_xl_uniref50'
618
  #best_model_path='ThorbenF/prot_t5_xl_uniref50/cpt.pth'
619
  full=False
 
613
 
614
  return model, tokenizer
615
 
616
+ def load_model(checkpoint,max_length):
617
  #checkpoint='ThorbenF/prot_t5_xl_uniref50'
618
  #best_model_path='ThorbenF/prot_t5_xl_uniref50/cpt.pth'
619
  full=False