samsl commited on
Commit
a70187d
·
1 Parent(s): 8465e5c

fix dependencies and update versions

Browse files
Files changed (2) hide show
  1. app.py +10 -13
  2. requirements.txt +8 -7
app.py CHANGED
@@ -1,20 +1,17 @@
1
- import time
2
-
3
  import gradio as gr
4
  import pandas as pd
5
  import torch
6
  from pathlib import Path
7
- from Bio import SeqIO
8
- from dscript.pretrained import get_pretrained
9
  from dscript.language_model import lm_embed
10
- from tqdm.auto import tqdm
11
  from uuid import uuid4
12
  from predict_3di import get_3di_sequences, predictions_to_dict, one_hot_3di_sequence
13
 
14
  model_map = {
15
- "D-SCRIPT": "human_v1",
16
- "Topsy-Turvy": "human_v2",
17
- "TT3D": "human_tt3d",
18
  }
19
 
20
  theme = "Default"
@@ -91,11 +88,11 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
91
  # gr.Info("Loading model...")
92
  _ = lm_embed("M", use_cuda = (device.type == "cuda"))
93
 
94
- model = get_pretrained(model_map[model_name]).to(device)
95
 
96
  # gr.Info("Loading files...")
97
  try:
98
- seqs = SeqIO.to_dict(SeqIO.parse(sequence_file.name, "fasta"))
99
  except ValueError as e:
100
  print(e)
101
  raise gr.Error("Invalid FASTA file - duplicate entry")
@@ -115,7 +112,7 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
115
  do_foldseek = True
116
 
117
  need_to_translate = set(pairs["protein1"]).union(set(pairs["protein2"]))
118
- seqs_to_translate = {k: str(seqs[k].seq) for k in need_to_translate if k in seqs}
119
 
120
  half_precision = False
121
  assert not (half_precision and device=="cpu"), print("Running fp16 on CPU is not supported, yet")
@@ -147,8 +144,8 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
147
  prot1 = r["protein1"]
148
  prot2 = r["protein2"]
149
 
150
- seq1 = str(seqs[prot1].seq)
151
- seq2 = str(seqs[prot2].seq)
152
 
153
  fold1 = foldseek_embeddings[prot1].to(device) if do_foldseek else None
154
  fold2 = foldseek_embeddings[prot2].to(device) if do_foldseek else None
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import torch
4
  from pathlib import Path
5
+ from biotite.sequence.io import fasta
6
+ from dscript.models.interaction import DSCRIPTModel
7
  from dscript.language_model import lm_embed
 
8
  from uuid import uuid4
9
  from predict_3di import get_3di_sequences, predictions_to_dict, one_hot_3di_sequence
10
 
11
  model_map = {
12
+ "D-SCRIPT": "samsl/dscript_human_v1",
13
+ "Topsy-Turvy": "samsl/topsy_turvy_human_v1",
14
+ "TT3D": "samsl/tt3d_human_v1",
15
  }
16
 
17
  theme = "Default"
 
88
  # gr.Info("Loading model...")
89
  _ = lm_embed("M", use_cuda = (device.type == "cuda"))
90
 
91
+ model = DSCRIPTModel.from_pretrained(model_map[model_name], use_cuda=torch.cuda.is_available())
92
 
93
  # gr.Info("Loading files...")
94
  try:
95
+ seqs = fasta.get_sequences(fasta.FastaFile.read(sequence_file))
96
  except ValueError as e:
97
  print(e)
98
  raise gr.Error("Invalid FASTA file - duplicate entry")
 
112
  do_foldseek = True
113
 
114
  need_to_translate = set(pairs["protein1"]).union(set(pairs["protein2"]))
115
+ seqs_to_translate = {k: str(seqs[k]) for k in need_to_translate if k in seqs}
116
 
117
  half_precision = False
118
  assert not (half_precision and device=="cpu"), print("Running fp16 on CPU is not supported, yet")
 
144
  prot1 = r["protein1"]
145
  prot2 = r["protein2"]
146
 
147
+ seq1 = str(seqs[prot1])
148
+ seq2 = str(seqs[prot2])
149
 
150
  fold1 = foldseek_embeddings[prot1].to(device) if do_foldseek else None
151
  fold2 = foldseek_embeddings[prot2].to(device) if do_foldseek else None
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- dscript>=0.2.6
2
- biopython
3
- pandas
4
- tqdm
5
- transformers
6
- sentencepiece
7
- protobuf
 
 
1
+ dscript>=0.3.0
2
+ pandas==1.5.3
3
+ tqdm==4.65.0
4
+ transformers==4.30.2
5
+ gradio==4.44.1
6
+ pydantic==2.3.0
7
+ biotite==1.4.0
8
+ numpy==1.26.4