Spaces:

samsl
/

D-SCRIPT

Sleeping

App Files Files Community

samsl commited on 9 days ago

Commit

a70187d

1 Parent(s): 8465e5c

fix dependencies and update versions

Browse files

Files changed (2) hide show

app.py +10 -13
requirements.txt +8 -7

app.py CHANGED Viewed

@@ -1,20 +1,17 @@
-import time
 import gradio as gr
 import pandas as pd
 import torch
 from pathlib import Path
-from Bio import SeqIO
-from dscript.pretrained import get_pretrained
 from dscript.language_model import lm_embed
-from tqdm.auto import tqdm
 from uuid import uuid4
 from predict_3di import get_3di_sequences, predictions_to_dict, one_hot_3di_sequence
 model_map = {
-    "D-SCRIPT": "human_v1",
-    "Topsy-Turvy": "human_v2",
-    "TT3D": "human_tt3d",
 }
 theme = "Default"
@@ -91,11 +88,11 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
         # gr.Info("Loading model...")
         _ = lm_embed("M", use_cuda = (device.type == "cuda"))
-        model = get_pretrained(model_map[model_name]).to(device)
         # gr.Info("Loading files...")
         try:
-            seqs = SeqIO.to_dict(SeqIO.parse(sequence_file.name, "fasta"))
         except ValueError as e:
             print(e)
             raise gr.Error("Invalid FASTA file - duplicate entry")
@@ -115,7 +112,7 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
             do_foldseek = True
             need_to_translate = set(pairs["protein1"]).union(set(pairs["protein2"]))
-            seqs_to_translate = {k: str(seqs[k].seq) for k in need_to_translate if k in seqs}
             half_precision = False
             assert not (half_precision and device=="cpu"), print("Running fp16 on CPU is not supported, yet")
@@ -147,8 +144,8 @@ def predict(model_name, pairs_file, sequence_file, progress = gr.Progress()):
             prot1 = r["protein1"]
             prot2 = r["protein2"]
-            seq1 = str(seqs[prot1].seq)
-            seq2 = str(seqs[prot2].seq)
             fold1 = foldseek_embeddings[prot1].to(device) if do_foldseek else None
             fold2 = foldseek_embeddings[prot2].to(device) if do_foldseek else None

 import gradio as gr
 import pandas as pd
 import torch
 from pathlib import Path
+from biotite.sequence.io import fasta
+from dscript.models.interaction import DSCRIPTModel
 from dscript.language_model import lm_embed
 from uuid import uuid4
 from predict_3di import get_3di_sequences, predictions_to_dict, one_hot_3di_sequence
 model_map = {
+    "D-SCRIPT": "samsl/dscript_human_v1",
+    "Topsy-Turvy": "samsl/topsy_turvy_human_v1",
+    "TT3D": "samsl/tt3d_human_v1",
 }
 theme = "Default"
         # gr.Info("Loading model...")
         _ = lm_embed("M", use_cuda = (device.type == "cuda"))
+        model = DSCRIPTModel.from_pretrained(model_map[model_name], use_cuda=torch.cuda.is_available())
         # gr.Info("Loading files...")
         try:
+           seqs = fasta.get_sequences(fasta.FastaFile.read(sequence_file))
         except ValueError as e:
             print(e)
             raise gr.Error("Invalid FASTA file - duplicate entry")
             do_foldseek = True
             need_to_translate = set(pairs["protein1"]).union(set(pairs["protein2"]))
+            seqs_to_translate = {k: str(seqs[k]) for k in need_to_translate if k in seqs}
             half_precision = False
             assert not (half_precision and device=="cpu"), print("Running fp16 on CPU is not supported, yet")
             prot1 = r["protein1"]
             prot2 = r["protein2"]
+            seq1 = str(seqs[prot1])
+            seq2 = str(seqs[prot2])
             fold1 = foldseek_embeddings[prot1].to(device) if do_foldseek else None
             fold2 = foldseek_embeddings[prot2].to(device) if do_foldseek else None

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
-dscript>=0.2.6
-biopython
-pandas
-tqdm
-transformers
-sentencepiece
-protobuf

+dscript>=0.3.0
+pandas==1.5.3
+tqdm==4.65.0
+transformers==4.30.2
+gradio==4.44.1
+pydantic==2.3.0
+biotite==1.4.0
+numpy==1.26.4