Spaces:

ThorbenF
/

test_webpage

Running

App Files Files Community

ThorbenF commited on Dec 3, 2024

Commit

8bef2d8

1 Parent(s): 1f960e0

Update

Browse files

Files changed (2) hide show

.ipynb_checkpoints/app-checkpoint.py +22 -5
app.py +22 -5

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -40,8 +40,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
 model.eval()
-reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
 # Function to fetch a PDB file
 def fetch_pdb(pdb_id):
     pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
@@ -60,7 +58,6 @@ def normalize_scores(scores):
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
-# Extract sequence and predict binding scores
 def process_pdb(pdb_id, segment):
     pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
@@ -70,25 +67,45 @@ def process_pdb(pdb_id, segment):
     structure = parser.get_structure('protein', pdb_path)
     chain = structure[0][segment]
-    sequence = "".join(residue.get_resname().strip() for residue in chain)
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
     result_str = "\n".join([
         f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
-        for i, res in enumerate(chain)
     ])
     with open(f"{pdb_id}_predictions.txt", "w") as f:
         f.write(result_str)
     return result_str, pdb_path, f"{pdb_id}_predictions.txt"
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")

 model.to(device)
 model.eval()
 # Function to fetch a PDB file
 def fetch_pdb(pdb_id):
     pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def process_pdb(pdb_id, segment):
     pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
     structure = parser.get_structure('protein', pdb_path)
     chain = structure[0][segment]
+    # Comprehensive amino acid mapping
+    aa_dict = {
+        'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
+        'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
+        'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
+        'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
+        'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
+    }
+    # Exclude non-amino acid residues
+    sequence = "".join(
+        aa_dict[residue.get_resname().strip()]
+        for residue in chain
+        if residue.get_resname().strip() in aa_dict
+    )
+    # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
+    # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
+    # Prepare the result string, including only amino acid residues
     result_str = "\n".join([
         f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
+        for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
     ])
+    # Save predictions to file
     with open(f"{pdb_id}_predictions.txt", "w") as f:
         f.write(result_str)
     return result_str, pdb_path, f"{pdb_id}_predictions.txt"
+reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")

app.py CHANGED Viewed

@@ -40,8 +40,6 @@ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model.to(device)
 model.eval()
-reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
 # Function to fetch a PDB file
 def fetch_pdb(pdb_id):
     pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
@@ -60,7 +58,6 @@ def normalize_scores(scores):
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
-# Extract sequence and predict binding scores
 def process_pdb(pdb_id, segment):
     pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
@@ -70,25 +67,45 @@ def process_pdb(pdb_id, segment):
     structure = parser.get_structure('protein', pdb_path)
     chain = structure[0][segment]
-    sequence = "".join(residue.get_resname().strip() for residue in chain)
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
     result_str = "\n".join([
         f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
-        for i, res in enumerate(chain)
     ])
     with open(f"{pdb_id}_predictions.txt", "w") as f:
         f.write(result_str)
     return result_str, pdb_path, f"{pdb_id}_predictions.txt"
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")

 model.to(device)
 model.eval()
 # Function to fetch a PDB file
 def fetch_pdb(pdb_id):
     pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def process_pdb(pdb_id, segment):
     pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
     structure = parser.get_structure('protein', pdb_path)
     chain = structure[0][segment]
+    # Comprehensive amino acid mapping
+    aa_dict = {
+        'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
+        'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
+        'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
+        'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
+        'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
+    }
+    # Exclude non-amino acid residues
+    sequence = "".join(
+        aa_dict[residue.get_resname().strip()]
+        for residue in chain
+        if residue.get_resname().strip() in aa_dict
+    )
+    # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
+    # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
+    # Prepare the result string, including only amino acid residues
     result_str = "\n".join([
         f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
+        for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
     ])
+    # Save predictions to file
     with open(f"{pdb_id}_predictions.txt", "w") as f:
         f.write(result_str)
     return result_str, pdb_path, f"{pdb_id}_predictions.txt"
+reps = [{"model": 0, "style": "cartoon", "color": "spectrum"}]
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")