Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

folding-studio-demo / secretion_scores.py

jfaustin

secretion-scores (#4)

a3f3d91 verified 6 days ago

raw

history blame contribute delete

2.87 kB

	from __future__ import annotations
	import pathlib
	import pyrosetta
	from pyrosetta.rosetta.protocols.protein_interface_design import movers

	import aggrescan

	pyrosetta.init()

	def get_sap_scores(structure_path: str \| pathlib.Path) -> list[float]:
	"""Calculate per-residue SAP scores.

	Args:
	structure_path: Path to the structure file.

	Returns:
	Dictionary of per-residue SAP scores.
	"""
	pose = pyrosetta.pose_from_file(str(structure_path))
	all_residues_selector = (
	pyrosetta.rosetta.core.select.residue_selector.TrueResidueSelector()
	)
	per_res_sap_scores = (
	pyrosetta.rosetta.core.pack.guidance_scoreterms.sap.calculate_per_res_sap(
	pose,
	all_residues_selector,
	all_residues_selector,
	all_residues_selector,
	)
	)
	return list(per_res_sap_scores)


	def get_dG_ins_pred_scores(structure_path: str \| pathlib.Path) -> list[float]:
	"""Calculate per-residue dG_ins_pred scores.

	The dGins,pred is predicted over sub-sequences using SecretionOptimizationMover.
	For middle residues, a window size of 19 is used. For residues near sequence ends,
	smaller windows are used to ensure accurate scoring.

	Args:
	structure_path: Path to the structure file.

	Returns:
	List of per-residue dG_ins scores.
	"""
	pose = pyrosetta.pose_from_file(str(structure_path))
	num_chains = pose.num_chains()
	secoptmover = movers.SecretionOptimizationMover()
	max_window_size = 19
	per_res_scores = []

	for chain_id in range(1, num_chains + 1):
	sequence = pose.chain_sequence(chain_id)
	for i in range(len(sequence)):
	# Calculate window boundaries
	if i < max_window_size // 2:
	# Near start: use smaller window
	start = 0
	end = i + (max_window_size // 2) + 1
	elif i > len(sequence) - (max_window_size // 2) - 1:
	# Near end: use smaller window
	end = len(sequence)
	start = i - (max_window_size // 2)
	else:
	# Middle: use full window size
	start = i - (max_window_size // 2)
	end = i + (max_window_size // 2) + 1

	sub_seq = sequence[start:end]
	dg_ins = secoptmover.dG_ins_for_window(sub_seq)
	per_res_scores.append(dg_ins)

	return per_res_scores


	def get_aggrescan_scores(structure_path: str \| pathlib.Path) -> dict[str, float]:
	"""Calculation of Aggrescan scores.

	Args:
	structure_path: Path to the structure file.

	Returns:
	Dictionary of aggrescan scores.
	"""
	agg_residues = aggrescan.run_aggrescan3d(str(structure_path))
	agg_scores = [r.score for r in agg_residues]
	return agg_scores