prettify scores
Browse files- folding_studio_demo/correlate.py +28 -23
folding_studio_demo/correlate.py
CHANGED
@@ -7,29 +7,31 @@ from scipy.stats import spearmanr, pearsonr, linregress
|
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
34 |
def compute_correlation_data(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> pd.DataFrame:
|
35 |
corr_data_file = Path("corr_data.csv")
|
@@ -93,6 +95,9 @@ def plot_correlation_ranking(corr_data: pd.DataFrame, correlation_type: str) ->
|
|
93 |
def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
|
94 |
"""Fake predict structures of all complexes and correlate the results."""
|
95 |
|
|
|
|
|
|
|
96 |
corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
|
97 |
corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
|
98 |
|
|
|
7 |
|
8 |
logger = logging.getLogger(__name__)
|
9 |
|
10 |
+
SCORE_COLUMN_NAMES = {
|
11 |
+
"confidence_score_boltz": "Boltz Confidence Score",
|
12 |
+
"ptm_boltz": "Boltz pTM Score",
|
13 |
+
"iptm_boltz": "Boltz ipTM Score",
|
14 |
+
"complex_plddt_boltz": "Boltz Complex pLDDT",
|
15 |
+
"complex_iplddt_boltz": "Boltz Complex ipLDDT",
|
16 |
+
"complex_pde_boltz": "Boltz Complex pDE",
|
17 |
+
"complex_ipde_boltz": "Boltz Complex ipDE",
|
18 |
+
"interchain_pae_monomer": "Monomer Interchain PAE",
|
19 |
+
"interface_pae_monomer": "Monomer Interface PAE",
|
20 |
+
"overall_pae_monomer": "Monomer Overall PAE",
|
21 |
+
"interface_plddt_monomer": "Monomer Interface pLDDT",
|
22 |
+
"average_plddt_monomer": "Monomer Average pLDDT",
|
23 |
+
"ptm_monomer": "Monomer pTM Score",
|
24 |
+
"interface_ptm_monomer": "Monomer Interface pTM",
|
25 |
+
"interchain_pae_multimer": "Multimer Interchain PAE",
|
26 |
+
"interface_pae_multimer": "Multimer Interface PAE",
|
27 |
+
"overall_pae_multimer": "Multimer Overall PAE",
|
28 |
+
"interface_plddt_multimer": "Multimer Interface pLDDT",
|
29 |
+
"average_plddt_multimer": "Multimer Average pLDDT",
|
30 |
+
"ptm_multimer": "Multimer pTM Score",
|
31 |
+
"interface_ptm_multimer": "Multimer Interface pTM"
|
32 |
+
}
|
33 |
+
|
34 |
+
SCORE_COLUMNS = list(SCORE_COLUMN_NAMES.values())
|
35 |
|
36 |
def compute_correlation_data(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> pd.DataFrame:
|
37 |
corr_data_file = Path("corr_data.csv")
|
|
|
95 |
def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str], main_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
|
96 |
"""Fake predict structures of all complexes and correlate the results."""
|
97 |
|
98 |
+
# Rename score columns using the mapping in SCORE_COLUMN_NAMES
|
99 |
+
spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
|
100 |
+
|
101 |
corr_data = compute_correlation_data(spr_data_with_scores, score_cols)
|
102 |
corr_ranking_plot = plot_correlation_ranking(corr_data, "Spearman")
|
103 |
|