jfaustin commited on
Commit
99ed182
·
1 Parent(s): b21645a

better describe model

Browse files
folding_studio_demo/app.py CHANGED
@@ -13,7 +13,8 @@ from folding_studio_demo.correlate import (
13
  fake_predict_and_correlate,
14
  make_regression_plot,
15
  compute_correlation_data,
16
- plot_correlation_ranking
 
17
  )
18
  from folding_studio_demo.predict import predict, predict_comparison
19
 
@@ -235,32 +236,6 @@ def create_correlation_tab():
235
  with gr.Row():
236
  log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
237
  with gr.Row():
238
- def get_score_description(score: str) -> str:
239
- descriptions = {
240
- "Boltz Confidence Score": "The Boltz confidence score provides an overall assessment of prediction quality (0-1, higher is better).",
241
- "Boltz pTM Score": "The Boltz predicted TM-score (pTM) assesses the overall fold accuracy of the predicted structure (0-1, higher is better).",
242
- "Boltz ipTM Score": "The Boltz interface pTM score (ipTM) specifically evaluates the accuracy of interface regions (0-1, higher is better).",
243
- "Boltz Complex pLDDT": "The Boltz Complex pLDDT measures confidence in local structure predictions across the entire complex (0-100, higher is better).",
244
- "Boltz Complex ipLDDT": "The Boltz Complex interface pLDDT (ipLDDT) focuses on confidence in interface region predictions (0-100, higher is better).",
245
- "Boltz Complex pDE": "The Boltz Complex predicted distance error (pDE) estimates the confidence in predicted distances between residues (0-1, higher is better).",
246
- "Boltz Complex ipDE": "The Boltz Complex interface pDE (ipDE) estimates confidence in predicted distances specifically at interfaces (0-1, higher is better).",
247
- "Monomer Interchain PAE": "The monomer interchain predicted aligned error (PAE) estimates position errors between chains in monomeric predictions (lower is better).",
248
- "Monomer Interface PAE": "The monomer interface PAE estimates position errors specifically at interfaces in monomeric predictions (lower is better).",
249
- "Monomer Overall PAE": "The monomer overall PAE estimates position errors across the entire structure in monomeric predictions (lower is better).",
250
- "Monomer Interface pLDDT": "The monomer interface pLDDT measures confidence in interface region predictions for monomeric models (0-100, higher is better).",
251
- "Monomer Average pLDDT": "The monomer average pLDDT provides the mean confidence across all residues in monomeric predictions (0-100, higher is better).",
252
- "Monomer pTM Score": "The monomer pTM score assesses overall fold accuracy in monomeric predictions (0-1, higher is better).",
253
- "Monomer Interface pTM": "The monomer interface pTM specifically evaluates accuracy of interface regions in monomeric predictions (0-1, higher is better).",
254
- "Multimer Interchain PAE": "The multimer interchain PAE estimates position errors between chains in multimeric predictions (lower is better).",
255
- "Multimer Interface PAE": "The multimer interface PAE estimates position errors specifically at interfaces in multimeric predictions (lower is better).",
256
- "Multimer Overall PAE": "The multimer overall PAE estimates position errors across the entire structure in multimeric predictions (lower is better).",
257
- "Multimer Interface pLDDT": "The multimer interface pLDDT measures confidence in interface region predictions for multimeric models (0-100, higher is better).",
258
- "Multimer Average pLDDT": "The multimer average pLDDT provides the mean confidence across all residues in multimeric predictions (0-100, higher is better).",
259
- "Multimer pTM Score": "The multimer pTM score assesses overall fold accuracy in multimeric predictions (0-1, higher is better).",
260
- "Multimer Interface pTM": "The multimer interface pTM specifically evaluates accuracy of interface regions in multimeric predictions (0-1, higher is better)."
261
- }
262
- return descriptions.get(score, "No description available for this score.")
263
-
264
  score_description = gr.Markdown(get_score_description(correlation_column.value))
265
  correlation_column.change(
266
  fn=lambda x: get_score_description(x),
 
13
  fake_predict_and_correlate,
14
  make_regression_plot,
15
  compute_correlation_data,
16
+ plot_correlation_ranking,
17
+ get_score_description
18
  )
19
  from folding_studio_demo.predict import predict, predict_comparison
20
 
 
236
  with gr.Row():
237
  log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
238
  with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  score_description = gr.Markdown(get_score_description(correlation_column.value))
240
  correlation_column.change(
241
  fn=lambda x: get_score_description(x),
folding_studio_demo/correlate.py CHANGED
@@ -15,24 +15,50 @@ SCORE_COLUMN_NAMES = {
15
  "complex_iplddt_boltz": "Boltz Complex ipLDDT",
16
  "complex_pde_boltz": "Boltz Complex pDE",
17
  "complex_ipde_boltz": "Boltz Complex ipDE",
18
- "interchain_pae_monomer": "Monomer Interchain PAE",
19
- "interface_pae_monomer": "Monomer Interface PAE",
20
- "overall_pae_monomer": "Monomer Overall PAE",
21
- "interface_plddt_monomer": "Monomer Interface pLDDT",
22
- "average_plddt_monomer": "Monomer Average pLDDT",
23
- "ptm_monomer": "Monomer pTM Score",
24
- "interface_ptm_monomer": "Monomer Interface pTM",
25
- "interchain_pae_multimer": "Multimer Interchain PAE",
26
- "interface_pae_multimer": "Multimer Interface PAE",
27
- "overall_pae_multimer": "Multimer Overall PAE",
28
- "interface_plddt_multimer": "Multimer Interface pLDDT",
29
- "average_plddt_multimer": "Multimer Average pLDDT",
30
- "ptm_multimer": "Multimer pTM Score",
31
- "interface_ptm_multimer": "Multimer Interface pTM"
32
  }
33
 
34
  SCORE_COLUMNS = list(SCORE_COLUMN_NAMES.values())
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def compute_correlation_data(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> pd.DataFrame:
37
  corr_data_file = Path("corr_data.csv")
38
  if corr_data_file.exists():
 
15
  "complex_iplddt_boltz": "Boltz Complex ipLDDT",
16
  "complex_pde_boltz": "Boltz Complex pDE",
17
  "complex_ipde_boltz": "Boltz Complex ipDE",
18
+ "interchain_pae_monomer": "AlphaFold2 GapTrick Interchain PAE",
19
+ "interface_pae_monomer": "AlphaFold2 GapTrick Interface PAE",
20
+ "overall_pae_monomer": "AlphaFold2 GapTrick Overall PAE",
21
+ "interface_plddt_monomer": "AlphaFold2 GapTrick Interface pLDDT",
22
+ "average_plddt_monomer": "AlphaFold2 GapTrick Average pLDDT",
23
+ "ptm_monomer": "AlphaFold2 GapTrick pTM Score",
24
+ "interface_ptm_monomer": "AlphaFold2 GapTrick Interface pTM",
25
+ "interchain_pae_multimer": "AlphaFold2 Multimer Interchain PAE",
26
+ "interface_pae_multimer": "AlphaFold2 Multimer Interface PAE",
27
+ "overall_pae_multimer": "AlphaFold2 Multimer Overall PAE",
28
+ "interface_plddt_multimer": "AlphaFold2 Multimer Interface pLDDT",
29
+ "average_plddt_multimer": "AlphaFold2 Multimer Average pLDDT",
30
+ "ptm_multimer": "AlphaFold2 Multimer pTM Score",
31
+ "interface_ptm_multimer": "AlphaFold2 Multimer Interface pTM"
32
  }
33
 
34
  SCORE_COLUMNS = list(SCORE_COLUMN_NAMES.values())
35
 
36
+ def get_score_description(score: str) -> str:
37
+ descriptions = {
38
+ "Boltz Confidence Score": "The Boltz model confidence score provides an overall assessment of prediction quality (0-1, higher is better).",
39
+ "Boltz pTM Score": "The Boltz model predicted TM-score (pTM) assesses the overall fold accuracy of the predicted structure (0-1, higher is better).",
40
+ "Boltz ipTM Score": "The Boltz model interface pTM score (ipTM) specifically evaluates the accuracy of interface regions (0-1, higher is better).",
41
+ "Boltz Complex pLDDT": "The Boltz model Complex pLDDT measures confidence in local structure predictions across the entire complex (0-100, higher is better).",
42
+ "Boltz Complex ipLDDT": "The Boltz model Complex interface pLDDT (ipLDDT) focuses on confidence in interface region predictions (0-100, higher is better).",
43
+ "Boltz Complex pDE": "The Boltz model Complex predicted distance error (pDE) estimates the confidence in predicted distances between residues (0-1, higher is better).",
44
+ "Boltz Complex ipDE": "The Boltz model Complex interface pDE (ipDE) estimates confidence in predicted distances specifically at interfaces (0-1, higher is better).",
45
+ "AlphaFold2 GapTrick Interchain PAE": "The AlphaFold2 GapTrick model interchain predicted aligned error (PAE) estimates position errors between chains in monomeric predictions (lower is better).",
46
+ "AlphaFold2 GapTrick Interface PAE": "The AlphaFold2 GapTrick model interface PAE estimates position errors specifically at interfaces in monomeric predictions (lower is better).",
47
+ "AlphaFold2 GapTrick Overall PAE": "The AlphaFold2 GapTrick model overall PAE estimates position errors across the entire structure in monomeric predictions (lower is better).",
48
+ "AlphaFold2 GapTrick Interface pLDDT": "The AlphaFold2 GapTrick model interface pLDDT measures confidence in interface region predictions for monomeric models (0-100, higher is better).",
49
+ "AlphaFold2 GapTrick Average pLDDT": "The AlphaFold2 GapTrick model average pLDDT provides the mean confidence across all residues in monomeric predictions (0-100, higher is better).",
50
+ "AlphaFold2 GapTrick pTM Score": "The AlphaFold2 GapTrick model pTM score assesses overall fold accuracy in monomeric predictions (0-1, higher is better).",
51
+ "AlphaFold2 GapTrick Interface pTM": "The AlphaFold2 GapTrick model interface pTM specifically evaluates accuracy of interface regions in monomeric predictions (0-1, higher is better).",
52
+ "AlphaFold2 GapTrick Interchain PAE": "The AlphaFold2 GapTrick model interchain PAE estimates position errors between chains in multimeric predictions (lower is better).",
53
+ "AlphaFold2 Multimer Interface PAE": "The AlphaFold2 Multimer model interface PAE estimates position errors specifically at interfaces in multimeric predictions (lower is better).",
54
+ "AlphaFold2 Multimer Overall PAE": "The AlphaFold2 Multimer model overall PAE estimates position errors across the entire structure in multimeric predictions (lower is better).",
55
+ "AlphaFold2 Multimer Interface pLDDT": "The AlphaFold2 Multimer model interface pLDDT measures confidence in interface region predictions for multimeric models (0-100, higher is better).",
56
+ "AlphaFold2 Multimer Average pLDDT": "The AlphaFold2 Multimer model average pLDDT provides the mean confidence across all residues in multimeric predictions (0-100, higher is better).",
57
+ "AlphaFold2 Multimer pTM Score": "The AlphaFold2 Multimer model pTM score assesses overall fold accuracy in multimeric predictions (0-1, higher is better).",
58
+ "AlphaFold2 Multimer Interface pTM": "The AlphaFold2 Multimer model interface pTM specifically evaluates accuracy of interface regions in multimeric predictions (0-1, higher is better)."
59
+ }
60
+ return descriptions.get(score, "No description available for this score.")
61
+
62
  def compute_correlation_data(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> pd.DataFrame:
63
  corr_data_file = Path("corr_data.csv")
64
  if corr_data_file.exists():