Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

jfaustin commited on 9 days ago

Commit

90a13ac

1 Parent(s): 03588ca

refactor

Browse files

Files changed (2) hide show

folding_studio_demo/app.py +3 -81
folding_studio_demo/correlate.py +90 -0

folding_studio_demo/app.py CHANGED Viewed

@@ -6,11 +6,9 @@ import gradio as gr
 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
 import pandas as pd
-import numpy as np
-from scipy.stats import spearmanr
-import plotly.graph_objects as go
 from folding_studio_demo.predict import predict
 logger = logging.getLogger(__name__)
@@ -144,59 +142,6 @@ def model_comparison(api_key: str) -> None:
         outputs=[mol_output, metrics_plot],
     )
-def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
-    """Fake predict structures of all complexes and correlate the results."""
-    corr_data = []
-    spr_data_with_scores["log_kd"] = np.log10(spr_data_with_scores["KD (nM)"])
-    kd_col = "KD (nM)"
-    for score_col in score_cols:
-        logger.info(f"Computing correlation between {score_col} and KD (nM)")
-        res = spearmanr(spr_data_with_scores[kd_col], spr_data_with_scores[score_col])
-        corr_data.append({"score": score_col, "correlation": res.statistic, "p-value": res.pvalue})
-        logger.info(f"Correlation between {score_col} and KD (nM): {res.statistic}")
-    corr_data = pd.DataFrame(corr_data)
-    # Find the lines in corr_data with NaN values and remove them
-    corr_data = corr_data[corr_data["correlation"].notna()]
-    # Sort correlation data by correlation value
-    corr_data = corr_data.sort_values('correlation', ascending=True)
-    # Create bar plot of correlations
-    corr_ranking_plot = go.Figure(data=[
-        go.Bar(
-            x=corr_data["correlation"],
-            y=corr_data["score"],
-            name="correlation",
-            orientation='h',
-            hovertemplate="<i>Score:</i> %{y}<br><i>Correlation:</i> %{x:.3f}<br>"
-        )
-    ])
-    corr_ranking_plot.update_layout(
-        title="Correlation with Binding Affinity",
-        yaxis_title="Score Type",
-        xaxis_title="Spearman Correlation",
-        template="simple_white",
-        showlegend=False
-    )
-    # corr_plot is a scatter plot of the correlation between the binding affinity and each of the scores
-    scatters = []
-    for score_col in score_cols:
-        scatters.append(
-            go.Scatter(
-                x=spr_data_with_scores[kd_col],
-                y=spr_data_with_scores[score_col],
-                name=f"{kd_col} vs {score_col}",
-                mode='markers',  # Only show markers/dots, no lines
-                hovertemplate="<i>Score:</i> %{y}<br><i>KD:</i> %{x:.2f}<br>"
-            )
-        )
-    corr_plot = go.Figure(data=scatters)
-    cols_to_show = [kd_col]
-    cols_to_show.extend(score_cols)
-    return spr_data_with_scores[cols_to_show], corr_ranking_plot, corr_plot
 def create_correlation_tab():
     gr.Markdown("# Upload binding affinity data")
@@ -217,36 +162,13 @@ def create_correlation_tab():
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
             correlation_plot = gr.Plot(label="Correlation with binding affinity")
-    cols = [
-        "confidence_score_boltz",
-        "ptm_boltz",
-        "iptm_boltz",
-        "complex_plddt_boltz",
-        "complex_iplddt_boltz",
-        "complex_pde_boltz",
-        "complex_ipde_boltz",
-        "interchain_pae_monomer",
-        "interface_pae_monomer",
-        "overall_pae_monomer",
-        "interface_plddt_monomer",
-        "average_plddt_monomer",
-        "ptm_monomer",
-        "interface_ptm_monomer",
-        "interchain_pae_multimer",
-        "interface_pae_multimer",
-        "overall_pae_multimer",
-        "interface_plddt_multimer",
-        "average_plddt_multimer",
-        "ptm_multimer",
-        "interface_ptm_multimer"
-    ]
     csv_file.change(
-        fn=lambda file: spr_data_with_scores.drop(columns=cols) if file else None,
         inputs=csv_file,
         outputs=dataframe
     )
     fake_predict_btn.click(
-        fn=lambda x: fake_predict_and_correlate(spr_data_with_scores, cols),
         inputs=None,
         outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot]
     )

 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
 import pandas as pd
 from folding_studio_demo.predict import predict
+from folding_studio_demo.correlate import fake_predict_and_correlate, COLUMNS
 logger = logging.getLogger(__name__)
         outputs=[mol_output, metrics_plot],
     )
 def create_correlation_tab():
     gr.Markdown("# Upload binding affinity data")
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
             correlation_plot = gr.Plot(label="Correlation with binding affinity")
     csv_file.change(
+        fn=lambda file: spr_data_with_scores.drop(columns=COLUMNS) if file else None,
         inputs=csv_file,
         outputs=dataframe
     )
     fake_predict_btn.click(
+        fn=lambda x: fake_predict_and_correlate(spr_data_with_scores, COLUMNS),
         inputs=None,
         outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot]
     )

folding_studio_demo/correlate.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import logging
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+from scipy.stats import spearmanr
+logger = logging.getLogger(__name__)
+COLUMNS = [
+        "confidence_score_boltz",
+        "ptm_boltz",
+        "iptm_boltz",
+        "complex_plddt_boltz",
+        "complex_iplddt_boltz",
+        "complex_pde_boltz",
+        "complex_ipde_boltz",
+        "interchain_pae_monomer",
+        "interface_pae_monomer",
+        "overall_pae_monomer",
+        "interface_plddt_monomer",
+        "average_plddt_monomer",
+        "ptm_monomer",
+        "interface_ptm_monomer",
+        "interchain_pae_multimer",
+        "interface_pae_multimer",
+        "overall_pae_multimer",
+        "interface_plddt_multimer",
+        "average_plddt_multimer",
+        "ptm_multimer",
+        "interface_ptm_multimer"
+    ]
+def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: list[str]) -> tuple[pd.DataFrame, go.Figure]:
+    """Fake predict structures of all complexes and correlate the results."""
+    corr_data = []
+    spr_data_with_scores["log_kd"] = np.log10(spr_data_with_scores["KD (nM)"])
+    kd_col = "KD (nM)"
+    for score_col in score_cols:
+        logger.info(f"Computing correlation between {score_col} and KD (nM)")
+        res = spearmanr(spr_data_with_scores[kd_col], spr_data_with_scores[score_col])
+        corr_data.append({"score": score_col, "correlation": res.statistic, "p-value": res.pvalue})
+        logger.info(f"Correlation between {score_col} and KD (nM): {res.statistic}")
+    corr_data = pd.DataFrame(corr_data)
+    # Find the lines in corr_data with NaN values and remove them
+    corr_data = corr_data[corr_data["correlation"].notna()]
+    # Sort correlation data by correlation value
+    corr_data = corr_data.sort_values('correlation', ascending=True)
+    # Create bar plot of correlations
+    corr_ranking_plot = go.Figure(data=[
+        go.Bar(
+            x=corr_data["correlation"],
+            y=corr_data["score"],
+            name="correlation",
+            orientation='h',
+            hovertemplate="<i>Score:</i> %{y}<br><i>Correlation:</i> %{x:.3f}<br>"
+        )
+    ])
+    corr_ranking_plot.update_layout(
+        title="Correlation with Binding Affinity",
+        yaxis_title="Score Type",
+        xaxis_title="Spearman Correlation",
+        template="simple_white",
+        showlegend=False
+    )
+    # corr_plot is a scatter plot of the correlation between the binding affinity and each of the scores
+    scatters = []
+    for score_col in score_cols:
+        scatters.append(
+            go.Scatter(
+                x=spr_data_with_scores[kd_col],
+                y=spr_data_with_scores[score_col],
+                name=f"{kd_col} vs {score_col}",
+                mode='markers',  # Only show markers/dots, no lines
+                hovertemplate="<i>Score:</i> %{y}<br><i>KD:</i> %{x:.2f}<br>"
+            )
+        )
+    corr_plot = go.Figure(data=scatters)
+    corr_plot.update_layout(
+        xaxis_title="KD (nM)",
+        yaxis_title="Score",
+        template="simple_white",
+        xaxis_type="log"  # Set x-axis to logarithmic scale
+    )
+    cols_to_show = [kd_col]
+    cols_to_show.extend(score_cols)
+    return spr_data_with_scores[cols_to_show], corr_ranking_plot, corr_plot