Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

jfaustin commited on 3 days ago

Commit

354bfc2

verified ·

1 Parent(s): f601557

Improve aesthetics of correlation tab (#7)

Browse files

- git-ignore draft notebooks and np output (1521e428e0cd285be1e9d19853270de06eb08e0a)
- improve correlation legend (fe7692a5de1e886c3d22d6b39a5b9d6839b8af71)
- show corr plot on button click (30947eff1f1b60460a326f0ffe27110a55ad6bb2)
- add option to show x-axis as log (640e97433b3ecd0cdd3d7c9e689ade72c060e196)
- add text to explain the correlation tab purpose (924cdcdcd7d3a52a25f994bb52d163ef371b2b8f)
- use blue/purple theme (ca02709b5b7f00c3e9cefa66bf7f3e17c94fa59a)

Files changed (3) hide show

.gitignore +3 -0
folding_studio_demo/app.py +66 -15
folding_studio_demo/correlate.py +9 -7

.gitignore CHANGED Viewed

@@ -3,3 +3,6 @@
 output/
 sequences/
 boltz_results/

 output/
 sequences/
 boltz_results/
+*.ipynb
+*.npz

folding_studio_demo/app.py CHANGED Viewed

@@ -10,9 +10,10 @@ from gradio_molecule3d import Molecule3D
 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
     fake_predict_and_correlate,
-    select_correlation_plot,
 )
 from folding_studio_demo.predict import predict, predict_comparison
 logger = logging.getLogger(__name__)
@@ -102,7 +103,13 @@ def simple_prediction(api_key: str) -> None:
         with gr.Column():
             sequence = sequence_input()
-    predict_btn = gr.Button("Predict")
     with gr.Row():
         mol_output = Molecule3D(label="Protein Structure", reps=MOLECULE_REPS)
@@ -134,7 +141,13 @@ def model_comparison(api_key: str) -> None:
         with gr.Column():
             sequence = sequence_input()
-    predict_btn = gr.Button("Compare Models")
     with gr.Row():
         mol_outputs = Molecule3D(
@@ -154,6 +167,20 @@ def model_comparison(api_key: str) -> None:
 def create_correlation_tab():
     gr.Markdown("# Correlation with experimental binding affinity data")
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
     prettified_columns = {
         "antibody_name": "Antibody Name",
@@ -179,36 +206,60 @@ def create_correlation_tab():
     gr.Markdown("# Prediction and correlation")
     with gr.Row():
-        fake_predict_btn = gr.Button("Predict structures of all complexes")
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
         correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
-        # User can select the columns to display in the correlation plot
-        correlation_column = gr.Dropdown(
-            label="Score data to display", choices=SCORE_COLUMNS, multiselect=False
-        )
-        correlation_plot = gr.Plot(label="Correlation with binding affinity")
     fake_predict_btn.click(
         fn=lambda x: fake_predict_and_correlate(
             spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
         ),
         inputs=None,
-        outputs=[prediction_dataframe, correlation_ranking_plot],
     )
-    # Call function to update the correlation plot when the user selects the columns
     correlation_column.change(
-        fn=lambda score: select_correlation_plot(spr_data_with_scores, score),
-        inputs=correlation_column,
         outputs=correlation_plot,
     )
 def __main__():
-    with gr.Blocks(title="Folding Studio Demo") as demo:
         gr.Markdown(
             """
             # Folding Studio: Harness the Power of Protein Folding 🧬

 from folding_studio_demo.correlate import (
     SCORE_COLUMNS,
     fake_predict_and_correlate,
+    make_correlation_plot,
 )
 from folding_studio_demo.predict import predict, predict_comparison
+from folding_studio_demo.config import BLUE, PURPLE
 logger = logging.getLogger(__name__)
         with gr.Column():
             sequence = sequence_input()
+    predict_btn = gr.Button(
+        "Predict",
+        elem_classes="gradient-button",
+        elem_id="predict-btn",
+        variant="primary",
+        # css=f".gradio-container #predict-btn {{background: linear-gradient(90deg, {BLUE}, {PURPLE});}}",
+    )
     with gr.Row():
         mol_output = Molecule3D(label="Protein Structure", reps=MOLECULE_REPS)
         with gr.Column():
             sequence = sequence_input()
+    predict_btn = gr.Button(
+        "Compare Models",
+        elem_classes=["gradient-button"],
+        elem_id="compare-models-btn",
+        variant="primary",
+        # css=f".gradio-container #compare-models-btn {{background: linear-gradient(90deg, {BLUE}, {PURPLE});}}"
+    )
     with gr.Row():
         mol_outputs = Molecule3D(
 def create_correlation_tab():
     gr.Markdown("# Correlation with experimental binding affinity data")
+    gr.Markdown("""
+        This analysis explores the relationship between protein folding model confidence scores and experimental binding affinity data.
+        The experimental dataset contains binding affinity measurements (KD in nM) between antibody-antigen pairs.
+        Each data point includes:
+        - The antibody's light and heavy chain sequences
+        - The antigen sequence
+        - The experimental KD value
+        The analysis involves submitting these sequences to protein folding models for 3D structure prediction.
+        The models generate various confidence scores for each prediction. These scores are then correlated
+        with the experimental binding affinity measurements to evaluate their effectiveness as predictors
+        of binding strength.
+    """)
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
     prettified_columns = {
         "antibody_name": "Antibody Name",
     gr.Markdown("# Prediction and correlation")
     with gr.Row():
+        fake_predict_btn = gr.Button(
+            "Predict structures of all complexes",
+            elem_classes="gradient-button",
+            variant="primary",
+            # css=f".gradio-container #fake-predict-btn {{background: linear-gradient(90deg, {BLUE}, {PURPLE});}}",
+        )
     with gr.Row():
         prediction_dataframe = gr.Dataframe(label="Predicted Structures Data")
     with gr.Row():
         correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                # User can select the columns to display in the correlation plot
+                correlation_column = gr.Dropdown(
+                    label="Score data to display", choices=SCORE_COLUMNS, multiselect=False, value=SCORE_COLUMNS[0]
+                )
+                # Add checkbox for log scale and update plot when either input changes
+            with gr.Row():
+                log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False)
+        with gr.Column():
+            correlation_plot = gr.Plot(label="Correlation with binding affinity")
     fake_predict_btn.click(
         fn=lambda x: fake_predict_and_correlate(
             spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
         ),
         inputs=None,
+        outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot],
     )
+    def update_plot(score, use_log):
+        return make_correlation_plot(spr_data_with_scores, score, use_log)
     correlation_column.change(
+        fn=update_plot,
+        inputs=[correlation_column, log_scale],
+        outputs=correlation_plot,
+    )
+    log_scale.change(
+        fn=update_plot,
+        inputs=[correlation_column, log_scale],
         outputs=correlation_plot,
     )
 def __main__():
+    theme = gr.themes.Ocean(
+        primary_hue="blue",
+        secondary_hue="purple",
+    )
+    with gr.Blocks(theme=theme, title="Folding Studio Demo") as demo:
         gr.Markdown(
             """
             # Folding Studio: Harness the Power of Protein Folding 🧬

folding_studio_demo/correlate.py CHANGED Viewed

@@ -68,15 +68,17 @@ def fake_predict_and_correlate(spr_data_with_scores: pd.DataFrame, score_cols: l
     cols_to_show = main_cols[:]
     cols_to_show.extend(score_cols)
-    return spr_data_with_scores[cols_to_show].round(2), corr_ranking_plot
-def select_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str) -> go.Figure:
     """Select the correlation plot to display."""
     # corr_plot is a scatter plot of the correlation between the binding affinity and each of the scores
     scatter =  go.Scatter(
             x=spr_data_with_scores["KD (nM)"],
             y=spr_data_with_scores[score],
-            name=f"KD (nM) vs {score}",
             mode='markers',  # Only show markers/dots, no lines
             hovertemplate="<i>Score:</i> %{y}<br><i>KD:</i> %{x:.2f}<br>",
             marker=dict(color='#1f77b4')  # Set color to match default first color
@@ -91,9 +93,9 @@ def select_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str) -> g
             yanchor="bottom",
             y=1.02,
             xanchor="right",
-            x=1
-        )
-        # xaxis_type="log"  # Set x-axis to logarithmic scale
     )
     # compute the correlation line
     corr_line = np.polyfit(spr_data_with_scores["KD (nM)"], spr_data_with_scores[score], 1)
@@ -104,7 +106,7 @@ def select_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str) -> g
         x=corr_line_x,
         y=corr_line_y,
         mode='lines',
-        name=f"Correlation",
         line=dict(color='#1f77b4')  # Set same color as scatter points
     ))
     return corr_plot

     cols_to_show = main_cols[:]
     cols_to_show.extend(score_cols)
+    corr_plot = make_correlation_plot(spr_data_with_scores, score_cols[0], use_log=False)
+    return spr_data_with_scores[cols_to_show].round(2), corr_ranking_plot, corr_plot
+def make_correlation_plot(spr_data_with_scores: pd.DataFrame, score: str, use_log: bool) -> go.Figure:
     """Select the correlation plot to display."""
     # corr_plot is a scatter plot of the correlation between the binding affinity and each of the scores
     scatter =  go.Scatter(
             x=spr_data_with_scores["KD (nM)"],
             y=spr_data_with_scores[score],
+            name=f"Samples",
             mode='markers',  # Only show markers/dots, no lines
             hovertemplate="<i>Score:</i> %{y}<br><i>KD:</i> %{x:.2f}<br>",
             marker=dict(color='#1f77b4')  # Set color to match default first color
             yanchor="bottom",
             y=1.02,
             xanchor="right",
+            x=1,
+        ),
+        xaxis_type="log" if use_log else "linear"  # Set x-axis to logarithmic scale
     )
     # compute the correlation line
     corr_line = np.polyfit(spr_data_with_scores["KD (nM)"], spr_data_with_scores[score], 1)
         x=corr_line_x,
         y=corr_line_y,
         mode='lines',
+        name=f"Regression line",
         line=dict(color='#1f77b4')  # Set same color as scatter points
     ))
     return corr_plot