|
"""Folding Studio Demo App.""" |
|
|
|
import logging |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
from folding_studio_data_models import FoldingModel |
|
from gradio_molecule3d import Molecule3D |
|
|
|
from folding_studio_demo.correlate import ( |
|
SCORE_COLUMN_NAMES, |
|
SCORE_COLUMNS, |
|
compute_correlation_data, |
|
fake_predict_and_correlate, |
|
get_score_description, |
|
make_regression_plot, |
|
plot_correlation_ranking, |
|
) |
|
from folding_studio_demo.predict import filter_predictions, predict, predict_comparison |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
MOLECULE_REPS = [ |
|
{ |
|
"model": 0, |
|
|
|
|
|
"style": "cartoon", |
|
"color": "alphafold", |
|
|
|
"around": 0, |
|
"byres": False, |
|
|
|
|
|
} |
|
] |
|
|
|
|
|
MODEL_CHOICES = [ |
|
("AlphaFold2", FoldingModel.AF2), |
|
("OpenFold", FoldingModel.OPENFOLD), |
|
|
|
("Boltz-1", FoldingModel.BOLTZ), |
|
("Chai-1", FoldingModel.CHAI), |
|
("Protenix", FoldingModel.PROTENIX), |
|
] |
|
|
|
MONOMER_SEQ_EXAMPLE = ">A|protein\nMALWMRLLPLLALLALWGPDPAAA" |
|
MULTIMER_SEQ_EXAMPLE = ">A|protein\nSQIPASEQETLVRPKPLLLKLLKSVGAQKDTYTMKEVLFYLGQYIMTKRLYDAAQQHIVYCSNDLLGDLFGVPSFSVKEHRKIYTMIYRNLVVVNQQESSDSGTSVSEN\n>B|protein\nSQETFSDLWKLLPEN" |
|
EXAMPLES = [ |
|
["Monomer", MONOMER_SEQ_EXAMPLE], |
|
["Multimer", MULTIMER_SEQ_EXAMPLE], |
|
] |
|
|
|
|
|
def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox: |
|
"""Sequence input component. |
|
|
|
Returns: |
|
gr.Textbox: Sequence input component |
|
""" |
|
with gr.Column(): |
|
with gr.Row(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
sequence = gr.Textbox( |
|
label="Protein Sequence", |
|
placeholder="Enter a protein sequence or upload a FASTA file", |
|
value=MONOMER_SEQ_EXAMPLE, |
|
lines=5, |
|
) |
|
gr.Markdown( |
|
"Select an example below, enter a sequence manually or upload a FASTA file." |
|
) |
|
|
|
file_input = gr.File( |
|
label="Upload a FASTA file", |
|
file_types=[".fasta", ".fa"], |
|
scale=0, |
|
height=150, |
|
) |
|
|
|
with gr.Row(equal_height=True): |
|
with gr.Column(): |
|
with gr.Row(): |
|
gr.Markdown("**Monomer Example:**") |
|
gr.Markdown("**Multimer Example:**") |
|
with gr.Row(): |
|
gr.Markdown("```\n" + MONOMER_SEQ_EXAMPLE + "\n```") |
|
gr.Markdown("```\n" + MULTIMER_SEQ_EXAMPLE + "\n```") |
|
with gr.Row(): |
|
gr.Button("Load Monomer Example", size="md").click( |
|
fn=lambda: MONOMER_SEQ_EXAMPLE, |
|
outputs=[sequence], |
|
) |
|
gr.Button("Load Multimer Example", size="md").click( |
|
fn=lambda: MULTIMER_SEQ_EXAMPLE, outputs=[sequence] |
|
) |
|
|
|
def _process_file(file: gr.File | None) -> gr.Textbox: |
|
if file is None: |
|
return gr.Textbox() |
|
try: |
|
with open(file.name, "r") as f: |
|
content = f.read().strip() |
|
return gr.Textbox(value=content) |
|
except Exception as e: |
|
logger.error(f"Error reading file: {e}") |
|
return gr.Textbox() |
|
|
|
file_input.change(fn=_process_file, inputs=[file_input], outputs=[sequence]) |
|
return sequence |
|
|
|
|
|
def simple_prediction(api_key: str) -> None: |
|
"""Simple prediction tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
gr.Markdown( |
|
""" |
|
## Predict a Protein Structure |
|
|
|
It will be run in the background and the results will be displayed in the output section. |
|
The output will contain the protein structure and the pLDDT plot. |
|
|
|
Select a model to run the inference with and enter a protein sequence or upload a FASTA file. |
|
""" |
|
) |
|
with gr.Row(): |
|
dropdown = gr.Dropdown( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
scale=0, |
|
value=FoldingModel.BOLTZ, |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input(dropdown) |
|
|
|
predict_btn = gr.Button( |
|
"Predict", |
|
elem_classes="gradient-button", |
|
elem_id="predict-btn", |
|
variant="primary", |
|
) |
|
|
|
with gr.Row(): |
|
mol_output = Molecule3D(label="Protein Structure", reps=MOLECULE_REPS) |
|
metrics_plot = gr.Plot(label="pLDDT") |
|
|
|
predict_btn.click( |
|
fn=lambda x, y, z: predict(x, y, z, format_fasta=True), |
|
inputs=[sequence, api_key, dropdown], |
|
outputs=[mol_output, metrics_plot], |
|
) |
|
|
|
|
|
def model_comparison(api_key: str) -> None: |
|
"""Model comparison tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
gr.Markdown( |
|
""" |
|
## Compare Folding Models |
|
|
|
This tab allows you to compare predictions from multiple protein folding models side by side. |
|
Follow these steps to get started: |
|
|
|
1. **Select Models**: Choose one or more models from the list on the left |
|
2. **Input Sequence** : Either select an example sequence, enter your protein sequence directly in the text box or upload a FASTA file. |
|
3. **Run Comparison**: Click "Compare Models" to start the prediction |
|
""" |
|
) |
|
with gr.Row(): |
|
models = gr.CheckboxGroup( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
scale=0, |
|
min_width=150, |
|
value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX], |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input() |
|
|
|
predict_btn = gr.Button( |
|
"Compare Models", |
|
elem_classes=["gradient-button"], |
|
elem_id="compare-models-btn", |
|
variant="primary", |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown( |
|
""" |
|
### Understanding the Outputs: |
|
- **3D Structure**: The molecular viewer shows the predicted protein structure |
|
- **pLDDT Score**: A confidence score (0-100) for each residue: |
|
- Very high (>90): Highly accurate |
|
- Confident (70-90): Good accuracy |
|
- Low (50-70): Limited accuracy |
|
- Very low (<50): Poor accuracy |
|
""" |
|
) |
|
gr.Markdown( |
|
"### Model Predictions\nUse the checkboxes to toggle which model predictions to compare:" |
|
) |
|
with gr.Row(): |
|
af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False) |
|
openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False) |
|
solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False) |
|
chai_predictions = gr.CheckboxGroup(label="Chai", visible=False) |
|
protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False) |
|
boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False) |
|
with gr.Row(): |
|
mol_outputs = Molecule3D( |
|
label="Protein Structure", reps=MOLECULE_REPS, height=1000 |
|
) |
|
metrics_plot = gr.Plot(label="pLDDT") |
|
|
|
|
|
prediction_outputs = gr.State() |
|
|
|
predict_btn.click( |
|
fn=predict_comparison, |
|
inputs=[sequence, api_key, models], |
|
outputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
).then( |
|
fn=filter_predictions, |
|
inputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
outputs=[mol_outputs, metrics_plot], |
|
) |
|
|
|
|
|
for checkbox in [ |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
]: |
|
checkbox.change( |
|
fn=filter_predictions, |
|
inputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
outputs=[mol_outputs, metrics_plot], |
|
) |
|
|
|
|
|
def create_antibody_discovery_tab(): |
|
gr.Markdown( |
|
"# Accelerating Antibody Discovery: In-Silico and Experimental Insights" |
|
) |
|
gr.Markdown(""" |
|
Let's dive into how we're using AI to accelerate antibody drug discovery by looking at how protein folding models stack up against real lab data. |
|
|
|
We've got this dataset that shows how well different antibodies stick to a specific target (we measure this as KD in nM). |
|
For each antibody-target pair, we've recorded: |
|
- The antibody's light and heavy chain sequences (think of them as the antibody's building blocks) |
|
- The target (antigen) sequence |
|
- How strongly they bind together in the lab (the KD value, lower means stronger binding) |
|
|
|
Why is it interesting? We take these sequences and feed them into protein folding models |
|
that predict their 3D structures. The models tell us how confident they are about their predictions. |
|
By comparing these confidence scores with our lab results, we can figure out which model scores |
|
are actually good at predicting real binding strength! |
|
|
|
Why is this useful for drug discovery? Once we know which computational scores to trust, |
|
we can use them to quickly check thousands of potential antibodies without having to test each one |
|
in the lab. We can then focus our lab work on testing just the most promising candidates. |
|
This means we can find effective antibody drugs much faster than before! |
|
""") |
|
spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv") |
|
spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES) |
|
prettified_columns = { |
|
"antibody_name": "Antibody Name", |
|
"KD (nM)": "KD (nM)", |
|
"antibody_vh_sequence": "Antibody VH Sequence", |
|
"antibody_vl_sequence": "Antibody VL Sequence", |
|
"antigen_sequence": "Antigen Sequence", |
|
} |
|
spr_data_with_scores = spr_data_with_scores.rename(columns=prettified_columns) |
|
columns = [ |
|
"Antibody Name", |
|
"KD (nM)", |
|
"Antibody VH Sequence", |
|
"Antibody VL Sequence", |
|
"Antigen Sequence", |
|
] |
|
|
|
gr.DataFrame( |
|
value=spr_data_with_scores[columns].round(2), |
|
label="Experimental Antibody-Antigen Binding Affinity Data", |
|
) |
|
|
|
gr.Markdown("# Prediction and correlation") |
|
|
|
with gr.Row(): |
|
with gr.Column(min_width=150): |
|
gr.Markdown( |
|
"Now, let's see how well the protein folding models can predict the binding affinity of these antibodies to the target antigen." |
|
) |
|
with gr.Column(min_width=150): |
|
fake_predict_btn = gr.Button( |
|
"Predict structures of all complexes", |
|
elem_classes="gradient-button", |
|
variant="primary", |
|
) |
|
prediction_dataframe = gr.Dataframe( |
|
label="Predicted Structures Data", visible=False |
|
) |
|
prediction_dataframe.change( |
|
fn=lambda x: gr.Dataframe(x, visible=True), |
|
inputs=[prediction_dataframe], |
|
outputs=[prediction_dataframe], |
|
) |
|
with gr.Row(visible=False) as explanation_row: |
|
gr.Markdown( |
|
""" |
|
We now have the predicted structures along with the models confidence scores of all complexes. Let's see if we can find a correlation |
|
between the confidence scores and the binding affinity. |
|
Spearman and Pearson are statistical methods commonly used to measure the correlation between |
|
two variables. Higher values indicate a stronger correlation. |
|
Here **Boltz Complex ipLDDT** is the best predictor of binding affinity. |
|
""", |
|
) |
|
with gr.Row(visible=False) as correlation_row: |
|
with gr.Column(scale=0): |
|
with gr.Row(): |
|
correlation_type = gr.Radio( |
|
choices=["Spearman", "Pearson"], |
|
value="Spearman", |
|
label="Correlation Type", |
|
interactive=True, |
|
min_width=150, |
|
) |
|
with gr.Row(): |
|
log_scale = gr.Checkbox( |
|
label="Use log scale for KD", |
|
value=True, |
|
min_width=150, |
|
) |
|
with gr.Column(): |
|
correlation_ranking_plot = gr.Plot(label="Correlation ranking") |
|
with gr.Row(visible=False) as regression_row: |
|
with gr.Column(scale=0): |
|
|
|
correlation_column = gr.Dropdown( |
|
label="Score data to display", |
|
choices=SCORE_COLUMNS, |
|
multiselect=False, |
|
value="Boltz Complex ipLDDT", |
|
) |
|
score_description = gr.Markdown( |
|
get_score_description(correlation_column.value) |
|
) |
|
correlation_column.change( |
|
fn=lambda x: get_score_description(x), |
|
inputs=correlation_column, |
|
outputs=score_description, |
|
) |
|
with gr.Column(): |
|
regression_plot = gr.Plot(label="Correlation with binding affinity") |
|
|
|
fake_predict_btn.click( |
|
fn=lambda x: ( |
|
*fake_predict_and_correlate( |
|
spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"] |
|
), |
|
gr.Row(visible=True), |
|
gr.Row(visible=True), |
|
gr.Row(visible=True) |
|
), |
|
inputs=[correlation_type], |
|
outputs=[ |
|
prediction_dataframe, |
|
correlation_ranking_plot, |
|
regression_plot, |
|
explanation_row, |
|
correlation_row, |
|
regression_row, |
|
], |
|
) |
|
|
|
def update_plots_with_log(correlation_type, score, use_log): |
|
logger.info(f"Updating correlation plot for {correlation_type}") |
|
corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS) |
|
logger.info(f"Correlation data: {corr_data}") |
|
corr_ranking_plot = plot_correlation_ranking( |
|
corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd" |
|
) |
|
regression_plot = make_regression_plot(spr_data_with_scores, score, use_log) |
|
return regression_plot, corr_ranking_plot |
|
|
|
correlation_column.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
|
|
correlation_type.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
log_scale.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
|
|
|
|
def __main__(): |
|
theme = gr.themes.Ocean( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
) |
|
with gr.Blocks(theme=theme, title="Folding Studio Demo") as demo: |
|
gr.Markdown( |
|
""" |
|
# Folding Studio: Harness the Power of Protein Folding 𧬠|
|
|
|
Folding Studio is a platform for protein structure prediction. |
|
It uses the latest AI-powered folding models to predict the structure of a protein. |
|
|
|
Available models are : AlphaFold2, OpenFold, Boltz-1, Chai and Protenix. |
|
""" |
|
) |
|
with gr.Accordion("API Key", open=False): |
|
gr.Markdown( |
|
""" |
|
To use the Folding Studio API, you need to provide an API key. |
|
You can get your API key by asking to the Folding Studio team. |
|
""" |
|
) |
|
api_key = gr.Textbox( |
|
placeholder="Enter your Folding Studio API key", |
|
type="password", |
|
show_label=False, |
|
) |
|
gr.Markdown("## Demo Usage") |
|
with gr.Tab("π Basic Folding"): |
|
simple_prediction(api_key) |
|
with gr.Tab("π Model Comparison"): |
|
model_comparison(api_key) |
|
with gr.Tab("π§ͺ Antibody Discovery Pipeline"): |
|
create_antibody_discovery_tab() |
|
|
|
demo.launch() |
|
|