|
"""Folding Studio Demo App.""" |
|
|
|
import logging |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
from folding_studio_data_models import FoldingModel |
|
from gradio_molecule3d import Molecule3D |
|
|
|
from folding_studio_demo.correlate import ( |
|
SCORE_COLUMN_NAMES, |
|
SCORE_COLUMNS, |
|
compute_correlation_data, |
|
fake_predict_and_correlate, |
|
get_score_description, |
|
make_regression_plot, |
|
plot_correlation_ranking, |
|
) |
|
from folding_studio_demo.predict import filter_predictions, predict, predict_comparison |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
MOLECULE_REPS = [ |
|
{ |
|
"model": 0, |
|
|
|
|
|
"style": "cartoon", |
|
"color": "alphafold", |
|
|
|
"around": 0, |
|
"byres": False, |
|
|
|
|
|
} |
|
] |
|
|
|
|
|
MODEL_CHOICES = [ |
|
("AlphaFold2", FoldingModel.AF2), |
|
("OpenFold", FoldingModel.OPENFOLD), |
|
|
|
("Boltz-1", FoldingModel.BOLTZ), |
|
("Chai-1", FoldingModel.CHAI), |
|
("Protenix", FoldingModel.PROTENIX), |
|
] |
|
|
|
DEFAULT_SEQ = "MALWMRLLPLLALLALWGPDPAAA" |
|
MODEL_EXAMPLES = { |
|
FoldingModel.AF2: [ |
|
["Monomer", f">A\n{DEFAULT_SEQ}"], |
|
["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"], |
|
], |
|
FoldingModel.OPENFOLD: [ |
|
["Monomer", f">A\n{DEFAULT_SEQ}"], |
|
["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"], |
|
], |
|
FoldingModel.SOLOSEQ: [["Monomer", f">A\n{DEFAULT_SEQ}"]], |
|
FoldingModel.BOLTZ: [ |
|
["Monomer", f">A|protein\n{DEFAULT_SEQ}"], |
|
["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"], |
|
], |
|
FoldingModel.CHAI: [ |
|
["Monomer", f">protein|name=A\n{DEFAULT_SEQ}"], |
|
["Multimer", f">protein|name=A\n{DEFAULT_SEQ}\n>protein|name=B\n{DEFAULT_SEQ}"], |
|
], |
|
FoldingModel.PROTENIX: [ |
|
["Monomer", f">A|protein\n{DEFAULT_SEQ}"], |
|
["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"], |
|
], |
|
} |
|
|
|
|
|
def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox: |
|
"""Sequence input component. |
|
|
|
Returns: |
|
gr.Textbox: Sequence input component |
|
""" |
|
with gr.Row(equal_height=True): |
|
with gr.Column(): |
|
sequence = gr.Textbox( |
|
label="Protein Sequence", |
|
lines=2, |
|
placeholder="Enter a protein sequence or upload a FASTA file", |
|
) |
|
dummy = gr.Textbox(label="Complex type", visible=False) |
|
|
|
examples = gr.Examples( |
|
examples=MODEL_EXAMPLES[FoldingModel.BOLTZ], |
|
inputs=[dummy, sequence], |
|
) |
|
file_input = gr.File( |
|
label="Upload a FASTA file", |
|
file_types=[".fasta", ".fa"], |
|
scale=0, |
|
) |
|
|
|
if dropdown is not None: |
|
dropdown.change( |
|
fn=lambda x: gr.Dataset(samples=MODEL_EXAMPLES[x]), |
|
inputs=[dropdown], |
|
outputs=[examples.dataset], |
|
) |
|
|
|
def _process_file(file: gr.File | None) -> gr.Textbox: |
|
if file is None: |
|
return gr.Textbox() |
|
try: |
|
with open(file.name, "r") as f: |
|
content = f.read().strip() |
|
return gr.Textbox(value=content) |
|
except Exception as e: |
|
logger.error(f"Error reading file: {e}") |
|
return gr.Textbox() |
|
|
|
file_input.change(fn=_process_file, inputs=[file_input], outputs=[sequence]) |
|
return sequence |
|
|
|
|
|
def simple_prediction(api_key: str) -> None: |
|
"""Simple prediction tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
gr.Markdown( |
|
""" |
|
## Predict a Protein Structure |
|
|
|
It will be run in the background and the results will be displayed in the output section. |
|
The output will contain the protein structure and the pLDDT plot. |
|
|
|
Select a model to run the inference with and enter a protein sequence or upload a FASTA file. |
|
""" |
|
) |
|
with gr.Row(): |
|
dropdown = gr.Dropdown( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
scale=0, |
|
value=FoldingModel.BOLTZ, |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input(dropdown) |
|
|
|
predict_btn = gr.Button( |
|
"Predict", |
|
elem_classes="gradient-button", |
|
elem_id="predict-btn", |
|
variant="primary", |
|
) |
|
|
|
with gr.Row(): |
|
mol_output = Molecule3D(label="Protein Structure", reps=MOLECULE_REPS) |
|
metrics_plot = gr.Plot(label="pLDDT") |
|
|
|
predict_btn.click( |
|
fn=predict, |
|
inputs=[sequence, api_key, dropdown], |
|
outputs=[mol_output, metrics_plot], |
|
) |
|
|
|
|
|
def model_comparison(api_key: str) -> None: |
|
"""Model comparison tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
gr.Markdown( |
|
""" |
|
## Compare Folding Models |
|
|
|
Select multiple models to compare their predictions on your protein sequence. |
|
You can either enter the sequence directly or upload a FASTA file. |
|
|
|
The selected models will run in parallel and generate: |
|
- 3D structures of your protein that you can visualize and compare |
|
- pLDDT confidence scores plotted for each residue |
|
|
|
""" |
|
) |
|
with gr.Row(): |
|
models = gr.CheckboxGroup( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
scale=0, |
|
min_width=300, |
|
value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX], |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input() |
|
|
|
predict_btn = gr.Button( |
|
"Compare Models", |
|
elem_classes=["gradient-button"], |
|
elem_id="compare-models-btn", |
|
variant="primary", |
|
) |
|
with gr.Row(): |
|
af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False) |
|
openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False) |
|
solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False) |
|
chai_predictions = gr.CheckboxGroup(label="Chai", visible=False) |
|
protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False) |
|
boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False) |
|
with gr.Row(): |
|
mol_outputs = Molecule3D( |
|
label="Protein Structure", reps=MOLECULE_REPS, height=1000 |
|
) |
|
metrics_plot = gr.Plot(label="pLDDT") |
|
|
|
|
|
prediction_outputs = gr.State() |
|
|
|
predict_btn.click( |
|
fn=predict_comparison, |
|
inputs=[sequence, api_key, models], |
|
outputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
).then( |
|
fn=filter_predictions, |
|
inputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
outputs=[mol_outputs, metrics_plot], |
|
) |
|
|
|
|
|
for checkbox in [ |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
]: |
|
checkbox.change( |
|
fn=filter_predictions, |
|
inputs=[ |
|
prediction_outputs, |
|
af2_predictions, |
|
openfold_predictions, |
|
solo_predictions, |
|
chai_predictions, |
|
boltz_predictions, |
|
protenix_predictions, |
|
], |
|
outputs=[mol_outputs, metrics_plot], |
|
) |
|
|
|
|
|
def create_antibody_discovery_tab(): |
|
gr.Markdown("# Accelerating Antibody Discovery: In-Silico and Experimental Insights") |
|
gr.Markdown(""" |
|
Hey there! π Let's dive into how we're using AI to accelerate antibody drug discovery by looking at how protein folding models stack up against real lab data. |
|
|
|
We've got this fascinating dataset that shows how well different antibodies stick to a specific target (we measure this as KD in nM). π§ͺ |
|
For each antibody-target pair, we've recorded: |
|
- The antibody's light and heavy chain sequences (think of them as the antibody's building blocks) 𧬠|
|
- The target (antigen) sequence π― |
|
- How strongly they bind together in the lab (the KD value, lower means stronger binding) πͺ |
|
|
|
Here's where it gets interesting! We take these sequences and feed them into protein folding models |
|
that predict their 3D structures. The models tell us how confident they are about their predictions. |
|
By comparing these confidence scores with our lab results, we can figure out which model scores |
|
are actually good at predicting real binding strength! π― |
|
|
|
Why is this exciting for drug discovery? π Once we know which computational scores to trust, |
|
we can use them to quickly check thousands of potential antibodies without having to test each one |
|
in the lab. It's like having a high-speed screening tool! We can then focus our lab work on testing |
|
just the most promising candidates. This means we can find effective antibody drugs much faster than |
|
before! π¬β¨ |
|
""") |
|
spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv") |
|
spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES) |
|
prettified_columns = { |
|
"antibody_name": "Antibody Name", |
|
"KD (nM)": "KD (nM)", |
|
"antibody_vh_sequence": "Antibody VH Sequence", |
|
"antibody_vl_sequence": "Antibody VL Sequence", |
|
"antigen_sequence": "Antigen Sequence", |
|
} |
|
spr_data_with_scores = spr_data_with_scores.rename(columns=prettified_columns) |
|
columns = [ |
|
"Antibody Name", |
|
"KD (nM)", |
|
"Antibody VH Sequence", |
|
"Antibody VL Sequence", |
|
"Antigen Sequence", |
|
] |
|
|
|
spr_data = gr.DataFrame( |
|
value=spr_data_with_scores[columns].round(2), |
|
label="Experimental Antibody-Antigen Binding Affinity Data", |
|
) |
|
|
|
gr.Markdown("# Prediction and correlation") |
|
|
|
with gr.Row(): |
|
with gr.Column(min_width=150): |
|
gr.Markdown("Now, let's see how well the protein folding models can predict the binding affinity of these antibodies to the target antigen.") |
|
with gr.Column(min_width=150): |
|
fake_predict_btn = gr.Button( |
|
"Predict structures of all complexes", |
|
elem_classes="gradient-button", |
|
variant="primary", |
|
) |
|
prediction_dataframe = gr.Dataframe( |
|
label="Predicted Structures Data", visible=False |
|
) |
|
prediction_dataframe.change( |
|
fn=lambda x: gr.Dataframe(x, visible=True), |
|
inputs=[prediction_dataframe], |
|
outputs=[prediction_dataframe], |
|
) |
|
with gr.Row(visible=False) as correlation_row: |
|
with gr.Column(scale=0): |
|
with gr.Row(): |
|
correlation_type = gr.Radio( |
|
choices=["Spearman", "Pearson"], |
|
value="Spearman", |
|
label="Correlation Type", |
|
interactive=True, |
|
min_width=150, |
|
) |
|
with gr.Row(): |
|
log_scale = gr.Checkbox( |
|
label="Use log scale for KD", |
|
value=False, |
|
min_width=150, |
|
) |
|
with gr.Column(): |
|
correlation_ranking_plot = gr.Plot(label="Correlation ranking") |
|
with gr.Row(visible=False) as regression_row: |
|
with gr.Column(scale=0): |
|
|
|
|
|
correlation_column = gr.Dropdown( |
|
label="Score data to display", |
|
choices=SCORE_COLUMNS, |
|
multiselect=False, |
|
value=SCORE_COLUMNS[0], |
|
) |
|
score_description = gr.Markdown( |
|
get_score_description(correlation_column.value) |
|
) |
|
correlation_column.change( |
|
fn=lambda x: get_score_description(x), |
|
inputs=correlation_column, |
|
outputs=score_description, |
|
) |
|
with gr.Column(): |
|
regression_plot = gr.Plot(label="Correlation with binding affinity") |
|
|
|
fake_predict_btn.click( |
|
fn=lambda x: ( |
|
*fake_predict_and_correlate( |
|
spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"] |
|
), |
|
gr.Row(visible=True), |
|
gr.Row(visible=True) |
|
), |
|
inputs=[correlation_type], |
|
outputs=[ |
|
prediction_dataframe, |
|
correlation_ranking_plot, |
|
regression_plot, |
|
correlation_row, |
|
regression_row, |
|
], |
|
) |
|
|
|
def update_plots_with_log(correlation_type, score, use_log): |
|
logger.info(f"Updating correlation plot for {correlation_type}") |
|
corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS) |
|
logger.info(f"Correlation data: {corr_data}") |
|
corr_ranking_plot = plot_correlation_ranking(corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd") |
|
regression_plot = make_regression_plot(spr_data_with_scores, score, use_log) |
|
return regression_plot, corr_ranking_plot |
|
|
|
correlation_column.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
|
|
correlation_type.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
log_scale.change( |
|
fn=update_plots_with_log, |
|
inputs=[correlation_type, correlation_column, log_scale], |
|
outputs=[regression_plot, correlation_ranking_plot], |
|
) |
|
|
|
|
|
def __main__(): |
|
theme = gr.themes.Ocean( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
) |
|
with gr.Blocks(theme=theme, title="Folding Studio Demo") as demo: |
|
gr.Markdown( |
|
""" |
|
# Folding Studio: Harness the Power of Protein Folding 𧬠|
|
|
|
Folding Studio is a platform for protein structure prediction. |
|
It uses the latest AI-powered folding models to predict the structure of a protein. |
|
|
|
Available models are : AlphaFold2, OpenFold, SoloSeq, Boltz-1, Chai and Protenix. |
|
|
|
## API Key |
|
To use the Folding Studio API, you need to provide an API key. |
|
You can get your API key by asking to the Folding Studio team. |
|
""" |
|
) |
|
api_key = gr.Textbox(label="Folding Studio API Key", type="password") |
|
gr.Markdown("## Demo Usage") |
|
with gr.Tab("π Basic Folding"): |
|
simple_prediction(api_key) |
|
with gr.Tab("π Model Comparison"): |
|
model_comparison(api_key) |
|
with gr.Tab("π§ͺ Antibody Discovery Pipeline"): |
|
create_antibody_discovery_tab() |
|
|
|
demo.launch() |
|
|