|
"""Folding Studio Demo App.""" |
|
|
|
import logging |
|
|
|
import gradio as gr |
|
import pandas as pd |
|
from folding_studio_data_models import FoldingModel |
|
from gradio_molecule3d import Molecule3D |
|
|
|
from folding_studio_demo.correlate import ( |
|
SCORE_COLUMNS, |
|
fake_predict_and_correlate, |
|
make_correlation_plot, |
|
) |
|
from folding_studio_demo.predict import predict, predict_comparison |
|
from folding_studio_demo.config import BLUE, PURPLE |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
MOLECULE_REPS = [ |
|
{ |
|
"model": 0, |
|
"chain": "", |
|
"resname": "", |
|
"style": "cartoon", |
|
"color": "alphafold", |
|
|
|
"around": 0, |
|
"byres": False, |
|
|
|
|
|
} |
|
] |
|
|
|
DEFAULT_PROTEIN_SEQ = ">protein description\nMALWMRLLPLLALLALWGPDPAAA" |
|
|
|
MODEL_CHOICES = [ |
|
|
|
|
|
|
|
("Boltz-1", FoldingModel.BOLTZ), |
|
("Chai-1", FoldingModel.CHAI), |
|
("Protenix", FoldingModel.PROTENIX), |
|
] |
|
|
|
|
|
def sequence_input() -> gr.Textbox: |
|
"""Sequence input component. |
|
|
|
Returns: |
|
gr.Textbox: Sequence input component |
|
""" |
|
sequence = gr.Textbox( |
|
label="Protein Sequence", |
|
value=DEFAULT_PROTEIN_SEQ, |
|
lines=2, |
|
placeholder="Enter a protein sequence or upload a FASTA file", |
|
) |
|
file_input = gr.File( |
|
label="Upload a FASTA file", |
|
file_types=[".fasta", ".fa"], |
|
) |
|
|
|
def _process_file(file: gr.File | None) -> gr.Textbox: |
|
if file is None: |
|
return gr.Textbox() |
|
try: |
|
with open(file.name, "r") as f: |
|
content = f.read().strip() |
|
return gr.Textbox(value=content) |
|
except Exception as e: |
|
logger.error(f"Error reading file: {e}") |
|
return gr.Textbox() |
|
|
|
file_input.change(fn=_process_file, inputs=[file_input], outputs=[sequence]) |
|
return sequence |
|
|
|
|
|
def simple_prediction(api_key: str) -> None: |
|
"""Simple prediction tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
gr.Markdown( |
|
""" |
|
### Predict a Protein Structure |
|
|
|
It will be run in the background and the results will be displayed in the output section. |
|
The output will contain the protein structure and the pLDDT plot. |
|
|
|
Select a model to run the inference with and enter a protein sequence or upload a FASTA file. |
|
""" |
|
) |
|
with gr.Row(): |
|
dropdown = gr.Dropdown( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
scale=0, |
|
value=FoldingModel.BOLTZ, |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input() |
|
|
|
predict_btn = gr.Button( |
|
"Predict", |
|
elem_classes="gradient-button", |
|
elem_id="predict-btn", |
|
variant="primary", |
|
|
|
) |
|
|
|
with gr.Row(): |
|
mol_output = Molecule3D(label="Protein Structure", reps=MOLECULE_REPS) |
|
metrics_plot = gr.Plot(label="pLDDT") |
|
|
|
predict_btn.click( |
|
fn=predict, |
|
inputs=[sequence, api_key, dropdown], |
|
outputs=[mol_output, metrics_plot], |
|
) |
|
|
|
|
|
def model_comparison(api_key: str) -> None: |
|
"""Model comparison tab. |
|
|
|
Args: |
|
api_key (str): Folding Studio API key |
|
""" |
|
|
|
with gr.Row(): |
|
models = gr.Dropdown( |
|
label="Model", |
|
choices=MODEL_CHOICES, |
|
multiselect=True, |
|
scale=0, |
|
min_width=300, |
|
value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX], |
|
) |
|
with gr.Column(): |
|
sequence = sequence_input() |
|
|
|
predict_btn = gr.Button( |
|
"Compare Models", |
|
elem_classes=["gradient-button"], |
|
elem_id="compare-models-btn", |
|
variant="primary", |
|
|
|
) |
|
|
|
with gr.Row(): |
|
mol_outputs = Molecule3D( |
|
label="Protein Structure", |
|
reps=MOLECULE_REPS, |
|
file_count="multiple", |
|
) |
|
|
|
|
|
|
|
predict_btn.click( |
|
fn=predict_comparison, |
|
inputs=[sequence, api_key, models], |
|
outputs=[mol_outputs], |
|
) |
|
|
|
|
|
def create_correlation_tab(): |
|
gr.Markdown("# Correlation with experimental binding affinity data") |
|
gr.Markdown(""" |
|
This analysis explores the relationship between protein folding model confidence scores and experimental binding affinity data. |
|
|
|
The experimental dataset contains binding affinity measurements (KD in nM) between antibody-antigen pairs. |
|
Each data point includes: |
|
- The antibody's light and heavy chain sequences |
|
- The antigen sequence |
|
- The experimental KD value |
|
|
|
The analysis involves submitting these sequences to protein folding models for 3D structure prediction. |
|
The models generate various confidence scores for each prediction. These scores are then correlated |
|
with the experimental binding affinity measurements to evaluate their effectiveness as predictors |
|
of binding strength. |
|
""") |
|
spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv") |
|
prettified_columns = { |
|
"antibody_name": "Antibody Name", |
|
"KD (nM)": "KD (nM)", |
|
"antibody_vh_sequence": "Antibody VH Sequence", |
|
"antibody_vl_sequence": "Antibody VL Sequence", |
|
"antigen_sequence": "Antigen Sequence", |
|
} |
|
spr_data_with_scores = spr_data_with_scores.rename(columns=prettified_columns) |
|
with gr.Row(): |
|
columns = [ |
|
"Antibody Name", |
|
"KD (nM)", |
|
"Antibody VH Sequence", |
|
"Antibody VL Sequence", |
|
"Antigen Sequence", |
|
] |
|
|
|
spr_data = gr.DataFrame( |
|
value=spr_data_with_scores[columns].round(2), |
|
label="Experimental Antibody-Antigen Binding Affinity Data", |
|
) |
|
|
|
gr.Markdown("# Prediction and correlation") |
|
with gr.Row(): |
|
fake_predict_btn = gr.Button( |
|
"Predict structures of all complexes", |
|
elem_classes="gradient-button", |
|
variant="primary", |
|
|
|
) |
|
with gr.Row(): |
|
prediction_dataframe = gr.Dataframe(label="Predicted Structures Data") |
|
with gr.Row(): |
|
correlation_ranking_plot = gr.Plot(label="Correlation ranking") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Row(): |
|
|
|
correlation_column = gr.Dropdown( |
|
label="Score data to display", choices=SCORE_COLUMNS, multiselect=False, value=SCORE_COLUMNS[0] |
|
) |
|
|
|
with gr.Row(): |
|
log_scale = gr.Checkbox(label="Display x-axis on logarithmic scale", value=False) |
|
with gr.Column(): |
|
correlation_plot = gr.Plot(label="Correlation with binding affinity") |
|
|
|
fake_predict_btn.click( |
|
fn=lambda x: fake_predict_and_correlate( |
|
spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"] |
|
), |
|
inputs=None, |
|
outputs=[prediction_dataframe, correlation_ranking_plot, correlation_plot], |
|
) |
|
|
|
def update_plot(score, use_log): |
|
return make_correlation_plot(spr_data_with_scores, score, use_log) |
|
|
|
correlation_column.change( |
|
fn=update_plot, |
|
inputs=[correlation_column, log_scale], |
|
outputs=correlation_plot, |
|
) |
|
|
|
log_scale.change( |
|
fn=update_plot, |
|
inputs=[correlation_column, log_scale], |
|
outputs=correlation_plot, |
|
) |
|
|
|
|
|
def __main__(): |
|
|
|
theme = gr.themes.Ocean( |
|
primary_hue="blue", |
|
secondary_hue="purple", |
|
) |
|
with gr.Blocks(theme=theme, title="Folding Studio Demo") as demo: |
|
gr.Markdown( |
|
""" |
|
# Folding Studio: Harness the Power of Protein Folding 𧬠|
|
|
|
Folding Studio is a platform for protein structure prediction. |
|
It uses the latest AI-powered folding models to predict the structure of a protein. |
|
|
|
Available models are : AlphaFold2, OpenFold, SoloSeq, Boltz-1, Chai and Protenix. |
|
|
|
## API Key |
|
To use the Folding Studio API, you need to provide an API key. |
|
You can get your API key by asking to the Folding Studio team. |
|
""" |
|
) |
|
api_key = gr.Textbox(label="Folding Studio API Key", type="password") |
|
gr.Markdown("## Demo Usage") |
|
with gr.Tab("π Simple Prediction"): |
|
simple_prediction(api_key) |
|
with gr.Tab("π Model Comparison"): |
|
model_comparison(api_key) |
|
with gr.Tab("π Correlations"): |
|
create_correlation_tab() |
|
|
|
demo.launch() |
|
|