Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

chengzhang1006

AchilleSoulieID commited on 2 days ago

Commit

01fba1c

verified ·

1 Parent(s): 250a4a2

add more informations (#15)

Browse files

- big update (3886d2a205b5c68eca597261497e84da6981b4ec)
- tune down cursor (172390e01a5e86ccf7bea1f14fd8087f1b321c7b)
- remove chain id (11230ea6ecf54c076b67cad64a9901ae3b4bbdd5)

Co-authored-by: Achille Soulie <[email protected]>

Files changed (7) hide show

folding-studio/folding_studio/api_call/predict/simple_predict.py +3 -2
folding-studio/folding_studio/commands/experiment.py +6 -2
folding-studio/folding_studio/utils/data_model.py +2 -2
folding-studio/folding_studio/utils/headers.py +4 -1
folding_studio_demo/app.py +105 -86
folding_studio_demo/models.py +31 -2
folding_studio_demo/predict.py +27 -28

folding-studio/folding_studio/api_call/predict/simple_predict.py CHANGED Viewed

@@ -23,6 +23,7 @@ def single_job_prediction(
     fasta_file: Path,
     parameters: AF2Parameters | OpenFoldParameters | None = None,
     project_code: str | None = None,
     *,
     ignore_cache: bool = False,
     **kwargs,
@@ -74,7 +75,7 @@ def single_job_prediction(
         if parameters.templates_masks_file
         else None,
     )
-    _ = custom_files.upload()
     params = parameters.model_dump(mode="json")
     pdb_ids, _ = partition_template_pdb_from_file(
@@ -107,7 +108,7 @@ def single_job_prediction(
     response = requests.post(
         url,
         data=params,
-        headers=get_auth_headers(),
         files=[("fasta_file", fasta_file.open("rb"))],
         params={"project_code": project_code},
         timeout=REQUEST_TIMEOUT,

     fasta_file: Path,
     parameters: AF2Parameters | OpenFoldParameters | None = None,
     project_code: str | None = None,
+    api_key: str | None = None,
     *,
     ignore_cache: bool = False,
     **kwargs,
         if parameters.templates_masks_file
         else None,
     )
+    _ = custom_files.upload(api_key=api_key)
     params = parameters.model_dump(mode="json")
     pdb_ids, _ = partition_template_pdb_from_file(
     response = requests.post(
         url,
         data=params,
+        headers=get_auth_headers(api_key),
         files=[("fasta_file", fasta_file.open("rb"))],
         params={"project_code": project_code},
         timeout=REQUEST_TIMEOUT,

folding-studio/folding_studio/commands/experiment.py CHANGED Viewed

@@ -35,6 +35,7 @@ def _download_file_from_signed_url(
     output: Path,
     force: bool,
     unzip: bool = False,
 ) -> None:
     """Download a zip file from an experiment id.
@@ -71,7 +72,7 @@ def _download_file_from_signed_url(
             )
             raise typer.Exit(code=1)
-    headers = get_auth_headers()
     url = API_URL + endpoint
     response = requests.get(
@@ -104,9 +105,10 @@ def _download_file_from_signed_url(
 @app.command()
 def status(
     exp_id: Annotated[str, experiment_ID_argument],
 ):
     """Get an experiment status."""
-    headers = get_auth_headers()
     url = API_URL + "getExperimentStatus"
     response = requests.get(
         url,
@@ -224,6 +226,7 @@ def features(
 @app.command()
 def results(
     exp_id: Annotated[str, experiment_ID_argument],
     output: Annotated[
         Optional[Path],
         typer.Option(
@@ -254,6 +257,7 @@ def results(
         output=output,
         force=force,
         unzip=unzip,
     )

     output: Path,
     force: bool,
     unzip: bool = False,
+    api_key: str | None = None,
 ) -> None:
     """Download a zip file from an experiment id.
             )
             raise typer.Exit(code=1)
+    headers = get_auth_headers(api_key)
     url = API_URL + endpoint
     response = requests.get(
 @app.command()
 def status(
     exp_id: Annotated[str, experiment_ID_argument],
+    api_key: Annotated[str, typer.Option("--api-key", "-k")],
 ):
     """Get an experiment status."""
+    headers = get_auth_headers(api_key)
     url = API_URL + "getExperimentStatus"
     response = requests.get(
         url,
 @app.command()
 def results(
     exp_id: Annotated[str, experiment_ID_argument],
+    api_key: Annotated[str, typer.Option("--api-key", "-k")],
     output: Annotated[
         Optional[Path],
         typer.Option(
         output=output,
         force=force,
         unzip=unzip,
+        api_key=api_key,
     )

folding-studio/folding_studio/utils/data_model.py CHANGED Viewed

@@ -207,7 +207,7 @@ class PredictRequestCustomFiles(BaseModel):
                 f"Unsupported file type {batch_jobs_file.suffix}: {batch_jobs_file}"
             )
-    def upload(self) -> None:
         """Upload local custom paths to GCP through an API request.
         Returns:
             A dict mapping local to uploaded files path.
@@ -218,7 +218,7 @@ class PredictRequestCustomFiles(BaseModel):
         local_to_uploaded = {}
-        headers = get_auth_headers()
         if len(self.templates) > 0:
             _, templates_to_upload = partition_template_pdb_from_file(
                 custom_templates=self.templates

                 f"Unsupported file type {batch_jobs_file.suffix}: {batch_jobs_file}"
             )
+    def upload(self, api_key: str | None = None) -> None:
         """Upload local custom paths to GCP through an API request.
         Returns:
             A dict mapping local to uploaded files path.
         local_to_uploaded = {}
+        headers = get_auth_headers(api_key)
         if len(self.templates) > 0:
             _, templates_to_upload = partition_template_pdb_from_file(
                 custom_templates=self.templates

folding-studio/folding_studio/utils/headers.py CHANGED Viewed

@@ -4,7 +4,7 @@ from folding_studio.config import FOLDING_API_KEY
 from folding_studio.utils.gcp import get_id_token
-def get_auth_headers() -> dict[str, str]:
     """
     Create authentication headers based on available credentials.
@@ -14,6 +14,9 @@ def get_auth_headers() -> dict[str, str]:
     Returns:
         dict: Authentication headers for API requests.
     """
     if FOLDING_API_KEY:
         return {"X-API-Key": FOLDING_API_KEY}

 from folding_studio.utils.gcp import get_id_token
+def get_auth_headers(api_key: str | None = None) -> dict[str, str]:
     """
     Create authentication headers based on available credentials.
     Returns:
         dict: Authentication headers for API requests.
     """
+    if api_key is not None:
+        return {"X-API-Key": api_key}
     if FOLDING_API_KEY:
         return {"X-API-Key": FOLDING_API_KEY}

folding_studio_demo/app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import logging
 import gradio as gr
 import pandas as pd
-import plotly.graph_objects as go
 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
@@ -47,30 +46,12 @@ MODEL_CHOICES = [
     ("Protenix", FoldingModel.PROTENIX),
 ]
-DEFAULT_SEQ = "MALWMRLLPLLALLALWGPDPAAA"
-MODEL_EXAMPLES = {
-    FoldingModel.AF2: [
-        ["Monomer", f">A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.OPENFOLD: [
-        ["Monomer", f">A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.SOLOSEQ: [["Monomer", f">A\n{DEFAULT_SEQ}"]],
-    FoldingModel.BOLTZ: [
-        ["Monomer", f">A|protein\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.CHAI: [
-        ["Monomer", f">protein|name=A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">protein|name=A\n{DEFAULT_SEQ}\n>protein|name=B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.PROTENIX: [
-        ["Monomer", f">A|protein\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"],
-    ],
-}
 def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
@@ -79,31 +60,43 @@ def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
     Returns:
         gr.Textbox: Sequence input component
     """
-    with gr.Row(equal_height=True):
-        with gr.Column():
-            sequence = gr.Textbox(
-                label="Protein Sequence",
-                lines=2,
-                placeholder="Enter a protein sequence or upload a FASTA file",
-            )
-            dummy = gr.Textbox(label="Complex type", visible=False)
-            examples = gr.Examples(
-                examples=MODEL_EXAMPLES[FoldingModel.BOLTZ],
-                inputs=[dummy, sequence],
-            )
-        file_input = gr.File(
-            label="Upload a FASTA file",
-            file_types=[".fasta", ".fa"],
-            scale=0,
-        )
-    if dropdown is not None:
-        dropdown.change(
-            fn=lambda x: gr.Dataset(samples=MODEL_EXAMPLES[x]),
-            inputs=[dropdown],
-            outputs=[examples.dataset],
-        )
     def _process_file(file: gr.File | None) -> gr.Textbox:
         if file is None:
@@ -158,7 +151,7 @@ def simple_prediction(api_key: str) -> None:
         metrics_plot = gr.Plot(label="pLDDT")
     predict_btn.click(
-        fn=predict,
         inputs=[sequence, api_key, dropdown],
         outputs=[mol_output, metrics_plot],
     )
@@ -174,13 +167,12 @@ def model_comparison(api_key: str) -> None:
         """
         ## Compare Folding Models
-        Select multiple models to compare their predictions on your protein sequence.
-        You can either enter the sequence directly or upload a FASTA file.
-        The selected models will run in parallel and generate:
-        - 3D structures of your protein that you can visualize and compare
-        - pLDDT confidence scores plotted for each residue
         """
     )
     with gr.Row():
@@ -188,7 +180,7 @@ def model_comparison(api_key: str) -> None:
             label="Model",
             choices=MODEL_CHOICES,
             scale=0,
-            min_width=300,
             value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX],
         )
         with gr.Column():
@@ -201,12 +193,28 @@ def model_comparison(api_key: str) -> None:
         variant="primary",
     )
     with gr.Row():
-        af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False)
-        openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False)
-        solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False)
-        chai_predictions = gr.CheckboxGroup(label="Chai", visible=False)
-        protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False)
-        boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False)
     with gr.Row():
         mol_outputs = Molecule3D(
             label="Protein Structure", reps=MOLECULE_REPS, height=1000
@@ -267,26 +275,27 @@ def model_comparison(api_key: str) -> None:
 def create_antibody_discovery_tab():
-    gr.Markdown("# Accelerating Antibody Discovery: In-Silico and Experimental Insights")
     gr.Markdown("""
-        Hey there! 👋 Let's dive into how we're using AI to accelerate antibody drug discovery by looking at how protein folding models stack up against real lab data.
-        We've got this fascinating dataset that shows how well different antibodies stick to a specific target (we measure this as KD in nM). 🧪
         For each antibody-target pair, we've recorded:
-        - The antibody's light and heavy chain sequences (think of them as the antibody's building blocks) 🧬
-        - The target (antigen) sequence 🎯
-        - How strongly they bind together in the lab (the KD value, lower means stronger binding) 💪
-        Here's where it gets interesting! We take these sequences and feed them into protein folding models
         that predict their 3D structures. The models tell us how confident they are about their predictions.
         By comparing these confidence scores with our lab results, we can figure out which model scores
-        are actually good at predicting real binding strength! 🎯
-        Why is this exciting for drug discovery? 🚀 Once we know which computational scores to trust,
         we can use them to quickly check thousands of potential antibodies without having to test each one
-        in the lab. It's like having a high-speed screening tool! We can then focus our lab work on testing
-        just the most promising candidates. This means we can find effective antibody drugs much faster than
-        before! 🔬✨
     """)
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
     spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
@@ -306,7 +315,7 @@ def create_antibody_discovery_tab():
         "Antigen Sequence",
     ]
     # Display dataframe with floating point values rounded to 2 decimal places
-    spr_data = gr.DataFrame(
         value=spr_data_with_scores[columns].round(2),
         label="Experimental Antibody-Antigen Binding Affinity Data",
     )
@@ -315,7 +324,9 @@ def create_antibody_discovery_tab():
     with gr.Row():
         with gr.Column(min_width=150):
-            gr.Markdown("Now, let's see how well the protein folding models can predict the binding affinity of these antibodies to the target antigen.")
         with gr.Column(min_width=150):
             fake_predict_btn = gr.Button(
                 "Predict structures of all complexes",
@@ -350,7 +361,6 @@ def create_antibody_discovery_tab():
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row(visible=False) as regression_row:
         with gr.Column(scale=0):
             # User can select the columns to display in the correlation plot
             correlation_column = gr.Dropdown(
                 label="Score data to display",
@@ -375,7 +385,7 @@ def create_antibody_discovery_tab():
                 spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
             ),
             gr.Row(visible=True),
-            gr.Row(visible=True)
         ),
         inputs=[correlation_type],
         outputs=[
@@ -391,7 +401,9 @@ def create_antibody_discovery_tab():
         logger.info(f"Updating correlation plot for {correlation_type}")
         corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS)
         logger.info(f"Correlation data: {corr_data}")
-        corr_ranking_plot = plot_correlation_ranking(corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd")
         regression_plot = make_regression_plot(spr_data_with_scores, score, use_log)
         return regression_plot, corr_ranking_plot
@@ -426,14 +438,21 @@ def __main__():
             Folding Studio is a platform for protein structure prediction.
             It uses the latest AI-powered folding models to predict the structure of a protein.
-            Available models are : AlphaFold2, OpenFold, SoloSeq, Boltz-1, Chai and Protenix.
-            ## API Key
-            To use the Folding Studio API, you need to provide an API key.
-            You can get your API key by asking to the Folding Studio team.
             """
         )
-        api_key = gr.Textbox(label="Folding Studio API Key", type="password")
         gr.Markdown("## Demo Usage")
         with gr.Tab("🚀 Basic Folding"):
             simple_prediction(api_key)

 import gradio as gr
 import pandas as pd
 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
     ("Protenix", FoldingModel.PROTENIX),
 ]
+MONOMER_SEQ_EXAMPLE = ">A|protein\nMALWMRLLPLLALLALWGPDPAAA"
+MULTIMER_SEQ_EXAMPLE = ">A|protein\nSQIPASEQETLVRPKPLLLKLLKSVGAQKDTYTMKEVLFYLGQYIMTKRLYDAAQQHIVYCSNDLLGDLFGVPSFSVKEHRKIYTMIYRNLVVVNQQESSDSGTSVSEN\n>B|protein\nSQETFSDLWKLLPEN"
+EXAMPLES = [
+    ["Monomer", MONOMER_SEQ_EXAMPLE],
+    ["Multimer", MULTIMER_SEQ_EXAMPLE],
+]
 def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
     Returns:
         gr.Textbox: Sequence input component
     """
+    with gr.Column():
+        with gr.Row():
+            with gr.Row():
+                with gr.Column():
+                    sequence = gr.Textbox(
+                        label="Protein Sequence",
+                        placeholder="Enter a protein sequence or upload a FASTA file",
+                        value=MONOMER_SEQ_EXAMPLE,
+                        lines=5,
+                    )
+                    gr.Markdown(
+                        "Select an example below, enter a sequence manually or upload a FASTA file."
+                    )
+                file_input = gr.File(
+                    label="Upload a FASTA file",
+                    file_types=[".fasta", ".fa"],
+                    scale=0,
+                    height=150,
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown("**Monomer Example:**")
+                    gr.Markdown("**Multimer Example:**")
+                with gr.Row():
+                    gr.Markdown("```\n" + MONOMER_SEQ_EXAMPLE + "\n```")
+                    gr.Markdown("```\n" + MULTIMER_SEQ_EXAMPLE + "\n```")
+                with gr.Row():
+                    gr.Button("Load Monomer Example", size="md").click(
+                        fn=lambda: MONOMER_SEQ_EXAMPLE,
+                        outputs=[sequence],
+                    )
+                    gr.Button("Load Multimer Example", size="md").click(
+                        fn=lambda: MULTIMER_SEQ_EXAMPLE, outputs=[sequence]
+                    )
     def _process_file(file: gr.File | None) -> gr.Textbox:
         if file is None:
         metrics_plot = gr.Plot(label="pLDDT")
     predict_btn.click(
+        fn=lambda x, y, z: predict(x, y, z, format_fasta=True),
         inputs=[sequence, api_key, dropdown],
         outputs=[mol_output, metrics_plot],
     )
         """
         ## Compare Folding Models
+        This tab allows you to compare predictions from multiple protein folding models side by side.
+        Follow these steps to get started:
+        1. **Select Models**: Choose one or more models from the list on the left
+        2. **Input Sequence** : Either select an example sequence, enter your protein sequence directly in the text box or upload a FASTA file.
+        3. **Run Comparison**: Click "Compare Models" to start the prediction
         """
     )
     with gr.Row():
             label="Model",
             choices=MODEL_CHOICES,
             scale=0,
+            min_width=150,
             value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX],
         )
         with gr.Column():
         variant="primary",
     )
     with gr.Row():
+        with gr.Column():
+            gr.Markdown(
+                """
+                ### Understanding the Outputs:
+                - **3D Structure**: The molecular viewer shows the predicted protein structure
+                - **pLDDT Score**: A confidence score (0-100) for each residue:
+                    - Very high (>90): Highly accurate
+                    - Confident (70-90): Good accuracy
+                    - Low (50-70): Limited accuracy
+                    - Very low (<50): Poor accuracy
+                """
+            )
+            gr.Markdown(
+                "### Model Predictions\nUse the checkboxes to toggle which model predictions to compare:"
+            )
+            with gr.Row():
+                af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False)
+                openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False)
+                solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False)
+                chai_predictions = gr.CheckboxGroup(label="Chai", visible=False)
+                protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False)
+                boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False)
     with gr.Row():
         mol_outputs = Molecule3D(
             label="Protein Structure", reps=MOLECULE_REPS, height=1000
 def create_antibody_discovery_tab():
+    gr.Markdown(
+        "# Accelerating Antibody Discovery: In-Silico and Experimental Insights"
+    )
     gr.Markdown("""
+        Let's dive into how we're using AI to accelerate antibody drug discovery by looking at how protein folding models stack up against real lab data.
+        We've got this dataset that shows how well different antibodies stick to a specific target (we measure this as KD in nM).
         For each antibody-target pair, we've recorded:
+        - The antibody's light and heavy chain sequences (think of them as the antibody's building blocks)
+        - The target (antigen) sequence
+        - How strongly they bind together in the lab (the KD value, lower means stronger binding)
+        Why is it interesting? We take these sequences and feed them into protein folding models
         that predict their 3D structures. The models tell us how confident they are about their predictions.
         By comparing these confidence scores with our lab results, we can figure out which model scores
+        are actually good at predicting real binding strength!
+        Why is this useful for drug discovery? Once we know which computational scores to trust,
         we can use them to quickly check thousands of potential antibodies without having to test each one
+        in the lab. We can then focus our lab work on testing just the most promising candidates.
+        This means we can find effective antibody drugs much faster than before!
     """)
     spr_data_with_scores = pd.read_csv("spr_af_scores_mapped.csv")
     spr_data_with_scores = spr_data_with_scores.rename(columns=SCORE_COLUMN_NAMES)
         "Antigen Sequence",
     ]
     # Display dataframe with floating point values rounded to 2 decimal places
+    gr.DataFrame(
         value=spr_data_with_scores[columns].round(2),
         label="Experimental Antibody-Antigen Binding Affinity Data",
     )
     with gr.Row():
         with gr.Column(min_width=150):
+            gr.Markdown(
+                "Now, let's see how well the protein folding models can predict the binding affinity of these antibodies to the target antigen."
+            )
         with gr.Column(min_width=150):
             fake_predict_btn = gr.Button(
                 "Predict structures of all complexes",
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row(visible=False) as regression_row:
         with gr.Column(scale=0):
             # User can select the columns to display in the correlation plot
             correlation_column = gr.Dropdown(
                 label="Score data to display",
                 spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
             ),
             gr.Row(visible=True),
+            gr.Row(visible=True),
         ),
         inputs=[correlation_type],
         outputs=[
         logger.info(f"Updating correlation plot for {correlation_type}")
         corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS)
         logger.info(f"Correlation data: {corr_data}")
+        corr_ranking_plot = plot_correlation_ranking(
+            corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd"
+        )
         regression_plot = make_regression_plot(spr_data_with_scores, score, use_log)
         return regression_plot, corr_ranking_plot
             Folding Studio is a platform for protein structure prediction.
             It uses the latest AI-powered folding models to predict the structure of a protein.
+            Available models are : AlphaFold2, OpenFold, Boltz-1, Chai and Protenix.
             """
         )
+        with gr.Accordion("API Key", open=False):
+            gr.Markdown(
+                """
+                To use the Folding Studio API, you need to provide an API key.
+                You can get your API key by asking to the Folding Studio team.
+                """
+            )
+            api_key = gr.Textbox(
+                placeholder="Enter your Folding Studio API key",
+                type="password",
+                show_label=False,
+            )
         gr.Markdown("## Demo Usage")
         with gr.Tab("🚀 Basic Folding"):
             simple_prediction(api_key)

folding_studio_demo/models.py CHANGED Viewed

@@ -9,6 +9,7 @@ from io import StringIO
 from pathlib import Path
 from typing import Any
 import gradio as gr
 import numpy as np
 from folding_studio import single_job_prediction
@@ -202,7 +203,33 @@ class ProtenixModel(AF3Model):
     def predictions(self, output_dir: Path) -> list[Path]:
         """Get the path to the prediction."""
-        return list(output_dir.rglob("*_model_[0-9].cif"))
 class BoltzModel(AF3Model):
@@ -259,12 +286,13 @@ class OldModel:
         output = single_job_prediction(
             fasta_file=seq_file,
             parameters=parameters,
         )
         experiment_id = output["message"]["experiment_id"]
         done = False
         while not done:
             with Capturing() as output:
-                get_status(experiment_id)
             status = output[0]
             logger.info(f"Experiment {experiment_id} status: {status}")
             if status == "Done":
@@ -275,6 +303,7 @@ class OldModel:
                     force=True,
                     unzip=True,
                     output=output_dir / "results.zip",
                 )
                 logger.info("Results downloaded to %s", output_dir)
             else:

 from pathlib import Path
 from typing import Any
+import folding_studio
 import gradio as gr
 import numpy as np
 from folding_studio import single_job_prediction
     def predictions(self, output_dir: Path) -> list[Path]:
         """Get the path to the prediction."""
+        prediction = next(output_dir.rglob("sequence_*_sample_[0-9].cif"), None)
+        if prediction is None:
+            return {}
+        cif_files = {
+            int(f.stem[-1]): f
+            for f in prediction.parent.glob("sequence_*_sample_[0-9].cif")
+        }
+        # Get all npz files and extract their indices
+        json_files = {
+            int(f.stem[-1]): f
+            for f in prediction.parent.glob(
+                "sequence_*_summary_confidence_sample_[0-9].json"
+            )
+        }
+        # Find common indices and create pairs
+        common_indices = sorted(set(cif_files.keys()) & set(json_files.keys()))
+        return {
+            idx: {
+                "prediction_path": cif_files[idx],
+                "metrics": json.load(open(json_files[idx])),
+            }
+            for idx in common_indices
+        }
 class BoltzModel(AF3Model):
         output = single_job_prediction(
             fasta_file=seq_file,
             parameters=parameters,
+            api_key=self.api_key,
         )
         experiment_id = output["message"]["experiment_id"]
         done = False
         while not done:
             with Capturing() as output:
+                get_status(experiment_id, api_key=self.api_key)
             status = output[0]
             logger.info(f"Experiment {experiment_id} status: {status}")
             if status == "Done":
                     force=True,
                     unzip=True,
                     output=output_dir / "results.zip",
+                    api_key=self.api_key,
                 )
                 logger.info("Results downloaded to %s", output_dir)
             else:

folding_studio_demo/predict.py CHANGED Viewed

@@ -91,31 +91,29 @@ def convert_cif_to_pdb(cif_path: str, pdb_path: str) -> None:
 def create_plddt_figure(
-    plddt_vals: list[list[float]],
     model_name: str,
     indexes: list[int],
-    residue_codes: list[list[str]] = None,
 ) -> go.Figure:
     """Create a plot of metrics."""
     plddt_traces = []
-    for i, (plddt_val, index) in enumerate(zip(plddt_vals, indexes)):
-        # Create hover text with residue codes if available
-        if residue_codes and i < len(residue_codes):
-            hover_text = [
-                f"<i>{model_name} {index}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue:</i> {code} {idx}"
-                for idx, (plddt, code) in enumerate(zip(plddt_val, residue_codes[i]))
-            ]
-        else:
-            hover_text = [
-                f"<i>{model_name} {index}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue index:</i> {idx}"
-                for idx, plddt in enumerate(plddt_val)
             ]
         plddt_traces.append(
             go.Scatter(
-                x=np.arange(len(plddt_val)),
-                y=plddt_val,
                 hovertemplate="%{text}<extra></extra>",
                 text=hover_text,
                 name=f"{model_name} {index}",
@@ -160,7 +158,9 @@ def _write_fasta_file(
     return seq_id, seq_file
-def extract_plddt_from_structure(structure_path: str) -> tuple[list[float], list[str]]:
     """Extract pLDDT values and residue codes from a structure file.
     Args:
@@ -175,22 +175,24 @@ def extract_plddt_from_structure(structure_path: str) -> tuple[list[float], list
         structure = PDBParser().get_structure("structure", structure_path)
     # Lists to store pLDDT values and residue codes
-    plddt_values = []
-    residue_codes = []
     # Iterate through all atoms
     for model in structure:
         for chain in model:
             for residue in chain:
                 # Get the first atom of each residue (usually CA atom)
                 if "CA" in residue:
                     # The B-factor contains the pLDDT value
                     plddt = residue["CA"].get_bfactor()
-                    plddt_values.append(plddt)
                     # Get residue code and convert to one-letter code
-                    residue_codes.append(convert_to_one_letter(residue.get_resname()))
-    return plddt_values, residue_codes
 def predict(
@@ -253,7 +255,6 @@ def predict(
     predictions = model.predictions(output_dir)
     pdb_paths = []
     model_plddt_vals = []
-    model_residue_codes = []
     total_predictions = len(predictions)
     for i, (model_idx, prediction) in enumerate(predictions.items()):
@@ -270,9 +271,8 @@ def predict(
             pdb_paths.append(converted_pdb_path)
         else:
             pdb_paths.append(str(prediction_path))
-        plddt_vals, residue_codes = extract_plddt_from_structure(prediction_path)
         model_plddt_vals.append(plddt_vals)
-        model_residue_codes.append(residue_codes)
     progress(0.8, desc="Generating plots...")
     indexes = []
@@ -290,7 +290,6 @@ def predict(
         plddt_vals=model_plddt_vals,
         model_name=model.model_name,
         indexes=indexes,
-        residue_codes=model_residue_codes,
     )
     progress(1.0, desc="Done!")
@@ -434,9 +433,8 @@ def run_prediction(
     model_pdb_paths, model_plddt_traces = predict(
         sequence, api_key, model_type, format_fasta=format_fasta
     )
-    model_pdb_paths = sorted(model_pdb_paths)
     model_predictions = {}
-    for pdb_path, plddt_trace in zip(model_pdb_paths, model_plddt_traces.data):
         if model_type in [
             FoldingModel.AF2,
             FoldingModel.OPENFOLD,
@@ -446,7 +444,8 @@ def run_prediction(
         else:
             index = int(Path(pdb_path).stem[-1])
-        model_predictions[index] = {"pdb_path": pdb_path, "plddt_trace": plddt_trace}
     return model_predictions

 def create_plddt_figure(
+    plddt_vals: list[dict[str, dict[str, list[float]]]],
     model_name: str,
     indexes: list[int],
 ) -> go.Figure:
     """Create a plot of metrics."""
     plddt_traces = []
+    for i, (pred_plddt, index) in enumerate(zip(plddt_vals, indexes)):
+        hover_text = []
+        plddt_values = []
+        for chain_id, plddt_val in pred_plddt.items():
+            plddt_values += plddt_val["values"]
+            hover_text += [
+                f"<i>{model_name} {index} - Chain {chain_id}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue:</i> {code} {idx}"
+                for idx, (plddt, code) in enumerate(
+                    zip(plddt_val["values"], plddt_val["residue_codes"])
+                )
             ]
         plddt_traces.append(
             go.Scatter(
+                x=np.arange(len(plddt_values)),
+                y=plddt_values,
                 hovertemplate="%{text}<extra></extra>",
                 text=hover_text,
                 name=f"{model_name} {index}",
     return seq_id, seq_file
+def extract_plddt_from_structure(
+    structure_path: str,
+) -> dict[str, dict[str, list[float]]]:
     """Extract pLDDT values and residue codes from a structure file.
     Args:
         structure = PDBParser().get_structure("structure", structure_path)
     # Lists to store pLDDT values and residue codes
+    plddt_values = {}
     # Iterate through all atoms
     for model in structure:
         for chain in model:
+            plddt_values[chain.id] = {"values": [], "residue_codes": []}
             for residue in chain:
                 # Get the first atom of each residue (usually CA atom)
                 if "CA" in residue:
                     # The B-factor contains the pLDDT value
                     plddt = residue["CA"].get_bfactor()
+                    plddt_values[chain.id]["values"].append(plddt)
                     # Get residue code and convert to one-letter code
+                    plddt_values[chain.id]["residue_codes"].append(
+                        convert_to_one_letter(residue.get_resname())
+                    )
+    return plddt_values
 def predict(
     predictions = model.predictions(output_dir)
     pdb_paths = []
     model_plddt_vals = []
     total_predictions = len(predictions)
     for i, (model_idx, prediction) in enumerate(predictions.items()):
             pdb_paths.append(converted_pdb_path)
         else:
             pdb_paths.append(str(prediction_path))
+        plddt_vals = extract_plddt_from_structure(prediction_path)
         model_plddt_vals.append(plddt_vals)
     progress(0.8, desc="Generating plots...")
     indexes = []
         plddt_vals=model_plddt_vals,
         model_name=model.model_name,
         indexes=indexes,
     )
     progress(1.0, desc="Done!")
     model_pdb_paths, model_plddt_traces = predict(
         sequence, api_key, model_type, format_fasta=format_fasta
     )
     model_predictions = {}
+    for pdb_path, plddt_traces in zip(model_pdb_paths, model_plddt_traces.data):
         if model_type in [
             FoldingModel.AF2,
             FoldingModel.OPENFOLD,
         else:
             index = int(Path(pdb_path).stem[-1])
+        model_predictions[index] = {"pdb_path": pdb_path, "plddt_trace": plddt_traces}
     return model_predictions