Spaces:

InstaDeepAI
/

folding-studio-demo

Running

App Files Files Community

AchilleSoulieID commited on 3 days ago

Commit

3886d2a

1 Parent(s): 250a4a2

big update

Browse files

Files changed (7) hide show

folding-studio/folding_studio/api_call/predict/simple_predict.py +3 -2
folding-studio/folding_studio/commands/experiment.py +6 -2
folding-studio/folding_studio/utils/data_model.py +2 -2
folding-studio/folding_studio/utils/headers.py +4 -1
folding_studio_demo/app.py +89 -73
folding_studio_demo/models.py +31 -2
folding_studio_demo/predict.py +28 -29

folding-studio/folding_studio/api_call/predict/simple_predict.py CHANGED Viewed

@@ -23,6 +23,7 @@ def single_job_prediction(
     fasta_file: Path,
     parameters: AF2Parameters | OpenFoldParameters | None = None,
     project_code: str | None = None,
     *,
     ignore_cache: bool = False,
     **kwargs,
@@ -74,7 +75,7 @@ def single_job_prediction(
         if parameters.templates_masks_file
         else None,
     )
-    _ = custom_files.upload()
     params = parameters.model_dump(mode="json")
     pdb_ids, _ = partition_template_pdb_from_file(
@@ -107,7 +108,7 @@ def single_job_prediction(
     response = requests.post(
         url,
         data=params,
-        headers=get_auth_headers(),
         files=[("fasta_file", fasta_file.open("rb"))],
         params={"project_code": project_code},
         timeout=REQUEST_TIMEOUT,

     fasta_file: Path,
     parameters: AF2Parameters | OpenFoldParameters | None = None,
     project_code: str | None = None,
+    api_key: str | None = None,
     *,
     ignore_cache: bool = False,
     **kwargs,
         if parameters.templates_masks_file
         else None,
     )
+    _ = custom_files.upload(api_key=api_key)
     params = parameters.model_dump(mode="json")
     pdb_ids, _ = partition_template_pdb_from_file(
     response = requests.post(
         url,
         data=params,
+        headers=get_auth_headers(api_key),
         files=[("fasta_file", fasta_file.open("rb"))],
         params={"project_code": project_code},
         timeout=REQUEST_TIMEOUT,

folding-studio/folding_studio/commands/experiment.py CHANGED Viewed

@@ -35,6 +35,7 @@ def _download_file_from_signed_url(
     output: Path,
     force: bool,
     unzip: bool = False,
 ) -> None:
     """Download a zip file from an experiment id.
@@ -71,7 +72,7 @@ def _download_file_from_signed_url(
             )
             raise typer.Exit(code=1)
-    headers = get_auth_headers()
     url = API_URL + endpoint
     response = requests.get(
@@ -104,9 +105,10 @@ def _download_file_from_signed_url(
 @app.command()
 def status(
     exp_id: Annotated[str, experiment_ID_argument],
 ):
     """Get an experiment status."""
-    headers = get_auth_headers()
     url = API_URL + "getExperimentStatus"
     response = requests.get(
         url,
@@ -224,6 +226,7 @@ def features(
 @app.command()
 def results(
     exp_id: Annotated[str, experiment_ID_argument],
     output: Annotated[
         Optional[Path],
         typer.Option(
@@ -254,6 +257,7 @@ def results(
         output=output,
         force=force,
         unzip=unzip,
     )

     output: Path,
     force: bool,
     unzip: bool = False,
+    api_key: str | None = None,
 ) -> None:
     """Download a zip file from an experiment id.
             )
             raise typer.Exit(code=1)
+    headers = get_auth_headers(api_key)
     url = API_URL + endpoint
     response = requests.get(
 @app.command()
 def status(
     exp_id: Annotated[str, experiment_ID_argument],
+    api_key: Annotated[str, typer.Option("--api-key", "-k")],
 ):
     """Get an experiment status."""
+    headers = get_auth_headers(api_key)
     url = API_URL + "getExperimentStatus"
     response = requests.get(
         url,
 @app.command()
 def results(
     exp_id: Annotated[str, experiment_ID_argument],
+    api_key: Annotated[str, typer.Option("--api-key", "-k")],
     output: Annotated[
         Optional[Path],
         typer.Option(
         output=output,
         force=force,
         unzip=unzip,
+        api_key=api_key,
     )

folding-studio/folding_studio/utils/data_model.py CHANGED Viewed

@@ -207,7 +207,7 @@ class PredictRequestCustomFiles(BaseModel):
                 f"Unsupported file type {batch_jobs_file.suffix}: {batch_jobs_file}"
             )
-    def upload(self) -> None:
         """Upload local custom paths to GCP through an API request.
         Returns:
             A dict mapping local to uploaded files path.
@@ -218,7 +218,7 @@ class PredictRequestCustomFiles(BaseModel):
         local_to_uploaded = {}
-        headers = get_auth_headers()
         if len(self.templates) > 0:
             _, templates_to_upload = partition_template_pdb_from_file(
                 custom_templates=self.templates

                 f"Unsupported file type {batch_jobs_file.suffix}: {batch_jobs_file}"
             )
+    def upload(self, api_key: str | None = None) -> None:
         """Upload local custom paths to GCP through an API request.
         Returns:
             A dict mapping local to uploaded files path.
         local_to_uploaded = {}
+        headers = get_auth_headers(api_key)
         if len(self.templates) > 0:
             _, templates_to_upload = partition_template_pdb_from_file(
                 custom_templates=self.templates

folding-studio/folding_studio/utils/headers.py CHANGED Viewed

@@ -4,7 +4,7 @@ from folding_studio.config import FOLDING_API_KEY
 from folding_studio.utils.gcp import get_id_token
-def get_auth_headers() -> dict[str, str]:
     """
     Create authentication headers based on available credentials.
@@ -14,6 +14,9 @@ def get_auth_headers() -> dict[str, str]:
     Returns:
         dict: Authentication headers for API requests.
     """
     if FOLDING_API_KEY:
         return {"X-API-Key": FOLDING_API_KEY}

 from folding_studio.utils.gcp import get_id_token
+def get_auth_headers(api_key: str | None = None) -> dict[str, str]:
     """
     Create authentication headers based on available credentials.
     Returns:
         dict: Authentication headers for API requests.
     """
+    if api_key is not None:
+        return {"X-API-Key": api_key}
     if FOLDING_API_KEY:
         return {"X-API-Key": FOLDING_API_KEY}

folding_studio_demo/app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import logging
 import gradio as gr
 import pandas as pd
-import plotly.graph_objects as go
 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
@@ -47,30 +46,12 @@ MODEL_CHOICES = [
     ("Protenix", FoldingModel.PROTENIX),
 ]
-DEFAULT_SEQ = "MALWMRLLPLLALLALWGPDPAAA"
-MODEL_EXAMPLES = {
-    FoldingModel.AF2: [
-        ["Monomer", f">A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.OPENFOLD: [
-        ["Monomer", f">A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A\n{DEFAULT_SEQ}\n>B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.SOLOSEQ: [["Monomer", f">A\n{DEFAULT_SEQ}"]],
-    FoldingModel.BOLTZ: [
-        ["Monomer", f">A|protein\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.CHAI: [
-        ["Monomer", f">protein|name=A\n{DEFAULT_SEQ}"],
-        ["Multimer", f">protein|name=A\n{DEFAULT_SEQ}\n>protein|name=B\n{DEFAULT_SEQ}"],
-    ],
-    FoldingModel.PROTENIX: [
-        ["Monomer", f">A|protein\n{DEFAULT_SEQ}"],
-        ["Multimer", f">A|protein\n{DEFAULT_SEQ}\n>B|protein\n{DEFAULT_SEQ}"],
-    ],
-}
 def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
@@ -79,31 +60,43 @@ def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
     Returns:
         gr.Textbox: Sequence input component
     """
-    with gr.Row(equal_height=True):
-        with gr.Column():
-            sequence = gr.Textbox(
-                label="Protein Sequence",
-                lines=2,
-                placeholder="Enter a protein sequence or upload a FASTA file",
-            )
-            dummy = gr.Textbox(label="Complex type", visible=False)
-            examples = gr.Examples(
-                examples=MODEL_EXAMPLES[FoldingModel.BOLTZ],
-                inputs=[dummy, sequence],
-            )
-        file_input = gr.File(
-            label="Upload a FASTA file",
-            file_types=[".fasta", ".fa"],
-            scale=0,
-        )
-    if dropdown is not None:
-        dropdown.change(
-            fn=lambda x: gr.Dataset(samples=MODEL_EXAMPLES[x]),
-            inputs=[dropdown],
-            outputs=[examples.dataset],
-        )
     def _process_file(file: gr.File | None) -> gr.Textbox:
         if file is None:
@@ -158,7 +151,7 @@ def simple_prediction(api_key: str) -> None:
         metrics_plot = gr.Plot(label="pLDDT")
     predict_btn.click(
-        fn=predict,
         inputs=[sequence, api_key, dropdown],
         outputs=[mol_output, metrics_plot],
     )
@@ -174,13 +167,12 @@ def model_comparison(api_key: str) -> None:
         """
         ## Compare Folding Models
-        Select multiple models to compare their predictions on your protein sequence.
-        You can either enter the sequence directly or upload a FASTA file.
-        The selected models will run in parallel and generate:
-        - 3D structures of your protein that you can visualize and compare
-        - pLDDT confidence scores plotted for each residue
         """
     )
     with gr.Row():
@@ -188,7 +180,7 @@ def model_comparison(api_key: str) -> None:
             label="Model",
             choices=MODEL_CHOICES,
             scale=0,
-            min_width=300,
             value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX],
         )
         with gr.Column():
@@ -201,12 +193,28 @@ def model_comparison(api_key: str) -> None:
         variant="primary",
     )
     with gr.Row():
-        af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False)
-        openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False)
-        solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False)
-        chai_predictions = gr.CheckboxGroup(label="Chai", visible=False)
-        protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False)
-        boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False)
     with gr.Row():
         mol_outputs = Molecule3D(
             label="Protein Structure", reps=MOLECULE_REPS, height=1000
@@ -306,7 +314,7 @@ def create_antibody_discovery_tab():
         "Antigen Sequence",
     ]
     # Display dataframe with floating point values rounded to 2 decimal places
-    spr_data = gr.DataFrame(
         value=spr_data_with_scores[columns].round(2),
         label="Experimental Antibody-Antigen Binding Affinity Data",
     )
@@ -350,7 +358,6 @@ def create_antibody_discovery_tab():
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row(visible=False) as regression_row:
         with gr.Column(scale=0):
             # User can select the columns to display in the correlation plot
             correlation_column = gr.Dropdown(
                 label="Score data to display",
@@ -375,7 +382,7 @@ def create_antibody_discovery_tab():
                 spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
             ),
             gr.Row(visible=True),
-            gr.Row(visible=True)
         ),
         inputs=[correlation_type],
         outputs=[
@@ -391,7 +398,9 @@ def create_antibody_discovery_tab():
         logger.info(f"Updating correlation plot for {correlation_type}")
         corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS)
         logger.info(f"Correlation data: {corr_data}")
-        corr_ranking_plot = plot_correlation_ranking(corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd")
         regression_plot = make_regression_plot(spr_data_with_scores, score, use_log)
         return regression_plot, corr_ranking_plot
@@ -426,14 +435,21 @@ def __main__():
             Folding Studio is a platform for protein structure prediction.
             It uses the latest AI-powered folding models to predict the structure of a protein.
-            Available models are : AlphaFold2, OpenFold, SoloSeq, Boltz-1, Chai and Protenix.
-            ## API Key
-            To use the Folding Studio API, you need to provide an API key.
-            You can get your API key by asking to the Folding Studio team.
             """
         )
-        api_key = gr.Textbox(label="Folding Studio API Key", type="password")
         gr.Markdown("## Demo Usage")
         with gr.Tab("🚀 Basic Folding"):
             simple_prediction(api_key)

 import gradio as gr
 import pandas as pd
 from folding_studio_data_models import FoldingModel
 from gradio_molecule3d import Molecule3D
     ("Protenix", FoldingModel.PROTENIX),
 ]
+MONOMER_SEQ_EXAMPLE = ">A|protein\nMALWMRLLPLLALLALWGPDPAAA"
+MULTIMER_SEQ_EXAMPLE = ">A|protein\nSQIPASEQETLVRPKPLLLKLLKSVGAQKDTYTMKEVLFYLGQYIMTKRLYDAAQQHIVYCSNDLLGDLFGVPSFSVKEHRKIYTMIYRNLVVVNQQESSDSGTSVSEN\n>B|protein\nSQETFSDLWKLLPEN"
+EXAMPLES = [
+    ["Monomer", MONOMER_SEQ_EXAMPLE],
+    ["Multimer", MULTIMER_SEQ_EXAMPLE],
+]
 def sequence_input(dropdown: gr.Dropdown | None = None) -> gr.Textbox:
     Returns:
         gr.Textbox: Sequence input component
     """
+    with gr.Column():
+        with gr.Row():
+            with gr.Row():
+                with gr.Column():
+                    sequence = gr.Textbox(
+                        label="Protein Sequence",
+                        placeholder="Enter a protein sequence or upload a FASTA file",
+                        value=MONOMER_SEQ_EXAMPLE,
+                        lines=5,
+                    )
+                    gr.Markdown(
+                        "Select an example below, enter a sequence manually or upload a FASTA file."
+                    )
+                file_input = gr.File(
+                    label="Upload a FASTA file",
+                    file_types=[".fasta", ".fa"],
+                    scale=0,
+                    height=150,
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown("**Monomer Example:**")
+                    gr.Markdown("**Multimer Example:**")
+                with gr.Row():
+                    gr.Markdown("```\n" + MONOMER_SEQ_EXAMPLE + "\n```")
+                    gr.Markdown("```\n" + MULTIMER_SEQ_EXAMPLE + "\n```")
+                with gr.Row():
+                    gr.Button("Load Monomer Example", size="md").click(
+                        fn=lambda: MONOMER_SEQ_EXAMPLE,
+                        outputs=[sequence],
+                    )
+                    gr.Button("Load Multimer Example", size="md").click(
+                        fn=lambda: MULTIMER_SEQ_EXAMPLE, outputs=[sequence]
+                    )
     def _process_file(file: gr.File | None) -> gr.Textbox:
         if file is None:
         metrics_plot = gr.Plot(label="pLDDT")
     predict_btn.click(
+        fn=lambda x, y, z: predict(x, y, z, format_fasta=True),
         inputs=[sequence, api_key, dropdown],
         outputs=[mol_output, metrics_plot],
     )
         """
         ## Compare Folding Models
+        This tab allows you to compare predictions from multiple protein folding models side by side.
+        Follow these steps to get started:
+        1. **Select Models**: Choose one or more models from the list on the left
+        2. **Input Sequence** : Either select an example sequence, enter your protein sequence directly in the text box or upload a FASTA file.
+        3. **Run Comparison**: Click "Compare Models" to start the prediction
         """
     )
     with gr.Row():
             label="Model",
             choices=MODEL_CHOICES,
             scale=0,
+            min_width=150,
             value=[FoldingModel.BOLTZ, FoldingModel.CHAI, FoldingModel.PROTENIX],
         )
         with gr.Column():
         variant="primary",
     )
     with gr.Row():
+        with gr.Column():
+            gr.Markdown(
+                """
+                ### Understanding the Outputs:
+                - **3D Structure**: The molecular viewer shows the predicted protein structure
+                - **pLDDT Score**: A confidence score (0-100) for each residue:
+                    - Very high (>90): Highly accurate
+                    - Confident (70-90): Good accuracy
+                    - Low (50-70): Limited accuracy
+                    - Very low (<50): Poor accuracy
+                """
+            )
+            gr.Markdown(
+                "### Model Predictions\nUse the checkboxes to toggle which model predictions to compare:"
+            )
+            with gr.Row():
+                af2_predictions = gr.CheckboxGroup(label="AlphaFold2", visible=False)
+                openfold_predictions = gr.CheckboxGroup(label="OpenFold", visible=False)
+                solo_predictions = gr.CheckboxGroup(label="SoloSeq", visible=False)
+                chai_predictions = gr.CheckboxGroup(label="Chai", visible=False)
+                protenix_predictions = gr.CheckboxGroup(label="Protenix", visible=False)
+                boltz_predictions = gr.CheckboxGroup(label="Boltz", visible=False)
     with gr.Row():
         mol_outputs = Molecule3D(
             label="Protein Structure", reps=MOLECULE_REPS, height=1000
         "Antigen Sequence",
     ]
     # Display dataframe with floating point values rounded to 2 decimal places
+    gr.DataFrame(
         value=spr_data_with_scores[columns].round(2),
         label="Experimental Antibody-Antigen Binding Affinity Data",
     )
             correlation_ranking_plot = gr.Plot(label="Correlation ranking")
     with gr.Row(visible=False) as regression_row:
         with gr.Column(scale=0):
             # User can select the columns to display in the correlation plot
             correlation_column = gr.Dropdown(
                 label="Score data to display",
                 spr_data_with_scores, SCORE_COLUMNS, ["Antibody Name", "KD (nM)"]
             ),
             gr.Row(visible=True),
+            gr.Row(visible=True),
         ),
         inputs=[correlation_type],
         outputs=[
         logger.info(f"Updating correlation plot for {correlation_type}")
         corr_data = compute_correlation_data(spr_data_with_scores, SCORE_COLUMNS)
         logger.info(f"Correlation data: {corr_data}")
+        corr_ranking_plot = plot_correlation_ranking(
+            corr_data, correlation_type, kd_col="KD (nM)" if not use_log else "log_kd"
+        )
         regression_plot = make_regression_plot(spr_data_with_scores, score, use_log)
         return regression_plot, corr_ranking_plot
             Folding Studio is a platform for protein structure prediction.
             It uses the latest AI-powered folding models to predict the structure of a protein.
+            Available models are : AlphaFold2, OpenFold, Boltz-1, Chai and Protenix.
             """
         )
+        with gr.Accordion("API Key", open=False):
+            gr.Markdown(
+                """
+                To use the Folding Studio API, you need to provide an API key.
+                You can get your API key by asking to the Folding Studio team.
+                """
+            )
+            api_key = gr.Textbox(
+                placeholder="Enter your Folding Studio API key",
+                type="password",
+                show_label=False,
+            )
         gr.Markdown("## Demo Usage")
         with gr.Tab("🚀 Basic Folding"):
             simple_prediction(api_key)

folding_studio_demo/models.py CHANGED Viewed

@@ -9,6 +9,7 @@ from io import StringIO
 from pathlib import Path
 from typing import Any
 import gradio as gr
 import numpy as np
 from folding_studio import single_job_prediction
@@ -202,7 +203,33 @@ class ProtenixModel(AF3Model):
     def predictions(self, output_dir: Path) -> list[Path]:
         """Get the path to the prediction."""
-        return list(output_dir.rglob("*_model_[0-9].cif"))
 class BoltzModel(AF3Model):
@@ -259,12 +286,13 @@ class OldModel:
         output = single_job_prediction(
             fasta_file=seq_file,
             parameters=parameters,
         )
         experiment_id = output["message"]["experiment_id"]
         done = False
         while not done:
             with Capturing() as output:
-                get_status(experiment_id)
             status = output[0]
             logger.info(f"Experiment {experiment_id} status: {status}")
             if status == "Done":
@@ -275,6 +303,7 @@ class OldModel:
                     force=True,
                     unzip=True,
                     output=output_dir / "results.zip",
                 )
                 logger.info("Results downloaded to %s", output_dir)
             else:

 from pathlib import Path
 from typing import Any
+import folding_studio
 import gradio as gr
 import numpy as np
 from folding_studio import single_job_prediction
     def predictions(self, output_dir: Path) -> list[Path]:
         """Get the path to the prediction."""
+        prediction = next(output_dir.rglob("sequence_*_sample_[0-9].cif"), None)
+        if prediction is None:
+            return {}
+        cif_files = {
+            int(f.stem[-1]): f
+            for f in prediction.parent.glob("sequence_*_sample_[0-9].cif")
+        }
+        # Get all npz files and extract their indices
+        json_files = {
+            int(f.stem[-1]): f
+            for f in prediction.parent.glob(
+                "sequence_*_summary_confidence_sample_[0-9].json"
+            )
+        }
+        # Find common indices and create pairs
+        common_indices = sorted(set(cif_files.keys()) & set(json_files.keys()))
+        return {
+            idx: {
+                "prediction_path": cif_files[idx],
+                "metrics": json.load(open(json_files[idx])),
+            }
+            for idx in common_indices
+        }
 class BoltzModel(AF3Model):
         output = single_job_prediction(
             fasta_file=seq_file,
             parameters=parameters,
+            api_key=self.api_key,
         )
         experiment_id = output["message"]["experiment_id"]
         done = False
         while not done:
             with Capturing() as output:
+                get_status(experiment_id, api_key=self.api_key)
             status = output[0]
             logger.info(f"Experiment {experiment_id} status: {status}")
             if status == "Done":
                     force=True,
                     unzip=True,
                     output=output_dir / "results.zip",
+                    api_key=self.api_key,
                 )
                 logger.info("Results downloaded to %s", output_dir)
             else:

folding_studio_demo/predict.py CHANGED Viewed

@@ -91,34 +91,32 @@ def convert_cif_to_pdb(cif_path: str, pdb_path: str) -> None:
 def create_plddt_figure(
-    plddt_vals: list[list[float]],
     model_name: str,
     indexes: list[int],
-    residue_codes: list[list[str]] = None,
 ) -> go.Figure:
     """Create a plot of metrics."""
     plddt_traces = []
-    for i, (plddt_val, index) in enumerate(zip(plddt_vals, indexes)):
-        # Create hover text with residue codes if available
-        if residue_codes and i < len(residue_codes):
-            hover_text = [
-                f"<i>{model_name} {index}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue:</i> {code} {idx}"
-                for idx, (plddt, code) in enumerate(zip(plddt_val, residue_codes[i]))
-            ]
-        else:
-            hover_text = [
-                f"<i>{model_name} {index}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue index:</i> {idx}"
-                for idx, plddt in enumerate(plddt_val)
             ]
         plddt_traces.append(
             go.Scatter(
-                x=np.arange(len(plddt_val)),
-                y=plddt_val,
                 hovertemplate="%{text}<extra></extra>",
                 text=hover_text,
-                name=f"{model_name} {index}",
                 visible=True,
             )
         )
@@ -160,7 +158,9 @@ def _write_fasta_file(
     return seq_id, seq_file
-def extract_plddt_from_structure(structure_path: str) -> tuple[list[float], list[str]]:
     """Extract pLDDT values and residue codes from a structure file.
     Args:
@@ -175,22 +175,24 @@ def extract_plddt_from_structure(structure_path: str) -> tuple[list[float], list
         structure = PDBParser().get_structure("structure", structure_path)
     # Lists to store pLDDT values and residue codes
-    plddt_values = []
-    residue_codes = []
     # Iterate through all atoms
     for model in structure:
         for chain in model:
             for residue in chain:
                 # Get the first atom of each residue (usually CA atom)
                 if "CA" in residue:
                     # The B-factor contains the pLDDT value
                     plddt = residue["CA"].get_bfactor()
-                    plddt_values.append(plddt)
                     # Get residue code and convert to one-letter code
-                    residue_codes.append(convert_to_one_letter(residue.get_resname()))
-    return plddt_values, residue_codes
 def predict(
@@ -253,7 +255,6 @@ def predict(
     predictions = model.predictions(output_dir)
     pdb_paths = []
     model_plddt_vals = []
-    model_residue_codes = []
     total_predictions = len(predictions)
     for i, (model_idx, prediction) in enumerate(predictions.items()):
@@ -270,9 +271,8 @@ def predict(
             pdb_paths.append(converted_pdb_path)
         else:
             pdb_paths.append(str(prediction_path))
-        plddt_vals, residue_codes = extract_plddt_from_structure(prediction_path)
         model_plddt_vals.append(plddt_vals)
-        model_residue_codes.append(residue_codes)
     progress(0.8, desc="Generating plots...")
     indexes = []
@@ -290,7 +290,6 @@ def predict(
         plddt_vals=model_plddt_vals,
         model_name=model.model_name,
         indexes=indexes,
-        residue_codes=model_residue_codes,
     )
     progress(1.0, desc="Done!")
@@ -434,9 +433,8 @@ def run_prediction(
     model_pdb_paths, model_plddt_traces = predict(
         sequence, api_key, model_type, format_fasta=format_fasta
     )
-    model_pdb_paths = sorted(model_pdb_paths)
     model_predictions = {}
-    for pdb_path, plddt_trace in zip(model_pdb_paths, model_plddt_traces.data):
         if model_type in [
             FoldingModel.AF2,
             FoldingModel.OPENFOLD,
@@ -446,7 +444,8 @@ def run_prediction(
         else:
             index = int(Path(pdb_path).stem[-1])
-        model_predictions[index] = {"pdb_path": pdb_path, "plddt_trace": plddt_trace}
     return model_predictions

 def create_plddt_figure(
+    plddt_vals: list[dict[str, dict[str, list[float]]]],
     model_name: str,
     indexes: list[int],
 ) -> go.Figure:
     """Create a plot of metrics."""
     plddt_traces = []
+    for i, (pred_plddt, index) in enumerate(zip(plddt_vals, indexes)):
+        hover_text = []
+        plddt_values = []
+        for chain_id, plddt_val in pred_plddt.items():
+            plddt_values += plddt_val["values"]
+            hover_text += [
+                f"<i>{model_name} {index} - Chain {chain_id}</i><br><i>pLDDT</i>: {plddt:.2f}<br><i>Residue:</i> {code} {idx}"
+                for idx, (plddt, code) in enumerate(
+                    zip(plddt_val["values"], plddt_val["residue_codes"])
+                )
             ]
         plddt_traces.append(
             go.Scatter(
+                x=np.arange(len(plddt_values)),
+                y=plddt_values,
                 hovertemplate="%{text}<extra></extra>",
                 text=hover_text,
+                name=f"{model_name} {index} - Chain {chain_id}",
                 visible=True,
             )
         )
     return seq_id, seq_file
+def extract_plddt_from_structure(
+    structure_path: str,
+) -> dict[str, dict[str, list[float]]]:
     """Extract pLDDT values and residue codes from a structure file.
     Args:
         structure = PDBParser().get_structure("structure", structure_path)
     # Lists to store pLDDT values and residue codes
+    plddt_values = {}
     # Iterate through all atoms
     for model in structure:
         for chain in model:
+            plddt_values[chain.id] = {"values": [], "residue_codes": []}
             for residue in chain:
                 # Get the first atom of each residue (usually CA atom)
                 if "CA" in residue:
                     # The B-factor contains the pLDDT value
                     plddt = residue["CA"].get_bfactor()
+                    plddt_values[chain.id]["values"].append(plddt)
                     # Get residue code and convert to one-letter code
+                    plddt_values[chain.id]["residue_codes"].append(
+                        convert_to_one_letter(residue.get_resname())
+                    )
+    return plddt_values
 def predict(
     predictions = model.predictions(output_dir)
     pdb_paths = []
     model_plddt_vals = []
     total_predictions = len(predictions)
     for i, (model_idx, prediction) in enumerate(predictions.items()):
             pdb_paths.append(converted_pdb_path)
         else:
             pdb_paths.append(str(prediction_path))
+        plddt_vals = extract_plddt_from_structure(prediction_path)
         model_plddt_vals.append(plddt_vals)
     progress(0.8, desc="Generating plots...")
     indexes = []
         plddt_vals=model_plddt_vals,
         model_name=model.model_name,
         indexes=indexes,
     )
     progress(1.0, desc="Done!")
     model_pdb_paths, model_plddt_traces = predict(
         sequence, api_key, model_type, format_fasta=format_fasta
     )
     model_predictions = {}
+    for pdb_path, plddt_traces in zip(model_pdb_paths, model_plddt_traces.data):
         if model_type in [
             FoldingModel.AF2,
             FoldingModel.OPENFOLD,
         else:
             index = int(Path(pdb_path).stem[-1])
+        model_predictions[index] = {"pdb_path": pdb_path, "plddt_trace": plddt_traces}
     return model_predictions