add dockerfile and folding studio cli
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +32 -0
- README.md +1 -2
- folding-studio/CONTRIBUTING.md +8 -0
- folding-studio/README.md +0 -0
- folding-studio/docs/app.yaml +24 -0
- folding-studio/docs/docs/css/main.css +8 -0
- folding-studio/docs/docs/explanation/advanced_algorithms.md +100 -0
- folding-studio/docs/docs/explanation/index.md +4 -0
- folding-studio/docs/docs/explanation/supported_models.md +125 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/gap_trick_job.md +21 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/initial_guess_af2.md +150 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/msa_subsampling_job.md +37 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/template_masking_job.md +549 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/batch_job_from_configuration_file.md +204 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/batch_job_from_directory.md +45 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/cancel_experiment.md +31 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/download_logs.md +19 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/download_prediction_results.md +55 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/fetch_folding_job_status.md +74 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/get_experiment_features.md +60 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/provide_input_data.md +32 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/set_af_folding_parameters.md +407 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/single_af2_job.md +27 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/single_openfold_job.md +27 -0
- folding-studio/docs/docs/how-to-guides/af2_openfold/soloseq_job.md +104 -0
- folding-studio/docs/docs/how-to-guides/af3/batch_job_from_directory.md +40 -0
- folding-studio/docs/docs/how-to-guides/af3/boltz_single_yaml_job.md +38 -0
- folding-studio/docs/docs/how-to-guides/af3/provide_input_data.md +39 -0
- folding-studio/docs/docs/how-to-guides/af3/single_job_boltz.md +72 -0
- folding-studio/docs/docs/how-to-guides/af3/single_job_chai.md +66 -0
- folding-studio/docs/docs/how-to-guides/af3/single_job_from_protein_sequence.md +32 -0
- folding-studio/docs/docs/how-to-guides/af3/single_job_protenix.md +65 -0
- folding-studio/docs/docs/how-to-guides/index.md +54 -0
- folding-studio/docs/docs/how-to-guides/msa_search/download_msa_logs.md +19 -0
- folding-studio/docs/docs/how-to-guides/msa_search/download_msa_search_results.md +53 -0
- folding-studio/docs/docs/how-to-guides/msa_search/fetch_msa_job_status.md +60 -0
- folding-studio/docs/docs/how-to-guides/msa_search/msa_no_cache.md +19 -0
- folding-studio/docs/docs/how-to-guides/msa_search/msa_search_mmseqs2.md +29 -0
- folding-studio/docs/docs/how-to-guides/msa_search/provide_input_data.md +32 -0
- folding-studio/docs/docs/how-to-guides/other/pLDDT_pAE_calculation.md +178 -0
- folding-studio/docs/docs/index.md +82 -0
- folding-studio/docs/docs/reference/cli.md +435 -0
- folding-studio/docs/docs/reference/python_lib_docs.md +719 -0
- folding-studio/docs/docs/tutorials/index.md +9 -0
- folding-studio/docs/docs/tutorials/installation.md +61 -0
- folding-studio/docs/docs/tutorials/msa_search.md +115 -0
- folding-studio/docs/docs/tutorials/single_folding_job_af2.md +154 -0
- folding-studio/docs/docs/tutorials/single_folding_job_af3.md +111 -0
- folding-studio/docs/generate_cli_docs.py +209 -0
- folding-studio/docs/mkdocs.yml +116 -0
Dockerfile
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UV_VERSION="0.6.14"
|
2 |
+
|
3 |
+
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-fetcher
|
4 |
+
|
5 |
+
FROM python:3.11-slim AS runtime
|
6 |
+
|
7 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
8 |
+
wget build-essential git && \
|
9 |
+
rm -rf /var/lib/apt/lists/*
|
10 |
+
|
11 |
+
COPY --from=uv-fetcher /uv /uvx /usr/local/bin/
|
12 |
+
|
13 |
+
ENV VIRTUAL_ENV=/modules/.venv
|
14 |
+
RUN uv venv "$VIRTUAL_ENV" && . "$VIRTUAL_ENV/bin/activate"
|
15 |
+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
16 |
+
|
17 |
+
# Set the working directory to the user's home directory
|
18 |
+
WORKDIR /app
|
19 |
+
|
20 |
+
COPY pyproject.toml uv.lock /app/
|
21 |
+
|
22 |
+
RUN uv sync --frozen --active --directory /app --inexact
|
23 |
+
|
24 |
+
COPY folding-studio /app/folding-studio
|
25 |
+
RUN cd /app/folding-studio && uv pip install -e .
|
26 |
+
|
27 |
+
COPY app.py /app/app.py
|
28 |
+
|
29 |
+
EXPOSE 7860
|
30 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
31 |
+
|
32 |
+
CMD ["python3", "app.py"]
|
README.md
CHANGED
@@ -3,8 +3,7 @@ title: Fs
|
|
3 |
emoji: 🚀
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
-
sdk:
|
7 |
-
sdk_version: 5.30.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: folding studio test
|
|
|
3 |
emoji: 🚀
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
6 |
+
sdk: docker
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
short_description: folding studio test
|
folding-studio/CONTRIBUTING.md
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributing to the Documentation
|
2 |
+
|
3 |
+
## Serve the Documentation Locally
|
4 |
+
```bash
|
5 |
+
poetry install --with docs
|
6 |
+
poetry run mkdocs serve
|
7 |
+
```
|
8 |
+
This will start a local server at http://127.0.0.1:8000.
|
folding-studio/README.md
ADDED
File without changes
|
folding-studio/docs/app.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This is the configuration file for the Google App Engine instance.
|
2 |
+
# GAE is capable of hosting any sort of web app,
|
3 |
+
# however, here we focus on the static website hosting capabilities.
|
4 |
+
# https://cloud.google.com/appengine/docs/standard/hosting-a-static-website
|
5 |
+
runtime: python39
|
6 |
+
|
7 |
+
handlers:
|
8 |
+
# static files with a URL ending with a file extension
|
9 |
+
# (e.g. favicon.ico, manifest.json, jylade.png)
|
10 |
+
- url: /(.*\..+)$
|
11 |
+
static_files: site/\1
|
12 |
+
upload: site/(.*\..+)$
|
13 |
+
# index page
|
14 |
+
- url: /
|
15 |
+
static_files: site/index.html
|
16 |
+
upload: site/index.html
|
17 |
+
# anything that ends with a slash (e.g. /docs/)
|
18 |
+
- url: /(.*)/$
|
19 |
+
static_files: site/\1/index.html
|
20 |
+
upload: site/(.*)
|
21 |
+
# anything else (e.g. /docs)
|
22 |
+
- url: /(.*)
|
23 |
+
static_files: site/\1/index.html
|
24 |
+
upload: site/(.*)
|
folding-studio/docs/docs/css/main.css
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.md-sidebar__inner .md-nav__title {
|
2 |
+
display: none !important;
|
3 |
+
}
|
4 |
+
|
5 |
+
/* Hide the copy button for .no-copy blocks */
|
6 |
+
.no-copy .md-clipboard {
|
7 |
+
display: none;
|
8 |
+
}
|
folding-studio/docs/docs/explanation/advanced_algorithms.md
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Gap trick
|
2 |
+
|
3 |
+
Gap Trick allows folding multimer complexes using the AlphaFold2/OpenFold **monomer**
|
4 |
+
models.
|
5 |
+
|
6 |
+
It is only available when using custom templates. Additionally, the provided
|
7 |
+
templates **MUST** exclusively include chains that precisely correspond to the
|
8 |
+
query sequences, no more, no less, and in the same order.
|
9 |
+
|
10 |
+
## Initial guess
|
11 |
+
|
12 |
+
The initial guess algorithm allows to use a pre-defined structure in the first
|
13 |
+
recycling stage of the AlphaFold2 forward pass. The original algorithm is
|
14 |
+
described in [Bennett et al.](https://www.nature.com/articles/s41467-023-38328-5)
|
15 |
+
and can be found in their [GitHub repo](https://github.com/nrbennet/dl_binder_design/blob/main/af2_initial_guess/).
|
16 |
+
|
17 |
+
## MSA subsampling
|
18 |
+
|
19 |
+
MSA subsampling allows to change the default number of MSA representation to
|
20 |
+
give as input to the model.
|
21 |
+
|
22 |
+
This feature is only supported for monomer.
|
23 |
+
|
24 |
+
The impact of MSA subsampling has been studied by [D. Del Alamo et al.](https://elifesciences.org/articles/75751).
|
25 |
+
|
26 |
+
!!! quote
|
27 |
+
"Whereas models of most proteins generated using the default AF2
|
28 |
+
pipeline are conformationally homogeneous and nearly identical to one
|
29 |
+
another, reducing the depth of the input multiple sequence alignments by
|
30 |
+
stochastic subsampling led to the generation of accurate models in multiple
|
31 |
+
conformations." - D. Del Alamo et al.
|
32 |
+
|
33 |
+
## Template masking
|
34 |
+
|
35 |
+
Template Masking enables the creation of template features by masking regions
|
36 |
+
of the structures considered less important for resolving the multimer
|
37 |
+
interface.
|
38 |
+
|
39 |
+
This method is exclusively available in Gap Trick mode. Therefore
|
40 |
+
the same constraints on the input template structures applies, i.e they
|
41 |
+
**MUST** exclusively include chains that precisely correspond to the query
|
42 |
+
sequences, no more, no less, and in the same order.
|
43 |
+
|
44 |
+
Consider an input `FASTA` file containing 3 chains of a multimer complex, for
|
45 |
+
example an antigen chain and two antibody chains:
|
46 |
+
|
47 |
+
```
|
48 |
+
>Antigen
|
49 |
+
VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK
|
50 |
+
>Antibody | Chain 1
|
51 |
+
DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIK
|
52 |
+
>Antibody | Chain 2
|
53 |
+
QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSS
|
54 |
+
```
|
55 |
+
|
56 |
+
The user aims to resolve the binding interface between the antigen and antibody
|
57 |
+
chains, which is a common scenario for applying the template masking algorithm.
|
58 |
+
In this case, a single template file is used, and the masking pattern
|
59 |
+
alternately masks the binding partners and the binding interface. To activate
|
60 |
+
the template masking algorithm, the user must provide a template mask file that
|
61 |
+
defines the masking pattern:
|
62 |
+
|
63 |
+
```json
|
64 |
+
{
|
65 |
+
"templates_masks": [
|
66 |
+
{
|
67 |
+
"template_name": "7si2_chothia_CGF.cif",
|
68 |
+
"masks": [
|
69 |
+
"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------",
|
70 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
71 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"template_name": "7si2_chothia_CGF.cif",
|
76 |
+
"masks": [
|
77 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
78 |
+
"------------------------------------------------------------------------------------------------------------",
|
79 |
+
"------------------------------------------------------------------------------------------------------------------------"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"template_name": "7si2_chothia_CGF.cif",
|
84 |
+
"masks": [
|
85 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--XXXXXX----------XX--XXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXX---XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
86 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-----XXXXXXXXXXXX",
|
87 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXXXX----XX--X-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX----XXXXXXXXXXXXXXX"
|
88 |
+
]
|
89 |
+
}
|
90 |
+
]
|
91 |
+
}
|
92 |
+
```
|
93 |
+
|
94 |
+
The `masks` fields must adhere to the following constraints:
|
95 |
+
|
96 |
+
- Contain as many mask line as chains in the input `FASTA` file,
|
97 |
+
- Each mask line must have as many characters as the corresponding chain in the
|
98 |
+
input `FASTA` file,
|
99 |
+
- Only contain `X` (residue is masked) or `-` (residue is not masked)
|
100 |
+
characters,
|
folding-studio/docs/docs/explanation/index.md
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Explanation
|
2 |
+
This **Explanation section** provides deeper insights into the models and advanced algorithms supported in our custom folding CLI. Unlike the **How-to guides**, which focus on practical steps, these explanations cover the underlying principles that influence model selection and algorithm performance.
|
3 |
+
|
4 |
+
You will find an overview of [each model](./supported_models.md), its strengths, and its limitations, as well as details on [advanced features](./advanced_algorithms.md) like MSA subsampling and template masking. The goal is to help you understand how these components fit together so you can make the right choices for your predictions.
|
folding-studio/docs/docs/explanation/supported_models.md
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This documentation provides an overview of the supported protein folding models
|
2 |
+
in Folding Studio. Please refer to the official documentation page for more details on a model features, how it was trained and how it is claimed to perform with respect to similar models. Users are encouraged run their own benchmarking tests tailored to their use-cases.
|
3 |
+
Folding Studio models can be organized into two categories: AlphaFold2-like and AlphaFold3-like architectures.
|
4 |
+
|
5 |
+
- AlphaFold2-like models: This group includes **AlphaFold2**, **OpenFold**, and **SoloSeq**.
|
6 |
+
- AlphaFold3-like models: This group includes **Boltz-1**, **Chai-1**, and **Protenix**.
|
7 |
+
|
8 |
+
AlphaFold3 is the new state-of-the-art method for protein-antibody interface prediction, improving upon AlphaFold2. Key advancements include:
|
9 |
+
|
10 |
+
- The replacement of AlphaFold2's Structure module with a Diffusion module, enhancing predictions without additional constraints.
|
11 |
+
- An expanded vocabulary to support RNA, DNA, and other molecules like ligands.
|
12 |
+
- A smaller MSA module and the removal of the MSA from the Pairformer module, which replaces AlphaFold2's Evoformer module.
|
13 |
+
- Internal changes in the Pairformer module, including information flow from pairs to single representations.
|
14 |
+
|
15 |
+
These modifications improve complex predictions and integrate recent ML advancements.
|
16 |
+
|
17 |
+
!!! Data
|
18 |
+
The key difference between AlphaFold2-like models and AlphaFold3-like models input data is that while AlphaFold 2 primarily requires only a protein sequence as input, AlphaFold3 can accept a wider range of molecular information, including not just protein sequences but also details about other molecules like DNA, RNA, and ligands, allowing it to predict the structures of protein complexes involving these molecules.
|
19 |
+
|
20 |
+
Please note that the MSA search process (applicable to all models with MSA support) runs on servers hosted by InstaDeep. This ensures complete confidentiality of all user inputs.
|
21 |
+
|
22 |
+
## AlphaFold2
|
23 |
+
|
24 |
+
[](https://github.com/google-deepmind/alphafold)
|
25 |
+
[](https://www.nature.com/articles/s41586-021-03819-2)
|
26 |
+
|
27 |
+
**Overview:** AlphaFold2 (AF2) is a deep learning model developed by DeepMind
|
28 |
+
for high-accuracy protein structure prediction. It leverages evolutionary
|
29 |
+
relationships and deep learning techniques to generate atomic-level protein
|
30 |
+
structures.
|
31 |
+
|
32 |
+
**Key Features:**
|
33 |
+
|
34 |
+
- Uses multiple sequence alignments (MSAs) for improved accuracy.
|
35 |
+
- Evoformer module for deep sequence and pairwise residue learning.
|
36 |
+
- Structure module with iterative 3D refinement.
|
37 |
+
- Suitable for high-precision structural biology and drug discovery.
|
38 |
+
|
39 |
+
___
|
40 |
+
|
41 |
+
## OpenFold
|
42 |
+
|
43 |
+
[](https://github.com/aqlaboratory/openfold)
|
44 |
+
[](https://www.nature.com/articles/s41592-024-02272-z)
|
45 |
+
[](https://openfold.readthedocs.io/en/latest/index.html)
|
46 |
+
|
47 |
+
**Overview:** OpenFold is an open-source reimplementation of AlphaFold2 in
|
48 |
+
PyTorch, designed for efficiency and accessibility.
|
49 |
+
|
50 |
+
**Key Features:**
|
51 |
+
|
52 |
+
- Optimizations in the implementation which allow for faster inference time (at least for proteins of few hundreds of residues) and lesser RAM consumption.
|
53 |
+
|
54 |
+
___
|
55 |
+
|
56 |
+
## SoloSeq
|
57 |
+
|
58 |
+
[](https://openfold.readthedocs.io/en/latest/Single_Sequence_Inference.html)
|
59 |
+
|
60 |
+
**Overview:** SoloSeq is a modified version of OpenFold which replaces MSA features by ESM-1b sequence embeddings.
|
61 |
+
|
62 |
+
**Key Features:**
|
63 |
+
|
64 |
+
- MSA-free Prediction: Uses ESM-1b model embeddings to predict structure without multiple sequence alignments (MSA).
|
65 |
+
- Flexible Optimization: Offers flags for controlling relaxation, output saving, and MMCIF file generation.
|
66 |
+
|
67 |
+
___
|
68 |
+
|
69 |
+
## Chai-1
|
70 |
+
|
71 |
+
[](https://github.com/chaidiscovery/chai-lab)
|
72 |
+
[](https://www.biorxiv.org/content/10.1101/2024.10.10.615955v1.full.pdf)
|
73 |
+
|
74 |
+
**Overview:** Chai-1 is a multi-modal protein structure prediction model that
|
75 |
+
supports proteins, RNA, DNA, and small molecules.
|
76 |
+
|
77 |
+
**Key Features:**
|
78 |
+
|
79 |
+
- Multi-modal capabilities for protein-protein and biomolecule interactions.
|
80 |
+
- Transformer-based architecture with deep representation learning.
|
81 |
+
- Best for users looking for an MSA-free alternative with high versatility.
|
82 |
+
|
83 |
+
___
|
84 |
+
|
85 |
+
## Boltz-1
|
86 |
+
|
87 |
+
[](https://github.com/jwohlwend/boltz?)
|
88 |
+
[](https://www.biorxiv.org/content/10.1101/2024.11.19.624167v1.full)
|
89 |
+
|
90 |
+
**Overview:** Boltz-1 is a protein folding model developed by MIT Jameel Clinic,
|
91 |
+
optimized for computational efficiency.
|
92 |
+
|
93 |
+
**Key Features:**
|
94 |
+
|
95 |
+
- Hybrid attention mechanisms improve residue interaction modeling.
|
96 |
+
- Optimized inference for fast predictions.
|
97 |
+
- Advanced multi-chain folding for complex biomolecular structures.
|
98 |
+
- Ideal for researchers needing high-speed, scalable structure prediction.
|
99 |
+
|
100 |
+
**Supported data format**
|
101 |
+
|
102 |
+
| Feature | Fasta | YAML |
|
103 |
+
| -------- |--------------------| ------- |
|
104 |
+
| Polymers | :white_check_mark: | :white_check_mark: |
|
105 |
+
| Smiles | :white_check_mark: | :white_check_mark: |
|
106 |
+
| CCD code | :white_check_mark: | :white_check_mark: |
|
107 |
+
| Custom MSA | :white_check_mark: | :white_check_mark: |
|
108 |
+
| Modified Residues | :x: | :white_check_mark: |
|
109 |
+
| Covalent bonds | :x: | :white_check_mark: |
|
110 |
+
| Pocket conditioning | :x: | :white_check_mark: |
|
111 |
+
|
112 |
+
|
113 |
+
___
|
114 |
+
|
115 |
+
## Protenix
|
116 |
+
|
117 |
+
[](https://github.com/bytedance/Protenix)
|
118 |
+
[](https://www.biorxiv.org/content/10.1101/2025.01.08.631967v1.full-text)
|
119 |
+
|
120 |
+
**Overview:** Protenix is an open-source alternative to AlphaFold3,
|
121 |
+
developed for scalability and custom applications.
|
122 |
+
|
123 |
+
**Key Features:**
|
124 |
+
|
125 |
+
- Protenix only support MSA search with MMSeqs2 and does not support template features
|
folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/gap_trick_job.md
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
The Gap Trick enables the folding of multimer complexes using monomer models from AlphaFold2/OpenFold **monomer** models.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
It is only available when using custom templates (i.e. `"template_mode"` must be set to `"custom"`). Additionally, the provided templates **MUST** exclusively include chains that precisely correspond to the query sequences, no more, no less, and in the same order.
|
7 |
+
|
8 |
+
=== ":octicons-command-palette-16: CLI"
|
9 |
+
|
10 |
+
```bash
|
11 |
+
folding predict af2 path/to/my/file.fasta --project-code "your-project-code" --gap-trick --template_mode custom --custom_template /path/to/template_1.cif
|
12 |
+
```
|
13 |
+
|
14 |
+
=== ":material-language-python: Python"
|
15 |
+
|
16 |
+
```python
|
17 |
+
from pathlib import Path
|
18 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
19 |
+
|
20 |
+
af2_predict(source=Path("path/to/my/file.fasta"), project_code="your-project-code", gap_trick=True, template_mode="custom", custom_template=[Path("/path/to/template_1.cif")], num_recycle=3, random_seed=0)
|
21 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/initial_guess_af2.md
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
The initial guess algorithm allows to use a pre-defined structure in the first recycling stage of the AlphaFold2 forward pass. The original algorithm is described in [Bennett et al.](https://www.nature.com/articles/s41467-023-38328-5) and in the [GitHub repo](https://github.com/nrbennet/dl_binder_design/blob/main/af2_initial_guess/).
|
4 |
+
|
5 |
+
!!! warning
|
6 |
+
Unlike the other features in the how-to guides, this algorithm is only available with the AlphaFold2 model.
|
7 |
+
|
8 |
+
## Application
|
9 |
+
|
10 |
+
The initial guess structure file **must be** in `.cif` format. This algorithm is only available when disabling MSA and templates (i.e. `"msa_mode"` and `"template_mode"` must both be set to `"none"`). Additionally, the provided initial guess file **MUST** exclusively include chains that precisely correspond to the query sequences, no more, no less.
|
11 |
+
|
12 |
+
=== ":octicons-command-palette-16: CLI single job"
|
13 |
+
|
14 |
+
```bash
|
15 |
+
folding predict af2 3bve.fasta --project-code "your-project-code" --initial-guess-file 3bve_dimer.cif --msa-mode none --template-mode none
|
16 |
+
```
|
17 |
+
|
18 |
+
=== ":octicons-command-palette-16: CLI batch job"
|
19 |
+
|
20 |
+
```bash
|
21 |
+
folding predict af2 batch_jobs.csv --project-code "your-project-code"
|
22 |
+
```
|
23 |
+
|
24 |
+
=== ":material-table: CSV example"
|
25 |
+
|
26 |
+
```csv
|
27 |
+
complex_id,description,fasta_sequence,num_recycle,random_seed,msa_mode,template_mode,initial_guess_file
|
28 |
+
3bve-ig,chain A,PMLSKDIIKLLNEQVNKEMNSSNLYMSMSSWCYTHSLDGAGLFLFDHAAEEYEHAKKLIIFLNENNVPVQLTSISAPEHKFEGLTQIFQKAYEHEQHISESINNIVDHAIKSKDHATFNFLQWYVAEQHEEEVLFKDILDKIELIGNENHGLYLADQYVKGIAKSRK,3,145,none,none,initial_guess/3bve_dimer.cif
|
29 |
+
3bve-ig,chain B,PMLSKDIIKLLNEQVNKEMNSSNLYMSMSSWCYTHSLDGAGLFLFDHAAEEYEHAKKLIIFLNENNVPVQLTSISAPEHKFEGLTQIFQKAYEHEQHISESINNIVDHAIKSKDHATFNFLQWYVAEQHEEEVLFKDILDKIELIGNENHGLYLADQYVKGIAKSRK,3,145,none,none,initial_guess/3bve_dimer.cif
|
30 |
+
1hqk-ig,chain 1,MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR,3,145,none,none,initial_guess/1hqk_pentamer.cif
|
31 |
+
1hqk-ig,chain 2,MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR,3,145,none,none,initial_guess/1hqk_pentamer.cif
|
32 |
+
1hqk-ig,chain 3,MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR,3,145,none,none,initial_guess/1hqk_pentamer.cif
|
33 |
+
1hqk-ig,chain 4,MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR,3,145,none,none,initial_guess/1hqk_pentamer.cif
|
34 |
+
1hqk-ig,chain 5,MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR,3,145,none,none,initial_guess/1hqk_pentamer.cif
|
35 |
+
```
|
36 |
+
|
37 |
+
|
38 |
+
=== ":material-language-python: script for single job"
|
39 |
+
|
40 |
+
To submit jobs with custom files programmatically, we use helper functions made available in the [`folding_studio` package](../../../tutorials/installation.md#cli-and-folding_studio-library) library.
|
41 |
+
|
42 |
+
```python
|
43 |
+
import json
|
44 |
+
from pathlib import Path
|
45 |
+
|
46 |
+
from folding_studio import get_id_token
|
47 |
+
from folding_studio import single_job_prediction
|
48 |
+
from folding_studio_data_models import AF2Parameters, FeatureMode
|
49 |
+
|
50 |
+
parameters = AF2Parameters(
|
51 |
+
initial_guess_file="3bve_dimer.cif",
|
52 |
+
template_mode=FeatureMode.NONE,
|
53 |
+
msa_mode=FeatureMode.NONE,
|
54 |
+
random_seed=0,
|
55 |
+
num_recycle=3,
|
56 |
+
)
|
57 |
+
|
58 |
+
# Obtain the identity token from gcloud auth
|
59 |
+
identity_token = get_id_token()
|
60 |
+
|
61 |
+
try:
|
62 |
+
response = single_job_prediction(
|
63 |
+
identity_token=identity_token,
|
64 |
+
fasta_file=Path("3bve.fasta"),
|
65 |
+
parameters=parameters,
|
66 |
+
)
|
67 |
+
json.dump(response, open("submission_initial_guess.json", "w"))
|
68 |
+
except Exception as err:
|
69 |
+
print("Error during submission.")
|
70 |
+
print(err)
|
71 |
+
```
|
72 |
+
|
73 |
+
=== ":material-language-python: script for batch job"
|
74 |
+
|
75 |
+
To submit jobs with custom files programmatically, we use helper functions made available in the [`folding_studio` package](../../../tutorials/installation.md#cli-and-folding_studio-library) library.
|
76 |
+
|
77 |
+
```python
|
78 |
+
import json
|
79 |
+
from pathlib import Path
|
80 |
+
|
81 |
+
from folding_studio import batch_prediction_from_file
|
82 |
+
from folding_studio import get_id_token
|
83 |
+
from folding_studio_data_models import (
|
84 |
+
AF2Parameters,
|
85 |
+
AF2Request,
|
86 |
+
BatchRequest,
|
87 |
+
FeatureMode,
|
88 |
+
Sequence,
|
89 |
+
)
|
90 |
+
|
91 |
+
requests = [
|
92 |
+
AF2Request(
|
93 |
+
complex_id="3bve-ig",
|
94 |
+
sequences=[
|
95 |
+
Sequence(
|
96 |
+
description="chain A",
|
97 |
+
fasta_sequence="PMLSKDIIKLLNEQVNKEMNSSNLYMSMSSWCYTHSLDGAGLFLFDHAAEEYEHAKKLIIFLNENNVPVQLTSISAPEHKFEGLTQIFQKAYEHEQHISESINNIVDHAIKSKDHATFNFLQWYVAEQHEEEVLFKDILDKIELIGNENHGLYLADQYVKGIAKSRK",
|
98 |
+
),
|
99 |
+
Sequence(
|
100 |
+
description="chain B",
|
101 |
+
fasta_sequence="PMLSKDIIKLLNEQVNKEMNSSNLYMSMSSWCYTHSLDGAGLFLFDHAAEEYEHAKKLIIFLNENNVPVQLTSISAPEHKFEGLTQIFQKAYEHEQHISESINNIVDHAIKSKDHATFNFLQWYVAEQHEEEVLFKDILDKIELIGNENHGLYLADQYVKGIAKSRK",
|
102 |
+
),
|
103 |
+
],
|
104 |
+
parameters= AF2Parameters(
|
105 |
+
num_recycle=3,
|
106 |
+
random_seed=145,
|
107 |
+
msa_mode=FeatureMode.NONE,
|
108 |
+
template_mode=FeatureMode.NONE,
|
109 |
+
initial_guess_file="3bve_dimer.cif",
|
110 |
+
),
|
111 |
+
),
|
112 |
+
AF2Request(
|
113 |
+
complex_id="1hqk-ig",
|
114 |
+
sequences=[
|
115 |
+
Sequence(
|
116 |
+
description=f"chain {i + 1}",
|
117 |
+
fasta_sequence="MQIYEGKLTAEGLRFGIVASRFNHALVDRLVEGAIDCIVRHGGREEDITLVRVPGSWEIPVAAGELARKEDIDAVIAIGVLIRGATPHFDYIASEVSKGLANLSLELRKPITFGVITADTLEQAIERAGTKHGNKGWEAALSAIEMANLFKSLR",
|
118 |
+
)
|
119 |
+
for i in range(5)
|
120 |
+
],
|
121 |
+
parameters= AF2Parameters(
|
122 |
+
num_recycle=3,
|
123 |
+
random_seed=145,
|
124 |
+
msa_mode=FeatureMode.NONE,
|
125 |
+
template_mode=FeatureMode.NONE,
|
126 |
+
initial_guess_file="1hqk_pentamer.cif",
|
127 |
+
),
|
128 |
+
),
|
129 |
+
]
|
130 |
+
|
131 |
+
# Build and validate the request
|
132 |
+
batch_request = BatchRequest(requests=requests)
|
133 |
+
|
134 |
+
# Prepare the batch request file for submission
|
135 |
+
json_data = batch_request.model_dump_json()
|
136 |
+
batch_file = Path("batch_request.json")
|
137 |
+
batch_file.write_text(json_data)
|
138 |
+
|
139 |
+
# Obtain the identity token from gcloud auth
|
140 |
+
identity_token = get_id_token()
|
141 |
+
|
142 |
+
try:
|
143 |
+
response = batch_prediction_from_file(
|
144 |
+
identity_token=identity_token, file=batch_file
|
145 |
+
)
|
146 |
+
json.dump(response, open("submission_batch.json", "w"))
|
147 |
+
except Exception as err:
|
148 |
+
print("Error during batch submission.")
|
149 |
+
print(err)
|
150 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/msa_subsampling_job.md
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
MSA subsampling allows to change the default number of MSA representation to give as input to the model.
|
3 |
+
|
4 |
+
This feature is only supported for monomer.
|
5 |
+
|
6 |
+
The impact of MSA subsampling has been studied by [D. Del Alamo et al.](https://elifesciences.org/articles/75751).
|
7 |
+
|
8 |
+
!!! quote
|
9 |
+
"Whereas models of most proteins generated using the default AF2 pipeline are conformationally homogeneous and nearly identical to one another, reducing the depth of the input multiple sequence alignments by stochastic subsampling led to the generation of accurate models in multiple conformations."
|
10 |
+
- D. Del Alamo et al.
|
11 |
+
|
12 |
+
## Application
|
13 |
+
To enable MSA subsampling, you can modify the `max_extra_msa` and `max_msa_cluster` parameters.
|
14 |
+
|
15 |
+
- `max_extra_msa` : maximum number of non-clustered MSA representation to use as input
|
16 |
+
- `max_msa_cluster` : maximum number of clustered MSA representation to use as input
|
17 |
+
|
18 |
+
By default `max_msa_cluster` will be half the value of `max_extra_msa`, up to 512.
|
19 |
+
|
20 |
+
=== ":octicons-command-palette-16: CLI"
|
21 |
+
|
22 |
+
```bash
|
23 |
+
folding predict af2 path/to/my/file.fasta --max-extra-msa 124 --max-msa-cluster 32
|
24 |
+
```
|
25 |
+
|
26 |
+
=== ":material-language-python: Python"
|
27 |
+
|
28 |
+
```python
|
29 |
+
from pathlib import Path
|
30 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
31 |
+
|
32 |
+
af2_predict(source=Path("path/to/my/file.fasta"), max_extra_msa=124, max_msa_cluster=32)
|
33 |
+
```
|
34 |
+
|
35 |
+
|
36 |
+
!!! warning
|
37 |
+
The specified `max_extra_msa` or `max_msa_cluster` values are applied to all AlphaFold2/OpenFold models.
|
folding-studio/docs/docs/how-to-guides/af2_openfold/advanced_algorithms/template_masking_job.md
ADDED
@@ -0,0 +1,549 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
Template Masking enables the creation of template features by masking regions
|
3 |
+
of the structures considered less important for resolving the multimer
|
4 |
+
interface.
|
5 |
+
|
6 |
+
## Application
|
7 |
+
This method is exclusively available in Gap Trick mode. Therefore
|
8 |
+
the same constraints on the input template structures applies, i.e they
|
9 |
+
**MUST** exclusively include chains that precisely correspond to the query
|
10 |
+
sequences, no more, no less, and in the same order.
|
11 |
+
|
12 |
+
Consider an input `FASTA` file containing 3 chains of a multimer complex, for example an antigen chain and two antibody chains:
|
13 |
+
|
14 |
+
!!! example
|
15 |
+
```title="antibody_antigen.fasta"
|
16 |
+
>Antigen
|
17 |
+
VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK
|
18 |
+
>Antibody | Chain 1
|
19 |
+
DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIK
|
20 |
+
>Antibody | Chain 2
|
21 |
+
QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSS
|
22 |
+
```
|
23 |
+
|
24 |
+
The user aims to resolve the binding interface between the antigen and antibody
|
25 |
+
chains, which is a common scenario for applying the template masking algorithm.
|
26 |
+
In this case, a single template file is used, and the masking pattern
|
27 |
+
alternately masks the binding partners and the binding interface. To
|
28 |
+
activate the template masking algorithm, the user must provide a template mask
|
29 |
+
file that defines the masking pattern:
|
30 |
+
|
31 |
+
!!! example
|
32 |
+
|
33 |
+
```json title="masks.json"
|
34 |
+
{
|
35 |
+
"templates_masks": [
|
36 |
+
{
|
37 |
+
"template_name": "7si2_chothia_CGF.cif",
|
38 |
+
"masks": [
|
39 |
+
"----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------",
|
40 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
41 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"template_name": "7si2_chothia_CGF.cif",
|
46 |
+
"masks": [
|
47 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
48 |
+
"------------------------------------------------------------------------------------------------------------",
|
49 |
+
"------------------------------------------------------------------------------------------------------------------------"
|
50 |
+
]
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"template_name": "7si2_chothia_CGF.cif",
|
54 |
+
"masks": [
|
55 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX--XXXXXX----------XX--XXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXX---XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
|
56 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-----XXXXXXXXXXXX",
|
57 |
+
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XXXXXXXXXXXXXXXX----XX--X-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX-XX----XXXXXXXXXXXXXXX"
|
58 |
+
]
|
59 |
+
}
|
60 |
+
]
|
61 |
+
}
|
62 |
+
```
|
63 |
+
The `masks` fields must adhere to the following constraints:
|
64 |
+
|
65 |
+
- Contain as many mask line as chains in the input `FASTA` file,
|
66 |
+
- Each mask line must have as many characters as the corresponding chain in the input `FASTA` file,
|
67 |
+
- Only contain `X` (residue is masked) or `-` (residue is not masked) characters,
|
68 |
+
|
69 |
+
## Submission
|
70 |
+
|
71 |
+
=== ":octicons-command-palette-16: CLI"
|
72 |
+
|
73 |
+
```bash
|
74 |
+
folding predict af2 antibody_antigen.fasta --gap-trick --template-mode custom --custom-template 7si2_chothia_CGF.cif --templates-masks-file masks.json
|
75 |
+
```
|
76 |
+
|
77 |
+
=== ":octicons-command-palette-16: CLI batch job"
|
78 |
+
|
79 |
+
```bash
|
80 |
+
folding predict af2 batch_jobs.csv
|
81 |
+
```
|
82 |
+
=== ":material-table: CSV example"
|
83 |
+
|
84 |
+
```csv
|
85 |
+
complex_id,description,fasta_sequence,num_recycle,random_seed,gap_trick,msa_mode,template_mode,custom_templates,templates_masks_file
|
86 |
+
Antibody + Antigen,Antigen,VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK,3,145,1,none,custom,['templates/7si2_chothia_CGF.cif'],templates_masks/ab_ag_mask_0.json
|
87 |
+
Antibody + Antigen,Antigen | Chain 1,DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIK,3,145,1,none,custom,['templates/7si2_chothia_CGF.cif'],templates_masks/ab_ag_mask_0.json
|
88 |
+
Antibody + Antigen,Antibody | Chain 2,QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSS,3,145,1,none,custom,['templates/7si2_chothia_CGF.cif'],templates_masks/ab_ag_mask_0.json
|
89 |
+
```
|
90 |
+
|
91 |
+
=== ":material-language-python: script for single job"
|
92 |
+
|
93 |
+
To submit jobs with custom files programmatically, we use helper functions made available in the [`folding_studio` package](../../../tutorials/installation.md#cli-and-folding_studio-library) library.
|
94 |
+
|
95 |
+
```python
|
96 |
+
import json
|
97 |
+
from pathlib import Path
|
98 |
+
|
99 |
+
from folding_studio import get_id_token
|
100 |
+
from folding_studio import single_job_prediction
|
101 |
+
from folding_studio_data_models import AF2Parameters, FeatureMode
|
102 |
+
|
103 |
+
parameters = AF2Parameters(
|
104 |
+
custom_templates=["7si2_chothia_CGF.cif"],
|
105 |
+
template_mode=FeatureMode.CUSTOM,
|
106 |
+
gap_trick=True,
|
107 |
+
templates_masks_file="ab_ag_mask_0.json",
|
108 |
+
random_seed=0,
|
109 |
+
num_recycle=3,
|
110 |
+
)
|
111 |
+
|
112 |
+
# Obtain the identity token from gcloud auth
|
113 |
+
identity_token = get_id_token()
|
114 |
+
|
115 |
+
try:
|
116 |
+
response = single_job_prediction(
|
117 |
+
identity_token=identity_token,
|
118 |
+
fasta_file=Path("ab_ag.fasta"),
|
119 |
+
parameters=parameters,
|
120 |
+
ignore_cache=True,
|
121 |
+
)
|
122 |
+
json.dump(response, open("submission_tm.json", "w"))
|
123 |
+
except Exception as err:
|
124 |
+
print("Error during submission.")
|
125 |
+
print(err)
|
126 |
+
```
|
127 |
+
|
128 |
+
=== ":material-language-python: script for batch job"
|
129 |
+
|
130 |
+
To submit jobs with custom files programmatically, we use helper functions made available in the [`folding_studio` package](../../../tutorials/installation.md#cli-and-folding_studio-library) library.
|
131 |
+
|
132 |
+
```python
|
133 |
+
import json
|
134 |
+
from pathlib import Path
|
135 |
+
|
136 |
+
from folding_studio import batch_prediction_from_file
|
137 |
+
from folding_studio import get_id_token
|
138 |
+
from folding_studio_data_models import (
|
139 |
+
AF2Parameters,
|
140 |
+
AF2Request,
|
141 |
+
BatchRequest,
|
142 |
+
FeatureMode,
|
143 |
+
Sequence,
|
144 |
+
)
|
145 |
+
|
146 |
+
requests = [
|
147 |
+
AF2Request(
|
148 |
+
complex_id="Antibody + Antigen",
|
149 |
+
sequences=[
|
150 |
+
Sequence(
|
151 |
+
description="Antigen",
|
152 |
+
fasta_sequence="VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK",
|
153 |
+
),
|
154 |
+
Sequence(
|
155 |
+
description="Antibody | Chain 1",
|
156 |
+
fasta_sequence="DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIK",
|
157 |
+
),
|
158 |
+
Sequence(
|
159 |
+
description="Antibody | Chain 2",
|
160 |
+
fasta_sequence="QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSS",
|
161 |
+
),
|
162 |
+
],
|
163 |
+
parameters= AF2Parameters(
|
164 |
+
template_mode=FeatureMode.CUSTOM,
|
165 |
+
gap_trick=True,
|
166 |
+
custom_templates=["templates/7si2_chothia_CGF.cif"],
|
167 |
+
templates_masks_file="templates_masks/ab_ag_mask_0.json",
|
168 |
+
),
|
169 |
+
),
|
170 |
+
AF2Request(
|
171 |
+
complex_id="Mutated Antibody + Antigen",
|
172 |
+
sequences=[
|
173 |
+
Sequence(
|
174 |
+
description="Antigen",
|
175 |
+
fasta_sequence="VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK",
|
176 |
+
),
|
177 |
+
Sequence(
|
178 |
+
description="Antibody | Chain 1",
|
179 |
+
fasta_sequence="DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIM",
|
180 |
+
),
|
181 |
+
Sequence(
|
182 |
+
description="Antibody | Chain 2",
|
183 |
+
fasta_sequence="QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSM",
|
184 |
+
),
|
185 |
+
],
|
186 |
+
parameters= AF2Parameters(
|
187 |
+
template_mode=FeatureMode.CUSTOM,
|
188 |
+
gap_trick=True,
|
189 |
+
custom_templates=["templates/7si2_chothia_CGF.cif"],
|
190 |
+
templates_masks_file="templates_masks/ab_ag_mask_0.json",
|
191 |
+
),
|
192 |
+
),
|
193 |
+
]
|
194 |
+
# Build and validate the request
|
195 |
+
batch_request = BatchRequest(requests=requests)
|
196 |
+
|
197 |
+
# Prepare the batch request file for submission
|
198 |
+
json_data = batch_request.model_dump_json()
|
199 |
+
batch_file = Path("batch_request.json")
|
200 |
+
batch_file.write_text(json_data)
|
201 |
+
|
202 |
+
# Obtain the identity token from gcloud auth
|
203 |
+
identity_token = get_id_token()
|
204 |
+
|
205 |
+
try:
|
206 |
+
response = batch_prediction_from_file(
|
207 |
+
identity_token=identity_token, file=batch_file
|
208 |
+
)
|
209 |
+
json.dump(response, open("submission_batch.json", "w"))
|
210 |
+
except Exception as err:
|
211 |
+
print("Error during batch submission.")
|
212 |
+
print(err)
|
213 |
+
```
|
214 |
+
|
215 |
+
## Generate mask
|
216 |
+
|
217 |
+
We provide the following guide for generating the mask file for binding
|
218 |
+
partners: A (antigen chain) and B and C (antibody chains). This process
|
219 |
+
requires the `bioblocks` package. Please refer to this
|
220 |
+
[page](https://instadeep.gitlab.io/bioai-group/BioBlocks/setup/setup.html) for
|
221 |
+
installation instructions.
|
222 |
+
|
223 |
+
```python
|
224 |
+
|
225 |
+
from __future__ import annotations
|
226 |
+
import numpy as np
|
227 |
+
from pathlib import Path
|
228 |
+
import warnings
|
229 |
+
import json
|
230 |
+
|
231 |
+
from bioblocks.blocks import Model, Residue, Chain
|
232 |
+
from bioblocks.io import read_model
|
233 |
+
from bioblocks.sequence import get_alignment_score, align_sequences
|
234 |
+
|
235 |
+
|
236 |
+
THRESHOLD: float = 10.0
|
237 |
+
MASK_TOKEN: str = "X"
|
238 |
+
KEEP_TOKEN: str = "-"
|
239 |
+
|
240 |
+
|
241 |
+
def get_ca_atom_coord(residue: Residue) -> np.ndarray:
|
242 |
+
"""Get CA atoms or None if not present.
|
243 |
+
|
244 |
+
Args:
|
245 |
+
residue (Residue): Residue entity.
|
246 |
+
|
247 |
+
Returns:
|
248 |
+
np.ndarray: Array of coordinates
|
249 |
+
"""
|
250 |
+
try:
|
251 |
+
coords = np.array(residue["CA"].coord)
|
252 |
+
except:
|
253 |
+
chain_id = residue.parent.id
|
254 |
+
model_id = residue.parent.parent.id
|
255 |
+
warnings.warn(
|
256 |
+
f"{residue.id}, chain {chain_id}, model {model_id} misses CA atom"
|
257 |
+
)
|
258 |
+
coords = np.array([None, None, None])
|
259 |
+
return coords
|
260 |
+
|
261 |
+
|
262 |
+
def fix_nan_ca_coords(ca_atoms: np.ndarray) -> np.ndarray:
|
263 |
+
"""Interpolate missing CA atom coordinates.
|
264 |
+
|
265 |
+
Args:
|
266 |
+
ca_atoms (np.ndarray): CA atom coordinates.
|
267 |
+
|
268 |
+
Returns:
|
269 |
+
np.ndarray: Updated atom coordinates without missing entries.
|
270 |
+
"""
|
271 |
+
nan_entries = np.unique(np.where(ca_atoms == None)[0])
|
272 |
+
if nan_entries.size == 0:
|
273 |
+
return ca_atoms
|
274 |
+
ca_atoms_out = ca_atoms.copy()
|
275 |
+
for idx in nan_entries:
|
276 |
+
if idx == 0:
|
277 |
+
ca_atoms_out[idx, :] = ca_atoms_out[idx + 1, :]
|
278 |
+
else:
|
279 |
+
ca_atoms_out[idx, :] = ca_atoms_out[idx - 1, :]
|
280 |
+
ca_atoms_out = ca_atoms_out.astype(np.float32)
|
281 |
+
return ca_atoms_out
|
282 |
+
|
283 |
+
|
284 |
+
def compute_chain_contact_mask(
|
285 |
+
chain: Chain, docking_partner_ca_atoms: np.ndarray, threshold: float = THRESHOLD
|
286 |
+
) -> str:
|
287 |
+
"""Compute contact mask for a chain.
|
288 |
+
|
289 |
+
Args:
|
290 |
+
chain (Chain): Chain entity of a loaded structure.
|
291 |
+
docking_partner_ca_atoms (np.ndarray): Selected atoms of the docking partner counter to
|
292 |
+
the current chain.
|
293 |
+
threshold (float): Threshold on CA-CA distance.
|
294 |
+
|
295 |
+
Returns:
|
296 |
+
str: String defining contact residues for the chain.
|
297 |
+
"""
|
298 |
+
chain_atoms = np.array([get_ca_atom_coord(r) for r in chain])
|
299 |
+
chain_atoms = fix_nan_ca_coords(chain_atoms)
|
300 |
+
|
301 |
+
distances = np.linalg.norm(
|
302 |
+
chain_atoms[:, None, :] - docking_partner_ca_atoms[None, :, :], axis=-1
|
303 |
+
)
|
304 |
+
distance_mask = (distances < threshold).any(axis=-1)
|
305 |
+
mask = "".join(
|
306 |
+
KEEP_TOKEN if distance_mask_i else MASK_TOKEN
|
307 |
+
for distance_mask_i in distance_mask
|
308 |
+
)
|
309 |
+
return mask
|
310 |
+
|
311 |
+
|
312 |
+
def generate_contact_masks_from_template(
|
313 |
+
model: Model, docking_partners: str, threshold: float = THRESHOLD
|
314 |
+
) -> dict[str, np.ndarray]:
|
315 |
+
"""Generate contact masks from a template.
|
316 |
+
|
317 |
+
Args:
|
318 |
+
model (Model): Model entity of a loaded structure.
|
319 |
+
docking_partners (str): Docking partners split by "_", e.g. A_BC.
|
320 |
+
threshold (float): Threshold to compute contact residues based on CA distances.
|
321 |
+
|
322 |
+
Returns:
|
323 |
+
dict[str, np.ndarry]: Contact map for each chain.
|
324 |
+
"""
|
325 |
+
left_partners, right_partners = docking_partners.split("_")
|
326 |
+
left_partner_atoms = np.array(
|
327 |
+
[get_ca_atom_coord(r) for chain_id in left_partners for r in model[chain_id]]
|
328 |
+
)
|
329 |
+
right_partner_atoms = np.array(
|
330 |
+
[get_ca_atom_coord(r) for chain_id in right_partners for r in model[chain_id]]
|
331 |
+
)
|
332 |
+
left_partner_atoms = fix_nan_ca_coords(left_partner_atoms)
|
333 |
+
right_partner_atoms = fix_nan_ca_coords(right_partner_atoms)
|
334 |
+
mask_left = {
|
335 |
+
chain_id: compute_chain_contact_mask(
|
336 |
+
model[chain_id], right_partner_atoms, threshold
|
337 |
+
)
|
338 |
+
for chain_id in left_partners
|
339 |
+
}
|
340 |
+
mask_right = {
|
341 |
+
chain_id: compute_chain_contact_mask(
|
342 |
+
model[chain_id], left_partner_atoms, threshold
|
343 |
+
)
|
344 |
+
for chain_id in right_partners
|
345 |
+
}
|
346 |
+
return {**mask_left, **mask_right}
|
347 |
+
|
348 |
+
|
349 |
+
def parse_fasta_as_dict(fasta_string: str) -> dict[str, str]:
|
350 |
+
"""Parses FASTA string and returns dictionary of description and sequence.
|
351 |
+
|
352 |
+
Args:
|
353 |
+
fasta_string (str): The string contents of a FASTA file.
|
354 |
+
|
355 |
+
Returns:
|
356 |
+
dict[str, str]: Mapping between description and sequences of the FASTA string.
|
357 |
+
"""
|
358 |
+
sequences = []
|
359 |
+
descriptions = []
|
360 |
+
index = -1
|
361 |
+
for line in fasta_string.splitlines():
|
362 |
+
line = line.strip()
|
363 |
+
if line.startswith(">"):
|
364 |
+
index += 1
|
365 |
+
descriptions.append(line[1:]) # Remove the '>' at the beginning.
|
366 |
+
sequences.append("")
|
367 |
+
continue
|
368 |
+
elif not line:
|
369 |
+
continue # Skip blank lines.
|
370 |
+
sequences[index] += line
|
371 |
+
|
372 |
+
return {d: s for d, s in zip(descriptions, sequences)}
|
373 |
+
|
374 |
+
|
375 |
+
def fasta_to_template_chains(fasta_map: dict[str, str], model: Model) -> dict[str, str]:
|
376 |
+
"""Map fasta sequences to template model chain labels based on sequence similarity.
|
377 |
+
|
378 |
+
Note:
|
379 |
+
Template structure must have at least the same number of chains as a number of query fasta sequences.
|
380 |
+
|
381 |
+
Args:
|
382 |
+
fasta_map (dict[str, str]): Mapping between description and sequences.
|
383 |
+
model (Model): Model entity of a loaded structure.
|
384 |
+
|
385 |
+
Returns:
|
386 |
+
dict[str, str | None]: Mapping between model descriptions and structure chain labels.
|
387 |
+
"""
|
388 |
+
chain_dict = {
|
389 |
+
chain.id: chain.sequence.replace("X", "") for chain in model.get_chains()
|
390 |
+
}
|
391 |
+
output_mapping = {desc_i: None for desc_i in fasta_map}
|
392 |
+
for desc_i, seq_i in fasta_map.items():
|
393 |
+
if chain_dict:
|
394 |
+
scores = [(k, get_alignment_score(v, seq_i)) for k, v in chain_dict.items()]
|
395 |
+
scores.sort(key=lambda x: x[1])
|
396 |
+
output_mapping[desc_i] = scores[-1][0]
|
397 |
+
del chain_dict[scores[-1][0]]
|
398 |
+
return output_mapping
|
399 |
+
|
400 |
+
|
401 |
+
def get_alignment_index_mapping(
|
402 |
+
seq_1: str, seq_2: str, aligned: bool = True
|
403 |
+
) -> dict[int, int]:
|
404 |
+
"""Mapping of aligned residue indices between two sequences.
|
405 |
+
|
406 |
+
Args:
|
407 |
+
seq_1 (str): First sequence.
|
408 |
+
seq_2 (str): Second sequence.
|
409 |
+
aligned (bool): Flag denoting whether the sequences are aligned or not.
|
410 |
+
|
411 |
+
Returns:
|
412 |
+
dict[int, int]: Dictionary containing the mapping between ``seq_1`` and ``seq_2``
|
413 |
+
indices. The keys correspond to the residue indices in the original target sequence,
|
414 |
+
the values correspond to the residue indices in the original template sequence.
|
415 |
+
"""
|
416 |
+
index_mapping: dict[int, int] = {}
|
417 |
+
res_idx_1, res_idx_2 = 0, 0
|
418 |
+
for res_1, res_2 in (
|
419 |
+
zip(seq_1, seq_2) if aligned else zip(align_sequences(seq_1, seq_2))
|
420 |
+
):
|
421 |
+
# If the chains do not contain the same residues, ignore
|
422 |
+
if res_1 == "-" or res_2 == "-":
|
423 |
+
res_idx_1 += res_1 != "-"
|
424 |
+
res_idx_2 += res_2 != "-"
|
425 |
+
continue
|
426 |
+
|
427 |
+
index_mapping[res_idx_1] = res_idx_2
|
428 |
+
res_idx_1 += 1
|
429 |
+
res_idx_2 += 1
|
430 |
+
return index_mapping
|
431 |
+
|
432 |
+
|
433 |
+
def get_sequence_contact_mask(
|
434 |
+
fasta_map: dict[str, str],
|
435 |
+
model: Model,
|
436 |
+
docking_partners: str,
|
437 |
+
threshold: float = THRESHOLD,
|
438 |
+
not_mapped_fill_token: str = MASK_TOKEN,
|
439 |
+
) -> dict[str, str]:
|
440 |
+
"""Generate contact mask for each sequence in the fasta file corresponding to a template.
|
441 |
+
|
442 |
+
Args:
|
443 |
+
fasta_map (dict[str, str]): Mapping between description and sequences.
|
444 |
+
model (Model): Model entity of a loaded structure.
|
445 |
+
docking_partners (str): Docking partners split by "_", e.g. A_BC.
|
446 |
+
threshold (float): Threshold to compute contact residues based on CA distances.
|
447 |
+
not_mapped_fill_token (str): Fill token for sequences that were not mapped to template.
|
448 |
+
|
449 |
+
Returns:
|
450 |
+
dict[str,str]: Mapping between fasta descriptions and sequence contact maps.
|
451 |
+
"""
|
452 |
+
fasta2template = fasta_to_template_chains(fasta_map, model)
|
453 |
+
template_contact_masks = generate_contact_masks_from_template(
|
454 |
+
model, docking_partners, threshold
|
455 |
+
)
|
456 |
+
contact_masks: dict[str, str] = {}
|
457 |
+
for desc_i, chain_id in fasta2template.items():
|
458 |
+
fasta_seq = fasta_map[desc_i]
|
459 |
+
if chain_id:
|
460 |
+
template_seq = model[chain_id].sequence.replace("X", "")
|
461 |
+
aligned_sequences = align_sequences(fasta_seq, template_seq)
|
462 |
+
chain_mask = template_contact_masks[chain_id]
|
463 |
+
idx_mapping = get_alignment_index_mapping(*aligned_sequences)
|
464 |
+
fasta_seq_mask = "".join(
|
465 |
+
not_mapped_fill_token
|
466 |
+
if i not in idx_mapping
|
467 |
+
else chain_mask[idx_mapping[i]]
|
468 |
+
for i in range(len(fasta_seq))
|
469 |
+
)
|
470 |
+
else:
|
471 |
+
fasta_seq_mask = not_mapped_fill_token * len(fasta_seq)
|
472 |
+
contact_masks[desc_i] = fasta_seq_mask
|
473 |
+
return contact_masks
|
474 |
+
|
475 |
+
|
476 |
+
def get_partners_masks(
|
477 |
+
fasta_map: dict[str, str],
|
478 |
+
model: Model,
|
479 |
+
docking_partners: str,
|
480 |
+
not_mapped_fill_token: str = MASK_TOKEN,
|
481 |
+
) -> list[dict[str, str]]:
|
482 |
+
"""Generate masks for chains of each docking partner.
|
483 |
+
|
484 |
+
Args:
|
485 |
+
fasta_map (dict[str, str]): Mapping between description and sequences.
|
486 |
+
model (Model): Model entity of a loaded structure.
|
487 |
+
docking_partners (str): Docking partners split by "_", e.g. A_BC.
|
488 |
+
not_mapped_fill_token (str): Fill token for sequences that were not mapped to template.
|
489 |
+
|
490 |
+
Returns:
|
491 |
+
list[dict[str,str]]: List of mappings for each docking partner.
|
492 |
+
"""
|
493 |
+
fasta2template = fasta_to_template_chains(fasta_map, model)
|
494 |
+
fasta2template_inv = {v: k for k, v in fasta2template.items() if v}
|
495 |
+
docking_partners_split = docking_partners.split("_")
|
496 |
+
output_masks: list[dict[str, str]] = []
|
497 |
+
for dp_i in docking_partners_split:
|
498 |
+
output_masks_i: dict[str, str] = {
|
499 |
+
k: not_mapped_fill_token * len(fasta_map[k]) for k in fasta2template
|
500 |
+
}
|
501 |
+
for chain_i in fasta2template_inv:
|
502 |
+
desc = fasta2template_inv[chain_i]
|
503 |
+
if chain_i in dp_i:
|
504 |
+
output_masks_i[desc] = KEEP_TOKEN * len(fasta_map[desc])
|
505 |
+
else:
|
506 |
+
output_masks_i[desc] = MASK_TOKEN * len(fasta_map[desc])
|
507 |
+
output_masks.append(output_masks_i)
|
508 |
+
return output_masks
|
509 |
+
|
510 |
+
|
511 |
+
if __name__ == "__main__":
|
512 |
+
model_path = Path("7si2_chothia_CGF.cif")
|
513 |
+
docking_partners = "A_BC"
|
514 |
+
|
515 |
+
model = read_model(model_path)
|
516 |
+
|
517 |
+
fasta = (
|
518 |
+
">Antigen\n"
|
519 |
+
"VRFPNITNLCPFHEVFNATTFASVYAWNRKRISNCVADYSVIYNFAPFFAFKCYGVSPTKLNDLCFTNVYADSFVIRGNEVSQIAPGQTGNIADYNYKLPDDFTGCVIAWNSNKLDSKPSGNYNYLYRLFRKSKLKPFERDISTEIYQAGNKPCNGVAGPNCYSPLQSYGFRPTYGVGHQPYRVVVLSFELLHAPATVCGPK\n"
|
520 |
+
">Antibody | Chain 1\n"
|
521 |
+
"DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPGVTFGPGTKVDIK\n"
|
522 |
+
">Antibody | Chain 2\n"
|
523 |
+
"QVQLVESGGGVVQPGRSLRLSCAASGFTFSSYDMHWVRQAPGKGLEWVAVISYDGSSKFYAESVKGRFTISRDNSKNTLYLQMNSLRAEETAVYYCVKDGEQLVPLFDYWGQGTLVTVSS"
|
524 |
+
)
|
525 |
+
|
526 |
+
fasta_map = parse_fasta_as_dict(fasta_string=fasta)
|
527 |
+
|
528 |
+
contact_masks = get_sequence_contact_mask(fasta_map, model, docking_partners)
|
529 |
+
partners_masks = get_partners_masks(fasta_map, model, docking_partners)
|
530 |
+
|
531 |
+
output_dict = {"templates_masks": []}
|
532 |
+
for partner_mask_i in partners_masks:
|
533 |
+
output_dict["templates_masks"].append(
|
534 |
+
{
|
535 |
+
"template_name": model_path.name,
|
536 |
+
"masks": list(partner_mask_i.values()),
|
537 |
+
}
|
538 |
+
)
|
539 |
+
output_dict["templates_masks"].append(
|
540 |
+
{
|
541 |
+
"template_name": model_path.name,
|
542 |
+
"masks": list(contact_masks.values()),
|
543 |
+
}
|
544 |
+
)
|
545 |
+
|
546 |
+
mask_file = "masks.json"
|
547 |
+
with open(mask_file, "w") as f:
|
548 |
+
json.dump(output_dict, f, indent=2)
|
549 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/batch_job_from_configuration_file.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
A batch job allow to submit multiple jobs at once. This avoids making too much API calls.
|
4 |
+
To submit a batch job through the folding CLI, you can pass a configuration file in `json` or `csv` format.
|
5 |
+
|
6 |
+
|
7 |
+
## Defining a batch job configuration file
|
8 |
+
A common way of submitting a batch job is via a configuration file in `json` or `csv` format.
|
9 |
+
When submitting a batch job through a configuration file, all the options passed to the predict command will be ignored.
|
10 |
+
|
11 |
+
### JSON
|
12 |
+
|
13 |
+
For a `json` format configuration file, its structure needs to be the same as a [`BatchRequest`](https://folding-studio-data-models-dot-int-bio-foldingstudio-gcp.nw.r.appspot.com/request/#folding_studio_data_models.request.BatchRequest) object.
|
14 |
+
|
15 |
+
=== "json"
|
16 |
+
|
17 |
+
```json
|
18 |
+
{
|
19 |
+
"requests": [
|
20 |
+
{
|
21 |
+
"complex_id": "mono_2LIS_auto_msa_custom_template",
|
22 |
+
"sequences": [
|
23 |
+
{
|
24 |
+
"description": ">mono_2LIS_auto_msa_custom_template|2LIS_1|Chain A|SPERM LYSIN|Haliotis rufescens (6454)",
|
25 |
+
"fasta_sequence": "RSWHYVEPKFLNKAFEVALKVQIIAGFDRGLVKWLRVHGRTLSTVQKKALYFVNRRYMQTHWANYMLWINKKIDALGRTPVVGDYTRLGAEIGRRIDMAYFYDFLKDKNMIPKYLPYMEEINRMRPADVPVKYMGK"
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"folding_model": "af2",
|
29 |
+
"parameters": {
|
30 |
+
"num_recycle": 3,
|
31 |
+
"random_seed": null,
|
32 |
+
"custom_templates": ["5II8"],
|
33 |
+
"gap_trick": false,
|
34 |
+
"msa_mode": "search",
|
35 |
+
"template_mode": "custom"
|
36 |
+
}
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"complex_id": "multi_6M0J_standard",
|
40 |
+
"sequences": [
|
41 |
+
{
|
42 |
+
"description": ">multi_6M0J_standard|6M0J_1|Chain A|Angiotensin-converting enzyme 2|Homo sapiens (9606)",
|
43 |
+
"fasta_sequence": "STIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADHHHHHH"
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"description": ">multi_6M0J_standard|6M0J_2|Chain B[auth E]|Spike protein S1|Severe acute respiratory syndrome coronavirus 2 (2697049)",
|
47 |
+
"fasta_sequence": "RVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFHHHHHH"
|
48 |
+
}
|
49 |
+
],
|
50 |
+
"folding_model": "openfold",
|
51 |
+
"parameters": {
|
52 |
+
"num_recycle": 2,
|
53 |
+
"random_seed": 5,
|
54 |
+
"gap_trick": false,
|
55 |
+
"msa_mode": "search",
|
56 |
+
"template_mode": "search"
|
57 |
+
}
|
58 |
+
}
|
59 |
+
]
|
60 |
+
}
|
61 |
+
```
|
62 |
+
|
63 |
+
### CSV
|
64 |
+
|
65 |
+
In a `CSV` format configuration file, each row represents a different request. The columns `complex_id`, `description` and `fasta_sequence` describe the proteins, while the others defines options passed for the folding process.
|
66 |
+
|
67 |
+
Multimer proteins are specified by listing each chain on separate lines and assigning them the same `complex_id`. Parameters can be repeated or left empty after the first sequence, as the API will only keep the parameters defined for the first sequence of the multimer.
|
68 |
+
|
69 |
+
!!! Note
|
70 |
+
The `CSV` format might be a bit tricky to set up in particular when using custom templates and MSAs. In that case, it might be more convenient to use the [`JSON` format](#json).
|
71 |
+
|
72 |
+
=== "csv"
|
73 |
+
|
74 |
+
```csv
|
75 |
+
complex_id,description,fasta_sequence,folding_model,custom_templates,num_recycle,random_seed,msa_mode,template_mode,gap_trick,custom_msas
|
76 |
+
mono_2LIS_auto_msa_custom_template,>2LIS_1|Chain A|SPERM LYSIN|Haliotis rufescens (6454),RSWHYVEPKFLNKAFEVALKVQIIAGFDRGLVKWLRVHGRTLSTVQKKALYFVNRRYMQTHWANYMLWINKKIDALGRTPVVGDYTRLGAEIGRRIDMAYFYDFLKDKNMIPKYLPYMEEINRMRPADVPVKYMGK,af2,"['1agw.cif','1agz.cif']",3,0,search,custom,0,"['1agb_A.sto','1agb_B.sto']"
|
77 |
+
multi_6M0J_standard,>6M0J_1|Chain A|Angiotensin-converting enzyme 2|Homo sapiens (9606),STIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADHHHHHH,openfold,,2,5,search,search,0,
|
78 |
+
multi_6M0J_standard,>6M0J_2|Chain B[auth E]|Spike protein S1|Severe acute respiratory syndrome coronavirus 2 (2697049),RVQPTESIVRFPNITNLCPFGEVFNATRFASVYAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIADYNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYFPLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFHHHHHH,,,2,5,search,search,0,
|
79 |
+
```
|
80 |
+
|
81 |
+
## Application
|
82 |
+
|
83 |
+
### Using the CLI
|
84 |
+
|
85 |
+
=== ":octicons-command-palette-16: CLI"
|
86 |
+
|
87 |
+
```bash
|
88 |
+
folding predict af2 batch_job.csv # or batch_job.json
|
89 |
+
```
|
90 |
+
|
91 |
+
### Using Python scripts
|
92 |
+
|
93 |
+
To submit a batch job with scripts, we advise using helper functions made
|
94 |
+
available in the [`folding_studio` package](../../tutorials/installation.md#cli-and-folding_studio-library) library. This helper
|
95 |
+
function will automatically upload the custom files (MSA, templates, initial guess, templates
|
96 |
+
masks) you specified in your configuration file.
|
97 |
+
You can use a configuration file (see [Defining a batch job configuration file](#defining-a-batch-job-configuration-file)) or define a `JSON` object.
|
98 |
+
|
99 |
+
#### From a json object
|
100 |
+
|
101 |
+
To submit a batch job from a `JSON` object defined programmatically, we must
|
102 |
+
build a
|
103 |
+
[`BatchRequest`](https://folding-studio-data-models-dot-int-bio-foldingstudio-gcp.nw.r.appspot.com/request/#folding_studio_data_models.request.BatchRequest)
|
104 |
+
object. Once the `BatchRequest` object is built, it is written to a `JSON` file
|
105 |
+
and submitted with the `batch_prediction_from_file` helper function from the
|
106 |
+
[`folding_studio` package](../../tutorials/installation.md#cli-and-folding_studio-library) library. This helper function will
|
107 |
+
automatically upload the custom files (MSA, templates, initial guess, templates
|
108 |
+
masks) you specified in your configuration file.
|
109 |
+
|
110 |
+
```python
|
111 |
+
import json
|
112 |
+
from pathlib import Path
|
113 |
+
|
114 |
+
from folding_studio import batch_prediction_from_file
|
115 |
+
from folding_studio import get_id_token
|
116 |
+
from folding_studio_data_models import (
|
117 |
+
AF2Parameters,
|
118 |
+
AF2Request,
|
119 |
+
BatchRequest,
|
120 |
+
FeatureMode,
|
121 |
+
OpenFoldParameters,
|
122 |
+
OpenFoldRequest,
|
123 |
+
Sequence,
|
124 |
+
)
|
125 |
+
|
126 |
+
folding_requests = [
|
127 |
+
# Monomer job with default AF2 parameters
|
128 |
+
AF2Request(
|
129 |
+
complex_id="Monomer Construct 0001",
|
130 |
+
sequences=[
|
131 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
132 |
+
],
|
133 |
+
parameters= AF2Parameters(),
|
134 |
+
),
|
135 |
+
# Monomer job with default OpenFold parameters
|
136 |
+
OpenFoldRequest(
|
137 |
+
complex_id="Monomer Construct 0001",
|
138 |
+
sequences=[
|
139 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
140 |
+
],
|
141 |
+
parameters=OpenFoldParameters(),
|
142 |
+
),
|
143 |
+
# Multimer job with deactivated template and 5 recycles.
|
144 |
+
AF2Request(
|
145 |
+
complex_id="Multimer Construct id 0001",
|
146 |
+
sequences=[
|
147 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP"),
|
148 |
+
Sequence(description="Antibody S203 Heavy Chain", fasta_sequence="MPAAFFF"),
|
149 |
+
Sequence(description="Antibody S203 Light Chain", fasta_sequence="MPAKK"),
|
150 |
+
],
|
151 |
+
parameters= AF2Parameters(
|
152 |
+
msa_mode=FeatureMode.SEARCH,
|
153 |
+
template_mode=FeatureMode.NONE,
|
154 |
+
random_seed=0,
|
155 |
+
num_recycle=5,
|
156 |
+
),
|
157 |
+
),
|
158 |
+
]
|
159 |
+
|
160 |
+
# Build and validate the request
|
161 |
+
batch_request = BatchRequest(requests=folding_requests)
|
162 |
+
|
163 |
+
# Prepare the batch request file for submission
|
164 |
+
json_data = batch_request.model_dump_json()
|
165 |
+
batch_file = Path("batch_request.json")
|
166 |
+
batch_file.write_text(json_data)
|
167 |
+
|
168 |
+
# Obtain the identity token from gcloud auth
|
169 |
+
identity_token = get_id_token()
|
170 |
+
|
171 |
+
try:
|
172 |
+
response = batch_prediction_from_file(
|
173 |
+
identity_token=identity_token, file=batch_file
|
174 |
+
)
|
175 |
+
json.dump(response, open("submission_batch.json", "w"))
|
176 |
+
except Exception as err:
|
177 |
+
print("Error during batch submission.")
|
178 |
+
print(err)
|
179 |
+
```
|
180 |
+
|
181 |
+
#### From a configuration file
|
182 |
+
|
183 |
+
To submit a batch job from a configuration file, simply pass it to the `batch_prediction_from_file` helper function from the [`folding_studio` package](../../tutorials/installation.md#cli-and-folding_studio-library) library. This helper function will automatically upload the custom files (MSA, templates, initial guess, templates masks) you specified in your configuration file.
|
184 |
+
|
185 |
+
```python
|
186 |
+
import json
|
187 |
+
from pathlib import Path
|
188 |
+
|
189 |
+
from folding_studio import batch_prediction_from_file
|
190 |
+
from folding_studio import get_id_token
|
191 |
+
|
192 |
+
# Obtain the identity token from gcloud auth
|
193 |
+
identity_token = get_id_token()
|
194 |
+
|
195 |
+
batch_file = Path("my-batch-file.csv")
|
196 |
+
try:
|
197 |
+
response = batch_prediction_from_file(
|
198 |
+
identity_token=identity_token, file=batch_file
|
199 |
+
)
|
200 |
+
json.dump(response, open("submission_batch.json", "w"))
|
201 |
+
except Exception as err:
|
202 |
+
print("Error during batch submission.")
|
203 |
+
print(err)
|
204 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/batch_job_from_directory.md
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
A batch job allow to submit multiple jobs at once. This avoid making too much API calls.
|
4 |
+
To submit a batch job through the folding CLI, you can simply pass a directory of FASTA files.
|
5 |
+
|
6 |
+
## Application
|
7 |
+
|
8 |
+
=== ":octicons-command-palette-16: CLI"
|
9 |
+
|
10 |
+
```bash
|
11 |
+
folding predict af2 path/to/my/fasta/directory --num-recycle 3 --random-seed 0
|
12 |
+
```
|
13 |
+
|
14 |
+
=== ":material-language-python: Python"
|
15 |
+
|
16 |
+
```python
|
17 |
+
from pathlib import Path
|
18 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
19 |
+
|
20 |
+
af2_predict(source=Path("path/to/my/fasta/directory"), num_recycle=3, random_seed=0)
|
21 |
+
```
|
22 |
+
|
23 |
+
Using the CLI, you will get the following information if the job was successfully submitted.
|
24 |
+
|
25 |
+
``` { .shell .no-copy }
|
26 |
+
Batch prediction job metadata written to batch_prediction_20250305172626.json
|
27 |
+
This file contains your experiments ids.
|
28 |
+
Batch prediction job submitted successfully !
|
29 |
+
The following experiments have been submitted (see batch_prediction_20250305172626.json for the full list):
|
30 |
+
['dfdddbc4e2969e327863260ba50f5a3cc1c62992']
|
31 |
+
For example, you can query an experiment status with the command:
|
32 |
+
|
33 |
+
folding experiment status dfdddbc4e2969e327863260ba50f5a3cc1c62992
|
34 |
+
|
35 |
+
The results of the following experiments were found in the cache (see batch_prediction_20250305172626.json for the full list):
|
36 |
+
['a13e8c9003695773b2623179fd0eafdf6296602d']
|
37 |
+
Use the `folding experiment results id` command to download the prediction results. For example:
|
38 |
+
|
39 |
+
folding experiment results a13e8c9003695773b2623179fd0eafdf6296602d
|
40 |
+
|
41 |
+
```
|
42 |
+
|
43 |
+
!!! warning
|
44 |
+
If you submit a batch job using a directory of `FASTA` files, the options passed to the `predict` command will be applied to **ALL** the proteins.
|
45 |
+
If you want to pass protein specific options, you need to submit you batch job through a [configuration file](./batch_job_from_configuration_file.md)
|
folding-studio/docs/docs/how-to-guides/af2_openfold/cancel_experiment.md
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
You can cancel one or more ongoing submission from their `experiment_id`.
|
4 |
+
|
5 |
+
For a batch job submission, cancelling one submission won't interfere with the other jobs in the batch.
|
6 |
+
|
7 |
+
If you want to cancel a whole batch submission, you have to cancel all the submission experiment ids.
|
8 |
+
|
9 |
+
## Application
|
10 |
+
|
11 |
+
=== ":octicons-command-palette-16: CLI"
|
12 |
+
|
13 |
+
```bash
|
14 |
+
folding experiment cancel experiment_id_1
|
15 |
+
```
|
16 |
+
|
17 |
+
=== ":material-language-python: Python"
|
18 |
+
|
19 |
+
```python
|
20 |
+
from folding_studio.commands.experiment import cancel
|
21 |
+
|
22 |
+
cancel(exp_id=["experiment_id_1"])
|
23 |
+
```
|
24 |
+
|
25 |
+
!!! Note
|
26 |
+
Requests cannot be cancelled with keyboard interruption
|
27 |
+
|
28 |
+
Cancelling a running process of the CLI or a Python script with a keyboard
|
29 |
+
interruption (`ctrl+c`) **will most likely not** cancel the job submission process.
|
30 |
+
This is because once the `POST` request has reached the server, there is no way
|
31 |
+
to send an interruption signal to the running process on the server.
|
folding-studio/docs/docs/how-to-guides/af2_openfold/download_logs.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This How-to guide explains how to download the logs of an experiment.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
=== ":octicons-command-palette-16: CLI"
|
7 |
+
|
8 |
+
```bash
|
9 |
+
folding experiment logs b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./logs_exp_b21b09.zip
|
10 |
+
```
|
11 |
+
|
12 |
+
=== ":material-language-python: Python"
|
13 |
+
|
14 |
+
```python
|
15 |
+
from pathlib import Path
|
16 |
+
from folding_studio.commands.experiment import logs
|
17 |
+
|
18 |
+
logs(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./logs_exp_b21b09.zip"))
|
19 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/download_prediction_results.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
AlphaFold2/OpenFold models generate predictions and but also confidence metrics. Once the prediction process of an experiment has finished, all of them are saved into a zip file.
|
4 |
+
|
5 |
+
The zip file contains :
|
6 |
+
|
7 |
+
- The confidence metrics of the models in `.json` format.
|
8 |
+
- The relaxed predictions per models in `.pdb` format.
|
9 |
+
- The raw predictions per models in `.pkl` format.
|
10 |
+
|
11 |
+
Here is an example of the zip file structure :
|
12 |
+
|
13 |
+
``` { .shell .no-copy }
|
14 |
+
results
|
15 |
+
├── metrics_per_model.json
|
16 |
+
├── msa_coverage.json
|
17 |
+
├── relaxed_predictions
|
18 |
+
│ ├── model_1_ptm.pdb
|
19 |
+
│ ├── model_2_ptm.pdb
|
20 |
+
│ ├── model_3_ptm.pdb
|
21 |
+
│ ├── model_4_ptm.pdb
|
22 |
+
│ └── model_5_ptm.pdb
|
23 |
+
├── rmsd_per_model.json
|
24 |
+
└── unrelaxed_predictions
|
25 |
+
├── model_1_ptm.pdb
|
26 |
+
├── model_2_ptm.pdb
|
27 |
+
├── model_3_ptm.pdb
|
28 |
+
├── model_4_ptm.pdb
|
29 |
+
└── model_5_ptm.pdb
|
30 |
+
```
|
31 |
+
|
32 |
+
## Application
|
33 |
+
|
34 |
+
Once the experiment has finished, you can download the results zip file.
|
35 |
+
|
36 |
+
=== ":octicons-command-palette-16: CLI"
|
37 |
+
|
38 |
+
```bash
|
39 |
+
folding experiment results b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./result_exp_b21b09.zip
|
40 |
+
```
|
41 |
+
|
42 |
+
=== ":material-language-python: Python"
|
43 |
+
|
44 |
+
```python
|
45 |
+
from pathlib import Path
|
46 |
+
from folding_studio.commands.experiment import results
|
47 |
+
|
48 |
+
results(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./result_exp_b21b09.zip"))
|
49 |
+
```
|
50 |
+
|
51 |
+
You will get the message:
|
52 |
+
|
53 |
+
``` { .shell .no-copy }
|
54 |
+
File downloaded successfully to result_exp_b21b09.zip.
|
55 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/fetch_folding_job_status.md
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This how-to guide explains how to manage folding jobs using the `experiment_id`. Each submission creates a unique experiment, enabling caching and avoiding redundant computations. You can track the status of individual or batch jobs with the `experiment_id`.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
### Fetch a job `experiment_id`
|
7 |
+
Submitting a folding job creates an experiment. This allows caching and avoid
|
8 |
+
useless re-computation of previously submitted folding job.
|
9 |
+
This applies also to batch jobs, if you submit a batch of `n` jobs, `n`
|
10 |
+
experiments will be created.
|
11 |
+
|
12 |
+
Each experiment is associated with a unique `experiment_id`. Its generation is
|
13 |
+
deterministic, created from the submitted FASTA sequence (without taking into
|
14 |
+
account the description) and the job parameters.
|
15 |
+
|
16 |
+
Once your folding job has been submitted, and thus the experiment created, you
|
17 |
+
can get various information from the `experiment_id`.
|
18 |
+
|
19 |
+
You can get the list of your experiment ids that succeeded or are still pending
|
20 |
+
using :
|
21 |
+
|
22 |
+
=== ":octicons-command-palette-16: CLI"
|
23 |
+
|
24 |
+
```bash
|
25 |
+
folding experiment list
|
26 |
+
```
|
27 |
+
|
28 |
+
=== ":material-language-python: Python"
|
29 |
+
|
30 |
+
```python
|
31 |
+
from folding_studio.commands.experiment import list
|
32 |
+
|
33 |
+
list()
|
34 |
+
```
|
35 |
+
|
36 |
+
You will get a table with the different experiments launched:
|
37 |
+
|
38 |
+
``` { .shell .no-copy }
|
39 |
+
Done and pending experiments list written to None
|
40 |
+
Done and pending experiments
|
41 |
+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
|
42 |
+
┃ Experiment ID ┃ Status ┃
|
43 |
+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩
|
44 |
+
│ a13e8c9003695773b2623179fd0eafdf6296602d │ Done │
|
45 |
+
│ 37d816fd1ad0461dd4291963ec10ca5c631058db │Pending │
|
46 |
+
└──────────────────────────────────────────┴────────┘
|
47 |
+
```
|
48 |
+
|
49 |
+
### Retrieve a job status
|
50 |
+
|
51 |
+
=== ":octicons-command-palette-16: CLI"
|
52 |
+
|
53 |
+
```bash
|
54 |
+
folding experiment status b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9
|
55 |
+
```
|
56 |
+
|
57 |
+
=== ":material-language-python: Python"
|
58 |
+
|
59 |
+
```python
|
60 |
+
from folding_studio.commands.experiment import status
|
61 |
+
|
62 |
+
status(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9")
|
63 |
+
```
|
64 |
+
|
65 |
+
The experiment status is the current state of the experiment.
|
66 |
+
|
67 |
+
| VALUE | DESCRIPTION |
|
68 |
+
| ----------- | ------------------------------------------------------------------------------- |
|
69 |
+
| `Done` | The experiment is done and its features and results are available for download. |
|
70 |
+
| `Pending` | The experiment is still ongoing. |
|
71 |
+
| `Failed` | The experiment has failed. |
|
72 |
+
| `Cancelled` | The experiment was cancelled. |
|
73 |
+
|
74 |
+
Once you have submitted a folding job, you can get its status at any time.
|
folding-studio/docs/docs/how-to-guides/af2_openfold/get_experiment_features.md
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
Before making a folding prediction, a feature generation process will build the two main features needed by the AlphaFold2 model :
|
3 |
+
|
4 |
+
- The [Multiple Sequence Alignment](https://en.wikipedia.org/wiki/Multiple_sequence_alignment) (MSA) search results. By default, the Folding Studio pipeline will trigger an MSA search on Uniref90, small BFD, Mgnify and Uniprot (multimer jobs only) using the `jackhmmer` algorithm.
|
5 |
+
|
6 |
+
- The protein [template](https://en.wikipedia.org/wiki/Homology_modeling) search results. By default, the Folding Studio pipeline will trigger a template search on the PDB70 structure databases using the `hhsearch` algorithm (`hhblits` for multimer jobs).
|
7 |
+
|
8 |
+
Once the feature generation process of an experiment has finished, all the generated features are saved into a zip file.
|
9 |
+
|
10 |
+
The zip file contains :
|
11 |
+
|
12 |
+
- The full pickled features in `.pkl` format.
|
13 |
+
- The output of the MSA search: the MSA search results on multiple databases in `.a3m` format.
|
14 |
+
- The output of the template search : the four best matching templates in `.cif` format.
|
15 |
+
|
16 |
+
Here is an example of the zip file structure for a monomer :
|
17 |
+
|
18 |
+
``` { .shell .no-copy }
|
19 |
+
extracted_experiment_features_zip
|
20 |
+
├── features.pkl
|
21 |
+
├── msas
|
22 |
+
│ ├── mgnify_hits.a3m
|
23 |
+
│ ├── pdb_hits.hhr
|
24 |
+
│ ├── small_bfd_hits.a3m
|
25 |
+
│ └── uniref90_hits.a3m
|
26 |
+
└── templates
|
27 |
+
├── 5kwb.cif
|
28 |
+
├── 6m0j.cif
|
29 |
+
├── 6vsj.cif
|
30 |
+
├── 6vw1.cif
|
31 |
+
└── selected_templates.json
|
32 |
+
```
|
33 |
+
|
34 |
+
For multimer, the structure is similar except that there is a dedicated subdirectory for each protein in the `msas` and `templates` directory.
|
35 |
+
|
36 |
+
You can download this zip file to check the generated features and use its content for specifying [custom MSA feature](./../af2_openfold/set_af_folding_parameters.md#custom-msas) or [custom template features](../af2_openfold/set_af_folding_parameters.md#custom-templates) in further experiments.
|
37 |
+
|
38 |
+
|
39 |
+
## Application
|
40 |
+
|
41 |
+
=== ":octicons-command-palette-16: CLI"
|
42 |
+
|
43 |
+
```bash
|
44 |
+
folding experiment features b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./features_exp_b21b09.zip
|
45 |
+
```
|
46 |
+
|
47 |
+
=== ":material-language-python: Python"
|
48 |
+
|
49 |
+
```python
|
50 |
+
from pathlib import Path
|
51 |
+
from folding_studio.commands.experiment import features
|
52 |
+
|
53 |
+
features(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./features_exp_b21b09.zip"))
|
54 |
+
```
|
55 |
+
|
56 |
+
Once the features are downloaded, you will get the following message:
|
57 |
+
|
58 |
+
``` { .shell .no-copy }
|
59 |
+
File downloaded successfully to features_exp_b21b09.zip.
|
60 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/provide_input_data.md
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This guide will help you determine whether a given input sequence is compatible with the Alphafold2 and OpenFold models.
|
3 |
+
|
4 |
+
## Supported inputs
|
5 |
+
|
6 |
+
To submit an folding job with Alphafold2 and OpenFold, you need the sequence input file in
|
7 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your
|
8 |
+
protein sequence.
|
9 |
+
|
10 |
+
It can be a monomer or a multimer sequence.
|
11 |
+
|
12 |
+
=== "monomer"
|
13 |
+
|
14 |
+
```text
|
15 |
+
>SARS-CoV-2|RBD|Omicron variant
|
16 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
17 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
18 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
19 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
20 |
+
```
|
21 |
+
|
22 |
+
=== "multimer"
|
23 |
+
|
24 |
+
```text
|
25 |
+
>SARS-CoV-2|RBD|Omicron variant
|
26 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
27 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
28 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
29 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
30 |
+
>Processed angiotensin-converting enzyme 2|Homo sapiens (9606)
|
31 |
+
STIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADRHHHHHH
|
32 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/set_af_folding_parameters.md
ADDED
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
The different folding parameters are detailed in the [reference section](../../reference/cli.md#predict) but this how-to guide gives some examples of how to use them.
|
4 |
+
|
5 |
+
## Application
|
6 |
+
|
7 |
+
### Number of recycle steps
|
8 |
+
|
9 |
+
You can choose the number of recycling steps that the input undergoes.
|
10 |
+
Recycling steps refer to the iterative refinement process where the input data
|
11 |
+
is repeatedly processed through the entire network. During each recycling step,
|
12 |
+
the network uses the output from the previous cycle as a new input, allowing it
|
13 |
+
to progressively refine and improve its predictions.
|
14 |
+
|
15 |
+
This iterative approach can enhance the accuracy of the final output,
|
16 |
+
especially for complex structures or cases where more nuanced adjustments are
|
17 |
+
needed. By adjusting the number of recycling steps, you can control the balance
|
18 |
+
between computational time and the desired level of refinement for your
|
19 |
+
predictions
|
20 |
+
|
21 |
+
By default, it is set to 3.
|
22 |
+
|
23 |
+
=== ":octicons-command-palette-16: CLI"
|
24 |
+
|
25 |
+
```bash
|
26 |
+
folding predict af2 path/to/my/monomer.fasta --num-recycle 5
|
27 |
+
```
|
28 |
+
|
29 |
+
=== ":material-language-python: Python"
|
30 |
+
|
31 |
+
```python
|
32 |
+
from pathlib import Path
|
33 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
34 |
+
|
35 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), num_recycle=5)
|
36 |
+
```
|
37 |
+
|
38 |
+
### Random seed
|
39 |
+
|
40 |
+
To generate different results from the same input sequence, you can change the random seed used during the forward pass.
|
41 |
+
|
42 |
+
By default, it is set to 0.
|
43 |
+
|
44 |
+
=== ":octicons-command-palette-16: CLI"
|
45 |
+
|
46 |
+
```bash
|
47 |
+
folding predict af2 path/to/my/monomer.fasta --random-seed 42
|
48 |
+
```
|
49 |
+
|
50 |
+
=== ":material-language-python: Python"
|
51 |
+
|
52 |
+
```python
|
53 |
+
from pathlib import Path
|
54 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
55 |
+
|
56 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), random_seed=42)
|
57 |
+
```
|
58 |
+
|
59 |
+
If you would like to submit a random seed scan job with the CLI, you can use the `--num-seed` option which specifies the number of random seed values to submit.
|
60 |
+
|
61 |
+
=== ":octicons-command-palette-16: CLI"
|
62 |
+
|
63 |
+
```bash
|
64 |
+
folding predict af2 path/to/my/monomer.fasta --num-seed 10
|
65 |
+
```
|
66 |
+
|
67 |
+
=== ":material-language-python: Python"
|
68 |
+
|
69 |
+
```python
|
70 |
+
from pathlib import Path
|
71 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
72 |
+
|
73 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), num_seed=10)
|
74 |
+
```
|
75 |
+
|
76 |
+
### Ignore cached experiments
|
77 |
+
|
78 |
+
By default, if you submit a job that has already been submitted, it will not run, and the cached results will be returned.
|
79 |
+
This is determined by the job experiment id, see [Fetch a job experiment_id](../../how-to-guides/af2_openfold/fetch_folding_job_status.md#fetch-a-job-experiment_id) for more details.
|
80 |
+
|
81 |
+
However, you can override this behavior and force the job to run, even if it was submitted earlier.
|
82 |
+
|
83 |
+
!!! warning
|
84 |
+
This will overwrite the previous experiment results, replacing them with the most recent ones.
|
85 |
+
|
86 |
+
=== ":octicons-command-palette-16: CLI"
|
87 |
+
|
88 |
+
```bash
|
89 |
+
folding predict af2 path/to/my/monomer.fasta --no-cache
|
90 |
+
```
|
91 |
+
|
92 |
+
=== ":material-language-python: Python"
|
93 |
+
|
94 |
+
```python
|
95 |
+
from pathlib import Path
|
96 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
97 |
+
|
98 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), cache=False)
|
99 |
+
```
|
100 |
+
|
101 |
+
### Use specific AlphaFold2/OpenFold models
|
102 |
+
|
103 |
+
By default, predictions are generated using all five AlphaFold2/OpenFold models. They
|
104 |
+
each have slight variations in how they predict protein structures, providing a
|
105 |
+
range of potential outcomes.
|
106 |
+
Leveraging all five models increases the robustness of the predictions, as it
|
107 |
+
allows for a more comprehensive exploration of possible protein conformations.
|
108 |
+
|
109 |
+
However, if you wish to narrow down the prediction process to specific models,
|
110 |
+
you can do so by specifying the IDs of the models you want to use.
|
111 |
+
It allows you to focus on particular models that may be better suited for your
|
112 |
+
specific use case or to reduce computational time by excluding models that are
|
113 |
+
less relevant for your needs.
|
114 |
+
|
115 |
+
Find more details about the difference between the model training procedures
|
116 |
+
and inputs
|
117 |
+
[here](https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf#page=47).
|
118 |
+
|
119 |
+
=== ":octicons-command-palette-16: CLI"
|
120 |
+
|
121 |
+
```bash
|
122 |
+
folding predict af2 path/to/my/monomer.fasta --model-subset 1 --model-subset 2 --model-subset 3
|
123 |
+
```
|
124 |
+
|
125 |
+
=== ":material-language-python: Python"
|
126 |
+
|
127 |
+
```python
|
128 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
129 |
+
|
130 |
+
af2_predict(source=path/to/my/monomer.fasta, model_subset=[1, 2, 3])
|
131 |
+
```
|
132 |
+
|
133 |
+
### Features generation mode
|
134 |
+
|
135 |
+
=== "MSA"
|
136 |
+
|
137 |
+
| Value | Description |
|
138 |
+
| -------------------- | -------------------------------------------------------------------------------------------- |
|
139 |
+
| `"search"` (default) | automated MSA search of sequence databases using `JackHMMer` (Uniref90, small_bfd and mgnfy) |
|
140 |
+
| `"mmseqs"` | automated search of Uniref30 and Colabfold_env_db using the **self-hosted** `MMSeqs2` server |
|
141 |
+
| `"none"` | deactivate MSA features features |
|
142 |
+
| `"custom"` | use user provided MSA input (`.sto` or `.a3m` format) |
|
143 |
+
|
144 |
+
!!! note
|
145 |
+
`MMSeqs2` produce fairly different MSA results compared to JackHMMer, they use different datasets (Uniref30 and colbafold_env_db) and different search algorithms. However the MSA produced by `MMSeqs2` is generally more diverse and can be leverage predict structures with higher accuracy (see this [publication](https://www.nature.com/articles/s41592-023-02130-4)). For more information about MMSeqs2, please refer to the corresponding [paper](https://www.biorxiv.org/content/10.1101/079681v5).
|
146 |
+
|
147 |
+
=== "Templates"
|
148 |
+
|
149 |
+
| Value | Description |
|
150 |
+
| -------------------- | -------------------------------------------------------------------------------------------- |
|
151 |
+
| `"search"` (default) | automated search of PDB70 structure databases using `hhsearch` (`hhblits` for multimer jobs) |
|
152 |
+
| `"mmseqs"` | automated search of pdb100 structure database using the **self-hosted** `mmseqs2` server |
|
153 |
+
| `"none"` | deactivate template features |
|
154 |
+
| `"custom"` | use user provided template structures (`.cif` format) or PDB code. |
|
155 |
+
|
156 |
+
#### Custom features
|
157 |
+
|
158 |
+
Instead of using the default experiment feature generation process, you might want to specify your own MSA or template features, or even remove them altogether.
|
159 |
+
|
160 |
+
We see two main use cases where you might want to override the default feature generation process :
|
161 |
+
|
162 |
+
- You want to utilize custom features specifically tailored for your folding job. For example, you have obtained a protein structure from crystallography experiment and want to use as custom templates.
|
163 |
+
|
164 |
+
- You already submitted a folding a job for the same protein and you want to use its features to speed up the new jobs.
|
165 |
+
|
166 |
+
##### Custom MSAs
|
167 |
+
|
168 |
+
To modify the default MSA feature computation behavior, you need to specify the MSA feature computation mode using the `msa_mode="custom"` mode. If you pass custom MSA features but didn't set the MSA feature mode to `custom`, they won't be taken into account and the MSA feature mode will be the default (`search`).
|
169 |
+
|
170 |
+
We support specifying custom MSA features as file of `.sto` or `.a3m` formats :
|
171 |
+
|
172 |
+
- You need to provide **at least as many** `.sto` or `.a3m` files as chains in the FASTA file. You may provide several MSA files for a chain.
|
173 |
+
|
174 |
+
- To assign a custom MSA to a specific chain, append the chain identifier as a suffix to the file name.
|
175 |
+
|
176 |
+
!!! example
|
177 |
+
For instance, in a complex with two chains A and B (as listed in the FASTA file), name your custom MSA files `my_custom_msa_A.sto` and `my_custom_msa_B.sto`.
|
178 |
+
|
179 |
+
!!! warning
|
180 |
+
If you use `.a3m` files downloaded from a
|
181 |
+
[ColabFold](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb)
|
182 |
+
notebook run, make sure to remove lines starting with a `#` before submitting
|
183 |
+
the job. These are artifacts added by the ColabFold pipeline after the MMSeqs2
|
184 |
+
run which are not relevant to Folding Studio but **will cause a crash**.
|
185 |
+
|
186 |
+
=== ":octicons-command-palette-16: CLI"
|
187 |
+
|
188 |
+
```bash
|
189 |
+
# monomer folding job submission with custom msa features
|
190 |
+
folding predict af2 path/to/my/monomer.fasta --msa-mode custom --custom-msa /path/to/monomer_custom_msa_A.sto
|
191 |
+
|
192 |
+
# multimer folding job submission with custom msa features
|
193 |
+
folding predict af2 path/to/my/multimer.fasta --msa-mode custom --custom-msa path/to/custom_msa_1_A.sto --custom-msa path/to/custom_msa_2_A.sto --custom-msa path/to/custom_msa_B.sto
|
194 |
+
|
195 |
+
# monomer folding job submission with mmseqs for msa and templates
|
196 |
+
folding predict af2 path/to/my/monomer.fasta --msa-mode mmseqs --template-mode mmseqs
|
197 |
+
```
|
198 |
+
|
199 |
+
=== ":material-language-python: Python"
|
200 |
+
|
201 |
+
```python
|
202 |
+
from pathlib import Path
|
203 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
204 |
+
|
205 |
+
# monomer folding job submission with custom msa features
|
206 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), msa_mode="custom", custom_msa=[Path("/path/to/monomer_custom_msa_A.sto")])
|
207 |
+
|
208 |
+
# multimer folding job submission with custom msa features
|
209 |
+
af2_predict(source=Path("path/to/my/multimer.fasta"), msa_mode="custom", custom_msa=[Path("/path/to/custom_msa_1_A.sto"), Path("/path/to/custom_msa_2_A.sto"), Path("/path/to/custom_msa_B.sto")])
|
210 |
+
|
211 |
+
# monomer folding job submission with mmseqs for msa and templates
|
212 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), msa_mode="mmseqs", template_mode="mmseqs")
|
213 |
+
```
|
214 |
+
|
215 |
+
##### Custom templates
|
216 |
+
|
217 |
+
To modify the default template feature computation behavior, you need to specify the template feature computation mode using the `template_mode` parameter :
|
218 |
+
|
219 |
+
We support specifying custom templates as file of `.cif` format or as PDB codes of crystal structures. Both options can be used at the same time for the same job.
|
220 |
+
|
221 |
+
You are free to provide as many templates as you wish. However the AlphaFold2 pipeline will only keep the best 4 matching templates.
|
222 |
+
|
223 |
+
Note that by design, AlphaFold2 **monomer** models 3, 4, and 5 do not incorporate template features. As a result, modifying the template feature calculations will not affect their predictions.
|
224 |
+
|
225 |
+
!!! note
|
226 |
+
If `"template_mode": "search"` is used with `"msa_mode": "none"`, an automated MSA search on Uniref90 will still be run in order to obtain an MSA necessary for `hhsearch/hhblits` to complete the template search.
|
227 |
+
However these MSA search results will not be included in the features.
|
228 |
+
|
229 |
+
!!! warning
|
230 |
+
If you pass custom template features but didn't set the template feature mode to `custom`, they won't be taken into account and the template feature mode will be the default (`search`).
|
231 |
+
|
232 |
+
=== ":octicons-command-palette-16: CLI"
|
233 |
+
|
234 |
+
```bash
|
235 |
+
folding predict af2 path/to/my/monomer.fasta--template-mode custom --custom-template-id 5ii8 --custom-template-id 6m0j --custom-template /path/to/template_1.cif --custom-template /path/to/template_2.cif --custom-template /path/to/template_3.cif
|
236 |
+
```
|
237 |
+
|
238 |
+
=== ":material-language-python: Python"
|
239 |
+
|
240 |
+
```python
|
241 |
+
from pathlib import Path
|
242 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
243 |
+
|
244 |
+
af2_predict(source=Path("path/to/my/monomer.fasta"), template_mode="custom", custom_template_id=["5ii8", "6m0j"], custom_template=[Path("/path/to/template_1.cif"), Path("/path/to/template_2.cif"), Path("/path/to/template_3.cif")])
|
245 |
+
```
|
246 |
+
|
247 |
+
### Specification for batch jobs
|
248 |
+
|
249 |
+
To submit jobs with custom files programmatically, we use helper functions made available in the `folding_studio` [package](../../tutorials/installation.md#cli-and-folding_studio-library).
|
250 |
+
|
251 |
+
=== "Custom templates"
|
252 |
+
|
253 |
+
```python
|
254 |
+
import json
|
255 |
+
from pathlib import Path
|
256 |
+
|
257 |
+
from folding_studio import batch_prediction_from_file
|
258 |
+
from folding_studio import get_id_token
|
259 |
+
from folding_studio_data_models import (
|
260 |
+
AF2Parameters,
|
261 |
+
AF2Request,
|
262 |
+
BatchRequest,
|
263 |
+
FeatureMode,
|
264 |
+
OpenFoldParameters,
|
265 |
+
OpenFoldRequest,
|
266 |
+
Sequence,
|
267 |
+
)
|
268 |
+
|
269 |
+
# Define local templates path
|
270 |
+
template_A_local_path = "/path/to/custom_template_A.cif"
|
271 |
+
template_B_local_path = "/path/to/custom_template_B.cif"
|
272 |
+
template_C_local_path = "/path/to/custom_template_C.cif"
|
273 |
+
|
274 |
+
# Build the batch request
|
275 |
+
requests = [
|
276 |
+
AF2Request(
|
277 |
+
complex_id="Monomer Construct 0001",
|
278 |
+
sequences=[
|
279 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
280 |
+
],
|
281 |
+
parameters= AF2Parameters(
|
282 |
+
template_mode=FeatureMode.CUSTOM, custom_templates=[template_A_local_path]
|
283 |
+
),
|
284 |
+
),
|
285 |
+
OpenFoldRequest(
|
286 |
+
complex_id="Monomer Construct 0001 with OpenFold",
|
287 |
+
sequences=[
|
288 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
289 |
+
],
|
290 |
+
parameters=OpenFoldParameters(
|
291 |
+
template_mode=FeatureMode.CUSTOM, custom_templates=[template_A_local_path]
|
292 |
+
),
|
293 |
+
),
|
294 |
+
AF2Request(
|
295 |
+
complex_id="Multimer Construct id 0001",
|
296 |
+
sequences=[
|
297 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP"),
|
298 |
+
Sequence(description="Antibody S203 Heavy Chain", fasta_sequence="MPAAFFF"),
|
299 |
+
Sequence(description="Antibody S203 Light Chain", fasta_sequence="MPAKK"),
|
300 |
+
],
|
301 |
+
parameters= AF2Parameters(
|
302 |
+
template_mode=FeatureMode.CUSTOM,
|
303 |
+
custom_templates=[
|
304 |
+
template_A_local_path,
|
305 |
+
template_B_local_path,
|
306 |
+
template_C_local_path,
|
307 |
+
],
|
308 |
+
),
|
309 |
+
),
|
310 |
+
]
|
311 |
+
# Build and validate the request
|
312 |
+
batch_request = BatchRequest(requests=requests)
|
313 |
+
|
314 |
+
# Prepare the batch request file for submission
|
315 |
+
json_data = batch_request.model_dump_json()
|
316 |
+
batch_file = Path("batch_request.json")
|
317 |
+
batch_file.write_text(json_data)
|
318 |
+
|
319 |
+
# Obtain the identity token from gcloud auth
|
320 |
+
identity_token = get_id_token()
|
321 |
+
|
322 |
+
try:
|
323 |
+
response = batch_prediction_from_file(
|
324 |
+
identity_token=identity_token, file=batch_file
|
325 |
+
)
|
326 |
+
json.dump(response, open("submission_batch.json", "w"))
|
327 |
+
except Exception as err:
|
328 |
+
print("Error during batch submission.")
|
329 |
+
print(err)
|
330 |
+
```
|
331 |
+
|
332 |
+
=== "Custom MSAs"
|
333 |
+
|
334 |
+
```python
|
335 |
+
import json
|
336 |
+
from pathlib import Path
|
337 |
+
|
338 |
+
from folding_studio import batch_prediction_from_file
|
339 |
+
from folding_studio import get_id_token
|
340 |
+
from folding_studio_data_models import (
|
341 |
+
AF2Parameters,
|
342 |
+
AF2Request,
|
343 |
+
BatchRequest,
|
344 |
+
FeatureMode,
|
345 |
+
OpenFoldParameters,
|
346 |
+
OpenFoldRequest,
|
347 |
+
Sequence,
|
348 |
+
)
|
349 |
+
|
350 |
+
# Define local MSA path
|
351 |
+
msa_A_local_path = "/path/to/custom_msa_A.sto"
|
352 |
+
msa_B_local_path = "/path/to/custom_msa_B.sto"
|
353 |
+
msa_C_local_path = "/path/to/custom_msa_C.sto"
|
354 |
+
|
355 |
+
# Build the batch request
|
356 |
+
requests = [
|
357 |
+
AF2Request(
|
358 |
+
complex_id="Monomer Construct 0001",
|
359 |
+
sequences=[
|
360 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
361 |
+
],
|
362 |
+
parameters= AF2Parameters(
|
363 |
+
msa_mode=FeatureMode.CUSTOM, custom_msas=[msa_A_local_path]
|
364 |
+
),
|
365 |
+
),
|
366 |
+
OpenFoldRequest(
|
367 |
+
complex_id="Monomer Construct 0001 with OpenFold",
|
368 |
+
sequences=[
|
369 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP")
|
370 |
+
],
|
371 |
+
parameters=OpenFoldParameters(
|
372 |
+
msa_mode=FeatureMode.CUSTOM, custom_msas=[msa_A_local_path]
|
373 |
+
),
|
374 |
+
),
|
375 |
+
AF2Request(
|
376 |
+
complex_id="Multimer Construct id 0001",
|
377 |
+
sequences=[
|
378 |
+
Sequence(description="Wild Type + mutation X", fasta_sequence="MVFKLLLP"),
|
379 |
+
Sequence(description="Antibody S203 Heavy Chain", fasta_sequence="MPAAFFF"),
|
380 |
+
Sequence(description="Antibody S203 Light Chain", fasta_sequence="MPAKK"),
|
381 |
+
],
|
382 |
+
parameters= AF2Parameters(
|
383 |
+
msa_mode=FeatureMode.CUSTOM,
|
384 |
+
custom_msas=[msa_A_local_path, msa_B_local_path, msa_C_local_path],
|
385 |
+
),
|
386 |
+
),
|
387 |
+
]
|
388 |
+
# Build and validate the request
|
389 |
+
batch_request = BatchRequest(requests=requests)
|
390 |
+
|
391 |
+
# Prepare the batch request file for submission
|
392 |
+
json_data = batch_request.model_dump_json()
|
393 |
+
batch_file = Path("batch_request.json")
|
394 |
+
batch_file.write_text(json_data)
|
395 |
+
|
396 |
+
# Obtain the identity token from gcloud auth
|
397 |
+
identity_token = get_id_token()
|
398 |
+
|
399 |
+
try:
|
400 |
+
response = batch_prediction_from_file(
|
401 |
+
identity_token=identity_token, file=batch_file
|
402 |
+
)
|
403 |
+
json.dump(response, open("submission_batch.json", "w"))
|
404 |
+
except Exception as err:
|
405 |
+
print("Error during batch submission.")
|
406 |
+
print(err)
|
407 |
+
```
|
folding-studio/docs/docs/how-to-guides/af2_openfold/single_af2_job.md
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This how-to guide shows how to run a folding job using AlphaFold2.
|
3 |
+
|
4 |
+
!!! Note
|
5 |
+
All the other how-to guides in **AlphaFold2/OpenFold section** of **How-to guides** apply to Alphafold2.
|
6 |
+
|
7 |
+
## Application
|
8 |
+
|
9 |
+
=== ":octicons-command-palette-16: CLI"
|
10 |
+
|
11 |
+
```bash
|
12 |
+
folding predict af2 path/to/my/file.fasta --num-recycle 3 --random-seed 0
|
13 |
+
```
|
14 |
+
|
15 |
+
=== ":material-language-python: Python"
|
16 |
+
|
17 |
+
```python
|
18 |
+
from pathlib import Path
|
19 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
20 |
+
|
21 |
+
af2_predict(source=Path("path/to/my/file.fasta"), num_recycle=3, random_seed=0)
|
22 |
+
```
|
23 |
+
|
24 |
+
!!! Warning
|
25 |
+
If you consider to submit >10 folding jobs, it is **strongly** advised to
|
26 |
+
use batch job submission from a [directory](./batch_job_from_directory.md) or from a [configuration file](./batch_job_from_configuration_file.md). A batch job will mutualize the feature generation steps, speeding up significantly the jobs processing if
|
27 |
+
they use similar features.
|
folding-studio/docs/docs/how-to-guides/af2_openfold/single_openfold_job.md
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This how-to guide shows how to run a folding job using OpenFold, an alternative option to Alphafold2 in Folding Studio.
|
3 |
+
|
4 |
+
!!! Note
|
5 |
+
All the other how-to guides in **AlphaFold2/OpenFold section** of **How-to guides** apply to OpenFold.
|
6 |
+
|
7 |
+
## Application
|
8 |
+
|
9 |
+
=== ":octicons-command-palette-16: CLI"
|
10 |
+
|
11 |
+
```bash
|
12 |
+
folding predict openfold path/to/my/file.fasta --num-recycle 3 --random-seed 0
|
13 |
+
```
|
14 |
+
|
15 |
+
=== ":material-language-python: Python"
|
16 |
+
|
17 |
+
```python
|
18 |
+
from pathlib import Path
|
19 |
+
from folding_studio.commands.predict import openfold as openfold_predict
|
20 |
+
|
21 |
+
openfold_predict(source=Path("path/to/my/file.fasta"), num_recycle=3, random_seed=0)
|
22 |
+
```
|
23 |
+
|
24 |
+
!!! Warning
|
25 |
+
If you consider to submit >10 folding jobs, it is **strongly** advised to
|
26 |
+
use batch job submission from a [directory](./batch_job_from_directory.md) or from a [configuration file](./batch_job_from_configuration_file.md). A batch job will mutualize the feature generation steps, speeding up significantly the jobs processing if
|
27 |
+
they use similar features.
|
folding-studio/docs/docs/how-to-guides/af2_openfold/soloseq_job.md
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
This tutorial explains how to run structure predictions using SoloSeq, an OpenFold-like model.
|
4 |
+
|
5 |
+
!!! Warning
|
6 |
+
SoloSeq is a synchronous model and uses different feature engineering process compared to AlphaFold2/Openfold. Therefore, the process of submitting SoloSeq jobs and retrieving output differs significantly from the processes described for AlphaFold2/OpenFold and the different other how-to guides from **AlphaFold2/OpenFold** folder do not apply to SoloSeq.
|
7 |
+
|
8 |
+
## Application
|
9 |
+
|
10 |
+
### Launch a job from a FASTA file
|
11 |
+
|
12 |
+
=== ":octicons-command-palette-16: CLI"
|
13 |
+
|
14 |
+
```bash
|
15 |
+
folding predict soloseq path/to/my/fasta/file.fasta --project-code "your-project-code" --output-dir ./
|
16 |
+
```
|
17 |
+
|
18 |
+
=== ":material-language-python: Python"
|
19 |
+
|
20 |
+
```python
|
21 |
+
from folding_studio.client import Client
|
22 |
+
from folding_studio.query.soloseq import SoloSeqQuery
|
23 |
+
|
24 |
+
inference_parameters = {"project_code": "your-project-code",
|
25 |
+
"seed":42}
|
26 |
+
|
27 |
+
file_path = "path/to/my/fasta/file.fasta"
|
28 |
+
|
29 |
+
# Create client
|
30 |
+
client = Client.from_jwt()
|
31 |
+
|
32 |
+
# Define query
|
33 |
+
query = SoloSeqQuery.from_file(path=file_path, parameters=inference_parameters)
|
34 |
+
|
35 |
+
# Send request
|
36 |
+
response = client.send_request(query)
|
37 |
+
|
38 |
+
# Download results
|
39 |
+
output_path = "./output.zip"
|
40 |
+
response.download_results(output_path, force=True, unzip=True)
|
41 |
+
```
|
42 |
+
|
43 |
+
### Launch a job from a directory of FASTA files
|
44 |
+
|
45 |
+
=== ":octicons-command-palette-16: CLI"
|
46 |
+
|
47 |
+
```bash
|
48 |
+
folding predict soloseq path/to/my/fasta/directory --project-code "your-project-code" --output-dir ./
|
49 |
+
```
|
50 |
+
|
51 |
+
=== ":material-language-python: Python"
|
52 |
+
|
53 |
+
```python
|
54 |
+
from folding_studio.client import Client
|
55 |
+
from folding_studio.query.soloseq import SoloSeqQuery
|
56 |
+
|
57 |
+
inference_parameters = {"project_code": "your-project-code",
|
58 |
+
"seed":42}
|
59 |
+
|
60 |
+
directory_path = "path/to/my/fasta/directory"
|
61 |
+
|
62 |
+
# Create client
|
63 |
+
client = Client.from_jwt()
|
64 |
+
|
65 |
+
# Define query
|
66 |
+
query = SoloSeqQuery.from_directory(path=directory_path, parameters=inference_parameters)
|
67 |
+
|
68 |
+
# Send request
|
69 |
+
response = client.send_request(query)
|
70 |
+
|
71 |
+
# Download results
|
72 |
+
output_path = "./output.zip"
|
73 |
+
response.download_results(output_path, force=True, unzip=True)
|
74 |
+
```
|
75 |
+
|
76 |
+
### Launch a job from a protein sequence directly
|
77 |
+
|
78 |
+
`SoloSeq` allow passing the protein sequence directly as input.
|
79 |
+
This makes the prediction job easier to integrate with your code.
|
80 |
+
|
81 |
+
=== ":material-language-python: Python"
|
82 |
+
|
83 |
+
```python
|
84 |
+
from folding_studio.client import Client
|
85 |
+
from folding_studio.query.soloseq import SoloSeqQuery
|
86 |
+
|
87 |
+
inference_parameters = {"project_code": "your-project-code",
|
88 |
+
"seed":42}
|
89 |
+
|
90 |
+
sequence = ">A|protein\\nQLEDSEVEAVAKGLEEMYANGVTEDNFKNYVKNNFAQQEISSVEEELNVNIS"
|
91 |
+
|
92 |
+
# Create client
|
93 |
+
client = Client.from_jwt()
|
94 |
+
|
95 |
+
# Define query
|
96 |
+
query = SoloSeqQuery.from_protein_sequence(sequence=sequence, parameters=inference_parameters)
|
97 |
+
|
98 |
+
# Send request
|
99 |
+
response = client.send_request(query)
|
100 |
+
|
101 |
+
# Download results
|
102 |
+
output_path = "./output.zip"
|
103 |
+
response.download_results(output_path, force=True, unzip=True)
|
104 |
+
```
|
folding-studio/docs/docs/how-to-guides/af3/batch_job_from_directory.md
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
A batch job allow to submit multiple jobs at once. This avoid making too much API calls.
|
4 |
+
To submit a batch job through the folding CLI, you can simply pass a directory of FASTA files (or both FASTA and YAML files if you are using **Boltz-1** model)
|
5 |
+
|
6 |
+
## Application
|
7 |
+
|
8 |
+
=== ":octicons-command-palette-16: CLI"
|
9 |
+
|
10 |
+
```bash
|
11 |
+
folding predict boltz path/to/my/fasta/directory --num-recycle 3 --random-seed 0 --output ./ --unzip
|
12 |
+
```
|
13 |
+
|
14 |
+
=== ":material-language-python: Python"
|
15 |
+
|
16 |
+
```python
|
17 |
+
from folding_studio.client import Client
|
18 |
+
from folding_studio.query.boltz import BoltzQuery
|
19 |
+
|
20 |
+
inference_parameters = {"project_code": "your-project-code",
|
21 |
+
"seed":42}
|
22 |
+
|
23 |
+
directory_path = "path/to/my/fasta/directory"
|
24 |
+
|
25 |
+
# Create client
|
26 |
+
client = Client.authenticate()
|
27 |
+
|
28 |
+
# Define query
|
29 |
+
query = BoltzQuery.from_directory(path=directory_path, parameters=inference_parameters)
|
30 |
+
|
31 |
+
# Send request
|
32 |
+
response = client.send_request(query)
|
33 |
+
|
34 |
+
# Download results
|
35 |
+
output_path = "./output.zip"
|
36 |
+
response.download_results(output_path, force=True, unzip=True)
|
37 |
+
```
|
38 |
+
|
39 |
+
!!! Note
|
40 |
+
Depending on the model you want to use, you can replace `predict boltz` with `predict chai` or `predict protenix`.
|
folding-studio/docs/docs/how-to-guides/af3/boltz_single_yaml_job.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
**Boltz-1** model allows passing a YAML file (or a directory containing YAML files, see [batch job from directory section](./batch_job_from_directory.md) for more details about launching jobs from a directory) as input. The YAML format is more flexible and allows for more complex inputs, particularly around covalent bonds.
|
3 |
+
This YAML has to follow the Boltz-1 format below.
|
4 |
+
|
5 |
+
See [Boltz-1 documentation about prediction](https://github.com/jwohlwend/boltz/blob/main/docs/prediction.md) for details about YAML input format.
|
6 |
+
|
7 |
+
## Application
|
8 |
+
|
9 |
+
=== ":octicons-command-palette-16: CLI"
|
10 |
+
|
11 |
+
```bash
|
12 |
+
folding predict boltz path/to/my/file.yaml --project-code "your-project-code" --output ./
|
13 |
+
```
|
14 |
+
|
15 |
+
=== ":material-language-python: Python"
|
16 |
+
|
17 |
+
```python
|
18 |
+
from folding_studio.client import Client
|
19 |
+
from folding_studio.query.boltz import BoltzQuery
|
20 |
+
|
21 |
+
inference_parameters = {"project_code": "your-project-code",
|
22 |
+
"seed":42}
|
23 |
+
|
24 |
+
file_path = "path/to/my/file.yaml"
|
25 |
+
|
26 |
+
# Create client
|
27 |
+
client = Client.authenticate()
|
28 |
+
|
29 |
+
# Define query
|
30 |
+
query = BoltzQuery.from_file(path=file_path, parameters=inference_parameters)
|
31 |
+
|
32 |
+
# Send request
|
33 |
+
response = client.send_request(query)
|
34 |
+
|
35 |
+
# Download results
|
36 |
+
output_path = "./output.zip"
|
37 |
+
response.download_results(output_path, force=True)
|
38 |
+
```
|
folding-studio/docs/docs/how-to-guides/af3/provide_input_data.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This guide will help you determine whether a given input sequence is compatible with the Alphafold3-like models.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
### Alphafold3-like models supported inputs
|
7 |
+
To submit an folding job with Alphafold3-like models, you need the sequence input file in
|
8 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your sequence.
|
9 |
+
|
10 |
+
AlphaFold3-like models support various molecular types for structure prediction, and these can be provided as input in different formats. Below is a summary of the supported molecular types:
|
11 |
+
|
12 |
+
- **Proteins**: Protein sequences are widely supported and can be provided for structure prediction tasks.
|
13 |
+
- monomer
|
14 |
+
- multimer
|
15 |
+
- **DNA** and **RNA**: Both DNA and RNA sequences are supported for structure prediction.-
|
16 |
+
- **Ligands**: Ligands can be specified in two ways:
|
17 |
+
- SMILES: A textual format describing the chemical structure of molecules.
|
18 |
+
- CCD Code: A standard identifier for chemical compounds, defined in the Chemical Component Dictionary.
|
19 |
+
|
20 |
+
**You can find detailed explanations of the input data formats for each model at the links below.**
|
21 |
+
|
22 |
+
### Boltz-1
|
23 |
+
Here is an explanation of the different input data formats that can be used for Boltz-1 prediction: [Boltz-1 documentation](https://github.com/jwohlwend/boltz/blob/main/docs/prediction.md). You can also find some input [examples](https://github.com/jwohlwend/boltz/tree/main/examples).
|
24 |
+
|
25 |
+
### Chai-1
|
26 |
+
Here is an explanation of the different input data formats that can be used for Chai-1 with restraints: [Chai-1 documentation](https://github.com/chaidiscovery/chai-lab/blob/main/examples/restraints/README.md). You can also find some input [examples](https://github.com/chaidiscovery/chai-lab/tree/main/examples).
|
27 |
+
|
28 |
+
### Protenix
|
29 |
+
Here is an explanation of the different input data formats that can be used for Protenix: [Protenix documentation](https://github.com/bytedance/Protenix/blob/main/docs/infer_json_format.md).
|
30 |
+
|
31 |
+
!!! Warning
|
32 |
+
The Protenix endpoint currently does not support JSON format. It is a work in progress.
|
33 |
+
|
34 |
+
!!! note
|
35 |
+
The Protenix endpoint also accepts RCSB FASTA format following this structure. The number of chains will automatically be derived from the description (e.g. 2 chains in the example below):
|
36 |
+
``` { .shell .no-copy }
|
37 |
+
>1HSG_1|Chains A, B|HIV-1 PROTEASE|Human immunodeficiency virus 1 (11676)
|
38 |
+
PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF
|
39 |
+
```
|
folding-studio/docs/docs/how-to-guides/af3/single_job_boltz.md
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
**Boltz-1** is one of the AlphaFold3-like models supported by Folding Studio. Here is a how-to guide to learn how to launch a simple single folding job on a FASTA file using **Boltz-1**.
|
4 |
+
|
5 |
+
## Application
|
6 |
+
|
7 |
+
=== ":octicons-command-palette-16: CLI"
|
8 |
+
|
9 |
+
```bash
|
10 |
+
folding predict boltz path/to/my/file.fasta --output ./ --unzip --project-code "your-project-code"
|
11 |
+
```
|
12 |
+
|
13 |
+
=== ":material-language-python: Python"
|
14 |
+
|
15 |
+
```python
|
16 |
+
from folding_studio.client import Client
|
17 |
+
from folding_studio.query.boltz import BoltzQuery
|
18 |
+
|
19 |
+
inference_parameters = {"project_code": "your-project-code"}
|
20 |
+
|
21 |
+
file_path = "path/to/my/file.fasta"
|
22 |
+
|
23 |
+
# Create client
|
24 |
+
client = Client.authenticate()
|
25 |
+
|
26 |
+
# Define query
|
27 |
+
query = BoltzQuery.from_file(path=file_path, parameters=inference_parameters)
|
28 |
+
|
29 |
+
# Send request
|
30 |
+
response = client.send_request(query)
|
31 |
+
|
32 |
+
# Download results
|
33 |
+
output_path = "./output.zip"
|
34 |
+
response.download_results(output_path, force=True, unzip=True)
|
35 |
+
```
|
36 |
+
|
37 |
+
!!! note
|
38 |
+
Do not forget that Boltz-1 accepts FASTA format following this structure
|
39 |
+
``` { .shell .no-copy }
|
40 |
+
>CHAIN_ID|ENTITY_TYPE|MSA_PATH
|
41 |
+
SEQUENCE
|
42 |
+
```
|
43 |
+
`MSA_PATH` is ignored if `--use-msa-server` is used.
|
44 |
+
|
45 |
+
For further information on YAML format, check [documentation](https://github.com/jwohlwend/boltz/blob/main/docs/prediction.md).
|
46 |
+
|
47 |
+
Using the CLI, you will get the following information if the job was successfully submitted.
|
48 |
+
|
49 |
+
``` { .shell .no-copy }
|
50 |
+
╭───────────────────────────────╮
|
51 |
+
│ 🧬 Boltz1 Folding submission │
|
52 |
+
╰───────────────────────────────╯
|
53 |
+
🔑 Authenticating client ✅
|
54 |
+
📦 Generating query ✅
|
55 |
+
Generated query: {
|
56 |
+
"fasta_files": {
|
57 |
+
"file": ">A|protein|\nQLEDSEVEAVAKGLEEMYANGVTEDNFKNYVKNNFAQQEISSVEEELNVNISDSCVANKIKDEFFAMISISAIVKAAQKKAWKELAVTVLRFAKANGLKTNAIIVAGQLALWAVQCG"
|
58 |
+
},
|
59 |
+
"yaml_files": {},
|
60 |
+
"parameters": {
|
61 |
+
"seed": 42,
|
62 |
+
"recycling_steps": 3,
|
63 |
+
"sampling_steps": 200,
|
64 |
+
"diffusion_samples": 1,
|
65 |
+
"step_scale": 1.638,
|
66 |
+
"msa_pairing_strategy": "greedy",
|
67 |
+
"write_full_pae": false,
|
68 |
+
"write_full_pde": false
|
69 |
+
}
|
70 |
+
}
|
71 |
+
🧠 Processing folding job ✅
|
72 |
+
```
|
folding-studio/docs/docs/how-to-guides/af3/single_job_chai.md
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
**Chai-1** is one of the AlphaFold3-like models supported by Folding Studio. Here is a how-to guide to learn how to launch a simple single folding job on a FASTA file using **Chai-1**.
|
4 |
+
|
5 |
+
## Application
|
6 |
+
|
7 |
+
=== ":octicons-command-palette-16: CLI"
|
8 |
+
|
9 |
+
```bash
|
10 |
+
folding predict chai path/to/my/file.fasta --output ./ --unzip --project-code "your-project-code"
|
11 |
+
```
|
12 |
+
|
13 |
+
=== ":material-language-python: Python"
|
14 |
+
|
15 |
+
```python
|
16 |
+
from folding_studio.client import Client
|
17 |
+
from folding_studio.query.chai import ChaiQuery
|
18 |
+
|
19 |
+
inference_parameters = {"project_code": "your-project-code"}
|
20 |
+
|
21 |
+
file_path = "path/to/my/file.fasta"
|
22 |
+
|
23 |
+
# Create client
|
24 |
+
client = Client.authenticate()
|
25 |
+
|
26 |
+
# Define query
|
27 |
+
query = ChaiQuery.from_file(path=file_path, parameters=inference_parameters)
|
28 |
+
|
29 |
+
# Send request
|
30 |
+
response = client.send_request(query)
|
31 |
+
|
32 |
+
# Download results
|
33 |
+
output_path = "./output.zip"
|
34 |
+
response.download_results(output_path, force=True, unzip=True)
|
35 |
+
```
|
36 |
+
|
37 |
+
!!! note
|
38 |
+
Do not forget that Chai-1 accepts FASTA format following this structure
|
39 |
+
``` { .shell .no-copy }
|
40 |
+
>ENTITY_TYPE|STRUCTURE_ID
|
41 |
+
SEQUENCE
|
42 |
+
```
|
43 |
+
|
44 |
+
Using the CLI, you will get the following information if the job was successfully submitted.
|
45 |
+
|
46 |
+
``` { .shell .no-copy }
|
47 |
+
╭───────────────────────────────╮
|
48 |
+
│ 🧬 Chai-1 Folding submission │
|
49 |
+
╰───────────────────────────────╯
|
50 |
+
🔑 Authenticating client ✅
|
51 |
+
📦 Generating query ✅
|
52 |
+
Generated query: {
|
53 |
+
"fasta_files": {
|
54 |
+
"file": ">A|protein|\nQLEDSEVEAVAKGLEEMYANGVTEDNFKNYVKNNFAQQEISSVEEELNVNISDSCVANKIKDEFFAMISISAIVKAAQKKAWKELAVTVLRFAKANGLKTNAIIVAGQLALWAVQCG\n"
|
55 |
+
},
|
56 |
+
"use_msa_server": false,
|
57 |
+
"use_templates_server": false,
|
58 |
+
"num_trunk_recycles": 3,
|
59 |
+
"seed": 0,
|
60 |
+
"num_diffn_timesteps": 200,
|
61 |
+
"restraints": null,
|
62 |
+
"recycle_msa_subsample": 0,
|
63 |
+
"num_trunk_samples": 1
|
64 |
+
}
|
65 |
+
🧠 Processing folding job ✅
|
66 |
+
```
|
folding-studio/docs/docs/how-to-guides/af3/single_job_from_protein_sequence.md
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
**Boltz-1**, **Chai-1** and **Protenix** models allow passing the protein sequence directly as input.
|
3 |
+
This makes the prediction jobs easier to integrate with your code.
|
4 |
+
|
5 |
+
## Application
|
6 |
+
=== ":material-language-python: Python"
|
7 |
+
|
8 |
+
```python
|
9 |
+
from folding_studio.client import Client
|
10 |
+
from folding_studio.query.boltz import BoltzQuery
|
11 |
+
|
12 |
+
inference_parameters = {"project_code": "your-project-code",
|
13 |
+
"seed":42}
|
14 |
+
|
15 |
+
sequence = ">A|protein\nQLEDSEVEAVAKGLEEMYANGVTEDNFKNYVKNNFAQQEISSVEEELNVNIS"
|
16 |
+
|
17 |
+
# Create client
|
18 |
+
client = Client.authenticate()
|
19 |
+
|
20 |
+
# Define query
|
21 |
+
query = BoltzQuery.from_protein_sequence(sequence=sequence, parameters=inference_parameters)
|
22 |
+
|
23 |
+
# Send request
|
24 |
+
response = client.send_request(query)
|
25 |
+
|
26 |
+
# Download results
|
27 |
+
output_path = out_dir / "output.zip"
|
28 |
+
response.download_results(output_path, force=True, unzip=True)
|
29 |
+
```
|
30 |
+
|
31 |
+
!!! Note
|
32 |
+
Depending on the model you want to use, you can replace `BoltzQuery` with `ChaiQuery` or `ProtenixQuery`.
|
folding-studio/docs/docs/how-to-guides/af3/single_job_protenix.md
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
**Protenix** is one of the AlphaFold3-like models supported by Folding Studio. Here is a how-to guide to learn how to launch a simple single folding job on a FASTA file using **Protenix**.
|
4 |
+
|
5 |
+
## Application
|
6 |
+
|
7 |
+
=== ":octicons-command-palette-16: CLI"
|
8 |
+
|
9 |
+
```bash
|
10 |
+
folding predict protenix path/to/my/file.fasta --output ./ --unzip --project-code "your-project-code"
|
11 |
+
```
|
12 |
+
|
13 |
+
=== ":material-language-python: Python"
|
14 |
+
|
15 |
+
```python
|
16 |
+
from folding_studio.client import Client
|
17 |
+
from folding_studio.query.protenix import ProtenixQuery
|
18 |
+
|
19 |
+
inference_parameters = {"project_code": "your-project-code"}
|
20 |
+
|
21 |
+
file_path = "path/to/my/file.fasta"
|
22 |
+
|
23 |
+
# Create client
|
24 |
+
client = Client.authenticate()
|
25 |
+
|
26 |
+
# Define query
|
27 |
+
query = ProtenixQuery.from_file(path=file_path, parameters=inference_parameters)
|
28 |
+
|
29 |
+
# Send request
|
30 |
+
response = client.send_request(query)
|
31 |
+
|
32 |
+
# Download results
|
33 |
+
output_path = "./output.zip"
|
34 |
+
response.download_results(output_path, force=True, unzip=True)
|
35 |
+
```
|
36 |
+
|
37 |
+
!!! note
|
38 |
+
Protenix also accepts RCSB FASTA format following this structure. The number of chains will automatically be derived from the description (e.g. 2 chains in the example below):
|
39 |
+
``` { .shell .no-copy }
|
40 |
+
>1HSG_1|Chains A, B|HIV-1 PROTEASE|Human immunodeficiency virus 1 (11676)
|
41 |
+
PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMSLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF
|
42 |
+
```
|
43 |
+
|
44 |
+
!!! warning
|
45 |
+
In this preview version, Protenix is only compatible with MSA search mode (`--use-msa-server`) and is enabled by default.
|
46 |
+
|
47 |
+
Using the CLI, you will get the following information if the job was successfully submitted.
|
48 |
+
|
49 |
+
``` { .shell .no-copy }
|
50 |
+
╭─────────────────────────────────╮
|
51 |
+
│ 🧬 Protenix Folding submission │
|
52 |
+
╰─────────────────────────────────╯
|
53 |
+
🔑 Authenticating client ✅
|
54 |
+
📦 Generating query ✅
|
55 |
+
Generated query: {
|
56 |
+
"fasta_files": {
|
57 |
+
"file":
|
58 |
+
">A|protein\nMASWSHPQFEKGGTHVAETSAPTRSEPDTRVLTLPGTASAPEFRLIDIDGLLNNRATTDV\nRDLGSGRLNAWGNSFPAAELPAPGSLITVAGIPFTWANAHAR>GDNIRCEGQVVDIPPGQY\nDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSR
|
59 |
+
MHYPHHV\nQEGLPTTMWLTRVGMPRHGVARSLRLPRSVAMHVFALTLRTAAAVRLAEGATT\n"
|
60 |
+
},
|
61 |
+
"use_msa_server": true,
|
62 |
+
"seeds": "0"
|
63 |
+
}
|
64 |
+
🧠 Processing folding job ✅
|
65 |
+
```
|
folding-studio/docs/docs/how-to-guides/index.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# How-to guides
|
2 |
+
|
3 |
+
The **How-to guides section** provides step-by-step instructions for using Folding Studio efficiently. You will find step-by-step guides for various workflows, from setting up and submitting folding jobs to analyzing the results.
|
4 |
+
|
5 |
+
## AlphaFold2/OpenFold
|
6 |
+
|
7 |
+
Learn how to work with **AlphaFold2/OpenFold** models in Folding Studio:
|
8 |
+
|
9 |
+
- [Provide Input Data](./af2_openfold/provide_input_data.md): Understand what data you can use for folding tasks.
|
10 |
+
- [Launch a Folding Job using AlphaFold2](./af2_openfold/single_af2_job.md): Instructions to launch a folding job with the AlphaFold2 model.
|
11 |
+
- [Launch a Folding Job using OpenFold](./af2_openfold/single_openfold_job.md): Instructions to launch a folding job with the OpenFold model.
|
12 |
+
- [Set Folding Parameters](./af2_openfold/set_af_folding_parameters.md): Discover how to configure custom folding parameters for your jobs.
|
13 |
+
- Launch a Batch Folding Job: Learn how to submit batch jobs [from configuration files](./af2_openfold/batch_job_from_configuration_file.md) or [directories](./af2_openfold/batch_job_from_directory.md) containing multiple fasta files.
|
14 |
+
- [Check Job Status](./af2_openfold/fetch_folding_job_status.md): Instructions on how to fetch the status of your folding job.
|
15 |
+
- [Download Job Logs](./af2_openfold/download_logs.md): Instructions on how to download the logs of your folding job.
|
16 |
+
- [Cancel a Job Submission](./af2_openfold/cancel_experiment.md): Learn how to cancel a folding job that is in progress.
|
17 |
+
- [Retrieve Features](./af2_openfold/get_experiment_features.md): Discover how to extract features generated by your folding jobs, such as structural metrics.
|
18 |
+
- [Download Results](./af2_openfold/download_prediction_results.md): Learn how to download the results from completed folding jobs.
|
19 |
+
- Launch Jobs using advanced algorithms:
|
20 |
+
- [Launch a Folding Job using MSA subsampling](./af2_openfold/advanced_algorithms/msa_subsampling_job.md): Learn how to use MSA subsampling to optimize sequence alignments for structure prediction.
|
21 |
+
- [Launch a Folding Job using the Gap Trick for Folding Multimer Complexes](./af2_openfold/advanced_algorithms/gap_trick_job.md): Discover how to fold multimer complexes using monomer models with the Gap Trick approach.
|
22 |
+
- [Launch a Folding Job using an Initial Guess Structure in AlphaFold2](./af2_openfold/advanced_algorithms/initial_guess_af2.md): Learn how to provide an initial structure guess to guide the AlphaFold2 folding process.
|
23 |
+
- [Launch a Folding Job applying Template Masking in Gap Trick Mode](./af2_openfold/advanced_algorithms/template_masking_job.md): Learn how to mask template regions to refine multimer folding while using Gap Trick mode.
|
24 |
+
- [Launch a Folding Job using SoloSeq](./af2_openfold/soloseq_job.md): Discover how to run a folding job with the **SoloSeq** model, an OpenFold-like model.
|
25 |
+
|
26 |
+
|
27 |
+
## Preview - AlphaFold3-like Models
|
28 |
+
|
29 |
+
For advanced folding tasks, Folding Studio supports models similar to **AlphaFold3**:
|
30 |
+
|
31 |
+
- [Provide Input Data](./af3/provide_input_data.md): Understand what data you can use for AlphaFold3-like jobs.
|
32 |
+
- [Launch a Single Job using Boltz-1](./af3/single_job_boltz.md): Instructions for running a single folding job from a FASTA file using Boltz-1 model.
|
33 |
+
- [Launch a Single Job using Chai-1](./af3/single_job_chai.md): Instructions for running a single folding job from a FASTA file using Chai-1 model.
|
34 |
+
- [Launch a Single Job using Protenix](./af3/single_job_protenix.md): Instructions for running a single folding job from a FASTA file using Protenix model.
|
35 |
+
- [Launch a Single Job from a YAML File using Boltz-1](./af3/boltz_single_yaml_job.md): Instructions for running a single folding job from a YAML file using Boltz-1 model.
|
36 |
+
- [Launch a Batch Job from a Directory](./af3/batch_job_from_directory.md): Learn how to submit a batch job by organizing your fasta files in a directory.
|
37 |
+
- [Launch a Job from a Protein Sequence](./af3/single_job_from_protein_sequence.md): Step-by-step guide to running a job directly from a protein sequence.
|
38 |
+
|
39 |
+
## Post-processing
|
40 |
+
|
41 |
+
Once your folding or alignment jobs are complete, you can perform post-processing tasks:
|
42 |
+
|
43 |
+
- [Calculate Interface pLDDT and pAE](./other/pLDDT_pAE_calculation.md): Learn how to calculate pLDDT and pAE to assess protein model quality.
|
44 |
+
|
45 |
+
## Multiple Sequence Alignment (MSA)
|
46 |
+
|
47 |
+
Folding Studio also supports performing MSA searches:
|
48 |
+
|
49 |
+
- [Provide Input Data for MSA](./msa_search/provide_input_data.md): Understand what data you can use for MSA search tasks.
|
50 |
+
- [Launch an MSA Search with MMSeqs2](./msa_search/msa_search_mmseqs2.md): Step-by-step guide to running an MSA search.
|
51 |
+
- [Launch an MSA Search ignoring cache](./msa_search/msa_no_cache.md): Learn how to run an MSA search while bypassing previously cached results.
|
52 |
+
- [Check MSA Job Status](./msa_search/fetch_msa_job_status.md): Discover how to check the status of your MSA job.
|
53 |
+
- [Download MSA Job Logs](./msa_search/download_msa_logs.md): Instructions on how to download the logs of your MSA job.
|
54 |
+
- [Download MSA Results](./msa_search/download_msa_search_results.md): Learn how to download the results of an MSA search.
|
folding-studio/docs/docs/how-to-guides/msa_search/download_msa_logs.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This How-to guide explains how to download the logs of an MSA experiment.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
=== ":octicons-command-palette-16: CLI"
|
7 |
+
|
8 |
+
```bash
|
9 |
+
folding msa experiment logs b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./msa_logs_exp_b21b09.zip
|
10 |
+
```
|
11 |
+
|
12 |
+
=== ":material-language-python: Python"
|
13 |
+
|
14 |
+
```python
|
15 |
+
from pathlib import Path
|
16 |
+
from folding_studio.commands.msa import logs
|
17 |
+
|
18 |
+
logs(msa_exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./msa_logs_exp_b21b09.zip"))
|
19 |
+
```
|
folding-studio/docs/docs/how-to-guides/msa_search/download_msa_search_results.md
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
|
3 |
+
The MSA search results will comprise:
|
4 |
+
|
5 |
+
- The [Multiple Sequence Alignment](https://en.wikipedia.org/wiki/Multiple_sequence_alignment) (MSA) search results. By default, the Folding Studio pipeline will trigger an MSA search on Uniref90, small BFD, Mgnify and Uniprot (multimer jobs only) using the `jackhmmer` algorithm.
|
6 |
+
- The msa coverage file.
|
7 |
+
|
8 |
+
Once the MSA search job has finished, all the generated features are saved into a zip file.
|
9 |
+
|
10 |
+
The zip file contains :
|
11 |
+
|
12 |
+
- The output of the MSA search: the MSA search results on multiple databases in `.a3m` format.
|
13 |
+
- The output of the template search : the four best matching templates in `.cif` format.
|
14 |
+
|
15 |
+
Here is an example of the zip file structure for a monomer :
|
16 |
+
|
17 |
+
``` { .shell .no-copy }
|
18 |
+
extracted_experiment_features_zip
|
19 |
+
├── msas
|
20 |
+
│ ├── mgnify_hits.a3m
|
21 |
+
│ ├── pdb_hits.hhr
|
22 |
+
│ ├── small_bfd_hits.a3m
|
23 |
+
│ └── uniref90_hits.a3m
|
24 |
+
└── msa_coverage.json
|
25 |
+
└── logs.txt
|
26 |
+
|
27 |
+
```
|
28 |
+
|
29 |
+
For multimer, the structure is similar except that there is a dedicated subdirectory for each protein.
|
30 |
+
|
31 |
+
## Application
|
32 |
+
|
33 |
+
You can download the zip file above to check the generated features by running this command.
|
34 |
+
|
35 |
+
=== ":octicons-command-palette-16: CLI"
|
36 |
+
|
37 |
+
```bash
|
38 |
+
folding msa experiment features b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./msa_features_exp_b21b09.zip
|
39 |
+
```
|
40 |
+
|
41 |
+
=== ":material-language-python: Python"
|
42 |
+
|
43 |
+
```python
|
44 |
+
from pathlib import Path
|
45 |
+
from folding_studio.commands.msa import features
|
46 |
+
|
47 |
+
features(msa_exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./msa_features_exp_b21b09.zip"))
|
48 |
+
```
|
49 |
+
Once the MSA features are downloaded, you will get the following message:
|
50 |
+
|
51 |
+
``` { .shell .no-copy }
|
52 |
+
File downloaded successfully to msa_features_exp_b21b09.zip.
|
53 |
+
```
|
folding-studio/docs/docs/how-to-guides/msa_search/fetch_msa_job_status.md
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This how-to guide explains how to manage MSA search jobs using the `msa_experiment_id`. Each submission creates a unique experiment, enabling caching and avoiding redundant computations.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
### Fetch a search job `msa_experiment_id`
|
7 |
+
Submitting an MSA search job creates an experiment. This allows caching and avoid
|
8 |
+
useless re-computation of previously submitted MSA search job.
|
9 |
+
|
10 |
+
Each experiment is associated with a unique `msa_experiment_id`. Its generation is
|
11 |
+
deterministic, created from the submitted FASTA sequence (without taking into
|
12 |
+
account the description) and the job parameters.
|
13 |
+
|
14 |
+
By default, if you resubmit an MSA experiment with the same sequence and parameters, it will not be triggered and the response will inform you of the status of the original MSA experiment and its results, if any available. This can be overridden in the options (see [Launch an MSA search ignoring cache](./msa_no_cache.md)).
|
15 |
+
|
16 |
+
Once your MSA job has been submitted, and thus the msa experiment created, you
|
17 |
+
can get various information from the `msa_experiment_id`.
|
18 |
+
|
19 |
+
You can get the list of your msa experiment ids that succeeded or are still pending
|
20 |
+
using :
|
21 |
+
|
22 |
+
=== ":octicons-command-palette-16: CLI"
|
23 |
+
|
24 |
+
```bash
|
25 |
+
folding msa experiment list
|
26 |
+
```
|
27 |
+
|
28 |
+
=== ":material-language-python: Python"
|
29 |
+
|
30 |
+
```python
|
31 |
+
from folding_studio.commands.msa import list
|
32 |
+
|
33 |
+
list()
|
34 |
+
```
|
35 |
+
|
36 |
+
### Retrieve a search job status
|
37 |
+
|
38 |
+
=== ":octicons-command-palette-16: CLI"
|
39 |
+
|
40 |
+
```bash
|
41 |
+
folding msa experiment status b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9
|
42 |
+
```
|
43 |
+
|
44 |
+
=== ":material-language-python: Python"
|
45 |
+
|
46 |
+
```python
|
47 |
+
from pathlib import Path
|
48 |
+
from folding_studio.commands.msa import status
|
49 |
+
|
50 |
+
status(msa_exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9")
|
51 |
+
```
|
52 |
+
|
53 |
+
The experiment status is the current state of the experiment.
|
54 |
+
|
55 |
+
| VALUE | DESCRIPTION |
|
56 |
+
| ----------- | ------------------------------------------------------------------------------- |
|
57 |
+
| `Done` | The experiment is done and its features and results are available for download. |
|
58 |
+
| `Pending` | The experiment is still ongoing. |
|
59 |
+
| `Failed` | The experiment has failed. |
|
60 |
+
| `Cancelled` | The experiment was cancelled.
|
folding-studio/docs/docs/how-to-guides/msa_search/msa_no_cache.md
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This tutorial guides you through running an MSA (Multiple Sequence Alignment) search ...
|
3 |
+
|
4 |
+
## Application
|
5 |
+
|
6 |
+
=== ":octicons-command-palette-16: CLI"
|
7 |
+
|
8 |
+
```bash
|
9 |
+
folding msa search path/to/my/file.fasta --no-cache
|
10 |
+
```
|
11 |
+
|
12 |
+
=== ":material-language-python: Python"
|
13 |
+
|
14 |
+
```python
|
15 |
+
from pathlib import Path
|
16 |
+
from folding_studio.commands.msa import search
|
17 |
+
|
18 |
+
search(source=Path("path/to/my/file.fasta"), cache=False)
|
19 |
+
```
|
folding-studio/docs/docs/how-to-guides/msa_search/msa_search_mmseqs2.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This tutorial guides you through running an MSA (Multiple Sequence Alignment) search using MMseqs2 instead of JackHMMer, which is the default method when no specific option is provided in the command.
|
3 |
+
|
4 |
+
## Application
|
5 |
+
You can choose between two MSA search mode :
|
6 |
+
|
7 |
+
| Value | Description |
|
8 |
+
| -------------------- | -------------------------------------------------------------------------------------------- |
|
9 |
+
| `"search"` (default) | automated search of Uniref90, small_bfd and MGnfy databases using `JackHMMer` |
|
10 |
+
| `"mmseqs"` | automated search of Uniref30 and Colabfold_env_db using the **self-hosted** `MMSeqs2` server |
|
11 |
+
|
12 |
+
|
13 |
+
!!! note
|
14 |
+
`MMSeqs2` produce fairly different MSA results compared to JackHMMer, they use different datasets (Uniref30 and colbafold_env_db) and different search algorithms. However the MSA produced by `MMSeqs2` is generally more diverse and can be leverage predict structures with higher accuracy (see this [publication](https://www.nature.com/articles/s41592-023-02130-4)). For more information about MMSeqs2, please refer to the corresponding [paper](https://www.biorxiv.org/content/10.1101/079681v5).
|
15 |
+
|
16 |
+
=== ":octicons-command-palette-16: CLI"
|
17 |
+
|
18 |
+
```bash
|
19 |
+
folding msa search path/to/my/file.fasta --msa-mode mmseqs
|
20 |
+
```
|
21 |
+
|
22 |
+
=== ":material-language-python: Python"
|
23 |
+
|
24 |
+
```python
|
25 |
+
from pathlib import Path
|
26 |
+
from folding_studio.commands.msa import search
|
27 |
+
|
28 |
+
search(source=Path("path/to/my/file.fasta"), msa_mode="mmseqs")
|
29 |
+
```
|
folding-studio/docs/docs/how-to-guides/msa_search/provide_input_data.md
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Goal
|
2 |
+
This guide will help you determine whether a given input sequence is compatible for launching an MSA search.
|
3 |
+
|
4 |
+
## Supported inputs
|
5 |
+
|
6 |
+
To submit an MSA search job, you need the sequence input file in
|
7 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your
|
8 |
+
protein sequence.
|
9 |
+
|
10 |
+
It can be a monomer or a multimer sequence.
|
11 |
+
|
12 |
+
=== "monomer"
|
13 |
+
|
14 |
+
```text
|
15 |
+
>SARS-CoV-2|RBD|Omicron variant
|
16 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
17 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
18 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
19 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
20 |
+
```
|
21 |
+
|
22 |
+
=== "multimer"
|
23 |
+
|
24 |
+
```text
|
25 |
+
>SARS-CoV-2|RBD|Omicron variant
|
26 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
27 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
28 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
29 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
30 |
+
>Processed angiotensin-converting enzyme 2|Homo sapiens (9606)
|
31 |
+
STIEEQAKTFLDKFNHEAEDLFYQSSLASWNYNTNITEENVQNMNNAGDKWSAFLKEQSTLAQMYPLQEIQNLTVKLQLQALQQNGSSVLSEDKSKRLNTILNTMSTIYSTGKVCNPDNPQECLLLEPGLNEIMANSLDYNERLWAWESWRSEVGKQLRPLYEEYVVLKNEMARANHYEDYGDYWRGDYEVNGVDGYDYSRGQLIEDVEHTFEEIKPLYEHLHAYVRAKLMNAYPSYISPIGCLPAHLLGDMWGRFWTNLYSLTVPFGQKPNIDVTDAMVDQAWDAQRIFKEAEKFFVSVGLPNMTQGFWENSMLTDPGNVQKAVCHPTAWDLGKGDFRILMCTKVTMDDFLTAHHEMGHIQYDMAYAAQPFLLRNGANEGFHEAVGEIMSLSAATPKHLKSIGLLSPDFQEDNETEINFLLKQALTIVGTLPFTYMLEKWRWMVFKGEIPKDQWMKKWWEMKREIVGVVEPVPHDETYCDPASLFHVSNDYSFIRYYTRTLYQFQFQEALCQAAKHEGPLHKCDISNSTEAGQKLFNMLRLGKSEPWTLALENVVGAKNMNVRPLLNYFEPLFTWLKDQNKNSFVGWSTDWSPYADRHHHHHH
|
32 |
+
```
|
folding-studio/docs/docs/how-to-guides/other/pLDDT_pAE_calculation.md
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Here we propose recipes to compute interface PLDDT (ipLDDT) and interface PAE (iPAE). The value of these metrics depend on the **chosen** definition of interface residues.
|
2 |
+
|
3 |
+
These scripts will use `bioblocks`, you can follow this [link](https://instadeep.gitlab.io/bioai-group/BioBlocks/setup/setup.html) to get detailed steps for installation, you will also need this specific [dependency](https://instadeep.gitlab.io/bioai-group/BioBlocks/setup/setup.html#manual-msms-installation). We use the `get_interface_residues` function to define the interface residues. Please refer to the [`bioblocks` documentation page](https://instadeep.gitlab.io/bioai-group/BioBlocks/api/generated/bioblocks.geometry.get_interface_residues.html#bioblocks-geometry-get-interface-residues) for more details on the input parameters.
|
4 |
+
|
5 |
+
### ipLDDT
|
6 |
+
|
7 |
+
To calculate the ipLDDT, make sure you downloaded the results of the experiment in question. You will need the metrics file path as well as the prediction path.
|
8 |
+
|
9 |
+
1. Download your desired experiments results by running:
|
10 |
+
|
11 |
+
```bash
|
12 |
+
folding experiment results 6fdb36d380c3f9ba49137af47cf2eed5a6774cab --output result_exp_6fdb3.zip
|
13 |
+
unzip result_exp_6fdb3.zip
|
14 |
+
```
|
15 |
+
|
16 |
+
2. Download the code snippet below into a Python file.
|
17 |
+
3. Select a model prediction and edit in the file both `model_name` and `prediction_path`.
|
18 |
+
4. Edit the `metrics_file_path` with the metrics `metrics_per_model.json` file path.
|
19 |
+
5. Run your Python file.
|
20 |
+
|
21 |
+
```python
|
22 |
+
import numpy as np
|
23 |
+
import json
|
24 |
+
from bioblocks.io import read_model
|
25 |
+
from bioblocks.geometry import get_interface_residues
|
26 |
+
|
27 |
+
def calculate_mean_plddt_all_interfaces(
|
28 |
+
prediction_path: str, model_name: str, metrics_path: str
|
29 |
+
) -> float:
|
30 |
+
"""
|
31 |
+
Calculate the mean pLDDT for the interface residues between all chain pairs in the model.
|
32 |
+
|
33 |
+
Args:
|
34 |
+
prediction_path: Path to the prediction PDB file.
|
35 |
+
model_name: Name of the model.
|
36 |
+
metrics_path: Path to the JSON file containing pLDDT scores.
|
37 |
+
|
38 |
+
Returns:
|
39 |
+
float: The mean pLDDT score for the interface residues across all chain pairs.
|
40 |
+
"""
|
41 |
+
pred = read_model(prediction_path)
|
42 |
+
chains = list(pred.get_chains())
|
43 |
+
|
44 |
+
with open(metrics_path, "r") as f:
|
45 |
+
metrics = json.load(f)
|
46 |
+
|
47 |
+
plddt = np.array(metrics[model_name]["plddt"])
|
48 |
+
|
49 |
+
# Create the offset list
|
50 |
+
offset = [0]
|
51 |
+
for i in range(len(chains) - 1):
|
52 |
+
offset.append(offset[-1] + len(list(chains[i].get_residues())))
|
53 |
+
|
54 |
+
all_interface_indices = []
|
55 |
+
|
56 |
+
# Iterate over all pairs of chains to calculate the interface residues
|
57 |
+
for i in range(len(chains)):
|
58 |
+
chain_a = chains[i]
|
59 |
+
for j in range(i + 1, len(chains)):
|
60 |
+
chain_b = chains[j]
|
61 |
+
residues = get_interface_residues(chain_a, chain_b)
|
62 |
+
interface_indexes_a = [
|
63 |
+
res.id.residue_index - 1 + offset[i] for res in residues[0]
|
64 |
+
]
|
65 |
+
interface_indexes_b = [
|
66 |
+
res.id.residue_index - 1 + offset[j] for res in residues[1]
|
67 |
+
]
|
68 |
+
all_interface_indices.extend(interface_indexes_a + interface_indexes_b)
|
69 |
+
|
70 |
+
all_interface_indices = np.unique(np.array(all_interface_indices))
|
71 |
+
mean_plddt = np.mean(plddt[all_interface_indices])
|
72 |
+
|
73 |
+
return mean_plddt
|
74 |
+
|
75 |
+
if __name__ == "__main__":
|
76 |
+
|
77 |
+
prediction_path = "/PATH/TO/YOUR/PREDICTION/PDB/FILE"
|
78 |
+
metrics_file_path = "/PATH/TO/YOUR/PREDICTION/METRICS/JSON/FILE"
|
79 |
+
model_name = "model_1_multimer_v3"
|
80 |
+
|
81 |
+
iplddt = calculate_mean_plddt_all_interfaces(prediction_path, model_name, metrics_file_path)
|
82 |
+
print(f"Mean Interface PLDDT across all chain pairs: {iplddt}")
|
83 |
+
|
84 |
+
```
|
85 |
+
|
86 |
+
### ipAE
|
87 |
+
|
88 |
+
To calculate the ipAE, make sure you downloaded the results of the experiment in question. You will need the metrics file path as well as the prediction path.
|
89 |
+
|
90 |
+
1. Download your desired experiments results by running:
|
91 |
+
|
92 |
+
```bash
|
93 |
+
folding experiment results 6fdb36d380c3f9ba49137af47cf2eed5a6774cab --output result_exp_6fdb3.zip
|
94 |
+
unzip result_exp_6fdb3.zip
|
95 |
+
```
|
96 |
+
|
97 |
+
2. Download the code snippet below into a Python file.
|
98 |
+
3. Select a model prediction and edit in the file both `model_name` and `prediction_path`.
|
99 |
+
4. Edit the `metrics_file_path` with the metrics `metrics_per_model.json` file path.
|
100 |
+
5. Run your Python file.
|
101 |
+
|
102 |
+
```python
|
103 |
+
|
104 |
+
import numpy as np
|
105 |
+
import json
|
106 |
+
from bioblocks.io import read_model
|
107 |
+
from bioblocks.geometry import get_interface_residues
|
108 |
+
|
109 |
+
|
110 |
+
def calculate_interface_pae_all_chains(
|
111 |
+
prediction_path: str, model_name: str, metrics_path: str
|
112 |
+
) -> float:
|
113 |
+
"""
|
114 |
+
Calculate the Interface PAE by averaging the PAE of cross-interface residue pairs between all chain pairs.
|
115 |
+
|
116 |
+
Args:
|
117 |
+
prediction_path: Path to the prediction PDB file.
|
118 |
+
model_name: Name of the model.
|
119 |
+
metrics_path: Path to the JSON file containing PAE matrix.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
float: The average Interface PAE score across all chain pairs.
|
123 |
+
"""
|
124 |
+
pred = read_model(prediction_path)
|
125 |
+
|
126 |
+
chains = list(pred.get_chains())
|
127 |
+
|
128 |
+
with open(metrics_path, "r") as f:
|
129 |
+
metrics = json.load(f)
|
130 |
+
|
131 |
+
pae_matrix = np.array(metrics[model_name]["pae"])
|
132 |
+
|
133 |
+
# Create the offset list
|
134 |
+
offset = [0]
|
135 |
+
for i in range(len(chains) - 1):
|
136 |
+
offset.append(offset[-1] + len(list(chains[i].get_residues())))
|
137 |
+
|
138 |
+
interface_pae_values = []
|
139 |
+
|
140 |
+
for i in range(len(chains)):
|
141 |
+
chain_a = chains[i]
|
142 |
+
for j in range(i + 1, len(chains)):
|
143 |
+
chain_b = chains[j]
|
144 |
+
|
145 |
+
residues = get_interface_residues(chain_a, chain_b)
|
146 |
+
interface_indexes_a = [
|
147 |
+
res.id.residue_index - 1 + offset[i] for res in residues[0]
|
148 |
+
]
|
149 |
+
interface_indexes_b = [
|
150 |
+
res.id.residue_index - 1 + offset[j] for res in residues[1]
|
151 |
+
]
|
152 |
+
|
153 |
+
# Calculate PAE for cross-interface residue pairs
|
154 |
+
for idx_a in interface_indexes_a:
|
155 |
+
for idx_b in interface_indexes_b:
|
156 |
+
interface_pae_values.append(pae_matrix[idx_a, idx_b])
|
157 |
+
interface_pae_values.append(
|
158 |
+
pae_matrix[idx_b, idx_a]
|
159 |
+
) # Pae is not symmetrical
|
160 |
+
|
161 |
+
# Calculate the average PAE for the cross-interface residue pairs
|
162 |
+
if interface_pae_values:
|
163 |
+
mean_pae = np.mean(interface_pae_values)
|
164 |
+
else:
|
165 |
+
mean_pae = 0.0
|
166 |
+
|
167 |
+
return mean_pae
|
168 |
+
|
169 |
+
if __name__ == "__main__":
|
170 |
+
|
171 |
+
prediction_path = "/PATH/TO/YOUR/PREDICTION/PDB/FILE"
|
172 |
+
metrics_file_path = "/PATH/TO/YOUR/PREDICTION/METRICS/JSON/FILE"
|
173 |
+
model_name = "model_1_multimer_v3"
|
174 |
+
|
175 |
+
ipae = calculate_interface_pae_all_chains(prediction_path, model_name, metrics_file_path)
|
176 |
+
print(f"Mean Interface PAE across all chain pairs: {ipae}")
|
177 |
+
|
178 |
+
```
|
folding-studio/docs/docs/index.md
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: "Folding Studio"
|
3 |
+
---
|
4 |
+
|
5 |
+
## Welcome to Folding Studio
|
6 |
+
|
7 |
+
Folding Studio enables you to perform protein structure predictions within the Google Cloud Platform (GCP) environment. By leveraging GCP's robust and scalable cloud infrastructure, it allows for fast and high-throughput protein folding predictions.
|
8 |
+
|
9 |
+
This documentation will guide you through interacting programmatically with the Folding Studio CLI and python library, helping you efficiently utilize our tools for protein structure prediction. After reading through this documentation, you will be equipped to set up and submit single and batch jobs by defining input and model parameters, monitor the status of your jobs and retrieve job results, including structure predictions and confidence metrics.
|
10 |
+
|
11 |
+
Some of the key features of Folding Studio include:
|
12 |
+
|
13 |
+
- Flexible input options: Support for a variety of input formats, including FASTA sequences, YAML files, and more.
|
14 |
+
- Batch job submission: Submit multiple jobs simultaneously, streamlining the prediction process for large datasets.
|
15 |
+
- Confidence metrics: Retrieve structure predictions along with confidence metrics (e.g., pLDDT, pAE) to assess model quality.
|
16 |
+
- Generated features: Access important features like multiple sequence alignments (MSA), templates, and more.
|
17 |
+
- Real-time job monitoring: Track job progress, status, and results with ease using the CLI and the python library.
|
18 |
+
|
19 |
+
To install Folding Studio, simply follow the [Installation Tutorial](tutorials/installation.md).
|
20 |
+
|
21 |
+
Folding Studio supports a range of structure prediction models, categorized into AlphaFold2-like and AlphaFold3-like architectures.
|
22 |
+
|
23 |
+
- AlphaFold2-like models: This category includes **AlphaFold2**, **OpenFold**, and **SoloSeq**. AlphaFold2 and OpenFold operate asynchronously, allowing users to submit predictions and retrieve results later. SoloSeq follows a similar modeling approach but runs synchronously.
|
24 |
+
|
25 |
+
- AlphaFold3-like models: This category includes **Boltz-1**, **Chai-1**, and **Protenix**. Unlike AlphaFold2 and OpenFold, all models in this category operate synchronously, returning results upon query completion. Users of the `folding-studio` Python library will notice a different interaction pattern when working with these models, as they do not require polling for job status or retrieving results in separate steps.
|
26 |
+
|
27 |
+
See [supported models section](./explanation/supported_models.md) for more details about this subject.
|
28 |
+
|
29 |
+
## Documentation overview
|
30 |
+
|
31 |
+
This documentation is organized as follows:
|
32 |
+
|
33 |
+
- **Tutorials**: Step-by-step guides to get you started with Folding Studio, including installation, job submissions, and MSA searches.
|
34 |
+
- **How-to Guides**: Detailed instructions for using specific features, such as setting folding parameters or calculating pLDDT and pAE.
|
35 |
+
- **Reference**: Comprehensive details on CLI commands, input flags, and the Python library.
|
36 |
+
- **Explanation**: In-depth explanation of supported models and advanced algorithms.
|
37 |
+
|
38 |
+
---
|
39 |
+
|
40 |
+
<div class="grid cards" markdown>
|
41 |
+
|
42 |
+
- :material-clock-fast:{ .lg .middle } **Tutorials**
|
43 |
+
|
44 |
+
---
|
45 |
+
|
46 |
+
[:octicons-arrow-right-24: Install Folding Studio](./tutorials/installation.md)
|
47 |
+
|
48 |
+
[:octicons-arrow-right-24: Run AlphaFold2 on a protein sequence](./tutorials/single_folding_job_af2.md)
|
49 |
+
|
50 |
+
[:octicons-arrow-right-24: Perform a Multiple Sequence Alignment (MSA) search](./tutorials/msa_search.md)
|
51 |
+
|
52 |
+
[:octicons-arrow-right-24: Preview - Run folding jobs with AlphaFold3-like models](./tutorials/single_folding_job_af3.md)
|
53 |
+
|
54 |
+
- :octicons-rocket-16:{ .lg .middle } **How-to Guides**
|
55 |
+
|
56 |
+
---
|
57 |
+
|
58 |
+
[:octicons-arrow-right-24: Alphafold2 / Openfold guides](./how-to-guides/af2_openfold/provide_input_data.md)
|
59 |
+
|
60 |
+
[:octicons-arrow-right-24: Multiple Sequence Alignment Search (MSA) guides](./how-to-guides/msa_search/provide_input_data.md)
|
61 |
+
|
62 |
+
[:octicons-arrow-right-24: Preview - Alphafold3-like models guides](./how-to-guides/af3/provide_input_data.md)
|
63 |
+
|
64 |
+
[:octicons-arrow-right-24: Post-processing recipes](./how-to-guides/other/pLDDT_pAE_calculation.md)
|
65 |
+
|
66 |
+
- :material-dna:{ .lg .middle } **Explanation**
|
67 |
+
|
68 |
+
---
|
69 |
+
|
70 |
+
[:octicons-arrow-right-24: Supported models](./explanation/supported_models.md)
|
71 |
+
|
72 |
+
[:octicons-arrow-right-24: Advanced algorithms](./explanation/advanced_algorithms.md)
|
73 |
+
|
74 |
+
- :octicons-book-16:{ .lg .middle } **Reference**
|
75 |
+
|
76 |
+
---
|
77 |
+
|
78 |
+
[:octicons-arrow-right-24: CLI](./reference/cli.md)
|
79 |
+
|
80 |
+
[:octicons-arrow-right-24: Python Library](./reference/python_lib_docs.md)
|
81 |
+
|
82 |
+
</div>
|
folding-studio/docs/docs/reference/cli.md
ADDED
@@ -0,0 +1,435 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## `experiment`
|
2 |
+
### `experiment status`
|
3 |
+
|
4 |
+
Get an experiment status.
|
5 |
+
|
6 |
+
**Usage**:
|
7 |
+
|
8 |
+
```console
|
9 |
+
folding experiment status EXP_ID
|
10 |
+
```
|
11 |
+
|
12 |
+
**Arguments**:
|
13 |
+
|
14 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
15 |
+
| -------- | ----------- | ----------- |
|
16 |
+
| EXP_ID | ID of the experiment. | str |
|
17 |
+
|
18 |
+
### `experiment list`
|
19 |
+
|
20 |
+
Get all your done and pending experiment ids. The IDs are provided in the order of submission, starting with the most recent.
|
21 |
+
|
22 |
+
**Usage**:
|
23 |
+
|
24 |
+
```console
|
25 |
+
folding experiment list [OPTIONS]
|
26 |
+
```
|
27 |
+
|
28 |
+
**Options**:
|
29 |
+
|
30 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
31 |
+
| ------- | ----------- | ---------- | ------------- |
|
32 |
+
| --limit | Max number of experiment to display in the terminal. | int | 100 |
|
33 |
+
| --output | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
34 |
+
|
35 |
+
### `experiment features`
|
36 |
+
|
37 |
+
Get an experiment features.
|
38 |
+
|
39 |
+
**Usage**:
|
40 |
+
|
41 |
+
```console
|
42 |
+
folding experiment features [OPTIONS] EXP_ID
|
43 |
+
```
|
44 |
+
|
45 |
+
**Arguments**:
|
46 |
+
|
47 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
48 |
+
| -------- | ----------- | ----------- |
|
49 |
+
| EXP_ID | ID of the experiment. | str |
|
50 |
+
|
51 |
+
**Options**:
|
52 |
+
|
53 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
54 |
+
| ------- | ----------- | ---------- | ------------- |
|
55 |
+
| --output | Local path to download the zip to. | Path | <exp_id>_features.zip |
|
56 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
57 |
+
| --unzip / --no-unzip | Automatically unzip the file after its download. | bool | --no-unzip |
|
58 |
+
|
59 |
+
### `experiment results`
|
60 |
+
|
61 |
+
Get an experiment results.
|
62 |
+
|
63 |
+
**Usage**:
|
64 |
+
|
65 |
+
```console
|
66 |
+
folding experiment results [OPTIONS] EXP_ID
|
67 |
+
```
|
68 |
+
|
69 |
+
**Arguments**:
|
70 |
+
|
71 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
72 |
+
| -------- | ----------- | ----------- |
|
73 |
+
| EXP_ID | ID of the experiment. | str |
|
74 |
+
|
75 |
+
**Options**:
|
76 |
+
|
77 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
78 |
+
| ------- | ----------- | ---------- | ------------- |
|
79 |
+
| --output | Local path to download the zip to. | Path | <exp_id>_results.zip |
|
80 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
81 |
+
| --unzip / --no-unzip | Automatically unzip the file after its download. | bool | --no-unzip |
|
82 |
+
|
83 |
+
### `experiment cancel`
|
84 |
+
|
85 |
+
Cancel experiments job executions. You can pass one or more experiment id
|
86 |
+
|
87 |
+
**Usage**:
|
88 |
+
|
89 |
+
```console
|
90 |
+
folding experiment cancel EXP_ID
|
91 |
+
```
|
92 |
+
|
93 |
+
**Arguments**:
|
94 |
+
|
95 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
96 |
+
| -------- | ----------- | ----------- |
|
97 |
+
| EXP_ID | ID of the experiment. | List[str] |
|
98 |
+
|
99 |
+
### `experiment logs`
|
100 |
+
|
101 |
+
Get an experiment logs.
|
102 |
+
|
103 |
+
**Usage**:
|
104 |
+
|
105 |
+
```console
|
106 |
+
folding experiment logs [OPTIONS] EXP_ID
|
107 |
+
```
|
108 |
+
|
109 |
+
**Arguments**:
|
110 |
+
|
111 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
112 |
+
| -------- | ----------- | ----------- |
|
113 |
+
| EXP_ID | ID of the experiment. | str |
|
114 |
+
|
115 |
+
**Options**:
|
116 |
+
|
117 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
118 |
+
| ------- | ----------- | ---------- | ------------- |
|
119 |
+
| --output | Local path to download the logs to. | Path | <exp_id>_logs.txt |
|
120 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
121 |
+
|
122 |
+
## `msa`
|
123 |
+
### `msa search`
|
124 |
+
|
125 |
+
Run an MSA tool. Read more at <https://int-bio-foldingstudio-gcp.nw.r.appspot.com/tutorials/msa_search/.>
|
126 |
+
|
127 |
+
**Usage**:
|
128 |
+
|
129 |
+
```console
|
130 |
+
folding msa search [OPTIONS] SOURCE
|
131 |
+
```
|
132 |
+
|
133 |
+
**Arguments**:
|
134 |
+
|
135 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
136 |
+
| -------- | ----------- | ----------- |
|
137 |
+
| SOURCE | Path to the input fasta file. | Path |
|
138 |
+
|
139 |
+
**Options**:
|
140 |
+
|
141 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
142 |
+
| ------- | ----------- | ---------- | ------------- |
|
143 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
144 |
+
| --cache / --no-cache | Use cached experiment results if any. | bool | --cache |
|
145 |
+
| --msa-mode | Mode of the MSA features generation. | FeatureMode | search |
|
146 |
+
| --metadata-file | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
147 |
+
|
148 |
+
### `msa experiment`
|
149 |
+
#### `msa experiment status`
|
150 |
+
|
151 |
+
Get an MSA experiment status.
|
152 |
+
|
153 |
+
**Usage**:
|
154 |
+
|
155 |
+
```console
|
156 |
+
folding msa experiment status MSA_EXP_ID
|
157 |
+
```
|
158 |
+
|
159 |
+
**Arguments**:
|
160 |
+
|
161 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
162 |
+
| -------- | ----------- | ----------- |
|
163 |
+
| MSA_EXP_ID | ID of the MSA experiment. | str |
|
164 |
+
|
165 |
+
#### `msa experiment features`
|
166 |
+
|
167 |
+
Get an experiment features.
|
168 |
+
|
169 |
+
**Usage**:
|
170 |
+
|
171 |
+
```console
|
172 |
+
folding msa experiment features [OPTIONS] MSA_EXP_ID
|
173 |
+
```
|
174 |
+
|
175 |
+
**Arguments**:
|
176 |
+
|
177 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
178 |
+
| -------- | ----------- | ----------- |
|
179 |
+
| MSA_EXP_ID | ID of the MSA experiment. | str |
|
180 |
+
|
181 |
+
**Options**:
|
182 |
+
|
183 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
184 |
+
| ------- | ----------- | ---------- | ------------- |
|
185 |
+
| --output | Local path to download the zip to. | Path | <msa_exp_id>_features.zip |
|
186 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
187 |
+
| --unzip / --no-unzip | Automatically unzip the file after its download. | bool | --no-unzip |
|
188 |
+
|
189 |
+
#### `msa experiment logs`
|
190 |
+
|
191 |
+
Get an experiment logs.
|
192 |
+
|
193 |
+
**Usage**:
|
194 |
+
|
195 |
+
```console
|
196 |
+
folding msa experiment logs [OPTIONS] MSA_EXP_ID
|
197 |
+
```
|
198 |
+
|
199 |
+
**Arguments**:
|
200 |
+
|
201 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
202 |
+
| -------- | ----------- | ----------- |
|
203 |
+
| MSA_EXP_ID | ID of the MSA experiment. | str |
|
204 |
+
|
205 |
+
**Options**:
|
206 |
+
|
207 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
208 |
+
| ------- | ----------- | ---------- | ------------- |
|
209 |
+
| --output | Local path to download the logs to. | Path | <exp_id>_logs.txt |
|
210 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
211 |
+
|
212 |
+
#### `msa experiment list`
|
213 |
+
|
214 |
+
Get all your done and pending experiment ids. The IDs are provided in the order of submission, starting with the most recent.
|
215 |
+
|
216 |
+
**Usage**:
|
217 |
+
|
218 |
+
```console
|
219 |
+
folding msa experiment list [OPTIONS]
|
220 |
+
```
|
221 |
+
|
222 |
+
**Options**:
|
223 |
+
|
224 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
225 |
+
| ------- | ----------- | ---------- | ------------- |
|
226 |
+
| --limit | Max number of experiment to display in the terminal. | int | 100 |
|
227 |
+
| --output | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
228 |
+
|
229 |
+
## `predict`
|
230 |
+
### `predict af2`
|
231 |
+
|
232 |
+
Asynchronous AF2 folding submission. Read more at <https://int-bio-foldingstudio-gcp.nw.r.appspot.com/how-to-guides/af2_openfold/single_af2_job/.> If the source is a CSV or JSON file describing a batch prediction request, all the other options will be overlooked.
|
233 |
+
|
234 |
+
**Usage**:
|
235 |
+
|
236 |
+
```console
|
237 |
+
folding predict af2 [OPTIONS] SOURCE
|
238 |
+
```
|
239 |
+
|
240 |
+
**Arguments**:
|
241 |
+
|
242 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
243 |
+
| -------- | ----------- | ----------- |
|
244 |
+
| SOURCE | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
245 |
+
|
246 |
+
**Options**:
|
247 |
+
|
248 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
249 |
+
| ------- | ----------- | ---------- | ------------- |
|
250 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
251 |
+
| --cache / --no-cache | Use cached experiment results if any. | bool | --cache |
|
252 |
+
| --template-mode | Mode of the template features generation. | FeatureMode | search |
|
253 |
+
| --custom-template | Path to a custom template or a directory of custom templates. To pass multiple inputs, simply repeat the flag (e.g. `--custom_template template_1.cif --custom_template template_2.cif`). | List[Path] | [] |
|
254 |
+
| --custom-template-id | ID of a custom template. To pass multiple inputs, simply repeat the flag (e.g. `--custom_template_id template_ID_1 --custom_template_id template_ID_2`). | List[str] | [] |
|
255 |
+
| --initial-guess-file | Path to an initial guess file. | Path | No default |
|
256 |
+
| --templates-masks-file | Path to a templates masks file. | Path | No default |
|
257 |
+
| --msa-mode | Mode of the MSA features generation. | FeatureMode | search |
|
258 |
+
| --custom-msa | Path to a custom msa or a directory of custom msas. To pass multiple inputs, simply repeat the flag (e.g. `--custom_msa msa_1.sto --custom_msa msa_2.sto`). | List[Path] | [] |
|
259 |
+
| --max-msa-clusters | Max number of MSA clusters to search. | int | -1 |
|
260 |
+
| --max-extra-msa | Max extra non-clustered MSA representation to use as source. | int | -1 |
|
261 |
+
| --gap-trick / --no-gap-trick | Activate gap trick, allowing to model complexes with monomer models. | bool | --no-gap-trick |
|
262 |
+
| --num-recycle | Number of refinement iterations of the predicted structures. | int | 3 |
|
263 |
+
| --model-subset | Subset of AF2 model ids to use, between 1 and 5 included. | List[int] | [] |
|
264 |
+
| --random-seed | Random seed used during the MSA sampling. Different random seed values will introduce variations in the predictions. | int | 0 |
|
265 |
+
| --num-seed | Number of random seeds to use. Creates a batch prediction. | int | No default |
|
266 |
+
| --metadata-file | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
267 |
+
|
268 |
+
### `predict openfold`
|
269 |
+
|
270 |
+
Asynchronous OpenFold folding submission. Read more at <https://int-bio-foldingstudio-gcp.nw.r.appspot.com/how-to-guides/af2_openfold/single_openfold_job/.> If the source is a CSV or JSON file describing a batch prediction request, all the other options will be overlooked.
|
271 |
+
|
272 |
+
**Usage**:
|
273 |
+
|
274 |
+
```console
|
275 |
+
folding predict openfold [OPTIONS] SOURCE
|
276 |
+
```
|
277 |
+
|
278 |
+
**Arguments**:
|
279 |
+
|
280 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
281 |
+
| -------- | ----------- | ----------- |
|
282 |
+
| SOURCE | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
283 |
+
|
284 |
+
**Options**:
|
285 |
+
|
286 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
287 |
+
| ------- | ----------- | ---------- | ------------- |
|
288 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
289 |
+
| --cache / --no-cache | Use cached experiment results if any. | bool | --cache |
|
290 |
+
| --template-mode | Mode of the template features generation. | FeatureMode | search |
|
291 |
+
| --custom-template | Path to a custom template or a directory of custom templates. To pass multiple inputs, simply repeat the flag (e.g. `--custom_template template_1.cif --custom_template template_2.cif`). | List[Path] | [] |
|
292 |
+
| --custom-template-id | ID of a custom template. To pass multiple inputs, simply repeat the flag (e.g. `--custom_template_id template_ID_1 --custom_template_id template_ID_2`). | List[str] | [] |
|
293 |
+
| --templates-masks-file | Path to a templates masks file. | Path | No default |
|
294 |
+
| --msa-mode | Mode of the MSA features generation. | FeatureMode | search |
|
295 |
+
| --custom-msa | Path to a custom msa or a directory of custom msas. To pass multiple inputs, simply repeat the flag (e.g. `--custom_msa msa_1.sto --custom_msa msa_2.sto`). | List[Path] | [] |
|
296 |
+
| --max-msa-clusters | Max number of MSA clusters to search. | int | -1 |
|
297 |
+
| --max-extra-msa | Max extra non-clustered MSA representation to use as source. | int | -1 |
|
298 |
+
| --gap-trick / --no-gap-trick | Activate gap trick, allowing to model complexes with monomer models. | bool | --no-gap-trick |
|
299 |
+
| --num-recycle | Number of refinement iterations of the predicted structures. | int | 3 |
|
300 |
+
| --model-subset | Subset of AF2 model ids to use, between 1 and 5 included. | List[int] | [] |
|
301 |
+
| --random-seed | Random seed used during the MSA sampling. Different random seed values will introduce variations in the predictions. | int | 0 |
|
302 |
+
| --num-seed | Number of random seeds to use. Creates a batch prediction. | int | No default |
|
303 |
+
| --metadata-file | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
304 |
+
|
305 |
+
### `predict boltz`
|
306 |
+
|
307 |
+
Synchronous Boltz-1 folding submission.
|
308 |
+
|
309 |
+
**Usage**:
|
310 |
+
|
311 |
+
```console
|
312 |
+
folding predict boltz [OPTIONS] SOURCE
|
313 |
+
```
|
314 |
+
|
315 |
+
**Arguments**:
|
316 |
+
|
317 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
318 |
+
| -------- | ----------- | ----------- |
|
319 |
+
| SOURCE | Path to the data source. Either a FASTA file, a YAML file, or a directory containing FASTA and YAML files. | Path |
|
320 |
+
|
321 |
+
**Options**:
|
322 |
+
|
323 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
324 |
+
| ------- | ----------- | ---------- | ------------- |
|
325 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
326 |
+
| --parameters-json | Path to JSON file containing Boltz inference parameters. | Path | No default |
|
327 |
+
| --recycling-steps | Number of recycling steps for prediction. | int | 3 |
|
328 |
+
| --sampling-steps | Number of sampling steps for prediction. | int | 200 |
|
329 |
+
| --diffusion-samples | Number of diffusion samples for prediction. | int | 1 |
|
330 |
+
| --step-scale | Step size related to the temperature at which the diffusion process samples the distribution. | float | 1.638 |
|
331 |
+
| --msa-pairing-strategy | Pairing strategy for MSA generation. | str | greedy |
|
332 |
+
| --write-full-pae / --no-write-full-pae | Whether to save the full PAE matrix as a file. | bool | --no-write-full-pae |
|
333 |
+
| --write-full-pde / --no-write-full-pde | Whether to save the full PDE matrix as a file. | bool | --no-write-full-pde |
|
334 |
+
| --use-msa-server / --no-use-msa-server | Flag to use the MSA server for inference. | bool | --use-msa-server |
|
335 |
+
| --msa-path | Path to the custom MSAs. It can be a .a3m or .aligned.pqt file, or a directory containing these files. | str | No default |
|
336 |
+
| --seed | Seed for random number generation. | int | 0 |
|
337 |
+
| --output | Local path to download the result zip and query parameters to. Default to 'boltz_results'. | Path | boltz_results |
|
338 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
339 |
+
| --unzip / --no-unzip | Unzip the file after its download. | bool | --no-unzip |
|
340 |
+
| --spinner / --no-spinner | Use live spinner in log output. | bool | --spinner |
|
341 |
+
|
342 |
+
### `predict chai`
|
343 |
+
|
344 |
+
Synchronous Chai-1 folding submission.
|
345 |
+
|
346 |
+
**Usage**:
|
347 |
+
|
348 |
+
```console
|
349 |
+
folding predict chai [OPTIONS] SOURCE
|
350 |
+
```
|
351 |
+
|
352 |
+
**Arguments**:
|
353 |
+
|
354 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
355 |
+
| -------- | ----------- | ----------- |
|
356 |
+
| SOURCE | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
357 |
+
|
358 |
+
**Options**:
|
359 |
+
|
360 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
361 |
+
| ------- | ----------- | ---------- | ------------- |
|
362 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
363 |
+
| --use-msa-server / --no-use-msa-server | Flag to enable MSA features. MSA search is performed by InstaDeep's MMseqs2 server. | bool | --use-msa-server |
|
364 |
+
| --use-templates-server / --no-use-templates-server | Flag to enable templates. Templates search is performed by InstaDeep's MMseqs2 server. | bool | --no-use-templates-server |
|
365 |
+
| --num-trunk-recycles | Number of trunk recycles during inference. | int | 3 |
|
366 |
+
| --seed | Random seed for inference. | int | 0 |
|
367 |
+
| --num-diffn-timesteps | Number of diffusion timesteps to run. | int | 200 |
|
368 |
+
| --restraints | Restraints information. | str | No default |
|
369 |
+
| --recycle-msa-subsample | Subsample parameter for recycling MSA during inference. | int | 0 |
|
370 |
+
| --num-trunk-samples | Number of trunk samples to generate during inference. | int | 1 |
|
371 |
+
| --msa-path | Path to the custom MSAs. It can be a .a3m or .aligned.pqt file, or a directory containing these files. | str | No default |
|
372 |
+
| --output | Local path to download the result zip and query parameters to. Default to 'chai_results'. | Path | chai_results |
|
373 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
374 |
+
| --unzip / --no-unzip | Unzip the file after its download. | bool | --no-unzip |
|
375 |
+
| --spinner / --no-spinner | Use live spinner in log output. | bool | --spinner |
|
376 |
+
|
377 |
+
### `predict protenix`
|
378 |
+
|
379 |
+
Synchronous Protenix folding submission.
|
380 |
+
|
381 |
+
**Usage**:
|
382 |
+
|
383 |
+
```console
|
384 |
+
folding predict protenix [OPTIONS] SOURCE
|
385 |
+
```
|
386 |
+
|
387 |
+
**Arguments**:
|
388 |
+
|
389 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
390 |
+
| -------- | ----------- | ----------- |
|
391 |
+
| SOURCE | Path to the data source. Either a fasta file, a directory of fasta filesdescribing a batch prediction request. | Path |
|
392 |
+
|
393 |
+
**Options**:
|
394 |
+
|
395 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
396 |
+
| ------- | ----------- | ---------- | ------------- |
|
397 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
398 |
+
| --use-msa-server / --no-use-msa-server | Flag to use the MSA server for inference. Forced to True. | bool | --use-msa-server |
|
399 |
+
| --seed | Random seed. | int | 0 |
|
400 |
+
| --cycle | Pairformer cycle number. | int | 10 |
|
401 |
+
| --step | Number of steps for the diffusion process. | int | 200 |
|
402 |
+
| --sample | Number of samples in each seed. | int | 5 |
|
403 |
+
| --output | Local path to download the result zip and query parameters to. Default to 'protenix_results'. | Path | protenix_results |
|
404 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
405 |
+
| --unzip / --no-unzip | Unzip the file after its download. | bool | --no-unzip |
|
406 |
+
| --spinner / --no-spinner | Use live spinner in log output. | bool | --spinner |
|
407 |
+
|
408 |
+
### `predict soloseq`
|
409 |
+
|
410 |
+
Synchronous SoloSeq folding submission
|
411 |
+
|
412 |
+
**Usage**:
|
413 |
+
|
414 |
+
```console
|
415 |
+
folding predict soloseq [OPTIONS] SOURCE
|
416 |
+
```
|
417 |
+
|
418 |
+
**Arguments**:
|
419 |
+
|
420 |
+
| ARGUMENT | DESCRIPTION | VALUE TYPE |
|
421 |
+
| -------- | ----------- | ----------- |
|
422 |
+
| SOURCE | Path to the data source. Either a fasta file or a directory of fasta files. | Path |
|
423 |
+
|
424 |
+
**Options**:
|
425 |
+
|
426 |
+
| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
427 |
+
| ------- | ----------- | ---------- | ------------- |
|
428 |
+
| --project-code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
429 |
+
| --seed | Random seed. | int | 0 |
|
430 |
+
| --skip-relaxation / --no-skip-relaxation | Skip the relaxation process. | bool | --no-skip-relaxation |
|
431 |
+
| --subtract-plddt / --no-subtract-plddt | Output (100 - pLDDT) instead of the pLDDT itself. | bool | --no-subtract-plddt |
|
432 |
+
| --output | Local path to download the result zip and query parameters to. Default to 'soloseq_results'. | Path | soloseq_results |
|
433 |
+
| --force / --no-force | Forces the download to overwrite any existing file with the same name in the specified location. | bool | --no-force |
|
434 |
+
| --unzip / --no-unzip | Unzip the file after its download. | bool | --no-unzip |
|
435 |
+
| --spinner / --no-spinner | Use live spinner in log output. | bool | --spinner |
|
folding-studio/docs/docs/reference/python_lib_docs.md
ADDED
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# `folding-studio` Python Library
|
2 |
+
|
3 |
+
This document provides an overview of the available functions and classes in `folding_studio`.
|
4 |
+
|
5 |
+
## `get_id_token`
|
6 |
+
|
7 |
+
**Signature:**
|
8 |
+
```{ .python .no-copy }
|
9 |
+
get_id_token() -> str
|
10 |
+
```
|
11 |
+
|
12 |
+
**Description:**
|
13 |
+
|
14 |
+
Get the user's gcp token id.
|
15 |
+
|
16 |
+
---
|
17 |
+
|
18 |
+
## `single_job_prediction`
|
19 |
+
|
20 |
+
**Signature:**
|
21 |
+
```{ .python .no-copy }
|
22 |
+
single_job_prediction(identity_token: str, fasta_file: pathlib.Path,
|
23 |
+
parameters: folding_studio_data_models.parameters.alphafold.AF2Parameters | folding_studio_data_models.parameters.openfold.OpenFoldParameters | None = None,
|
24 |
+
project_code: str | None = None, *, ignore_cache: bool = False, **kwargs) -> dict
|
25 |
+
```
|
26 |
+
|
27 |
+
**Description:**
|
28 |
+
|
29 |
+
Make a single job prediction from folding parameters and a FASTA file.
|
30 |
+
|
31 |
+
|
32 |
+
### Parameters:
|
33 |
+
|
34 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
35 |
+
| --------- | ----------- | ---------- | ------------- |
|
36 |
+
| identity_token | GCP identity token | str | No default |
|
37 |
+
| fasta_file | Input FASTA file | Path | No default |
|
38 |
+
| parameters | Job parameters | AF2Parameters or OpenFoldParameters or None | None |
|
39 |
+
| project_code | Project code under which the jobs are billed. If None, value is attempted to be read from environment. | str | None |
|
40 |
+
| ignore_cache | Force the job submission or not | bool | False |
|
41 |
+
|
42 |
+
---
|
43 |
+
|
44 |
+
## `batch_prediction_from_file`
|
45 |
+
|
46 |
+
**Signature:**
|
47 |
+
```{ .python .no-copy }
|
48 |
+
batch_prediction_from_file(identity_token: str, file: pathlib.Path, project_code: str | None = None) -> dict
|
49 |
+
```
|
50 |
+
|
51 |
+
**Description:**
|
52 |
+
|
53 |
+
Make a batch prediction from a configuration files.
|
54 |
+
|
55 |
+
### Parameters:
|
56 |
+
|
57 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
58 |
+
| --------- | ----------- | ---------- | ------------- |
|
59 |
+
| identity_token | GCP identity token | str | No default |
|
60 |
+
| file | Configuration file path | Path | No default |
|
61 |
+
| project_code | Project code under which the jobs are billed. If None, value is attempted to be read from environment. | str | None |
|
62 |
+
|
63 |
+
---
|
64 |
+
|
65 |
+
## `af2`
|
66 |
+
|
67 |
+
**Signature:**
|
68 |
+
```{ .python .no-copy }
|
69 |
+
af2(
|
70 |
+
source: Path,
|
71 |
+
project_code: str,
|
72 |
+
cache: bool = True,
|
73 |
+
template_mode: FeatureMode = FeatureMode.SEARCH,
|
74 |
+
custom_template: List[Path] = [],
|
75 |
+
custom_template_id: List[str] = [],
|
76 |
+
initial_guess_file: Optional[Path] = None,
|
77 |
+
templates_masks_file: Optional[Path] = None,
|
78 |
+
msa_mode: FeatureMode = FeatureMode.SEARCH,
|
79 |
+
custom_msa: List[Path] = [],
|
80 |
+
max_msa_clusters: int = -1,
|
81 |
+
max_extra_msa: int = -1,
|
82 |
+
gap_trick: bool = False,
|
83 |
+
num_recycle: int = 3,
|
84 |
+
model_subset: List[int] = [],
|
85 |
+
random_seed: int = 0,
|
86 |
+
num_seed: Optional[int] = None,
|
87 |
+
metadata_file: Optional[Path] = None,
|
88 |
+
)
|
89 |
+
```
|
90 |
+
|
91 |
+
**Description:**
|
92 |
+
|
93 |
+
Asynchronous AF2 folding submission. This command is used to submit a folding job to the AlphaFold2 model for protein structure prediction.
|
94 |
+
If the `source` is a CSV or JSON file describing a batch prediction request, all the other options will be overlooked.
|
95 |
+
|
96 |
+
### Parameters:
|
97 |
+
|
98 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
99 |
+
| ------- | ----------- | ---------- | ------------- |
|
100 |
+
| source | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
101 |
+
| project_code | Project code. If unknown, contact your PM or the Folding Studio team. | str | |
|
102 |
+
| cache | Use cached experiment results if any. | bool | True |
|
103 |
+
| template_mode | Mode of the template features generation. | FeatureMode | FeatureMode.SEARCH |
|
104 |
+
| custom_template | Path to a custom template or a directory of custom templates. | List[Path] | [] |
|
105 |
+
| custom_template_id | ID of a custom template. | List[str] | [] |
|
106 |
+
| initial_guess_file | Path to an initial guess file. | Path | No default |
|
107 |
+
| templates_masks_file | Path to a templates masks file. | Path | No default |
|
108 |
+
| msa_mode | Mode of the MSA features generation. | FeatureMode | FeatureMode.SEARCH |
|
109 |
+
| custom_msa | Path to a custom msa or a directory of custom msas. | List[Path] | [] |
|
110 |
+
| max_msa_clusters | Max number of MSA clusters to search. | int | -1 |
|
111 |
+
| max_extra_msa | Max extra non-clustered MSA representation to use as source. | int | -1 |
|
112 |
+
| gap_trick | Activate gap trick, allowing to model complexes with monomer models. | bool | False |
|
113 |
+
| num_recycle | Number of refinement iterations of the predicted structures. | int | 3 |
|
114 |
+
| model_subset | Subset of AF2 model ids to use, between 1 and 5 included. | List[int] | [] |
|
115 |
+
| random_seed | Random seed used during the MSA sampling. Different random seed values will introduce variations in the predictions. | int | 0 |
|
116 |
+
| num_seed | Number of random seeds to use. Creates a batch prediction. | int | No default |
|
117 |
+
| metadata_file | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
118 |
+
|
119 |
+
---
|
120 |
+
|
121 |
+
## `openfold`
|
122 |
+
|
123 |
+
**Signature:**
|
124 |
+
```{ .python .no-copy }
|
125 |
+
openfold(
|
126 |
+
source: Path,
|
127 |
+
project_code: str,
|
128 |
+
cache: bool = True,
|
129 |
+
template_mode: FeatureMode = FeatureMode.SEARCH,
|
130 |
+
custom_template: List[Path] = [],
|
131 |
+
custom_template_id: List[str] = [],
|
132 |
+
templates_masks_file: Optional[Path] = None,
|
133 |
+
msa_mode: FeatureMode = FeatureMode.SEARCH,
|
134 |
+
custom_msa: List[Path] = [],
|
135 |
+
max_msa_clusters: int = -1,
|
136 |
+
max_extra_msa: int = -1,
|
137 |
+
gap_trick: bool = False,
|
138 |
+
num_recycle: int = 3,
|
139 |
+
model_subset: List[int] = [],
|
140 |
+
random_seed: int = 0,
|
141 |
+
num_seed: Optional[int] = None,
|
142 |
+
metadata_file: Optional[Path] = None,
|
143 |
+
)
|
144 |
+
```
|
145 |
+
|
146 |
+
**Description:**
|
147 |
+
|
148 |
+
Asynchronous OpenFold folding submission. This command is used to submit a folding job to the OpenFold model for protein structure prediction.
|
149 |
+
|
150 |
+
If the source is a CSV or JSON file describing a batch prediction request, all the other options will be overlooked.
|
151 |
+
|
152 |
+
### Parameters:
|
153 |
+
|
154 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
155 |
+
| ------- | ----------- | ---------- | ------------- |
|
156 |
+
| source | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
157 |
+
| project_code | Project code. If unknown, contact your PM or the Folding Studio team. | str | No default |
|
158 |
+
| cache | Use cached experiment results if any. | bool | True |
|
159 |
+
| template_mode | Mode of the template features generation. | FeatureMode | FeatureMode.SEARCH |
|
160 |
+
| custom_template | Path to a custom template or a directory of custom templates. | List[Path] | [] |
|
161 |
+
| custom_template_id | ID of a custom template. | List[str] | [] |
|
162 |
+
| templates_masks_file | Path to a templates masks file. | Path | No default |
|
163 |
+
| msa_mode | Mode of the MSA features generation. | FeatureMode | FeatureMode.SEARCH |
|
164 |
+
| custom_msa | Path to a custom msa or a directory of custom msas.| List[Path] | [] |
|
165 |
+
| max_msa_clusters | Max number of MSA clusters to search. | int | -1 |
|
166 |
+
| max_extra_msa | Max extra non-clustered MSA representation to use as source. | int | -1 |
|
167 |
+
| gap_trick | Activate gap trick, allowing to model complexes with monomer models. | bool | False |
|
168 |
+
| num_recycle | Number of refinement iterations of the predicted structures. | int | 3 |
|
169 |
+
| model_subset | Subset of AF2 model ids to use, between 1 and 5 included. | List[int] | [] |
|
170 |
+
| random_seed | Random seed used during the MSA sampling. Different random seed values will introduce variations in the predictions. | int | 0 |
|
171 |
+
| num_seed | Number of random seeds to use. Creates a batch prediction. | int | No default |
|
172 |
+
| metadata_file | Path to the file where the job metadata returned by the server are written. | Path | No default |
|
173 |
+
|
174 |
+
---
|
175 |
+
|
176 |
+
## `list` (experiment)
|
177 |
+
|
178 |
+
**Signature:**
|
179 |
+
```{ .python .no-copy }
|
180 |
+
list()
|
181 |
+
```
|
182 |
+
|
183 |
+
**Description:**
|
184 |
+
|
185 |
+
Fetches a list of all completed and pending experiments.
|
186 |
+
|
187 |
+
---
|
188 |
+
|
189 |
+
## `status` (experiment)
|
190 |
+
|
191 |
+
**Signature:**
|
192 |
+
```{ .python .no-copy }
|
193 |
+
status(
|
194 |
+
exp_id: str
|
195 |
+
)
|
196 |
+
```
|
197 |
+
|
198 |
+
**Description:**
|
199 |
+
|
200 |
+
Fetches the status of a specific experiment using its ID. The function makes a GET request to the server to retrieve the status of the experiment.
|
201 |
+
|
202 |
+
### Parameters:
|
203 |
+
|
204 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE |
|
205 |
+
| ------- | ----------- | ---------- |
|
206 |
+
| exp_id | The experiment ID for which the status needs to be fetched | str |
|
207 |
+
|
208 |
+
---
|
209 |
+
|
210 |
+
## `cancel` (experiment)
|
211 |
+
|
212 |
+
**Signature:**
|
213 |
+
```{ .python .no-copy }
|
214 |
+
cancel(
|
215 |
+
exp_id: List[str]
|
216 |
+
)
|
217 |
+
```
|
218 |
+
|
219 |
+
**Description:**
|
220 |
+
|
221 |
+
Cancels the execution of one or more experiment jobs by their IDs.
|
222 |
+
|
223 |
+
### Parameters:
|
224 |
+
|
225 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE |
|
226 |
+
| ------- | ----------- | ---------- |
|
227 |
+
| exp_id | A list of experiment IDs to cancel | List[str] |
|
228 |
+
|
229 |
+
---
|
230 |
+
|
231 |
+
## `results` (experiment)
|
232 |
+
|
233 |
+
**Signature:**
|
234 |
+
```{ .python .no-copy }
|
235 |
+
results(
|
236 |
+
exp_id: str,
|
237 |
+
output: Optional[Path] = None,
|
238 |
+
force: bool = False,
|
239 |
+
unzip: bool = False
|
240 |
+
)
|
241 |
+
```
|
242 |
+
|
243 |
+
**Description:**
|
244 |
+
|
245 |
+
Downloads the results of a specified experiment, given its experiment ID.
|
246 |
+
|
247 |
+
### Parameters:
|
248 |
+
|
249 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
250 |
+
| ------- | ----------- | ---------- | ------------- |
|
251 |
+
| exp_id | str | The experiment ID of the results to retrieve. | |
|
252 |
+
| output | Optional[Path] | The local path where the zip file will be downloaded. | |
|
253 |
+
| force | bool | Whether to overwrite an existing file at the specified location.| False |
|
254 |
+
| unzip | bool | Whether to automatically unzip the downloaded file after the download completes. | False |
|
255 |
+
|
256 |
+
---
|
257 |
+
|
258 |
+
## `features` (experiment)
|
259 |
+
|
260 |
+
**Signature:**
|
261 |
+
```{ .python .no-copy }
|
262 |
+
features(
|
263 |
+
exp_id: str,
|
264 |
+
output: Optional[Path] = None,
|
265 |
+
force: bool = False,
|
266 |
+
unzip: bool = False
|
267 |
+
)
|
268 |
+
```
|
269 |
+
|
270 |
+
**Description:**
|
271 |
+
|
272 |
+
Downloads the features of a specified experiment, given its experiment ID.
|
273 |
+
|
274 |
+
### Parameters:
|
275 |
+
|
276 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
277 |
+
| ------- | ----------- | ---------- | ------------- |
|
278 |
+
| exp_id | str | The experiment ID of the results to retrieve. | |
|
279 |
+
| output | Optional[Path] | The local path where the zip file will be downloaded. | |
|
280 |
+
| force | bool | Whether to overwrite an existing file at the specified location.| False |
|
281 |
+
| unzip | bool | Whether to automatically unzip the downloaded file after the download completes. | False |
|
282 |
+
|
283 |
+
|
284 |
+
---
|
285 |
+
|
286 |
+
## `search` (multiple sequences alignment - msa)
|
287 |
+
|
288 |
+
**Signature:**
|
289 |
+
```{ .python .no-copy }
|
290 |
+
search(
|
291 |
+
source: Path,
|
292 |
+
project_code: str,
|
293 |
+
cache: bool = True,
|
294 |
+
msa_mode: FeatureMode = FeatureMode.SEARCH,
|
295 |
+
)
|
296 |
+
|
297 |
+
```
|
298 |
+
|
299 |
+
**Description:**
|
300 |
+
|
301 |
+
Runs an MSA (Multiple Sequence Alignment).
|
302 |
+
|
303 |
+
### Parameters:
|
304 |
+
|
305 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
306 |
+
| ------- | ----------- | ---------- | ------------- |
|
307 |
+
| source | Path to the data source. Either a fasta file, a directory of fasta files or a csv/json file describing a batch prediction request. | Path |
|
308 |
+
| project_code | Project code. If unknown, contact your PM or the Folding Studio team. | str | |
|
309 |
+
| cache | Use cached experiment results if any. | bool | True |
|
310 |
+
| msa_mode | Mode of the MSA features generation. | FeatureMode | FeatureMode.SEARCH |
|
311 |
+
|
312 |
+
|
313 |
+
---
|
314 |
+
|
315 |
+
## `list` (multiple sequences alignment - msa)
|
316 |
+
|
317 |
+
**Signature:**
|
318 |
+
```{ .python .no-copy }
|
319 |
+
list()
|
320 |
+
```
|
321 |
+
|
322 |
+
**Description:**
|
323 |
+
|
324 |
+
Fetches a list of all completed and pending experiments.
|
325 |
+
|
326 |
+
---
|
327 |
+
|
328 |
+
## `status` (multiple sequences alignment - msa)
|
329 |
+
|
330 |
+
**Signature:**
|
331 |
+
```{ .python .no-copy }
|
332 |
+
status(
|
333 |
+
msa_exp_id: str
|
334 |
+
)
|
335 |
+
```
|
336 |
+
|
337 |
+
**Description:**
|
338 |
+
|
339 |
+
Fetches the status of a specific MSA experiment using its ID.
|
340 |
+
|
341 |
+
### Parameters:
|
342 |
+
|
343 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE |
|
344 |
+
| ------- | ----------- | ---------- |
|
345 |
+
| ms_exp_id | The MSA experiment ID for which the status needs to be fetched | str |
|
346 |
+
|
347 |
+
|
348 |
+
---
|
349 |
+
|
350 |
+
## `features` (multiple sequences alignment - msa)
|
351 |
+
|
352 |
+
**Signature:**
|
353 |
+
```{ .python .no-copy }
|
354 |
+
features(
|
355 |
+
msa_exp_id: str,
|
356 |
+
output: Optional[Path] = None,
|
357 |
+
force: bool = False,
|
358 |
+
unzip: bool = False
|
359 |
+
)
|
360 |
+
```
|
361 |
+
|
362 |
+
**Description:**
|
363 |
+
|
364 |
+
Fetches the features of a given MSA (Multiple Sequence Alignment) experiment.
|
365 |
+
|
366 |
+
### Parameters:
|
367 |
+
|
368 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
369 |
+
| ------- | ----------- | ---------- | ------------- |
|
370 |
+
| msa_exp_id | str | The MSA experiment ID of the results to retrieve. | |
|
371 |
+
| output | Optional[Path] | The local path where the zip file will be downloaded. | |
|
372 |
+
| force | bool | Whether to overwrite an existing file at the specified location.| False |
|
373 |
+
| unzip | bool | Whether to automatically unzip the downloaded file after the download completes. | False |
|
374 |
+
|
375 |
+
|
376 |
+
---
|
377 |
+
|
378 |
+
## `Client`
|
379 |
+
|
380 |
+
**Signature:**
|
381 |
+
```{ .python .no-copy }
|
382 |
+
Client(api_key: 'str | None' = None, token_manager: 'TokenManager | None' = None) -> 'None'
|
383 |
+
```
|
384 |
+
|
385 |
+
**Description:**
|
386 |
+
|
387 |
+
The `Client` class is used to send requests to a prediction endpoint. It supports authentication via an API key or a Google Cloud JWT token, and it handles sending queries, managing errors, and receiving responses.
|
388 |
+
|
389 |
+
### Parameters:
|
390 |
+
|
391 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
392 |
+
| --------- | ----------- | ---------- | ------------- |
|
393 |
+
| api_key | API key for authentication | str or None | None |
|
394 |
+
| token_manager | JWT token manager | TokenManager or None | None |
|
395 |
+
|
396 |
+
### Class methods:
|
397 |
+
|
398 |
+
#### `from_api_key`
|
399 |
+
Creates a Client instance using an API key.
|
400 |
+
|
401 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
402 |
+
| --------- | ----------- | ---------- | ------------- |
|
403 |
+
| api_key | API key for authentication | str or None | None |
|
404 |
+
|
405 |
+
#### `from_jwt`
|
406 |
+
Creates a Client instance using a Google Cloud JWT token.
|
407 |
+
|
408 |
+
|
409 |
+
### Instance methods:
|
410 |
+
|
411 |
+
#### `send_request`
|
412 |
+
Sends a prediction request to the server.
|
413 |
+
|
414 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
415 |
+
| --------- | ----------- | ---------- | ------------- |
|
416 |
+
| query | A `Query` object containing the prediction data to send. | str or None | None |
|
417 |
+
|
418 |
+
---
|
419 |
+
|
420 |
+
## `SoloSeqQuery`
|
421 |
+
|
422 |
+
**Description:**
|
423 |
+
|
424 |
+
The `SoloSeqQuery` class is used to query the SoloSeq prediction API with FASTA data.
|
425 |
+
|
426 |
+
### Class methods:
|
427 |
+
|
428 |
+
#### `from_protein_sequence`
|
429 |
+
Creates a `SoloSeqQuery` instance from a protein sequence given as a string.
|
430 |
+
|
431 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
432 |
+
| --------- | ----------- | ---------- | ------------- |
|
433 |
+
| sequence | Input sequence to make prediction on. | str | |
|
434 |
+
| parameters | Prediction parameters.| SoloSeqParameters | Default values below. |
|
435 |
+
|
436 |
+
with
|
437 |
+
|
438 |
+
```{ .python .no-copy }
|
439 |
+
class SoloSeqParameters(BaseModel):
|
440 |
+
"""SoloSeq inference parameters."""
|
441 |
+
|
442 |
+
data_random_seed: int = Field(alias="seed", default=0)
|
443 |
+
skip_relaxation: bool = False
|
444 |
+
subtract_plddt: bool = False
|
445 |
+
```
|
446 |
+
|
447 |
+
#### `from_file`
|
448 |
+
Creates a `SoloSeqQuery` instance from a FASTA file located at the given path.
|
449 |
+
|
450 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
451 |
+
| --------- | ----------- | ---------- | ------------- |
|
452 |
+
| path | Path to the FASTA file. | str or Path | |
|
453 |
+
| parameters | Prediction parameters.| SoloSeqParameters | Default values below. |
|
454 |
+
|
455 |
+
with
|
456 |
+
|
457 |
+
```{ .python .no-copy }
|
458 |
+
class SoloSeqParameters(BaseModel):
|
459 |
+
"""SoloSeq inference parameters."""
|
460 |
+
|
461 |
+
data_random_seed: int = Field(alias="seed", default=0)
|
462 |
+
skip_relaxation: bool = False
|
463 |
+
subtract_plddt: bool = False
|
464 |
+
```
|
465 |
+
|
466 |
+
#### `from_directory`
|
467 |
+
Creates a `SoloSeqQuery` instance from a directory containing multiple FASTA files.
|
468 |
+
|
469 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
470 |
+
| --------- | ----------- | ---------- | ------------- |
|
471 |
+
| path | Path to the directory containing FASTA files.| str or Path| |
|
472 |
+
| parameters | Prediction parameters.| SoloSeqParameters | Default values below. |
|
473 |
+
|
474 |
+
with
|
475 |
+
|
476 |
+
```{ .python .no-copy }
|
477 |
+
class SoloSeqParameters(BaseModel):
|
478 |
+
"""SoloSeq inference parameters."""
|
479 |
+
|
480 |
+
data_random_seed: int = Field(alias="seed", default=0)
|
481 |
+
skip_relaxation: bool = False
|
482 |
+
subtract_plddt: bool = False
|
483 |
+
```
|
484 |
+
|
485 |
+
---
|
486 |
+
|
487 |
+
## `BoltzQuery`
|
488 |
+
|
489 |
+
**Description:**
|
490 |
+
|
491 |
+
The `BoltzQuery` class is used to query the Boltz-1 prediction API with FASTA data.
|
492 |
+
|
493 |
+
### Class methods:
|
494 |
+
|
495 |
+
#### `from_protein_sequence`
|
496 |
+
Creates a `BoltzQuery` instance from a protein sequence given as a string.
|
497 |
+
|
498 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
499 |
+
| --------- | ----------- | ---------- | ------------- |
|
500 |
+
| sequence | Input sequence to make prediction on. | str | |
|
501 |
+
| parameters | Prediction parameters.| BoltzParameters | Default values below. |
|
502 |
+
|
503 |
+
with
|
504 |
+
|
505 |
+
```{ .python .no-copy }
|
506 |
+
class BoltzParameters(BaseModel):
|
507 |
+
"""Boltz inference parameters."""
|
508 |
+
|
509 |
+
seed: int = 0
|
510 |
+
recycling_steps: int = 3
|
511 |
+
sampling_steps: int = 200
|
512 |
+
diffusion_samples: int = 1
|
513 |
+
step_scale: float = 1.638
|
514 |
+
msa_pairing_strategy: str = "greedy"
|
515 |
+
write_full_pae: bool = False
|
516 |
+
write_full_pde: bool = False
|
517 |
+
use_msa_server: bool = True
|
518 |
+
```
|
519 |
+
|
520 |
+
#### `from_file`
|
521 |
+
Creates a `BoltzQuery` instance from a FASTA file or a YAML file located at the given path.
|
522 |
+
|
523 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
524 |
+
| --------- | ----------- | ---------- | ------------- |
|
525 |
+
| path | Path to the FASTA or YAML file. | str or Path | |
|
526 |
+
| parameters | Prediction parameters.| BoltzParameters | Default values below. |
|
527 |
+
|
528 |
+
with
|
529 |
+
|
530 |
+
```{ .python .no-copy }
|
531 |
+
class BoltzParameters(BaseModel):
|
532 |
+
"""Boltz inference parameters."""
|
533 |
+
|
534 |
+
seed: int = 0
|
535 |
+
recycling_steps: int = 3
|
536 |
+
sampling_steps: int = 200
|
537 |
+
diffusion_samples: int = 1
|
538 |
+
step_scale: float = 1.638
|
539 |
+
msa_pairing_strategy: str = "greedy"
|
540 |
+
write_full_pae: bool = False
|
541 |
+
write_full_pde: bool = False
|
542 |
+
use_msa_server: bool = True
|
543 |
+
```
|
544 |
+
|
545 |
+
#### `from_directory`
|
546 |
+
Creates a `BoltzQuery` instance from a directory containing multiple FASTA and/or YAML files.
|
547 |
+
|
548 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
549 |
+
| --------- | ----------- | ---------- | ------------- |
|
550 |
+
| path | Path to the directory containing FASTA and/or YAML files.| str or Path| |
|
551 |
+
| parameters | Prediction parameters.| BoltzParameters | Default values below. |
|
552 |
+
|
553 |
+
with
|
554 |
+
|
555 |
+
```{ .python .no-copy }
|
556 |
+
class BoltzParameters(BaseModel):
|
557 |
+
"""Boltz inference parameters."""
|
558 |
+
|
559 |
+
seed: int = 0
|
560 |
+
recycling_steps: int = 3
|
561 |
+
sampling_steps: int = 200
|
562 |
+
diffusion_samples: int = 1
|
563 |
+
step_scale: float = 1.638
|
564 |
+
msa_pairing_strategy: str = "greedy"
|
565 |
+
write_full_pae: bool = False
|
566 |
+
write_full_pde: bool = False
|
567 |
+
use_msa_server: bool = True
|
568 |
+
```
|
569 |
+
|
570 |
+
---
|
571 |
+
|
572 |
+
## `ChaiQuery`
|
573 |
+
|
574 |
+
**Description:**
|
575 |
+
|
576 |
+
The `ChaiQuery` class is used to query the Chai-1 prediction API with FASTA data.
|
577 |
+
|
578 |
+
### Class methods:
|
579 |
+
|
580 |
+
#### `from_protein_sequence`
|
581 |
+
Creates a `ChaiQuery` instance from a protein sequence given as a string.
|
582 |
+
|
583 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
584 |
+
| --------- | ----------- | ---------- | ------------- |
|
585 |
+
| sequence | Input sequence to make prediction on. | str | |
|
586 |
+
| parameters | Prediction parameters.| ChaiParameters | Default values below. |
|
587 |
+
|
588 |
+
with
|
589 |
+
|
590 |
+
```{ .python .no-copy }
|
591 |
+
class ChaiParameters(BaseModel):
|
592 |
+
"""Chai inference parameters."""
|
593 |
+
|
594 |
+
seed: int = 0
|
595 |
+
num_trunk_recycles: int = 3
|
596 |
+
num_diffn_timesteps: int = 200
|
597 |
+
recycle_msa_subsample: int = 0
|
598 |
+
num_trunk_samples: int = 1
|
599 |
+
restraints: str | None = None
|
600 |
+
use_msa_server: bool = False
|
601 |
+
use_templates_server: bool = False
|
602 |
+
custom_msa_paths: dict[str, str] | None = None
|
603 |
+
```
|
604 |
+
|
605 |
+
#### `from_file`
|
606 |
+
Creates a `ChaiQuery` instance from a FASTA file located at the given path.
|
607 |
+
|
608 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
609 |
+
| --------- | ----------- | ---------- | ------------- |
|
610 |
+
| path | Path to the FASTA file. | str or Path | |
|
611 |
+
| parameters | Prediction parameters.| ChaiParameters | Default values below. |
|
612 |
+
|
613 |
+
with
|
614 |
+
|
615 |
+
```{ .python .no-copy }
|
616 |
+
class ChaiParameters(BaseModel):
|
617 |
+
"""Chai inference parameters."""
|
618 |
+
|
619 |
+
seed: int = 0
|
620 |
+
num_trunk_recycles: int = 3
|
621 |
+
num_diffn_timesteps: int = 200
|
622 |
+
recycle_msa_subsample: int = 0
|
623 |
+
num_trunk_samples: int = 1
|
624 |
+
restraints: str | None = None
|
625 |
+
use_msa_server: bool = False
|
626 |
+
use_templates_server: bool = False
|
627 |
+
custom_msa_paths: dict[str, str] | None = None
|
628 |
+
```
|
629 |
+
|
630 |
+
#### `from_directory`
|
631 |
+
Creates a `ChaiQuery` instance from a directory containing multiple FASTA files.
|
632 |
+
|
633 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
634 |
+
| --------- | ----------- | ---------- | ------------- |
|
635 |
+
| path | Path to the directory containing FASTA files.| str or Path| |
|
636 |
+
| parameters | Prediction parameters.| ChaiParameters | Default values below. |
|
637 |
+
|
638 |
+
with
|
639 |
+
|
640 |
+
```{ .python .no-copy }
|
641 |
+
class ChaiParameters(BaseModel):
|
642 |
+
"""Chai inference parameters."""
|
643 |
+
|
644 |
+
seed: int = 0
|
645 |
+
num_trunk_recycles: int = 3
|
646 |
+
num_diffn_timesteps: int = 200
|
647 |
+
recycle_msa_subsample: int = 0
|
648 |
+
num_trunk_samples: int = 1
|
649 |
+
restraints: str | None = None
|
650 |
+
use_msa_server: bool = False
|
651 |
+
use_templates_server: bool = False
|
652 |
+
custom_msa_paths: dict[str, str] | None = None
|
653 |
+
```
|
654 |
+
|
655 |
+
---
|
656 |
+
|
657 |
+
## `ProtenixQuery`
|
658 |
+
|
659 |
+
**Description:**
|
660 |
+
|
661 |
+
The `ProtenixQuery` class is used to query the Protenix prediction API with FASTA data.
|
662 |
+
|
663 |
+
### Class methods:
|
664 |
+
|
665 |
+
#### `from_protein_sequence`
|
666 |
+
Creates a `ProtenixQuery` instance from a protein sequence given as a string.
|
667 |
+
|
668 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
669 |
+
| --------- | ----------- | ---------- | ------------- |
|
670 |
+
| sequence | Input sequence to make prediction on. | str | |
|
671 |
+
| parameters | Prediction parameters.| ProtenixParameters | Default values below. |
|
672 |
+
|
673 |
+
with
|
674 |
+
|
675 |
+
```{ .python .no-copy }
|
676 |
+
class ProtenixParameters(BaseModel):
|
677 |
+
"""Protenix inference parameters."""
|
678 |
+
|
679 |
+
seeds: str = Field(alias="seed", default="0", coerce_numbers_to_str=True)
|
680 |
+
use_msa_server: bool = True
|
681 |
+
```
|
682 |
+
|
683 |
+
#### `from_file`
|
684 |
+
Creates a `ProtenixQuery` instance from a FASTA file located at the given path.
|
685 |
+
|
686 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
687 |
+
| --------- | ----------- | ---------- | ------------- |
|
688 |
+
| path | Path to the FASTA file. | str or Path | |
|
689 |
+
| parameters | Prediction parameters.| ProtenixParameters | Default values below. |
|
690 |
+
|
691 |
+
with
|
692 |
+
|
693 |
+
```{ .python .no-copy }
|
694 |
+
class ProtenixParameters(BaseModel):
|
695 |
+
"""Protenix inference parameters."""
|
696 |
+
|
697 |
+
seeds: str = Field(alias="seed", default="0", coerce_numbers_to_str=True)
|
698 |
+
use_msa_server: bool = True
|
699 |
+
```
|
700 |
+
|
701 |
+
#### `from_directory`
|
702 |
+
Creates a `ProtenixQuery` instance from a directory containing multiple FASTA files.
|
703 |
+
|
704 |
+
| PARAMETER | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |
|
705 |
+
| --------- | ----------- | ---------- | ------------- |
|
706 |
+
| path | Path to the directory containing FASTA files.| str or Path| |
|
707 |
+
| parameters | Prediction parameters.| ProtenixParameters | Default values below. |
|
708 |
+
|
709 |
+
with
|
710 |
+
|
711 |
+
```{ .python .no-copy }
|
712 |
+
class ProtenixParameters(BaseModel):
|
713 |
+
"""Protenix inference parameters."""
|
714 |
+
|
715 |
+
seeds: str = Field(alias="seed", default="0", coerce_numbers_to_str=True)
|
716 |
+
use_msa_server: bool = True
|
717 |
+
```
|
718 |
+
|
719 |
+
---
|
folding-studio/docs/docs/tutorials/index.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tutorials
|
2 |
+
The Tutorials section of the documentation provides step-by-step guides on installing and using Folding Studio for protein structure prediction.
|
3 |
+
|
4 |
+
It is structured into the following subsections:
|
5 |
+
|
6 |
+
1. [Install Folding Studio](./installation.md)
|
7 |
+
1. [Make a single prediction using AlphaFold2](./single_folding_job_af2.md)
|
8 |
+
1. [Perform a Multiple Sequence Alignment (MSA) search](./msa_search.md)
|
9 |
+
1. [Preview - Run folding jobs with AlphaFold3-like models](./single_folding_job_af3.md)
|
folding-studio/docs/docs/tutorials/installation.md
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Prerequisites
|
2 |
+
|
3 |
+
1. Install the `gcloud` CLI following the official Google Cloud [instructions page](https://cloud.google.com/sdk/docs/install).
|
4 |
+
1. Create and activate a virtual environment, for example `folding-pipeline`, using your favourite tool (conda, pyenv, venv, ...). **Only Python version `3.11` is supported**.
|
5 |
+
1. It is recommended to make `gcloud` use the same Python interpreter as your virtual environment:
|
6 |
+
|
7 |
+
```bash
|
8 |
+
# Use a Python you have installed in a special location
|
9 |
+
export CLOUDSDK_PYTHON=<path/to/python/interpreter>
|
10 |
+
```
|
11 |
+
|
12 |
+
1. Authenticate to `gcloud`:
|
13 |
+
|
14 |
+
```bash
|
15 |
+
gcloud auth application-default login
|
16 |
+
```
|
17 |
+
|
18 |
+
1. To simplify the `gcloud` authentication process when you install a Python package from InstaDeep's Google Artifact Registry, install the `keyring.google-artifactregistry-auth` package in your virtual environment:
|
19 |
+
|
20 |
+
```bash
|
21 |
+
pip install keyrings.google-artifactregistry-auth
|
22 |
+
```
|
23 |
+
|
24 |
+
It automatically searches for credentials from the environment and authenticates to Artifact Registry. Otherwise, you will have to specify your GCP credentials during the package installation.
|
25 |
+
|
26 |
+
1. Provide a project code
|
27 |
+
To submit folding jobs, you need to provide a project code. Ensure that you use the project code that corresponds to your project or Statement of Work (SOW).
|
28 |
+
|
29 |
+
- Projects/SOW codes are communicated to PMs.
|
30 |
+
- The project code can be defined as an environment variable or passed as an argument
|
31 |
+
to the CLI
|
32 |
+
|
33 |
+
```bash
|
34 |
+
export FOLDING_PROJECT_CODE=<your_project_code>
|
35 |
+
```
|
36 |
+
|
37 |
+
1. If you do not possess an InstaDeep account or you intend to use Folding Studio on a server or with a service account, you are able to authenticate using an API key.
|
38 |
+
|
39 |
+
- API keys are generated on demand, please contact the Folding Studio team.
|
40 |
+
- The API key must be defined as an environment variable.
|
41 |
+
|
42 |
+
|
43 |
+
```bash
|
44 |
+
export FOLDING_API_KEY=<your_api_key>
|
45 |
+
```
|
46 |
+
|
47 |
+
## <a name="folding-cli-lib"></a> CLI and `folding_studio` library
|
48 |
+
|
49 |
+
To install the CLI and `folding-studio` library of helper functions, simply use pip in your virtual environment:
|
50 |
+
|
51 |
+
```bash
|
52 |
+
pip install --extra-index-url https://europe-west4-python.pkg.dev/instadeep/folding-studio/simple folding-studio
|
53 |
+
```
|
54 |
+
|
55 |
+
This package will install both the CLI, available under the `folding` command, and the `folding_studio` suite of helper functions available in Python.
|
56 |
+
|
57 |
+
## Troubleshooting
|
58 |
+
|
59 |
+
### While installing the CLI I get prompted `User for http://europe-west4-python.pkg.dev/:`
|
60 |
+
|
61 |
+
This message means that the `gcloud` credentials were not found. Please refer to steps 4 and 5 of [Prerequisites](#prerequisites).
|
folding-studio/docs/docs/tutorials/msa_search.md
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
In this tutorial, we will guide you through the process of submitting an MSA (Multiple Sequence Alignment) search job using the command-line interface (CLI) . You will learn how to prepare your sequence input file and submit the job to retrieve the aligned sequences.
|
2 |
+
|
3 |
+
An MSA search helps in finding similar sequences in biological databases, allowing you to analyze protein structures and relationships more effectively. We support two types of sequence submissions: monomer and multimer. The process automatically detects the sequence type, so you can focus on your analysis.
|
4 |
+
|
5 |
+
## Input data
|
6 |
+
To submit an MSA search job, you need a sequence input file in
|
7 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your
|
8 |
+
protein sequence as well as the msa search method. You can use the following monomer.
|
9 |
+
|
10 |
+
=== "monomer"
|
11 |
+
|
12 |
+
```text
|
13 |
+
>SARS-CoV-2|RBD|Omicron variant
|
14 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
15 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
16 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
17 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
18 |
+
```
|
19 |
+
|
20 |
+
## Submit a MSA search job
|
21 |
+
You simply use the `msa` command of the CLI to submit your job.
|
22 |
+
|
23 |
+
=== ":octicons-command-palette-16: CLI"
|
24 |
+
|
25 |
+
```bash
|
26 |
+
folding msa search path/to/my/file.fasta --project-code "your-project-code"
|
27 |
+
```
|
28 |
+
|
29 |
+
=== ":material-language-python: Python"
|
30 |
+
|
31 |
+
```python
|
32 |
+
from folding_studio.commands.msa import search
|
33 |
+
|
34 |
+
search(source="path/to/my/file.fasta", project_code="your-project-code")
|
35 |
+
```
|
36 |
+
|
37 |
+
## Identify the `experiment_id` of your search job
|
38 |
+
|
39 |
+
Your experiment is associated with a unique `experiment_id`.
|
40 |
+
|
41 |
+
You get the list of your experiment ids that succeeded or are still pending
|
42 |
+
using:
|
43 |
+
|
44 |
+
=== ":octicons-command-palette-16: CLI"
|
45 |
+
|
46 |
+
```bash
|
47 |
+
folding msa experiment list
|
48 |
+
```
|
49 |
+
|
50 |
+
=== ":material-language-python: Python"
|
51 |
+
|
52 |
+
```python
|
53 |
+
from folding_studio.commands.msa import list
|
54 |
+
|
55 |
+
list()
|
56 |
+
```
|
57 |
+
|
58 |
+
|
59 |
+
Once you have submitted a folding job, you can get its status at any time.
|
60 |
+
|
61 |
+
=== ":octicons-command-palette-16: CLI"
|
62 |
+
|
63 |
+
```bash
|
64 |
+
folding msa experiment status b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9
|
65 |
+
```
|
66 |
+
|
67 |
+
=== ":material-language-python: Python"
|
68 |
+
|
69 |
+
```python
|
70 |
+
from folding_studio.commands.msa import status
|
71 |
+
|
72 |
+
status(msa_exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9")
|
73 |
+
```
|
74 |
+
|
75 |
+
The experiment status is the current state of the experiment.
|
76 |
+
|
77 |
+
| VALUE | DESCRIPTION |
|
78 |
+
| ----------- | ------------------------------------------------------------------------------- |
|
79 |
+
| `Done` | The experiment is done and its features and results are available for download. |
|
80 |
+
| `Pending` | The experiment is still ongoing. |
|
81 |
+
| `Failed` | The experiment has failed. |
|
82 |
+
| `Cancelled` | The experiment was cancelled.
|
83 |
+
|
84 |
+
## Download Results
|
85 |
+
|
86 |
+
You download the search job results by running the following command.
|
87 |
+
|
88 |
+
=== ":octicons-command-palette-16: CLI"
|
89 |
+
|
90 |
+
```bash
|
91 |
+
folding msa experiment features b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./features_exp_b21b09.zip
|
92 |
+
```
|
93 |
+
|
94 |
+
=== ":material-language-python: Python"
|
95 |
+
|
96 |
+
```python
|
97 |
+
from pathlib import Path
|
98 |
+
from folding_studio.commands.msa import features
|
99 |
+
|
100 |
+
features(msa_exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./features_exp_b21b09.zip"))
|
101 |
+
```
|
102 |
+
|
103 |
+
Here is an example of the zip file structure for a monomer :
|
104 |
+
|
105 |
+
``` { .shell .no-copy }
|
106 |
+
extracted_experiment_features_zip
|
107 |
+
├── msas
|
108 |
+
│ ├── mgnify_hits.a3m
|
109 |
+
│ ├── pdb_hits.hhr
|
110 |
+
│ ├── small_bfd_hits.a3m
|
111 |
+
│ └── uniref90_hits.a3m
|
112 |
+
└── msa_coverage.json
|
113 |
+
└── logs.txt
|
114 |
+
|
115 |
+
```
|
folding-studio/docs/docs/tutorials/single_folding_job_af2.md
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
In this tutorial, you will learn how to run predictions using the AlphaFold2 model.
|
3 |
+
Once the job is submitted, you can track its status, retrieve results, and access various metrics from the generated predictions. To gain a deeper understanding of the different inference parameters and how to fine-tune them for your needs, we recommend reviewing the [How-to guides in the AlphaFold2 and OpenFold section](./../how-to-guides/af2_openfold/provide_input_data.md) for detailed examples and usage tips.
|
4 |
+
|
5 |
+
## Input data
|
6 |
+
|
7 |
+
To submit a folding job, you need a sequence input file in
|
8 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your
|
9 |
+
protein sequence. You can use the following monomer.
|
10 |
+
|
11 |
+
=== "monomer"
|
12 |
+
|
13 |
+
```text
|
14 |
+
>SARS-CoV-2|RBD|Omicron variant
|
15 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
16 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
17 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
18 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
19 |
+
```
|
20 |
+
|
21 |
+
## Submit job
|
22 |
+
|
23 |
+
=== ":octicons-command-palette-16: CLI"
|
24 |
+
|
25 |
+
```bash
|
26 |
+
folding predict af2 path/to/my/file.fasta --project-code "your-project-code"
|
27 |
+
```
|
28 |
+
|
29 |
+
=== ":material-language-python: Python"
|
30 |
+
|
31 |
+
```python
|
32 |
+
from pathlib import Path
|
33 |
+
from folding_studio.commands.predict import af2 as af2_predict
|
34 |
+
|
35 |
+
af2_predict(source=Path("path/to/my/file.fasta"), project_code="your-project-code")
|
36 |
+
```
|
37 |
+
|
38 |
+
Using the CLI, you will get the following information once the job was successfully submitted.
|
39 |
+
|
40 |
+
``` { .shell .no-copy }
|
41 |
+
Single job successfully submitted.
|
42 |
+
Experiment submitted successfully !
|
43 |
+
The experiment id is b938c1adaec932e8a6ba765c80144492b6a3f1e6
|
44 |
+
Prediction job metadata written to simple_prediction_20250305172707.json
|
45 |
+
You can query your experiment status with the command:
|
46 |
+
|
47 |
+
folding experiment status b938c1adaec932e8a6ba765c80144492b6a3f1e6
|
48 |
+
```
|
49 |
+
|
50 |
+
For details about the inference parameters / flags for each model check the [reference section](../reference/cli.md#predict).
|
51 |
+
|
52 |
+
## Identify the `experiment_id` of your job
|
53 |
+
|
54 |
+
Your experiment is associated with a unique `experiment_id`.
|
55 |
+
|
56 |
+
You get the list of the experiment ids that succeeded or are still pending
|
57 |
+
using:
|
58 |
+
|
59 |
+
=== ":octicons-command-palette-16: CLI"
|
60 |
+
|
61 |
+
```bash
|
62 |
+
folding experiment list
|
63 |
+
```
|
64 |
+
|
65 |
+
=== ":material-language-python: Python"
|
66 |
+
|
67 |
+
```python
|
68 |
+
from folding_studio.commands.experiment import list
|
69 |
+
|
70 |
+
list()
|
71 |
+
```
|
72 |
+
|
73 |
+
You will get a table with the status of the job you just launched:
|
74 |
+
|
75 |
+
``` { .shell .no-copy }
|
76 |
+
Done and pending experiments list written to None
|
77 |
+
Done and pending experiments
|
78 |
+
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━┓
|
79 |
+
┃ Experiment ID ┃ Status ┃
|
80 |
+
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━┩
|
81 |
+
│ b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 │Pending │
|
82 |
+
└──────────────────────────────────────────┴────────┘
|
83 |
+
```
|
84 |
+
|
85 |
+
## Get your job status
|
86 |
+
|
87 |
+
=== ":octicons-command-palette-16: CLI"
|
88 |
+
|
89 |
+
```bash
|
90 |
+
folding experiment status b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9
|
91 |
+
```
|
92 |
+
|
93 |
+
=== ":material-language-python: Python"
|
94 |
+
|
95 |
+
```python
|
96 |
+
from folding_studio.commands.experiment import status
|
97 |
+
|
98 |
+
status(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9")
|
99 |
+
```
|
100 |
+
|
101 |
+
The experiment status is the current state of the experiment.
|
102 |
+
|
103 |
+
| VALUE | DESCRIPTION |
|
104 |
+
| ----------- | ------------------------------------------------------------------------------- |
|
105 |
+
| `Done` | The experiment is done and its features and results are available for download. |
|
106 |
+
| `Pending` | The experiment is still ongoing. |
|
107 |
+
| `Failed` | The experiment has failed. |
|
108 |
+
| `Cancelled` | The experiment was cancelled. |
|
109 |
+
|
110 |
+
## Download results
|
111 |
+
|
112 |
+
After your experiment has finished, you download the results zip file.
|
113 |
+
|
114 |
+
=== ":octicons-command-palette-16: CLI"
|
115 |
+
|
116 |
+
```bash
|
117 |
+
folding experiment results b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9 --output ./result_exp_b21b09.zip
|
118 |
+
```
|
119 |
+
|
120 |
+
=== ":material-language-python: Python"
|
121 |
+
|
122 |
+
```python
|
123 |
+
from pathlib import Path
|
124 |
+
from folding_studio.commands.experiment import results
|
125 |
+
|
126 |
+
results(exp_id="b21b09a6a43dcfb282bdc00ec79bd7ae06de97b9", output=Path("./result_exp_b21b09.zip"))
|
127 |
+
```
|
128 |
+
|
129 |
+
You will get the message:
|
130 |
+
|
131 |
+
```bash
|
132 |
+
File downloaded successfully to result_exp_b21b09.zip.
|
133 |
+
```
|
134 |
+
|
135 |
+
Here is an example of the zip file structure :
|
136 |
+
|
137 |
+
``` { .shell .no-copy }
|
138 |
+
results
|
139 |
+
├── metrics_per_model.json
|
140 |
+
├── msa_coverage.json
|
141 |
+
├── relaxed_predictions
|
142 |
+
│ ├── model_1_ptm.pdb
|
143 |
+
│ ├── model_2_ptm.pdb
|
144 |
+
│ ├── model_3_ptm.pdb
|
145 |
+
│ ├── model_4_ptm.pdb
|
146 |
+
│ └── model_5_ptm.pdb
|
147 |
+
├── rmsd_per_model.json
|
148 |
+
└── unrelaxed_predictions
|
149 |
+
├── model_1_ptm.pdb
|
150 |
+
├── model_2_ptm.pdb
|
151 |
+
├── model_3_ptm.pdb
|
152 |
+
├── model_4_ptm.pdb
|
153 |
+
└── model_5_ptm.pdb
|
154 |
+
```
|
folding-studio/docs/docs/tutorials/single_folding_job_af3.md
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
In this tutorial, you will learn how to run the **Boltz-1**, **Chai-1** and **Protenix** models, which are all AlphaFold3-like models. These models features differ slightly from AlphaFold2 and OpenFold models.
|
2 |
+
To help you get started, we recommend exploring the other How-to guides in the AlphaFold3-like models section for more detailed examples and usage instructions tailored to each model.
|
3 |
+
|
4 |
+
This tutorial focuses on using the Boltz-1 model.
|
5 |
+
|
6 |
+
## Input data
|
7 |
+
|
8 |
+
To submit a folding job, you need a sequence input file in
|
9 |
+
[`FASTA`](https://en.wikipedia.org/wiki/FASTA_format) format containing your
|
10 |
+
protein sequence. You can use the following monomer.
|
11 |
+
|
12 |
+
=== "monomer"
|
13 |
+
|
14 |
+
```text
|
15 |
+
>SARS-CoV-2|RBD|Omicron variant
|
16 |
+
RVQPTESIVRFPNITNLCPFDEVFNATRFASVYAWNRKRISNCVADYSVLYNLAPFFTFK
|
17 |
+
CYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGNIADYNYKLPDDFTGCVIAWNS
|
18 |
+
NKLDSKVSGNYNYLYRLFRKSNLKPFERDISTEIYQAGNKPCNGVAGFNCYFPLRSYSFR
|
19 |
+
PTYGVGHQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNF
|
20 |
+
```
|
21 |
+
|
22 |
+
## Submit job and get results
|
23 |
+
|
24 |
+
You use the `predict boltz` command of the CLI. You can then add the different inference parameters as flags. In particular, the `--output-path` flag allows you to specify the path of the zip file that will contain the results.
|
25 |
+
|
26 |
+
For details about the inference parameters / flags for each model check the [reference section](../reference/cli.md#predict).
|
27 |
+
|
28 |
+
=== ":octicons-command-palette-16: CLI"
|
29 |
+
|
30 |
+
```bash
|
31 |
+
folding predict boltz path/to/my/file.fasta --project-code "your-project-code" --output-path ./output.zip --seed 42
|
32 |
+
```
|
33 |
+
|
34 |
+
=== ":material-language-python: Python"
|
35 |
+
|
36 |
+
```python
|
37 |
+
from folding_studio.client import Client
|
38 |
+
from folding_studio.query.boltz import BoltzQuery
|
39 |
+
|
40 |
+
inference_parameters = {"project_code": "your-project-code",
|
41 |
+
"seed":42}
|
42 |
+
|
43 |
+
file_path = "path/to/my/file.fasta"
|
44 |
+
|
45 |
+
# Create client
|
46 |
+
client = Client.from_jwt()
|
47 |
+
|
48 |
+
# Define query
|
49 |
+
query = BoltzQuery.from_file(path=file_path, parameters=inference_parameters)
|
50 |
+
|
51 |
+
# Send request
|
52 |
+
response = client.send_request(query)
|
53 |
+
|
54 |
+
# Download results
|
55 |
+
output_path = "./output.zip"
|
56 |
+
response.download_results(output_path, force=True, unzip=True)
|
57 |
+
```
|
58 |
+
|
59 |
+
Using the CLI, you will get the following information if the job was successfully submitted.
|
60 |
+
|
61 |
+
``` { .shell .no-copy }
|
62 |
+
╭───────────────────────────────╮
|
63 |
+
│ 🧬 Boltz1 Folding submission │
|
64 |
+
╰───────────────────────────────╯
|
65 |
+
🔑 Authenticating client ✅
|
66 |
+
📦 Generating query ✅
|
67 |
+
Generated query: {
|
68 |
+
"fasta_files": {
|
69 |
+
"file": ">A|protein|\nQLEDSEVEAVAKGLEEMYANGVTEDNFKNYVKNNFAQQEISSVEEELNVNISDSCVANKIKDEFFAMISISAIVKAAQKKAWKELAVTVLRFAKANGLKTNAIIVAGQLALWAVQCG"
|
70 |
+
},
|
71 |
+
"yaml_files": {},
|
72 |
+
"parameters": {
|
73 |
+
"seed": 42,
|
74 |
+
"recycling_steps": 3,
|
75 |
+
"sampling_steps": 200,
|
76 |
+
"diffusion_samples": 1,
|
77 |
+
"step_scale": 1.638,
|
78 |
+
"msa_pairing_strategy": "greedy",
|
79 |
+
"write_full_pae": false,
|
80 |
+
"write_full_pde": false
|
81 |
+
}
|
82 |
+
}
|
83 |
+
🧠 Processing folding job ✅
|
84 |
+
```
|
85 |
+
|
86 |
+
And you will get the following information once the job is completed and the results are downloaded.
|
87 |
+
|
88 |
+
``` { .shell .no-copy }
|
89 |
+
Confidence data: {
|
90 |
+
"prot_73bcabf6-54e5-4762-8745-97e6de0f9c22": {
|
91 |
+
"chains_ptm": {
|
92 |
+
"0": 0.7689766883850098
|
93 |
+
},
|
94 |
+
"complex_ipde": 0,
|
95 |
+
"complex_iplddt": 0,
|
96 |
+
"complex_pde": 0.8002958297729492,
|
97 |
+
"complex_plddt": 0.8243614435195923,
|
98 |
+
"confidence_score": 0.8132845163345337,
|
99 |
+
"iptm": 0,
|
100 |
+
"ligand_iptm": 0,
|
101 |
+
"pair_chains_iptm": {
|
102 |
+
"0": {
|
103 |
+
"0": 0.7689766883850098
|
104 |
+
}
|
105 |
+
},
|
106 |
+
"protein_iptm": 0,
|
107 |
+
"ptm": 0.7689766883850098
|
108 |
+
}
|
109 |
+
}
|
110 |
+
💾 Downloading results to `boltz_results` ✅
|
111 |
+
```
|
folding-studio/docs/generate_cli_docs.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import types
|
3 |
+
from typing import Union, get_args, get_origin, get_type_hints
|
4 |
+
|
5 |
+
import typer
|
6 |
+
from folding_studio.cli import app
|
7 |
+
from typer.models import TyperInfo
|
8 |
+
from typer.utils import get_params_from_function
|
9 |
+
|
10 |
+
|
11 |
+
def extract_base_type(annotation):
|
12 |
+
"""Extract annotation type.
|
13 |
+
|
14 |
+
Handles List[X], Optional[X] and Union[X, Y] cases.
|
15 |
+
"""
|
16 |
+
origin = get_origin(annotation)
|
17 |
+
|
18 |
+
if origin is list:
|
19 |
+
inner_type = get_args(annotation)[0]
|
20 |
+
return (
|
21 |
+
f"List[{inner_type.__name__}]"
|
22 |
+
if hasattr(inner_type, "__name__")
|
23 |
+
else "List[Unknown]"
|
24 |
+
)
|
25 |
+
|
26 |
+
if origin is Union or isinstance(origin, types.UnionType):
|
27 |
+
inner_types = [t.__name__ for t in get_args(annotation) if t is not type(None)]
|
28 |
+
return " | ".join(inner_types) if inner_types else "Unknown"
|
29 |
+
|
30 |
+
if isinstance(annotation, types.UnionType):
|
31 |
+
inner_types = [t.__name__ for t in get_args(annotation) if t is not type(None)]
|
32 |
+
return " | ".join(inner_types) if inner_types else "Unknown"
|
33 |
+
|
34 |
+
return annotation.__name__ if hasattr(annotation, "__name__") else "Unknown"
|
35 |
+
|
36 |
+
|
37 |
+
def update_params_with_default(param_info: dict[str, str]) -> dict[str, str]:
|
38 |
+
"""
|
39 |
+
Update the 'default' value in the params dictionary based on its description.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
param_info (dict): A dictionary describing a parameter.
|
43 |
+
|
44 |
+
Returns:
|
45 |
+
dict: The updated param_info dictionary.
|
46 |
+
"""
|
47 |
+
if param_info.get("default") != "No default":
|
48 |
+
return param_info
|
49 |
+
description = param_info.get("description", "")
|
50 |
+
# Regular expression to find a sentence starting with 'Default to ' and ending with a period
|
51 |
+
match = re.search(r"Default to '([^']+)'", description)
|
52 |
+
if not match:
|
53 |
+
return param_info
|
54 |
+
# Extract the default value
|
55 |
+
default_value = match.group(1)
|
56 |
+
# Update the 'description' by removing the 'Default to ' phrase
|
57 |
+
param_info["description"] = re.sub(
|
58 |
+
r"Default to '[^']+'\.\s*", "", description
|
59 |
+
).strip()
|
60 |
+
# Ensure the description ends with a single period
|
61 |
+
if not param_info["description"].endswith("."):
|
62 |
+
param_info["description"] += "."
|
63 |
+
# Update the 'default' key with the extracted value
|
64 |
+
param_info["default"] = default_value
|
65 |
+
param_info["default"] = param_info["default"].replace("<", "<")
|
66 |
+
param_info["default"] = param_info["default"].replace(">", ">")
|
67 |
+
return param_info
|
68 |
+
|
69 |
+
|
70 |
+
def extract_command_info(command: typer.models.CommandInfo):
|
71 |
+
func = command.callback
|
72 |
+
parameters = get_params_from_function(func)
|
73 |
+
|
74 |
+
hints = get_type_hints(func)
|
75 |
+
docstring = func.__doc__ or "No docstring provided."
|
76 |
+
docstring = docstring.replace("\n", " ").replace("\n\n", " ")
|
77 |
+
docstring = re.sub(r"(https?://\S+)", r"<\1>", docstring)
|
78 |
+
command_info = {
|
79 |
+
"docstring": docstring,
|
80 |
+
"name": command.name if command.name else func.__name__,
|
81 |
+
"params": [],
|
82 |
+
"options": [],
|
83 |
+
}
|
84 |
+
|
85 |
+
for name, param in parameters.items():
|
86 |
+
raw_type = hints.get(name, None)
|
87 |
+
base_type = extract_base_type(raw_type)
|
88 |
+
description = str(param.default.help)
|
89 |
+
description = description.replace("\n", " ").replace("\n\n", " ")
|
90 |
+
param_info = {
|
91 |
+
"name": name,
|
92 |
+
"type": base_type,
|
93 |
+
"description": description,
|
94 |
+
}
|
95 |
+
|
96 |
+
if isinstance(param.default, typer.models.OptionInfo):
|
97 |
+
name = name.replace("_", "-")
|
98 |
+
if base_type == "bool":
|
99 |
+
name = name + f" / --no-{name}"
|
100 |
+
name = f"--{name}"
|
101 |
+
param_info["name"] = name
|
102 |
+
default_value = param.default.default
|
103 |
+
if param_info["type"] == "bool":
|
104 |
+
values = param_info["name"].split(" / ")
|
105 |
+
default_value = values[0] if param.default.default else values[1]
|
106 |
+
if default_value is Ellipsis:
|
107 |
+
default_value = "No default"
|
108 |
+
param_info["default"] = default_value
|
109 |
+
|
110 |
+
# if No default, check if default value is in description
|
111 |
+
param_info = update_params_with_default(param_info)
|
112 |
+
|
113 |
+
command_info["options"].append(param_info)
|
114 |
+
else:
|
115 |
+
command_info["params"].append(param_info)
|
116 |
+
|
117 |
+
return command_info
|
118 |
+
|
119 |
+
|
120 |
+
def generate_markdown_level_docs(
|
121 |
+
f, group: TyperInfo, cli_name: str, level=1, base_name=None
|
122 |
+
):
|
123 |
+
base_level = "#" * level
|
124 |
+
f.write(f"{base_level} `{base_name + ' ' if base_name else ''}{group.name}`\n")
|
125 |
+
|
126 |
+
for subcommand in group.typer_instance.registered_commands:
|
127 |
+
subcommand_info = extract_command_info(subcommand)
|
128 |
+
subcommand_name = (
|
129 |
+
subcommand.name
|
130 |
+
if subcommand.name is not None
|
131 |
+
else subcommand.callback.__name__
|
132 |
+
)
|
133 |
+
command_name = (
|
134 |
+
f"{base_name + ' ' if base_name else ''}{group.name} {subcommand_name}"
|
135 |
+
)
|
136 |
+
f.write(f"{base_level}# `{command_name}`\n\n")
|
137 |
+
|
138 |
+
f.write(f"{subcommand_info['docstring']}\n\n")
|
139 |
+
|
140 |
+
usage = "**Usage**:\n\n"
|
141 |
+
usage += "```console\n"
|
142 |
+
usage += f"{cli_name} {command_name}{' [OPTIONS]' if subcommand_info['options'] else ''}"
|
143 |
+
if subcommand_info["params"]:
|
144 |
+
usage += f" {' '.join(param['name'].upper() for param in subcommand_info['params'])}"
|
145 |
+
usage += "\n```\n\n"
|
146 |
+
f.write(usage)
|
147 |
+
|
148 |
+
if subcommand_info["params"]:
|
149 |
+
# Arguments
|
150 |
+
f.write("**Arguments**:\n\n")
|
151 |
+
f.write("| ARGUMENT | DESCRIPTION | VALUE TYPE |\n")
|
152 |
+
f.write("| -------- | ----------- | ----------- |\n")
|
153 |
+
for param in subcommand_info["params"]:
|
154 |
+
param["description"] = (
|
155 |
+
param["description"]
|
156 |
+
if param.get("description")
|
157 |
+
else "No description"
|
158 |
+
)
|
159 |
+
param["type"] = param["type"] if param["type"] else "No type"
|
160 |
+
|
161 |
+
f.write(
|
162 |
+
f"| {param['name'].upper()} | {param['description']} | {param['type']} |\n"
|
163 |
+
)
|
164 |
+
|
165 |
+
f.write("\n")
|
166 |
+
|
167 |
+
# Options
|
168 |
+
if subcommand_info["options"]:
|
169 |
+
f.write("**Options**:\n\n")
|
170 |
+
f.write("| OPTIONS | DESCRIPTION | VALUE TYPE | DEFAULT VALUE |\n")
|
171 |
+
f.write("| ------- | ----------- | ---------- | ------------- |\n")
|
172 |
+
for param in subcommand_info["options"]:
|
173 |
+
param["description"] = (
|
174 |
+
param["description"]
|
175 |
+
if param.get("description")
|
176 |
+
else "No description"
|
177 |
+
)
|
178 |
+
param["type"] = param["type"] if param["type"] else "No type"
|
179 |
+
param["default"] = (
|
180 |
+
param["default"] if param["default"] is not None else "No default"
|
181 |
+
)
|
182 |
+
# if No default, check if default value is in description
|
183 |
+
param = update_params_with_default(param)
|
184 |
+
f.write(
|
185 |
+
f"| {param['name']} | {param['description']} | {param['type']} | {param['default']} |\n"
|
186 |
+
)
|
187 |
+
|
188 |
+
f.write("\n")
|
189 |
+
for subgroup in group.typer_instance.registered_groups:
|
190 |
+
generate_markdown_level_docs(f, subgroup, cli_name, level + 1, group.name)
|
191 |
+
|
192 |
+
|
193 |
+
def generate_markdown_docs() -> None:
|
194 |
+
"""
|
195 |
+
Generate markdown documentation for all registered commands and subcommands in the application.
|
196 |
+
The documentation will include descriptions, arguments, and options.
|
197 |
+
|
198 |
+
The generated markdown is saved in the 'docs/reference/cli.md' file.
|
199 |
+
"""
|
200 |
+
# Iterate through each group in app.registered_groups
|
201 |
+
with open("docs/reference/cli.md", "w") as f:
|
202 |
+
for group in app.registered_groups:
|
203 |
+
if group.name == "key":
|
204 |
+
continue
|
205 |
+
generate_markdown_level_docs(f, group, "folding", 2)
|
206 |
+
|
207 |
+
|
208 |
+
if __name__ == "__main__":
|
209 |
+
generate_markdown_docs()
|
folding-studio/docs/mkdocs.yml
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
extra_css:
|
2 |
+
- css/main.css
|
3 |
+
markdown_extensions:
|
4 |
+
- admonition
|
5 |
+
- pymdownx.details
|
6 |
+
- pymdownx.superfences:
|
7 |
+
custom_fences:
|
8 |
+
- class: mermaid
|
9 |
+
format: !!python/name:pymdownx.superfences.fence_code_format ''
|
10 |
+
name: mermaid
|
11 |
+
- pymdownx.tabbed:
|
12 |
+
alternate_style: true
|
13 |
+
- attr_list
|
14 |
+
- md_in_html
|
15 |
+
- pymdownx.emoji:
|
16 |
+
emoji_generator: !!python/name:material.extensions.emoji.to_svg ''
|
17 |
+
emoji_index: !!python/name:material.extensions.emoji.twemoji ''
|
18 |
+
nav:
|
19 |
+
- index.md
|
20 |
+
- Tutorials:
|
21 |
+
- tutorials/index.md
|
22 |
+
- Install Folding Studio: tutorials/installation.md
|
23 |
+
- Run AlphaFold2 on a protein sequence: tutorials/single_folding_job_af2.md
|
24 |
+
- Perform a Multiple Sequence Alignment (MSA) search: tutorials/msa_search.md
|
25 |
+
- Preview - Run folding jobs with AlphaFold3-like models: tutorials/single_folding_job_af3.md
|
26 |
+
- How-to guides:
|
27 |
+
- how-to-guides/index.md
|
28 |
+
- AlphaFold2/OpenFold:
|
29 |
+
- Provide Input Data: how-to-guides/af2_openfold/provide_input_data.md
|
30 |
+
- Launch a Folding Job using AlphaFold2: how-to-guides/af2_openfold/single_af2_job.md
|
31 |
+
- Launch a Folding Job using OpenFold: how-to-guides/af2_openfold/single_openfold_job.md
|
32 |
+
- Launch a Folding Job with custom parameters: how-to-guides/af2_openfold/set_af_folding_parameters.md
|
33 |
+
- Launch a batch Folding Job from a configuration file: how-to-guides/af2_openfold/batch_job_from_configuration_file.md
|
34 |
+
- Launch a batch Folding Job from a directory of fasta files: how-to-guides/af2_openfold/batch_job_from_directory.md
|
35 |
+
- Check Job Status: how-to-guides/af2_openfold/fetch_folding_job_status.md
|
36 |
+
- Download Job Logs: how-to-guides/af2_openfold/download_logs.md
|
37 |
+
- Cancel a Folding Job submission: how-to-guides/af2_openfold/cancel_experiment.md
|
38 |
+
- Retrieve Features from a Folding Job: how-to-guides/af2_openfold/get_experiment_features.md
|
39 |
+
- Download results of a folding job: how-to-guides/af2_openfold/download_prediction_results.md
|
40 |
+
- Advanced Algorithms:
|
41 |
+
- Launch a Folding Job using MSA subsampling: how-to-guides/af2_openfold/advanced_algorithms/msa_subsampling_job.md
|
42 |
+
- Launch a Folding Job using the Gap Trick for Folding Multimer Complexes: how-to-guides/af2_openfold/advanced_algorithms/gap_trick_job.md
|
43 |
+
- Launch a Folding Job using an Initial Guess Structure in AlphaFold2: how-to-guides/af2_openfold/advanced_algorithms/initial_guess_af2.md
|
44 |
+
- Launch a Folding Job applying Template Masking in Gap Trick Mode: how-to-guides/af2_openfold/advanced_algorithms/template_masking_job.md
|
45 |
+
- Preview - Launch a folding job using SoloSeq model: how-to-guides/af2_openfold/soloseq_job.md
|
46 |
+
- Preview - AlphaFold3-like:
|
47 |
+
- Provide Input Data: how-to-guides/af3/provide_input_data.md
|
48 |
+
- Launch a Single Job using Boltz-1: how-to-guides/af3/single_job_boltz.md
|
49 |
+
- Launch a Single Job using Chai-1: how-to-guides/af3/single_job_chai.md
|
50 |
+
- Launch a Single Job using Protenix: how-to-guides/af3/single_job_protenix.md
|
51 |
+
- Launch a Single Job from a YAML file using Boltz-1: how-to-guides/af3/boltz_single_yaml_job.md
|
52 |
+
- Launch a Batch Job from a directory: how-to-guides/af3/batch_job_from_directory.md
|
53 |
+
- Launch a Single Job from a Protein Sequence: how-to-guides/af3/single_job_from_protein_sequence.md
|
54 |
+
- Post-processing recipes:
|
55 |
+
- Calculate Interface pLDDT and pAE: how-to-guides/other/pLDDT_pAE_calculation.md
|
56 |
+
- Multiple Sequence Alignment Search:
|
57 |
+
- Provide Input Data for MSA: how-to-guides/msa_search/provide_input_data.md
|
58 |
+
- Launch an MSA Search with MMSeqs2: how-to-guides/msa_search/msa_search_mmseqs2.md
|
59 |
+
- Launch an MSA Search ignoring cache: how-to-guides/msa_search/msa_no_cache.md
|
60 |
+
- Check an MSA Job Status: how-to-guides/msa_search/fetch_msa_job_status.md
|
61 |
+
- Download MSA Job Logs: how-to-guides/msa_search/download_msa_logs.md
|
62 |
+
- Download Results of an MSA Search: how-to-guides/msa_search/download_msa_search_results.md
|
63 |
+
- Explanation:
|
64 |
+
- explanation/index.md
|
65 |
+
- Supported models: explanation/supported_models.md
|
66 |
+
- Advanced algorithms: explanation/advanced_algorithms.md
|
67 |
+
- Reference:
|
68 |
+
- CLI: reference/cli.md
|
69 |
+
- Python Library: reference/python_lib_docs.md
|
70 |
+
plugins:
|
71 |
+
- swagger-ui-tag
|
72 |
+
- search
|
73 |
+
site_name: Folding Studio
|
74 |
+
site_url: https://int-bio-foldingstudio-gcp.nw.r.appspot.com
|
75 |
+
theme:
|
76 |
+
colormode: auto
|
77 |
+
features:
|
78 |
+
- content.code.select
|
79 |
+
- content.code.copy
|
80 |
+
- navigation.indexes
|
81 |
+
- navigation.tracking
|
82 |
+
- navigation.sections
|
83 |
+
- navigation.top
|
84 |
+
highlightjs: true
|
85 |
+
hljs_languages:
|
86 |
+
- python
|
87 |
+
- bash
|
88 |
+
icon:
|
89 |
+
admonition:
|
90 |
+
abstract: octicons/checklist-16
|
91 |
+
bug: octicons/bug-16
|
92 |
+
danger: octicons/zap-16
|
93 |
+
example: octicons/beaker-16
|
94 |
+
failure: octicons/x-circle-16
|
95 |
+
info: octicons/info-16
|
96 |
+
note: octicons/tag-16
|
97 |
+
question: octicons/question-16
|
98 |
+
quote: octicons/quote-16
|
99 |
+
success: octicons/check-16
|
100 |
+
tip: octicons/squirrel-16
|
101 |
+
warning: octicons/alert-16
|
102 |
+
logo: material/dna
|
103 |
+
name: material
|
104 |
+
nav_style: primary
|
105 |
+
palette:
|
106 |
+
- media: '(prefers-color-scheme: light)'
|
107 |
+
scheme: default
|
108 |
+
toggle:
|
109 |
+
icon: material/brightness-7
|
110 |
+
name: Switch to dark mode
|
111 |
+
- media: '(prefers-color-scheme: dark)'
|
112 |
+
scheme: slate
|
113 |
+
toggle:
|
114 |
+
icon: material/brightness-4
|
115 |
+
name: Switch to light mode
|
116 |
+
user_color_mode_toggle: true
|