Spaces:

Hukuna
/

ProteinDesignDemo

Sleeping

App Files Files Community

Hukuna commited on Feb 24, 2024

Commit

e9e75df

verified ·

1 Parent(s): 589196f

Upload 275 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +12 -0
__pycache__/demo.cpython-38.pyc +0 -0
app.py +33 -0
chroma/CONTRIBUTING.md +11 -0
chroma/Dockerfile +24 -0
chroma/LICENSE.txt +202 -0
chroma/README.md +255 -0
chroma/Untitled.ipynb +6 -0
chroma/assets/LiberationSans-Regular.ttf +0 -0
chroma/assets/chroma_logo.svg +85 -0
chroma/assets/chroma_logo_outline.svg +109 -0
chroma/assets/conditioners.png +0 -0
chroma/assets/lattice.png +3 -0
chroma/assets/logo.png +0 -0
chroma/assets/proteins.png +3 -0
chroma/assets/refolding.png +3 -0
chroma/chroma/__init__.py +19 -0
chroma/chroma/__pycache__/__init__.cpython-38.pyc +0 -0
chroma/chroma/constants/__init__.py +16 -0
chroma/chroma/constants/__pycache__/__init__.cpython-38.pyc +0 -0
chroma/chroma/constants/__pycache__/geometry.cpython-38.pyc +0 -0
chroma/chroma/constants/__pycache__/named_models.cpython-38.pyc +0 -0
chroma/chroma/constants/__pycache__/sequence.cpython-38.pyc +0 -0
chroma/chroma/constants/geometry.py +558 -0
chroma/chroma/constants/named_models.py +54 -0
chroma/chroma/constants/sequence.py +112 -0
chroma/chroma/data/__init__.py +19 -0
chroma/chroma/data/__pycache__/__init__.cpython-38.pyc +0 -0
chroma/chroma/data/__pycache__/protein.cpython-38.pyc +0 -0
chroma/chroma/data/__pycache__/system.cpython-38.pyc +0 -0
chroma/chroma/data/__pycache__/xcs.cpython-38.pyc +0 -0
chroma/chroma/data/protein.py +513 -0
chroma/chroma/data/system.py +0 -0
chroma/chroma/data/xcs.py +121 -0
chroma/chroma/layers/__init__.py +18 -0
chroma/chroma/layers/__pycache__/__init__.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/attention.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/basic.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/complexity.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/conv.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/graph.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/linalg.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/norm.cpython-38.pyc +0 -0
chroma/chroma/layers/__pycache__/sde.cpython-38.pyc +0 -0
chroma/chroma/layers/attention.py +347 -0
chroma/chroma/layers/basic.py +467 -0
chroma/chroma/layers/complexity.py +201 -0
chroma/chroma/layers/conv.py +58 -0
chroma/chroma/layers/graph.py +1126 -0
chroma/chroma/layers/linalg.py +98 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+chroma/assets/lattice.png filter=lfs diff=lfs merge=lfs -text
+chroma/assets/proteins.png filter=lfs diff=lfs merge=lfs -text
+chroma/assets/refolding.png filter=lfs diff=lfs merge=lfs -text
+chroma/notebooks/complex_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+chroma/notebooks/shaped_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+chroma/notebooks/symmetric_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+chroma/tests/_streamlit/demoapp/complex_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+chroma/tests/_streamlit/demoapp/shaped_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+chroma/tests/_streamlit/demoapp/symmetric_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+output/complex_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+output/shaped_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text
+output/symmetric_protein_trajectory.cif filter=lfs diff=lfs merge=lfs -text

__pycache__/demo.cpython-38.pyc ADDED Viewed

Binary file (10.4 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import streamlit as st
+import demo
+st.set_page_config(
+    page_title="Chroma Demos",
+    page_icon="🧬",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+st.title("Demos for Chroma")
+# sidebar
+st.sidebar.header("Demo Config")
+# 创建字典映射demo
+demoDict={
+    "getProtein":demo.getProteinDemo,
+    "complexSample":demo.complexSampleDemo,
+    "symmetricSample":demo.symmetricSampleDemo,
+    "shapeSample":demo.shapeSampleDemo,
+    "foldSample":demo.foldSampleDemo,
+    "ssSample":demo.ssSampleDemo,
+    "substructureSample":demo.substructureSampleDemo,
+}
+# 在侧边栏中添加一个选择框，用于选择demo
+selected_branch = st.sidebar.selectbox("Select demo", list(demoDict.keys()))
+style=st.sidebar.selectbox("Select style:Can be 'stick', 'sphere', 'cross','cartoon'",('stick', 'sphere', 'cross','cartoon'),key='style')
+resn=st.sidebar.selectbox("Select display resn:PDB resn labels:['ALA','ARG','LYS','THR','TRP','TYR','VAL']",('','ALA','ARG','LYS','THR','TRP','TYR','VAL'),key='resn')
+# 执行选定分支对应的函数
+demoDict[selected_branch](style,resn)

chroma/CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# Code contributions
+We welcome contributions to the Chroma code base, including new conditioners, integrators, patches, bug fixes, and others.
+Note that your contributions will be governed by the Apache 2.0 license, meaning that you will be giving us permission to use your contributed code under the conditions specified in the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0) (also available in [LICENSE.txt](LICENSE.txt)).
+## How to Contribute
+Please use GitHub pull requests to contribute code. See
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests. We will try to monitor incoming requests with some regularity, but cannot promise a specific timeframe within which we will review your request.

chroma/Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
+ARG DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y --no-install-recommends \
+         build-essential \
+         cmake \
+         git \
+         curl \
+         ca-certificates \
+         libjpeg-dev \
+         libpng-dev && \
+     rm -rf /var/lib/apt/lists/*
+WORKDIR /tmp
+RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+     chmod +x ~/miniconda.sh && \
+     ~/miniconda.sh -b -p /opt/conda && \
+     rm ~/miniconda.sh
+RUN /opt/conda/bin/conda create --name chroma python=3.9.7
+RUN /opt/conda/envs/chroma/bin/pip install torch==1.12.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113
+WORKDIR /workspace
+COPY . .
+RUN /opt/conda/envs/chroma/bin/pip install .
+ENV PATH /opt/conda/envs/chroma/bin:$PATH

chroma/LICENSE.txt ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

chroma/README.md ADDED Viewed

	@@ -0,0 +1,255 @@

+<img src="assets/chroma_logo_outline.svg" width="280">
+[**Get Started**](#get-started)
+| [**Sampling**](#sampling)
+| [**Design**](#design)
+| [**Conditioners**](#conditioners)
+| [**License**](#license)
+Chroma is a generative model for designing proteins **programmatically**.
+Protein space is complex and hard to navigate. With Chroma, protein design problems are represented in terms of [composable building blocks](#conditioners) from which diverse, [all-atom protein structures can be automatically generated](#sampling). As a joint model of structure and sequence, Chroma can also be used for common protein modeling tasks such as [generating sequences given backbones](#design), packing side-chains, and scoring designs.
+We provide protein conditioners for a variety of constraints, including substructure, symmetry, shape, and neural-network predictions of some protein classes and annotations. We also provide an API for [creating your own conditioners](#conditioners-api) in a few lines of code.
+Internally, Chroma uses diffusion modeling, equivariant graph neural networks, and conditional random fields to efficiently sample all-atom structures with a complexity that is sub-quadratic in the number of residues. It can generate large complexes in a few minutes on a commodity GPU. You can read more about Chroma, including biophysical and crystallographic validation of some early designs, in our paper, [*Illuminating protein space with a programmable generative model*. Nature 2023](https://doi.org/10.1038/s41586-023-06728-8).
+<div align="center">
+<img src="assets/proteins.png" alt="Generated protein examples" width="700px" align="middle"/>
+</div>
+## Get Started
+> **Note:** An API key is required to download and use the pretrained model weights. It can be obtained [here](https://chroma-weights.generatebiomedicines.com/).
+**Colab Notebooks**. The quickest way to get started with Chroma is our Colab notebooks, which provide starting points for a variety of use cases in a preconfigured, in-browser environment
+* [Chroma Quickstart](https://colab.research.google.com/github/generatebio/chroma/blob/main/notebooks/ChromaDemo.ipynb): GUI notebook demonstrating unconditional and conditional generation of proteins with Chroma.
+* [Chroma API Tutorial](https://colab.research.google.com/github/generatebio/chroma/blob/main/notebooks/ChromaAPI.ipynb): Code notebook demonstrating protein I/O, sampling, and design configuration directly in `python`.
+* [Chroma Conditioner API Tutorial](https://colab.research.google.com/github/generatebio/chroma/blob/main/notebooks/ChromaConditioners.ipynb): A deeper dive under the hood for implementing new Chroma [Conditioners](#conditioner-api).
+**PyPi package**.You can install the latest release of Chroma with:
+```
+pip install generate-chroma
+```
+**Install latest Chroma from github**
+```
+git clone https://github.com/generatebio/chroma.git
+pip install -e chroma # use `-e` for it to be editable locally.
+```
+## Sampling
+**Unconditional monomer**. We provide a unified entry point to both unconditional and conditional protein design with the `Chroma.sample()` method. When no conditioners are specified, we can sample a simple 200-amino acid monomeric protein with
+```python
+from chroma import Chroma
+chroma = Chroma()
+protein = chroma.sample(chain_lengths=[200])
+protein.to("sample.cif")
+display(protein)
+```
+Generally, `Chroma.sample()` takes as input design hyperparameters and [Conditioners](#conditioners) and outputs `Protein` objects representing the all-atom structures of protein systems which can be loaded to and from disk in PDB or mmCIF formats.
+**Unconditional complex**. To sample a complex instead of a monomer, we can simply do
+```python
+from chroma import Chroma
+chroma = Chroma()
+protein = chroma.sample(chain_lengths=[100, 200])
+protein.to("sample-complex.cif")
+```
+**Conditional complex**. We can further customize sampling towards design objectives via [Conditioners](#conditioners) and sampling hyperparameters. For example, to sample a C3-symmetric homo-trimer with 100 residues per monomer, we can do
+```python
+from chroma import Chroma, conditioners
+chroma = Chroma()
+conditioner = conditioners.SymmetryConditioner(G="C_3", num_chain_neighbors=2)
+protein = chroma.sample(
+    chain_lengths=[100],
+    conditioner=conditioner,
+    langevin_factor=8,
+    inverse_temperature=8,
+    sde_func="langevin",
+    potts_symmetry_order=conditioner.potts_symmetry_order)
+protein.to("sample-C3.cif")
+```
+Because compositions of conditioners are conditioners, even relatively complex design problems can follow this basic usage pattern. See the [demo notebooks](#get-started) and docstrings for more information on hyperparameters, conditioners, and starting points.
+## Design
+**Robust design**. Chroma is a joint model of sequence and structure that uses a common graph neural network base architecture to parameterize both backbone generation and conditional sequence and sidechain generation. These sequence and sidechain decoders are *diffusion-aware* in the sense that they have been trained to predict sequence and side chain not just for natural structures at diffusion time $t=0$ but also on noisy structures at all diffusion times $t \in [0,1]$. As a result, the $t$ hyperpameter of the design network provides a kind of tunable robustness via **diffusion augmentation** in we trade off between how much the model attempts to design the backbone *exactly* as specified (e.g. $t=0.0$) versus *robust* design within a small neighborhood of nearby backbone conformations (e.g. $t=0.5$).
+While all results presented in the Chroma [publication](https://doi.org/10.1038/s41586-023-06728-8) were done with **exact design** at $t=0.0$, we have found **robust design** at times near $t=0.5$ frequently improves one-shot refolding while incurring only minor, often Ångstrom-scale, relaxation adjustments to target backbones. When we compare the performance of these two design modes on our set of 50,000 unconditional backbones that were analyzed in the paper, we see very large improvements in refolding across both [AlphaFold](https://github.com/google-deepmind/alphafold) and [ESMFold](https://github.com/facebookresearch/esm) that stratifies well across protein length, percent helicity, or similarity to a known structure (See Chroma [Supplementary Figure 14](https://doi.org/10.1038/s41586-023-06728-8) for further context).
+<div align="center">
+<img src="./assets/refolding.png" alt="alt text" width="700px" align="middle"/>
+</div></br>
+The value of diffusion time conditioning $t$ can be set via the `design_t` parameter in `Chroma.sample` and `Chroma.design`. We find that for generated structures, $t = 0.5$ produces highly robust refolding results and is, therefore, the default setting. For experimentally-precise structures, $t = 0.0$ may be more appropriate, and values in between may provide a useful tradeoff between these two regimes.
+**Design *a la carte***. Chroma's design network can be accessed separately to design, redesign, and pack arbitrary protein systems. Here we load a protein from the PDB and redesign as
+```python
+# Redesign a Protein
+from chroma import Protein, Chroma
+chroma = Chroma()
+protein = Protein('1GFP')
+protein = chroma.design(protein)
+protein.to("1GFP-redesign.cif")
+```
+Clamped sub-sequence redesign is also available and compatible with a built-in selection algebra, along with position- and mutation-specific mask constraints as
+```python
+# Redesign a Protein
+from chroma import Protein, Chroma
+chroma = Chroma()
+protein = Protein('my_favorite_protein.cif') # PDB is fine too
+protein = chroma.design(protein, design_selection="resid 20-50 around 5.0") #  5 angstrom bubble around indices 20-50
+protein.to("my_favorite_protein_redesign.cif")
+```
+We provide more examples of design in the [demo notebooks](#get-started).
+## Conditioners
+Protein design with Chroma is **programmable**. Our `Conditioner` framework allows for automatic conditional sampling under arbitrary compositions of protein specifications, which can come in the forms of restraints (biasing the distribution of states) or constraints (directly restrict the domain of underlying sampling process); see Supplementary Appendix M in [our paper](https://doi.org/10.1038/s41586-023-06728-8). We have pre-defined multiple conditioners, including for controlling substructure, symmetry, shape, semantics, and natural-language prompts (see `chroma.layers.structure.conditioners`), which can be used in arbitrary combinations.
+<div align="center">
+| Conditioner | Class(es) in [`chroma.conditioners`](chroma/layers/structure/conditioners.py) | Example applications |
+|----------|----------|----------|
+| Symmetry constraint | `SymmetryConditioner`, `ScrewConditioner` | Large symmetric assemblies |
+| Substructure constraint | `SubstructureConditioner` | Substructure grafting, scaffold enforcement |
+| Shape restraint | `ShapeConditioner` | Molecular shape control |
+| Secondary structure | `ProClassConditioner` | Secondary-structure specification |
+| Domain classification | `ProClassConditioner` | Specification of class, such as Pfam, CATH, or Taxonomy |
+| Text caption | `ProCapConditioner` | Natural language prompting |
+| Sequence | `SubsequenceConditioner` | Subsequence constraints.  |
+</div>
+**How it works**. The central idea of Conditioners is *composable state transformations*, where each Conditioner is a function that modifies the state and/or energy of a protein system in a differentiable way ([Supplementary Appendix M](https://doi.org/10.1038/s41586-023-06728-8)). For example, to encode symmetry as a *constraint* we can take as input the assymetric unit and tesselate it according to the desired symmetry group to output a protein system that is symmetric by construction. To encode something like a neural network restraint, we can adjust the total system energy by the negative log probability of the target condition. For both of these, we add on the diffusion energy to the output of the Conditioner(s) and then backpropagate the total energy through all intermediate transformations to compute the unconstrained forces that are compatible with generic sampling SDE such as annealed Langevin Dynamics.
+We schematize this overall Conditioners framework below.
+<div align="center">
+<img src="./assets/conditioners.png" alt="alt text" width="600px" align="middle"/><br>
+<figcaption><i>The <code>Conditioner</code> class is the composable building block of protein design with Chroma.</i></figcaption>
+</div>
+#### Conditioner API
+It is simple to develop new conditioners. A `Conditioner` is a Pytorch `nn.Module` which takes in the system state - i.e. the structure, energy, and diffusion time - and outputs potentially updated structures and energies as
+```python
+class Conditioner(torch.nn.Module):
+    """A composable function for parameterizing protein design problems.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        # Setup your conditioner's hyperparameters
+    def forward(
+        self,
+        X: torch.Tensor,                # Input coordinates
+        C: torch.LongTensor,            # Input chain map (for complexes)
+        O: torch.Tensor,                # Input sequence (one-hot, not used)
+        U: torch.Tensor,                # Input energy (one-hot, not used)
+        t: Union[torch.Tensor, float],  # Diffusion time
+    ):
+        # Update the state, e.g. map from an unconstrained to constrained manifold
+        X_update, C_update  = update_state(X, C, t)
+        # Update the energy, e.g. add a restraint potential
+        U_update = U + update_energy(X, C, t)
+        return X_update, C_update, O, U_update, t
+```
+ Roughly speaking, `Conditioner`s are composable by construction because their input and output type signatures are matched (i.e. they are an endomorphism). So we also simply build conditioners from conditioners by "stacking" them much as we would with traditional neural network layer developemnt. With the final `Conditioner` as an input, `Chroma.sample()` will then leverage Pytorch's automatic differentiation facilities to automaticallly furnish a diffusion-annealed MCMC sampling algorithm to sample with this conditioner (We note this isn't magic and taking care to scale and parameterize appropriately is [important](#note-on-conditioners)).
+##### A minimal Conditioner: 2D lattice symmetry
+The code snippet below shows how in a few lines of code we can add a conditioner that stipulates the generation of a 2D crystal-like object, where generated proteins are arrayed in an `M x N` rectangular lattice.
+```python
+import torch
+from chroma.models import Chroma
+from chroma.layers.structure import conditioners
+class Lattice2DConditioner(conditioners.Conditioner):
+    def __init__(self, M, N, cell):
+        super().__init__()
+        # Setup the coordinates of a 2D lattice
+        self.order = M*N
+        x = torch.arange(M) * cell[0]
+        y = torch.arange(N) * cell[1]
+        xx, yy = torch.meshgrid(x, y, indexing="ij")
+        dX = torch.stack([xx.flatten(), yy.flatten(), torch.zeros(M * N)], dim=1)
+        self.register_buffer("dX", dX)
+    def forward(self, X, C, O, U, t):
+        # Tesselate the unit cell on the lattice
+        X = (X[:,None,...] + self.dX[None,:,None,None]).reshape(1, -1, 4, 3)
+        C = torch.cat([C + C.unique().max() * i for i in range(self.dX.shape[0])], dim=1)
+        # Average the gradient across the group (simplifies force scaling)
+        X.register_hook(lambda gradX: gradX / self.order)
+        return X, C, O, U, t
+chroma = Chroma().cuda()
+conditioner = Lattice2DConditioner(M=3, N=4, cell=[20., 15.]).cuda()
+protein = chroma.sample(
+    chain_lengths=[70], conditioner=conditioner, sde_func='langevin',
+    potts_symmetry_order=conditioner.order
+)
+protein.to_CIF("lattice_protein.cif")
+```
+<div align="center">
+<img src="./assets/lattice.png" alt="alt text" width="700px" align="middle"/>
+</div>
+#### Note on Conditioners
+An attractive aspect of this conditioner framework is that it is very general, enabling both constraints (which involve operations on $x$) and restraints (which amount to changes to $U$). At the same time, generation under restraints can still be (and often is) challenging, as the resulting effective energy landscape can become arbitrarily rugged and difficult to integrate. We therefore advise caution when using and developing new conditioners or conditioner combinations. We find that inspecting diffusition trajectories (including unconstrained and denoised trajectories, $\hat{x}_t$ and $\tilde{x}_t$) can be a good tool for identifying integration challenges and defining either better conditioner forms or better sampling regimes.
+## Citing Chroma
+If you use Chroma in your research, please cite:
+J. B. Ingraham, M. Baranov, Z. Costello, K. W. Barber, W. Wang, A. Ismail, V. Frappier, D. M. Lord, C. Ng-Thow-Hing, E. R. Van Vlack, S. Tie, V. Xue, S. C. Cowles, A. Leung, J. V. Rodrigues, C. L. Morales-Perez, A. M. Ayoub, R. Green, K. Puentes, F. Oplinger, N. V. Panwar, F. Obermeyer, A. R. Root, A. L. Beam, F. J. Poelwijk, and G. Grigoryan, "Illuminating protein space with a programmable generative model", *Nature*, 2023 (10.1038/s41586-023-06728-8).
+```bibtex
+@Article{Chroma2023,
+  author  = {Ingraham, John B. and Baranov, Max and Costello, Zak and Barber, Karl W. and Wang, Wujie and Ismail, Ahmed and Frappier, Vincent and Lord, Dana M. and Ng-Thow-Hing, Christopher and Van Vlack, Erik R. and Tie, Shan and Xue, Vincent and Cowles, Sarah C. and Leung, Alan and Rodrigues, Jo\~{a}o V. and Morales-Perez, Claudio L. and Ayoub, Alex M. and Green, Robin and Puentes, Katherine and Oplinger, Frank and Panwar, Nishant V. and Obermeyer, Fritz and Root, Adam R. and Beam, Andrew L. and Poelwijk, Frank J. and Grigoryan, Gevorg},
+  journal = {Nature},
+  title   = {Illuminating protein space with a programmable generative model},
+  year    = {2023},
+  volume  = {},
+  number  = {},
+  pages   = {},
+  doi     = {10.1038/s41586-023-06728-8}
+}
+```
+## Acknowledgements
+The Chroma codebase is the work of many contributers at Generate Biomedicines. We would like to acknowledge: Ahmed Ismail, Alan Witmer, Alex Ramos, Alexander Bock, Ameya Harmalkar, Brinda Monian, Craig Mackenzie, Dan Luu, David Moore, Frank Oplinger, Fritz Obermeyer, George Kent-Scheller, Gevorg Grigoryan, Jacob Feala, James Lucas, Jenhan Tao, John Ingraham, Martin Jankowiak, Max Baranov, Meghan Franklin, Mick Ward, Rudraksh Tuwani, Ryan Nelson, Shan Tie, Vincent Frappier, Vincent Xue, William Wolfe-McGuire, Wujie Wang, Zak Costello, Zander Harteveld.
+## License
+Copyright Generate Biomedicines, Inc.
+### Chroma Code License
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this code except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0.
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. See the License for the specific language governing permissions and limitations under the License.
+### Model Weights License
+Chroma weights are freely available to academic researchers and non-profit entities who accept and agree to be bound under the terms of the Chroma Parameters License. Please visit the [weights download page](https://chroma-weights.generatebiomedicines.com/) for more information. If you are not eligible to use the Chroma Parameters under the terms of the provided License or if you would like to share the Chroma Parameters and/or otherwise use the Chroma Parameters beyond the scope of the rights granted in the License (including for commercial purposes), you may contact the Licensor at: [email protected].

chroma/Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

chroma/assets/LiberationSans-Regular.ttf ADDED Viewed

Binary file (139 kB). View file

chroma/assets/chroma_logo.svg ADDED Viewed

chroma/assets/chroma_logo_outline.svg ADDED Viewed

chroma/assets/conditioners.png ADDED Viewed

chroma/assets/lattice.png ADDED Viewed

Git LFS Details

SHA256: 6f19bae6a7d8c38dece6bdb8eab384bf319264957a1a8ce85f0eb90e21e2b7b7
Pointer size: 132 Bytes
Size of remote file: 3.47 MB

chroma/assets/logo.png ADDED Viewed

chroma/assets/proteins.png ADDED Viewed

Git LFS Details

SHA256: 9714927ed591ba22d5815ef24219b493dd40389be3c4c4cda8f830e89de48fe3
Pointer size: 132 Bytes
Size of remote file: 2.89 MB

chroma/assets/refolding.png ADDED Viewed

Git LFS Details

SHA256: 2b1db27f48d31963e8ff422ea47b8bfbb2a7d2cd6ab1c344896be3d672840bb3
Pointer size: 132 Bytes
Size of remote file: 4.18 MB

chroma/chroma/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "1.0.0"
+from chroma.data.protein import Protein
+from chroma.layers.structure import conditioners
+from chroma.models.chroma import Chroma
+from chroma.utility import api

chroma/chroma/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (365 Bytes). View file

chroma/chroma/constants/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from chroma.constants.geometry import AA_GEOMETRY
+from chroma.constants.sequence import *

chroma/chroma/constants/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (248 Bytes). View file

chroma/chroma/constants/__pycache__/geometry.cpython-38.pyc ADDED Viewed

Binary file (6.97 kB). View file

chroma/chroma/constants/__pycache__/named_models.cpython-38.pyc ADDED Viewed

Binary file (1.24 kB). View file

chroma/chroma/constants/__pycache__/sequence.cpython-38.pyc ADDED Viewed

Binary file (2.06 kB). View file

chroma/chroma/constants/geometry.py ADDED Viewed

	@@ -0,0 +1,558 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Dictionary containing ideal internal coordinates and chi angle assignments
+ for building amino acid 3D coordinates"""
+from typing import Dict
+AA_GEOMETRY: Dict[str, dict] = {
+    "ALA": {
+        "atoms": ["CB"],
+        "chi_indices": [],
+        "parents": [["N", "C", "CA"]],
+        "types": {"C": "C", "CA": "CT1", "CB": "CT3", "N": "NH1", "O": "O"},
+        "z-angles": [111.09],
+        "z-dihedrals": [123.23],
+        "z-lengths": [1.55],
+    },
+    "ARG": {
+        "atoms": ["CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
+        "chi_indices": [1, 2, 3, 4],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CB", "CG", "CD"],
+            ["CG", "CD", "NE"],
+            ["CD", "NE", "CZ"],
+            ["NH1", "NE", "CZ"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD": "CT2",
+            "CG": "CT2",
+            "CZ": "C",
+            "N": "NH1",
+            "NE": "NC2",
+            "NH1": "NC2",
+            "NH2": "NC2",
+            "O": "O",
+        },
+        "z-angles": [112.26, 115.95, 114.01, 107.09, 123.05, 118.06, 122.14],
+        "z-dihedrals": [123.64, 180.0, 180.0, 180.0, 180.0, 180.0, 178.64],
+        "z-lengths": [1.56, 1.55, 1.54, 1.5, 1.34, 1.33, 1.33],
+    },
+    "ASN": {
+        "atoms": ["CB", "CG", "OD1", "ND2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["OD1", "CB", "CG"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CG": "CC",
+            "N": "NH1",
+            "ND2": "NH2",
+            "O": "O",
+            "OD1": "O",
+        },
+        "z-angles": [113.04, 114.3, 122.56, 116.15],
+        "z-dihedrals": [121.18, 180.0, 180.0, -179.19],
+        "z-lengths": [1.56, 1.53, 1.23, 1.35],
+    },
+    "ASP": {
+        "atoms": ["CB", "CG", "OD1", "OD2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["OD1", "CB", "CG"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2A",
+            "CG": "CC",
+            "N": "NH1",
+            "O": "O",
+            "OD1": "OC",
+            "OD2": "OC",
+        },
+        "z-angles": [114.1, 112.6, 117.99, 117.7],
+        "z-dihedrals": [122.33, 180.0, 180.0, -170.23],
+        "z-lengths": [1.56, 1.52, 1.26, 1.25],
+    },
+    "CYS": {
+        "atoms": ["CB", "SG"],
+        "chi_indices": [1],
+        "parents": [["N", "C", "CA"], ["N", "CA", "CB"]],
+        "types": {"C": "C", "CA": "CT1", "CB": "CT2", "N": "NH1", "O": "O", "SG": "S"},
+        "z-angles": [111.98, 113.87],
+        "z-dihedrals": [121.79, 180.0],
+        "z-lengths": [1.56, 1.84],
+    },
+    "GLN": {
+        "atoms": ["CB", "CG", "CD", "OE1", "NE2"],
+        "chi_indices": [1, 2, 3],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CB", "CG", "CD"],
+            ["OE1", "CG", "CD"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD": "CC",
+            "CG": "CT2",
+            "N": "NH1",
+            "NE2": "NH2",
+            "O": "O",
+            "OE1": "O",
+        },
+        "z-angles": [111.68, 115.52, 112.5, 121.52, 116.84],
+        "z-dihedrals": [121.91, 180.0, 180.0, 180.0, 179.57],
+        "z-lengths": [1.55, 1.55, 1.53, 1.23, 1.35],
+    },
+    "GLU": {
+        "atoms": ["CB", "CG", "CD", "OE1", "OE2"],
+        "chi_indices": [1, 2, 3],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CB", "CG", "CD"],
+            ["OE1", "CG", "CD"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2A",
+            "CD": "CC",
+            "CG": "CT2",
+            "N": "NH1",
+            "O": "O",
+            "OE1": "OC",
+            "OE2": "OC",
+        },
+        "z-angles": [111.71, 115.69, 115.73, 114.99, 120.08],
+        "z-dihedrals": [121.9, 180.0, 180.0, 180.0, -179.1],
+        "z-lengths": [1.55, 1.56, 1.53, 1.26, 1.25],
+    },
+    "GLY": {
+        "atoms": [],
+        "chi_indices": [],
+        "parents": [],
+        "types": {"C": "C", "CA": "CT2", "N": "NH1", "O": "O"},
+        "z-angles": [],
+        "z-dihedrals": [],
+        "z-lengths": [],
+    },
+    "HIS": {
+        "atoms": ["CB", "CG", "ND1", "CD2", "CE1", "NE2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["ND1", "CB", "CG"],
+            ["CB", "CG", "ND1"],
+            ["CB", "CG", "CD2"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD2": "CPH1",
+            "CE1": "CPH2",
+            "CG": "CPH1",
+            "N": "NH1",
+            "ND1": "NR1",
+            "NE2": "NR2",
+            "O": "O",
+        },
+        "z-angles": [109.99, 114.05, 124.1, 129.6, 107.03, 110.03],
+        "z-dihedrals": [122.46, 180.0, 90.0, -171.29, -173.21, 171.99],
+        "z-lengths": [1.55, 1.5, 1.38, 1.36, 1.35, 1.38],
+    },
+    "HSD": {
+        "atoms": ["CB", "CG", "ND1", "CD2", "CE1", "NE2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["ND1", "CB", "CG"],
+            ["CB", "CG", "ND1"],
+            ["CB", "CG", "CD2"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD2": "CPH1",
+            "CE1": "CPH2",
+            "CG": "CPH1",
+            "N": "NH1",
+            "ND1": "NR1",
+            "NE2": "NR2",
+            "O": "O",
+        },
+        "z-angles": [109.99, 114.05, 124.1, 129.6, 107.03, 110.03],
+        "z-dihedrals": [122.46, 180.0, 90.0, -171.29, -173.21, 171.99],
+        "z-lengths": [1.55, 1.5, 1.38, 1.36, 1.35, 1.38],
+    },
+    "HSE": {
+        "atoms": ["CB", "CG", "ND1", "CD2", "CE1", "NE2"],
+        "chi_indices": [],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["ND1", "CB", "CG"],
+            ["CB", "CG", "ND1"],
+            ["CB", "CG", "CD2"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD2": "CPH1",
+            "CE1": "CPH2",
+            "CG": "CPH1",
+            "N": "NH1",
+            "ND1": "NR2",
+            "NE2": "NR1",
+            "O": "O",
+        },
+        "z-angles": [111.67, 116.94, 120.17, 129.71, 105.2, 105.8],
+        "z-dihedrals": [123.52, 180.0, 90.0, -178.26, -179.2, 178.66],
+        "z-lengths": [1.56, 1.51, 1.39, 1.36, 1.32, 1.38],
+    },
+    "HSP": {
+        "atoms": ["CB", "CG", "ND1", "CD2", "CE1", "NE2"],
+        "chi_indices": [],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["ND1", "CB", "CG"],
+            ["CB", "CG", "ND1"],
+            ["CB", "CG", "CD2"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2A",
+            "CD2": "CPH1",
+            "CE1": "CPH2",
+            "CG": "CPH1",
+            "N": "NH1",
+            "ND1": "NR3",
+            "NE2": "NR3",
+            "O": "O",
+        },
+        "z-angles": [109.38, 114.18, 122.94, 128.93, 108.9, 106.93],
+        "z-dihedrals": [125.13, 180.0, 90.0, -165.26, -167.62, 167.13],
+        "z-lengths": [1.55, 1.52, 1.37, 1.35, 1.33, 1.37],
+    },
+    "ILE": {
+        "atoms": ["CB", "CG1", "CG2", "CD1"],
+        "chi_indices": [1, 3],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CG1", "CA", "CB"],
+            ["CA", "CB", "CG1"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT1",
+            "CD": "CT3",
+            "CG1": "CT2",
+            "CG2": "CT3",
+            "N": "NH1",
+            "O": "O",
+        },
+        "z-angles": [112.93, 113.63, 113.93, 114.09],
+        "z-dihedrals": [124.22, 180.0, -130.04, 180.0],
+        "z-lengths": [1.57, 1.55, 1.55, 1.54],
+    },
+    "LEU": {
+        "atoms": ["CB", "CG", "CD1", "CD2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CD1", "CB", "CG"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD1": "CT3",
+            "CD2": "CT3",
+            "CG": "CT1",
+            "N": "NH1",
+            "O": "O",
+        },
+        "z-angles": [112.12, 117.46, 110.48, 112.57],
+        "z-dihedrals": [121.52, 180.0, 180.0, 120.0],
+        "z-lengths": [1.55, 1.55, 1.54, 1.54],
+    },
+    "LYS": {
+        "atoms": ["CB", "CG", "CD", "CE", "NZ"],
+        "chi_indices": [1, 2, 3, 4],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CB", "CG", "CD"],
+            ["CG", "CD", "CE"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD": "CT2",
+            "CE": "CT2",
+            "CG": "CT2",
+            "N": "NH1",
+            "NZ": "NH3",
+            "O": "O",
+        },
+        "z-angles": [111.36, 115.76, 113.28, 112.33, 110.46],
+        "z-dihedrals": [122.23, 180.0, 180.0, 180.0, 180.0],
+        "z-lengths": [1.56, 1.54, 1.54, 1.53, 1.46],
+    },
+    "MET": {
+        "atoms": ["CB", "CG", "SD", "CE"],
+        "chi_indices": [1, 2, 3],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CB", "CG", "SD"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CE": "CT3",
+            "CG": "CT2",
+            "N": "NH1",
+            "O": "O",
+            "SD": "S",
+        },
+        "z-angles": [111.88, 115.92, 110.28, 98.94],
+        "z-dihedrals": [121.62, 180.0, 180.0, 180.0],
+        "z-lengths": [1.55, 1.55, 1.82, 1.82],
+    },
+    "PHE": {
+        "atoms": ["CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CD1", "CB", "CG"],
+            ["CB", "CG", "CD1"],
+            ["CB", "CG", "CD2"],
+            ["CG", "CD1", "CE1"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD1": "CA",
+            "CD2": "CA",
+            "CE1": "CA",
+            "CE2": "CA",
+            "CG": "CA",
+            "CZ": "CA",
+            "N": "NH1",
+            "O": "O",
+        },
+        "z-angles": [112.45, 112.76, 120.32, 120.76, 120.63, 120.62, 119.93],
+        "z-dihedrals": [122.49, 180.0, 90.0, -177.96, -177.37, 177.2, -0.12],
+        "z-lengths": [1.56, 1.51, 1.41, 1.41, 1.4, 1.4, 1.4],
+    },
+    "PRO": {
+        "atoms": ["CB", "CG", "CD"],
+        "chi_indices": [1, 2],
+        "parents": [["N", "C", "CA"], ["N", "CA", "CB"], ["CA", "CB", "CG"]],
+        "types": {
+            "C": "C",
+            "CA": "CP1",
+            "CB": "CP2",
+            "CD": "CP3",
+            "CG": "CP2",
+            "N": "N",
+            "O": "O",
+        },
+        "z-angles": [111.74, 104.39, 103.21],
+        "z-dihedrals": [113.74, 31.61, -34.59],
+        "z-lengths": [1.54, 1.53, 1.53],
+    },
+    "SER": {
+        "atoms": ["CB", "OG"],
+        "chi_indices": [1],
+        "parents": [["N", "C", "CA"], ["N", "CA", "CB"]],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "N": "NH1",
+            "O": "O",
+            "OG": "OH1",
+        },
+        "z-angles": [111.4, 112.45],
+        "z-dihedrals": [124.75, 180.0],
+        "z-lengths": [1.56, 1.43],
+    },
+    "THR": {
+        "atoms": ["CB", "OG1", "CG2"],
+        "chi_indices": [1],
+        "parents": [["N", "C", "CA"], ["N", "CA", "CB"], ["OG1", "CA", "CB"]],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT1",
+            "CG2": "CT3",
+            "N": "NH1",
+            "O": "O",
+            "OG1": "OH1",
+        },
+        "z-angles": [112.74, 112.16, 115.91],
+        "z-dihedrals": [126.46, 180.0, -124.13],
+        "z-lengths": [1.57, 1.43, 1.53],
+    },
+    "TRP": {
+        "atoms": ["CB", "CG", "CD2", "CD1", "CE2", "NE1", "CE3", "CZ3", "CH2", "CZ2"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CD2", "CB", "CG"],
+            ["CD1", "CG", "CD2"],
+            ["CG", "CD2", "CE2"],
+            ["CE2", "CG", "CD2"],
+            ["CE2", "CD2", "CE3"],
+            ["CD2", "CE3", "CZ3"],
+            ["CE3", "CZ3", "CH2"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD1": "CA",
+            "CD2": "CPT",
+            "CE2": "CPT",
+            "CE3": "CAI",
+            "CG": "CY",
+            "CH2": "CA",
+            "CZ2": "CAI",
+            "CZ3": "CA",
+            "N": "NH1",
+            "NE1": "NY",
+            "O": "O",
+        },
+        "z-angles": [
+            111.23,
+            115.14,
+            123.95,
+            129.18,
+            106.65,
+            107.87,
+            132.54,
+            118.16,
+            120.97,
+            120.87,
+        ],
+        "z-dihedrals": [
+            122.68,
+            180.0,
+            90.0,
+            -172.81,
+            -0.08,
+            0.14,
+            179.21,
+            -0.2,
+            0.1,
+            0.01,
+        ],
+        "z-lengths": [1.56, 1.52, 1.44, 1.37, 1.41, 1.37, 1.4, 1.4, 1.4, 1.4],
+    },
+    "TYR": {
+        "atoms": ["CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"],
+        "chi_indices": [1, 2],
+        "parents": [
+            ["N", "C", "CA"],
+            ["N", "CA", "CB"],
+            ["CA", "CB", "CG"],
+            ["CD1", "CB", "CG"],
+            ["CB", "CG", "CD1"],
+            ["CB", "CG", "CD2"],
+            ["CG", "CD1", "CE1"],
+            ["CE1", "CE2", "CZ"],
+        ],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT2",
+            "CD1": "CA",
+            "CD2": "CA",
+            "CE1": "CA",
+            "CE2": "CA",
+            "CG": "CA",
+            "CZ": "CA",
+            "N": "NH1",
+            "O": "O",
+            "OH": "OH1",
+        },
+        "z-angles": [112.34, 112.94, 120.49, 120.46, 120.4, 120.56, 120.09, 120.25],
+        "z-dihedrals": [122.27, 180.0, 90.0, -176.46, -175.49, 175.32, -0.19, -178.98],
+        "z-lengths": [1.56, 1.51, 1.41, 1.41, 1.4, 1.4, 1.4, 1.41],
+    },
+    "VAL": {
+        "atoms": ["CB", "CG1", "CG2"],
+        "chi_indices": [1],
+        "parents": [["N", "C", "CA"], ["N", "CA", "CB"], ["CG1", "CA", "CB"]],
+        "types": {
+            "C": "C",
+            "CA": "CT1",
+            "CB": "CT1",
+            "CG1": "CT3",
+            "CG2": "CT3",
+            "N": "NH1",
+            "O": "O",
+        },
+        "z-angles": [111.23, 113.97, 112.17],
+        "z-dihedrals": [122.95, 180.0, 123.99],
+        "z-lengths": [1.57, 1.54, 1.54],
+    },
+}

chroma/chroma/constants/named_models.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Paths for named models in the zoo """
+GRAPH_BACKBONE_MODELS = {
+    "public": {
+        "s3_uri": "https://chroma-weights.generatebiomedicines.com/downloads?weights=chroma_backbone_v1.0.pt",
+        "data": "Generate Structure ETL: July 25 2022",
+        "task": "BLNL backbone model training with EMA, trained July 2023",
+    },
+}
+GRAPH_CLASSIFIER_MODELS = {
+    "public": {
+        "s3_uri": "https://chroma-weights.generatebiomedicines.com/downloads?weights=chroma_proclass_v1.0.pt",
+        "data": "Generate Structure ETL: June 2022",
+        "task": "Backbone classification model training with cross-entropy loss",
+    },
+}
+GRAPH_DESIGN_MODELS = {
+    "public": {
+        "s3_uri": "https://chroma-weights.generatebiomedicines.com/downloads?weights=chroma_design_v1.0.pt",
+        "data": "Generate Structure ETL: July 25 2022",
+        "task": "Autoregressive joint prediction of sequence and chi angles, two-stage",
+    },
+}
+PROCAP_MODELS = {
+    "public": {
+        "s3_uri": "https://chroma-weights.generatebiomedicines.com/downloads?weights=chroma_procap_v1.0.pt",
+        "data": "Generate Structure ETL: June 2022",
+        "task": "Backbone caption model training with cross-entropy loss, using M5 ProClass GNN embeddings",
+    },
+}
+NAMED_MODELS = {
+    "GraphBackbone": GRAPH_BACKBONE_MODELS,
+    "GraphDesign": GRAPH_DESIGN_MODELS,
+    "GraphClassifier": GRAPH_CLASSIFIER_MODELS,
+    "ProteinCaption": PROCAP_MODELS,
+}

chroma/chroma/constants/sequence.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Constants used across protein representations.
+These constants standardize protein tokenization alphabets, ideal structure
+geometries and topologies, etc.
+"""
+from chroma.constants.geometry import AA_GEOMETRY
+# Standard tokenization for Omniprot and Omniprot-interacting models
+OMNIPROT_TOKENS = "ABCDEFGHIKLMNOPQRSTUVWYXZ*-#"
+POTTS_EXTENDED_TOKENS = "ACDEFGHIKLMNPQRSTVWY-*#"
+PAD = "-"
+START = "@"
+STOP = "*"
+MASK = "#"
+DNA_TOKENS = "ACGT"
+RNA_TOKENS = "AGCU"
+PROTEIN_TOKENS = "ACDEFGHIKLMNPQRSTVWY"
+# Minimal 20-letter alphabet and corresponding triplet codes
+AA20 = "ACDEFGHIKLMNPQRSTVWY"
+AA20_3_TO_1 = {
+    "ALA": "A",
+    "ARG": "R",
+    "ASN": "N",
+    "ASP": "D",
+    "CYS": "C",
+    "GLN": "Q",
+    "GLU": "E",
+    "GLY": "G",
+    "HIS": "H",
+    "ILE": "I",
+    "LEU": "L",
+    "LYS": "K",
+    "MET": "M",
+    "PHE": "F",
+    "PRO": "P",
+    "SER": "S",
+    "THR": "T",
+    "TRP": "W",
+    "TYR": "Y",
+    "VAL": "V",
+}
+AA20_1_TO_3 = {
+    "A": "ALA",
+    "R": "ARG",
+    "N": "ASN",
+    "D": "ASP",
+    "C": "CYS",
+    "Q": "GLN",
+    "E": "GLU",
+    "G": "GLY",
+    "H": "HIS",
+    "I": "ILE",
+    "L": "LEU",
+    "K": "LYS",
+    "M": "MET",
+    "F": "PHE",
+    "P": "PRO",
+    "S": "SER",
+    "T": "THR",
+    "W": "TRP",
+    "Y": "TYR",
+    "V": "VAL",
+}
+AA20_3 = [AA20_1_TO_3[aa] for aa in AA20]
+# Adding noncanonical amino acids
+NONCANON_AA = [
+    "HSD",
+    "HSE",
+    "HSC",
+    "HSP",
+    "MSE",
+    "CSO",
+    "SEC",
+    "CSX",
+    "HIP",
+    "SEP",
+    "TPO",
+]
+AA31_3 = AA20_3 + NONCANON_AA
+# Chain alphabet for PDB chain naming
+CHAIN_ALPHABET = "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
+# Standard atom indexing
+ATOMS_BB = ["N", "CA", "C", "O"]
+ATOM_SYMMETRIES = {
+    "ARG": [("NH1", "NH2")],  # Correct handling of NH1 and NH2 is relabeling
+    "ASP": [("OD1", "OD2")],
+    "GLU": [("OE1", "OE2")],
+    "PHE": [("CD1", "CD2"), ("CE1", "CE2")],
+    "TYR": [("CD1", "CD2"), ("CE1", "CE2")],
+}
+AA20_NUM_ATOMS = [4 + len(AA_GEOMETRY[aa]["atoms"]) for aa in AA20_3]
+AA20_NUM_CHI = [len(AA_GEOMETRY[aa]["chi_indices"]) for aa in AA20_3]

chroma/chroma/data/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package includes io formats and tools for a few common datatypes,
+including antibodies, proteins, sequences, and structures.
+"""
+from chroma.data.protein import Protein

chroma/chroma/data/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (339 Bytes). View file

chroma/chroma/data/__pycache__/protein.cpython-38.pyc ADDED Viewed

Binary file (19.3 kB). View file

chroma/chroma/data/__pycache__/system.cpython-38.pyc ADDED Viewed

Binary file (136 kB). View file

chroma/chroma/data/__pycache__/xcs.cpython-38.pyc ADDED Viewed

Binary file (3.83 kB). View file

chroma/chroma/data/protein.py ADDED Viewed

	@@ -0,0 +1,513 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import copy
+import os
+import tempfile
+from typing import List, Optional, Tuple, Union
+import nglview as nv
+import torch
+import chroma.utility.polyseq as polyseq
+from chroma.constants import CHAIN_ALPHABET, PROTEIN_TOKENS
+from chroma.data.system import System, SystemEntity
+class Protein:
+    """
+    Protein: A utility class for managing proteins within the Chroma ecosystem.
+    The Protein class offers a suite of methods for loading, saving, transforming, and viewing protein structures
+    and trajectories from a variety of input sources such as PDBID, CIF files, and XCS representations.
+    Attributes:
+        sys (System): A protein system object used for various molecular operations.
+        device (str): Specifies the device on which tensors are managed. Defaults to `cpu`.
+    """
+    sys: System
+    device: str = "cpu"
+    def __new__(cls, *args, **kwargs):
+        """Handles automatic loading of the protein based on the input.
+        Specifically deals with XCS
+        Args:
+            protein_input (_type_): _description_
+        """
+        if len(args) == 1 and isinstance(args[0], System):
+            return cls.from_system(*args, **kwargs)
+        elif len(args) == 3:  # 3 Tensor Arguments
+            X, C, S = args
+            assert isinstance(
+                C, torch.Tensor
+            ), f"arg[1] must be a chain (C) torch.Tensor, but get {type(C)}"
+            assert isinstance(
+                S, torch.Tensor
+            ), f"arg[2] must be a sequence (S) torch.Tensor, but get {type(S)}"
+            if isinstance(X, list):
+                assert all(
+                    isinstance(x, torch.Tensor) for x in X
+                ), "arg[0] must be an X torch.Tensor or a list of X torch.Tensors"
+                return cls.from_XCS_trajectory(X, C, S)
+            elif isinstance(X, torch.Tensor):
+                return cls.from_XCS(X, C, S)
+            else:
+                raise TypeError(
+                    f"X must be a list of torch.Tensor that respects XCS format, but get {type(X), type(C), type(S)}"
+                )
+        elif len(args) == 1 and isinstance(args[0], str):
+            if args[0].lower().startswith("s3:"):
+                raise NotImplementedError(
+                    "download of cifs or pdbs from s3 not supported."
+                )
+            if args[0].endswith(".cif"):
+                return cls.from_CIF(*args, **kwargs)
+            elif args[0].endswith(".pdb"):
+                return cls.from_PDB(*args, **kwargs)
+            else:  # PDB or Sequence String
+                # Check if it is a valid PDB
+                import requests
+                url = f"https://data.rcsb.org/rest/v1/core/entry/{args[0]}"
+                VALID_PDBID = requests.get(url).status_code == 200
+                VALID_SEQUENCE = all([s in PROTEIN_TOKENS for s in args[0]])
+                if VALID_PDBID:
+                    # This only works if connected to the internet,
+                    # so maybe better status checking will help here
+                    if VALID_PDBID and VALID_SEQUENCE:
+                        raise Warning(
+                            "Ambuguous input, this is both a valid Sequence string and"
+                            " a valid PDBID. Interpreting as a PDBID, if you wish to"
+                            " initialize as a sequence string please explicitly"
+                            " initialize as Protein.from_sequence(MY_SEQUENCE)."
+                        )
+                    return cls.from_PDBID(*args, **kwargs)
+                elif VALID_SEQUENCE:
+                    return cls.from_sequence(*args, **kwargs)
+                else:
+                    raise NotImplementedError(
+                        "Could Not Identify a valid input Type. See docstring for"
+                        " details."
+                    )
+        else:
+            raise NotImplementedError(
+                "Inputs must either be a 3-tuple of XCS tensors, or a single string"
+            )
+    @classmethod
+    def from_system(cls, system: System, device: str = "cpu") -> Protein:
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = system
+        protein.device = device
+        return protein
+    @classmethod
+    def from_XCS(cls, X: torch.Tensor, C: torch.Tensor, S: torch.Tensor) -> Protein:
+        """
+        Create a Protein object from XCS representations.
+        Args:
+            X (torch.Tensor): A 4D tensor representing atomic coordinates of proteins.
+                            Dimensions are `(batch, residues, atoms (4 or 14), coordinates (3))`.
+            C (torch.Tensor): A chain label tensor of shape `(batch, residues)`. Values are integers.
+                            Sign of the value indicates presence (+) or absence (-) of structural
+                            information for that residue. Magnitude indicates which chain the residue belongs to.
+            S (torch.Tensor): A sequence information tensor of shape `(batch, residues)`. Contains
+                            non-negative integers representing residue types at each position.
+        Returns:
+            Protein: Initialized Protein object from the given XCS representation.
+        """
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = System.from_XCS(X, C, S)
+        protein.device = X.device
+        return protein
+    @classmethod
+    def from_XCS_trajectory(
+        cls, X_traj: List[torch.Tensor], C: torch.Tensor, S: torch.Tensor
+    ) -> Protein:
+        """
+        Initialize a Protein object from a trajectory of XCS representations.
+        Args:
+            X_traj (List[torch.Tensor]): List of X tensor representations over time. Each tensor represents atomic
+                                        coordinates of proteins with dimensions `(batch, residues, atoms (4 or 14), coordinates (3))`.
+            C (torch.Tensor): A chain label tensor of shape `(batch, residues)`. Values are integers.
+                            Sign of the value indicates presence (+) or absence (-) of structural
+                            information for that residue. Magnitude indicates which chain the residue belongs to.
+            S (torch.Tensor): A sequence information tensor of shape `(batch, residues)`. Contains
+                            non-negative integers representing residue types at each position.
+        Returns:
+            Protein: Protein object initialized from the XCS trajectory.
+        """
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = System.from_XCS(X_traj[0], C, S)
+        protein.device = C.device
+        for X in X_traj[1:]:
+            protein.sys.add_model_from_X(X[C > 0])
+        return protein
+    @classmethod
+    def from_PDB(cls, input_file: str, device: str = "cpu") -> Protein:
+        """
+        Load a Protein object from a provided PDB file.
+        Args:
+            input_file (str): Path to the PDB file to be loaded.
+            device (str, optional): The device for tensor operations. Defaults to 'cpu'.
+        Returns:
+            Protein: Initialized Protein object from the provided PDB file.
+        """
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = System.from_PDB(input_file)
+        protein.device = device
+        return protein
+    @classmethod
+    def from_CIF(
+        cls, input_file: str, canonicalize: bool = True, device: str = "cpu"
+    ) -> Protein:
+        """
+        Load a Protein object from a provided CIF format.
+        Args:
+            input_file (str): Path to the CIF file to be loaded.
+            device (str, optional): The device for tensor operations. Defaults to 'cpu'.
+        Returns:
+            Protein: Initialized Protein object from the provided CIF file.
+        """
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = System.from_CIF(input_file)
+        protein.device = device
+        if canonicalize:
+            protein.canonicalize()
+        return protein
+    @classmethod
+    def from_PDBID(
+        cls, pdb_id: str, canonicalize: bool = True, device: str = "cpu"
+    ) -> Protein:
+        """
+        Load a Protein object using its PDBID by fetching the corresponding CIF file from the Protein Data Bank.
+        This method downloads the CIF file for the specified PDBID, processes it to create a Protein object,
+        and then deletes the temporary CIF file.
+        Args:
+            pdb_id (str): The PDBID of the protein to fetch.
+            canonicalize (bool, optional): If set to True, the protein will be canonicalized post-loading. Defaults to True.
+            device (str, optional): The device for tensor operations. Defaults to 'cpu'.
+        Returns:
+            Protein: An instance of the Protein class initialized from the fetched CIF file corresponding to the PDBID.
+        """
+        from os import unlink
+        from chroma.utility.fetchdb import RCSB_file_download
+        file_cif = os.path.join(tempfile.gettempdir(), f"{pdb_id}.cif")
+        RCSB_file_download(pdb_id, ".cif", file_cif)
+        protein = cls.from_CIF(file_cif, canonicalize=canonicalize, device=device)
+        unlink(file_cif)
+        return protein
+    @classmethod
+    def from_sequence(
+        cls, chains: Union[List[str], str], device: str = "cpu"
+    ) -> Protein:
+        """
+        Load a protein object purely from Sequence with no structural content.
+        Args:
+            chains (Union[List[str],str]): a list of sequence strings, or a sequence string to create the protein.
+            device (str, optional): which device for torch outputs should be used. Defaults to "cpu".
+        Returns:
+            Protein: An instance of the Protein class initialized a sequence or list of sequences.
+        """
+        if isinstance(chains, str):
+            chains = [chains]
+        system = System("system")
+        for c_ix, seq in enumerate(chains):
+            chain_id = CHAIN_ALPHABET[c_ix + 1]
+            chain = system.add_chain(chain_id)
+            # Populate the Chain
+            three_letter_sequence = []
+            for s_ix, s in enumerate(seq):
+                resname = polyseq.to_triple(s)
+                three_letter_sequence.append(resname)
+                chain.add_residue(resname, s_ix + 1, "")
+            # Add Entity
+            sys_entity = SystemEntity(
+                "polymer",
+                f"Sequence Chain {chain_id}",
+                "polypeptide(L)",
+                three_letter_sequence,
+                [False] * len(three_letter_sequence),
+            )
+            system.add_new_entity(sys_entity, [c_ix])
+        protein = super(Protein, cls).__new__(cls)
+        protein.sys = system
+        protein.device = device
+        return protein
+    def to_CIF(self, output_file: str, force: bool = False) -> None:
+        """
+        Save the current Protein object to a file in CIF format.
+        Args:
+            output_file (str): The path where the CIF file should be saved.
+        """
+        if output_file.lower().startswith("s3:"):
+            raise NotImplementedError("cif output to an s3 bucket not supported.")
+        else:
+            self.sys.to_CIF(output_file)
+    def to_PDB(self, output_file: str, force: bool = False) -> None:
+        """
+        Save the current Protein object to a file in PDB format.
+        Args:
+            output_file (str): The path where the PDB file should be saved.
+        """
+        if output_file.lower().startswith("s3:"):
+            raise NotImplementedError("pdb output to an s3 bucket not supported.")
+        else:
+            self.sys.to_PDB(output_file)
+    def to_XCS(
+        self, all_atom: bool = False, device: Optional[str] = None
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Convert the current Protein object to its XCS tensor representations.
+        Args:
+            all_atom (bool, optional): Indicates if all atoms should be considered in the conversion. Defaults to False.
+            device (str, optional): the device to export XCS tensors to. If not specified uses the device property
+                set in the class. Default None.
+        Returns:
+            X (torch.Tensor): A 4D tensor representing atomic coordinates of proteins with dimensions
+                                `(batch, residues, atoms (4 or 14), coordinates (3))`.
+            C (torch.Tensor): A chain label tensor of shape `(batch, residues)`. Values are integers. Sign of
+                                the value indicates presence (+) or absence (-) of structural information for that residue.
+                                Magnitude indicates which chain the residue belongs to.
+            S (torch.Tensor): A sequence information tensor of shape `(batch, residues)`. Contains non-negative
+                                integers representing residue types at each position.
+        """
+        if device is None:
+            device = self.device
+        X, C, S = [tensor.to(device) for tensor in self.sys.to_XCS(all_atom=all_atom)]
+        return X, C, S
+    def to_XCS_trajectory(
+        self,
+        device: Optional[str] = None,
+    ) -> Tuple[List[torch.Tensor], torch.Tensor, torch.Tensor]:
+        """
+        Convert the current Protein object to its XCS tensor representations over a trajectory.
+        Args:
+            device (str, optional): the device to export XCS tensors to. If not specified uses the device property
+                set in the class. Default None.
+        Returns:
+            X_traj (List[torch.Tensor]): List of X tensor representations over time. Each tensor represents atomic
+                                        coordinates of proteins with dimensions `(batch, residues, atoms (4 or 14), coordinates (3))`.
+            C (torch.Tensor): A chain label tensor of shape `(batch, residues)`. Values are integers. Sign of
+                            the value indicates presence (+) or absence (-) of structural information for that residue.
+                            Magnitude indicates which chain the residue belongs to.
+            S (torch.Tensor): A sequence information tensor of shape `(batch, residues)`. Contains non-negative
+                            integers representing residue types at each position.
+        """
+        X, C, S = [], None, None
+        for i in range(self.sys.num_models()):
+            self.sys.swap_model(i)
+            if i == 0:
+                X_frame, C, S, loc_indices = self.sys.to_XCS(get_indices=True)
+            else:
+                X_frame.flatten(0, 2)[:] = torch.from_numpy(
+                    self.sys._locations["coor"][loc_indices, 0:3]
+                )
+            X.append(X_frame.clone())
+            self.sys.swap_model(i)
+        X = torch.cat(X)
+        if device is None:
+            device = self.device
+        Xtraj, C, S = [tensor.to(device) for tensor in [X, C, S]]
+        return [each.unsqueeze(0) for each in Xtraj], C, S
+    def to(self, file_path: str, force: bool = False) -> None:
+        """
+        General Export for the Protein Class
+        This method allows for export in pdf or cif based on the file extension.
+        explicit saving is still available with the respective export methods.
+        Args:
+            device (str): The desired device for tensor operations, e.g., 'cpu' or 'cpu'.
+        """
+        if file_path.lower().endswith(".pdb"):
+            self.to_PDB(file_path, force=force)
+        elif file_path.lower().endswith(".cif"):
+            self.to_CIF(file_path, force=force)
+        else:
+            raise NotImplementedError(
+                "file path must end with either *.cif or *.pdb for export."
+            )
+    def length(self, structured: bool = False) -> None:
+        """
+        Retrieve the length of the protein.
+        Args:
+            structured (bool, optional): If set to True, returns the residue size of the structured part of the protein.
+                                        Otherwise, returns the length of the entire protein. Defaults to False.
+        Returns:
+            int: Length of the protein or its structured part based on the 'structured' argument.
+        """
+        if structured:
+            return self.sys.num_structured_residues()
+        return self.sys.num_residues()
+    __len__ = length
+    def canonicalize(self) -> None:
+        """
+        Canonicalize the protein's backbone geometry.
+        This method processes the protein to ensure it conforms to a canonical form.
+        """
+        self.sys.canonicalize_protein(
+            level=2,
+            drop_coors_unknowns=True,
+            drop_coors_missing_backbone=True,
+        )
+    def sequence(self, format: str = "one-letter-string") -> Union[List[str], str]:
+        """
+        Retrieve the sequence of the protein in the specified format.
+        Args:
+            format (str, optional): The desired format for the sequence. Can be 'three-letter-list' or 'one-letter-string'.
+                                    Defaults to 'one-letter-string'.
+        Returns:
+            Union[List[str], str]: The protein sequence in the desired format.
+        Raises:
+            Exception: If an unknown sequence format is provided.
+        """
+        if format == "three-letter-list":
+            return list(self.sys.sequence())
+        elif format == "one-letter-string":
+            return self.sys.sequence("one-letter-string")
+        else:
+            raise Exception(f"unknown sequence format {format}")
+    def display(self, representations: list = []) -> None:
+        """
+        Display the protein using the provided representations in NGL view.
+        Args:
+            representations (list, optional): List of visual representations to use in the display. Defaults to an empty list.
+        Returns:
+            viewer: A viewer object for interactive visualization.
+        """
+        from chroma.utility.ngl import SystemTrajectory, view_gsystem
+        if self.sys.num_models() == 1:
+            viewer = view_gsystem(self.sys)
+            for rep in representations:
+                viewer.add_representation(rep)
+        else:
+            t = SystemTrajectory(self)
+            viewer = nv.NGLWidget(t)
+        return viewer
+    def _ipython_display_(self):
+        display(self.display())
+    def __str__(self):
+        """Define Print Behavior
+        Return Protein Sequence Along with some useful statistics.
+        """
+        protein_string = f"Protein: {self.sys.name}\n"
+        for chain in self.sys.chains():
+            if chain.sequence is not None:
+                protein_string += (
+                    f"> Chain {chain.cid} ({len(chain.sequence())} residues)\n"
+                )
+                protein_string += "".join(
+                    [polyseq.to_single(s) for s in chain.sequence()]
+                )
+                protein_string += "\n\n"
+        return protein_string
+    def get_mask(self, selection: str) -> torch.Tensor:
+        """
+        Generate a mask tensor based on the provided residue selection.
+        Args:
+            selection (str): A selection string to specify which residues should be included in the mask.
+        Returns:
+            torch.Tensor: A mask tensor of shape `(1, protein length)`, where positions corresponding to selected residues have a value of 1.
+        """
+        residue_gtis = self.sys.select_residues(selection, gti=True)
+        D = torch.zeros(1, self.sys.num_residues(), device=self.device)
+        for gti in residue_gtis:
+            D[0, gti] = 1
+        return D
+    def __copy__(self):
+        new_system = copy.copy(self.sys)
+        device = self.device
+        return Protein(new_system, device=device)
+    def __deepcopy__(self, memo):
+        new_system = copy.deepcopy(self.sys)
+        device = self.device
+        return Protein(new_system, device=device)

chroma/chroma/data/system.py ADDED Viewed

The diff for this file is too large to render. See raw diff

chroma/chroma/data/xcs.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""XCS represents protein structure as a tuple of PyTorch tensors.
+The tensors in an XCS representation are:
+    `X` (FloatTensor), the Cartesian coordinates representing the protein
+        structure with shape `(num_batch, num_residues, num_atoms, 3)`. The
+        `num_atoms` dimension can be one of two sizes: `num_atoms=4` for
+        backbone-only structures or `num_atoms=14` for all-atom structures
+        (excluding hydrogens). The first four atoms will always be
+        `N, CA, C, O`, and the meaning of the optional 10 additional atom
+        positions will vary based on the residue identity at
+        a given position. Atom orders for each amino acid are defined in
+        `constants.AA_GEOMETRY[TRIPLET_CODE]["atoms"]`.
+    `C` (LongTensor), the chain map encoding per-residue chain assignments with
+        shape `(num_batch, num_residues)`.The chain map codes positions as `0`
+        when masked, poitive integers for chain indices, and negative integers
+        to represent missing residues (of the corresponding positive integers).
+    `S` (LongTensor), the sequence of the protein as alphabet indices with
+        shape `(num_batch, num_residues)`. The standard alphabet is
+        `ACDEFGHIKLMNPQRSTVWY`, also defined in `constants.AA20`.
+"""
+from functools import partial, wraps
+from inspect import getfullargspec
+import torch
+from torch.nn import functional as F
+try:
+    pass
+except ImportError:
+    print("MST not installed!")
+def validate_XCS(all_atom=None, sequence=True):
+    """Decorator factory that adds XCS validation to any function.
+    Args:
+        all_atom (bool, optional): If True, requires that input structure
+            tensors have 14 residues per atom. If False, reduces to 4 residues
+            per atom. If None, applies no transformation on input structures.
+        sequence (bool, optional): If True, makes sure that if S and O are both
+            provided, that they match, i.e. that O is a one-hot version of S.
+            If only one of S or O is provided, the other is generated, and both
+            are passed.
+    """
+    def decorator(func):
+        @wraps(func)
+        def new_func(*args, **kwargs):
+            args = list(args)
+            arg_list = getfullargspec(func)[0]
+            tensors = {}
+            for var in ["X", "C", "S", "O"]:
+                try:
+                    if var in kwargs:
+                        tensors[var] = kwargs[var]
+                    else:
+                        tensors[var] = args[arg_list.index(var)]
+                except IndexError:  # empty args_list
+                    tensors[var] = None
+                except ValueError:  # variable not an argument of function
+                    if not sequence and var in ["S", "O"]:
+                        pass
+                    else:
+                        raise Exception(
+                            f"Variable {var} is required by validation but not defined!"
+                        )
+            if tensors["X"] is not None and tensors["C"] is not None:
+                if tensors["X"].shape[:2] != tensors["C"].shape[:2]:
+                    raise ValueError(
+                        f"X shape {tensors['X'].shape} does not match C shape"
+                        f" {tensors['C'].shape}"
+                    )
+            if all_atom is not None and tensors["X"] is not None:
+                if all_atom and tensors["X"].shape[2] != 14:
+                    raise ValueError("Side chain atoms missing!")
+                elif not all_atom:
+                    if "X" in kwargs:
+                        kwargs["X"] = tensors["X"][:, :, :4]
+                    else:
+                        args[arg_list.index("X")] = tensors["X"][:, :, :4]
+            if sequence and (tensors["S"] is not None or tensors["O"] is not None):
+                if tensors["O"] is None:
+                    if "O" in kwargs:
+                        kwargs["O"] = F.one_hot(tensors["S"], 20).float()
+                    else:
+                        args[arg_list.index("O")] = F.one_hot(tensors["S"], 20).float()
+                elif tensors["S"] is None:
+                    if "S" in kwargs:
+                        kwargs["S"] = tensors["O"].argmax(dim=2)
+                    else:
+                        args[arg_list.index("S")] = tensors["O"].argmax(dim=2)
+                else:
+                    if not torch.allclose(tensors["O"].argmax(dim=2), tensors["S"]):
+                        raise ValueError("S and O are both provided but don't match!")
+            return func(*args, **kwargs)
+        return new_func
+    return decorator
+validate_XC = partial(validate_XCS, sequence=False)

chroma/chroma/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This package contains low-level PyTorch layers, including ``nn.Module`` s and ops.
+These layers are often used in :mod:`chroma.models`.
+"""

chroma/chroma/layers/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (291 Bytes). View file

chroma/chroma/layers/__pycache__/attention.cpython-38.pyc ADDED Viewed

Binary file (12.8 kB). View file

chroma/chroma/layers/__pycache__/basic.cpython-38.pyc ADDED Viewed

Binary file (18.6 kB). View file

chroma/chroma/layers/__pycache__/complexity.cpython-38.pyc ADDED Viewed

Binary file (5.45 kB). View file

chroma/chroma/layers/__pycache__/conv.cpython-38.pyc ADDED Viewed

Binary file (1.14 kB). View file

chroma/chroma/layers/__pycache__/graph.cpython-38.pyc ADDED Viewed

Binary file (34.6 kB). View file

chroma/chroma/layers/__pycache__/linalg.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

chroma/chroma/layers/__pycache__/norm.cpython-38.pyc ADDED Viewed

Binary file (7.03 kB). View file

chroma/chroma/layers/__pycache__/sde.cpython-38.pyc ADDED Viewed

Binary file (2.83 kB). View file

chroma/chroma/layers/attention.py ADDED Viewed

	@@ -0,0 +1,347 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import torch.nn as nn
+"""
+们实现了Transformer模型中的关键组件：缩放点积注意力(Scaled Dot Product Attention)和多头注意力(Multi-Head Attention)。
+"""
+class ScaledDotProductAttention(nn.Module):
+    """Scaled dot product attention as described in Eqn 1 of Vaswani et al. 2017 [https://arxiv.org/abs/1706.03762].
+    Attention(Q, K, V) = softmax(QK^T / sqrt(d_k))V
+    Note that the dimension of the query has to match the dimension of the keys (here specified as ```d_k```) and the length of keys has to match
+    the length of the values. See for instance 'The Illustrated Transformer' [http://jalammar.github.io/illustrated-transformer/]
+    for pictorial depiction of attention.
+    Inputs:
+        Q (torch.tensor): of shape (batch_size, sequence_length_q, d_k)
+        K (torch.tensor):  of shape (batch_size, sequence_length_k, d_k)
+        V (torch.tensor):  of shape (batch_size, sequence_length_k, d_v)
+        mask (torch.tensor):  of dtype (bool) or (byte) and shape (batch_size, 1, sequence_length_k), optional
+             zeroes (or False) indicate positions that cannot contribute to attention
+    Outputs:
+        output (torch.tensor) of shape (batch_size, sequence_length_q, d_v). The [i-j]-entry output[i,j,:] is formed as a convex combination of values:
+        \sum_k a_k V[i,k,:] and \sum_k a_k = 1.
+        attentions (torch.tensor) of shape (batch_size, sequence_length_q, sequence_length_k)) where the [b,i,j]-element
+        corresponds to the attention value (e.g relative contribution) of position j in the key-tensor to position i in the query tensor in element b of the batch.
+    """
+    def __init__(self):
+        super(ScaledDotProductAttention, self).__init__()
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, Q, K, V, mask=None):
+        _, _, d = K.size()
+        attn = torch.bmm(Q, K.transpose(1, 2)) / d ** 0.5
+        if mask is not None:
+            attn = attn.float().masked_fill(mask == 0, -1e9)
+        attn = self.softmax(attn)
+        if mask is not None:
+            attn = attn.float().masked_fill(mask == 0, 0)
+        if V.dtype == torch.float16:
+            attn = attn.half()
+        output = torch.bmm(attn, V)
+        return output, attn
+class MultiHeadAttention(nn.Module):
+    """Multi-head attention with scaled dot product attention. See 'The Annotated Transformer'
+    http://nlp.seas.harvard.edu/2018/04/03/attention.html or 'The Illustrated Transformer' http://jalammar.github.io/illustrated-transformer/
+    for details and intuition.
+     Args:
+         n_head (int): number of attention heads
+         d_k (int): dimension of the keys and queries in each attention head
+         d_v (int): dimension of the values in each attention head
+         d_model (int): input and output dimension for the layer
+         dropout (float): dropout rate, default is 0.1
+    Inputs:
+        Q (torch.tensor): query tensor of shape ```(batch_size, sequence_length_q, d_model)```
+        K (torch.tensor):  key tensor of shape ```(batch_size, sequence_length_k, d_model)```
+        V (torch.tensor): value tensor of shape ```(batch_size, sequence_length_k, d_model)```
+        mask (torch.tensor): (optional) of dtype ```bool`` or ```byte``` and size (batch_size, 1, sequence_length_k),
+                    zeroes (or False) indicate positions that cannot contribute to attention
+    Outputs:
+        output (torch.tensor) :  of shape ```(batch_size, sequence_length_q, d_model)```
+        attentions (torch.tensor): of shape ```(batch_size * n_head, sequence_length_q, sequence_length_k) where
+        ```attentions[batch_size*(i):batch_size*(i+1),:,:]``` corresponds to the batch of attention blocks for i'th head. See
+        ```chroma.layers.attention.ScaledDotProductAttention``` for more details
+    """
+    def __init__(self, n_head, d_k, d_v, d_model, dropout=0.1):
+        super(MultiHeadAttention, self).__init__()
+        self.n_head = n_head
+        self.d_k = d_k
+        self.d_v = d_v
+        self.d_model = d_model
+        self.Wq = nn.Parameter(torch.Tensor(n_head, d_model, d_k))
+        self.Wk = nn.Parameter(torch.Tensor(n_head, d_model, d_k))
+        self.Wv = nn.Parameter(torch.Tensor(n_head, d_model, d_v))
+        self.Wo = nn.Parameter(torch.Tensor(n_head * d_v, d_model))
+        self.attention = ScaledDotProductAttention()
+        self.dropout = nn.Dropout(p=dropout)
+        self.reset_parameters()
+    def reset_parameters(self):
+        nn.init.xavier_normal_(self.Wq)
+        nn.init.xavier_normal_(self.Wk)
+        nn.init.xavier_normal_(self.Wv)
+        nn.init.kaiming_uniform_(self.Wo)
+    def forward(self, Q, K, V, bias=None, mask=None):
+        mb_size, len_q, d_q_in = Q.size()
+        mb_size, len_k, d_k_in = K.size()
+        mb_size, len_v, d_v_in = V.size()
+        d_model = self.d_model
+        if d_q_in != d_model:
+            raise ValueError("Dimension of Q does not match d_model.")
+        if d_k_in != d_model:
+            raise ValueError("Dimension of K does not match d_model.")
+        if d_v_in != d_model:
+            raise ValueError("Dimension of V does not match d_model.")
+        # treat as a (n_head) size batch and project to d_k and d_v
+        q_s = torch.cat([Q @ W for W in self.Wq])  # (n_head*mb_size) x len_q x d_k
+        k_s = torch.cat([K @ W for W in self.Wk])  # (n_head*mb_size) x len_k x d_k
+        v_s = torch.cat([V @ W for W in self.Wv])  # (n_head*mb_size) x len_v x d_v
+        # Attention
+        if mask is not None:
+            mask = mask.repeat(self.n_head, 1, 1)
+        outputs, attns = self.attention(q_s, k_s, v_s, mask=mask)
+        # Back to original mb_size batch, result size = mb_size x len_q x (n_head*d_v)
+        outputs = torch.cat(torch.split(outputs, mb_size, dim=0), dim=-1)
+        # Project back to residual size
+        outputs = outputs @ self.Wo
+        outputs = self.dropout(outputs)
+        return outputs, attns
+class AttentionChainPool(nn.Module):
+    """Pools residue-based representations to chain-based representations using a chain mask and attention.
+    Args:
+        n_head (int): number of attention heads
+        d_model (int): dimension of embeddings to be pooled
+    Inputs:
+        h (torch.tensor): of size (batch_size, sequence_length, d_model)
+        C (torch.tensor): of size (batch_size, sequence_length)
+    Outputs:
+        output (torch.tensor): of size (batch_size, n_chains, d_model)
+        chain_mask (torch.tensor): of size (batch_size, n_chains)
+    """
+    def __init__(self, n_head, d_model):
+        super().__init__()
+        self.attention = MultiHeadAttention(
+            n_head, d_model, d_model, d_model, dropout=0.0
+        )
+    def get_query(self, x):
+        return torch.ones(x.size(0), 1, x.size(2)).type(x.dtype).to(x.device)
+    def forward(self, h, C):
+        bs, num_res = C.size()
+        chains = C.abs().unique()
+        chains = (
+            chains[chains > 0].unsqueeze(-1).repeat(1, bs).reshape(-1).unsqueeze(-1)
+        )
+        num_chains = len(chains.unique())
+        h_repeat = h.repeat(num_chains, 1, 1)
+        C_repeat = C.repeat(num_chains, 1)
+        mask = (C_repeat == chains).unsqueeze(-2)
+        output, _ = self.attention(
+            self.get_query(h_repeat), h_repeat, h_repeat, mask=mask
+        )
+        output = torch.cat(output.split(bs), 1)
+        chain_mask = torch.stack(mask.squeeze(1).any(dim=-1).split(bs), -1)
+        return output, chain_mask
+class Attention(nn.Module):
+    """
+    A multi-head attention layer with optional gating and bias as implemented in Jumper et al. (2021)
+    Args:
+        n_head (int): Number of heads of attention
+        d_model (int): Dimension of input and outputs
+        d_k (int): Dimension of keys/queries
+        d_v (int): Dimension of values
+        gate (bool): Whether to include a gate connection (as in Jumper et al. (2021))
+    Inputs:
+        Q (torch.tensor): of size (batch_size, num_queries, d_model)
+        K (torch.tensor): of size (batch_size, num_keys, d_model)
+        V (torch.tensor): of size (batch_size, num_keys, d_model)
+        bias (torch.tensor): (optional) of size (batch_size, n_head, num_queries, num_keys)
+        mask (torch.tensor): (optional) of size (batch_size, n_head, num_queries, num_keys)
+    Outputs:
+        output (torch.tensor): of size (batch_size, num_queries, d_model)
+    """
+    def __init__(self, n_head, d_model, d_k=None, d_v=None, gate=False):
+        super().__init__()
+        self.n_head = n_head
+        self.d_model = d_model
+        self.d_k = d_model // n_head if d_k is None else d_k
+        self.d_v = d_model // n_head if d_v is None else d_v
+        self.gate = gate
+        self.q_weights = nn.Parameter(torch.Tensor(d_model, n_head, self.d_k))
+        self.k_weights = nn.Parameter(torch.Tensor(d_model, n_head, self.d_k))
+        self.v_weights = nn.Parameter(torch.Tensor(d_model, n_head, self.d_v))
+        self.o_weights = nn.Parameter(torch.Tensor(n_head, self.d_v, d_model))
+        self.o_bias = nn.Parameter(torch.Tensor(d_model))
+        if self.gate:
+            self.g_weights = nn.Parameter(torch.Tensor(d_model, n_head, self.d_v))
+            self.g_bias = nn.Parameter(torch.Tensor(n_head, self.d_v))
+        self.softmax = nn.Softmax(dim=-1)
+        self.reset_parameters()
+    def reset_parameters(self):
+        nn.init.xavier_uniform_(self.q_weights)
+        nn.init.xavier_uniform_(self.k_weights)
+        nn.init.xavier_uniform_(self.v_weights)
+        nn.init.xavier_uniform_(self.o_weights)
+        nn.init.zeros_(self.o_bias)
+        if self.gate:
+            nn.init.zeros_(self.g_weights)
+            nn.init.ones_(self.g_bias)
+    def forward(self, Q, K, V, bias=None, mask=None):
+        self._check_inputs(Q, K, V, bias, mask)
+        q = torch.einsum("bqa,ahc->bqhc", Q, self.q_weights) * self.d_k ** (-0.5)
+        k = torch.einsum("bka,ahc->bkhc", K, self.k_weights)
+        v = torch.einsum("bka,ahc->bkhc", V, self.v_weights)
+        logits = torch.einsum("bqhc,bkhc->bhqk", q, k)
+        if bias is not None:
+            logits = logits + bias
+        weights = torch.nn.functional.softmax(logits, dim=-1)
+        if mask is not None:
+            weights = weights.masked_fill(~mask, 0.0)
+        weighted_avg = torch.einsum("bhqk,bkhc->bqhc", weights, v)
+        if self.gate:
+            gate_values = torch.einsum("bqa,ahc->bqhc", Q, self.g_weights) + self.g_bias
+            gate_values = torch.sigmoid(gate_values, dim=-1)
+            weighted_avg = weighted_avg * gate_values
+        output = (
+            torch.einsum("bqhc,hco->bqo", weighted_avg, self.o_weights) + self.o_bias
+        )
+        return output
+    def _check_inputs(self, Q, K, V, bias, mask):
+        batch_size_q, num_queries, d_q_in = Q.size()
+        batch_size_k, num_keys, d_k_in = K.size()
+        batch_size_v, num_values, d_v_in = V.size()
+        if d_q_in != self.d_model:
+            raise ValueError(
+                f"Dimension of Q tensor needs to be (batch_size, number_queries, d_model)"
+            )
+        if d_k_in != self.d_model:
+            raise ValueError(
+                f"Dimension of K tensor needs to be (batch_size, number_keys, d_model)"
+            )
+        if d_v_in != self.d_model:
+            raise ValueError(
+                f"Dimension of V tensor needs to be (batch_size, number_values, d_model)"
+            )
+        if num_keys != num_values:
+            raise ValueError(f"Number of keys needs to match number of values passed")
+        if (batch_size_q != batch_size_k) or (batch_size_k != batch_size_v):
+            raise ValueError(
+                f"Found batch size mismatch among inputs, all tensors must agree in size of dimension 0"
+            )
+        if bias is not None:
+            if (bias.dim() != 3) and (bias.dim() != 4):
+                raise ValueError(
+                    f"Bias specified but dimension mismatched: passed {bias.dim()}-dimensional tensor but should be 3-dimensional"
+                    f"of shape (n_head, num_queries, num_keys) or 4-dimensional of shape (batch_size, n_head, num_queries, num_keys)"
+                )
+            if bias.dim() == 3:
+                n_head_b, num_queries_b, num_keys_b = bias.size()
+                if n_head_b != self.n_head:
+                    raise ValueError(
+                        f"Bias specified but number of heads (dim of axis=0) does not match number of heads: {self.n_head}"
+                    )
+                if num_queries_b != num_queries:
+                    raise ValueError(
+                        f"Bias specified but number of queries (dim of axis=1) does not match number of queries given in Q tensor"
+                    )
+                if num_keys_b != num_keys:
+                    raise ValueError(
+                        f"Bias specified but number of keys (dim of axis=2) does not match number of queries given in K tensor "
+                        f"(dimenson of axis=1)"
+                    )
+            elif bias.dim() == 4:
+                if bias.dim() == 3:
+                    n_batch_b, n_head_b, num_queries_b, num_keys_b = bias.size()
+                    if n_head_b != self.n_head:
+                        raise ValueError(
+                            f"Bias specified but number of heads (dim of axis=0) does not match number of heads: {self.n_head}"
+                        )
+                    if num_queries_b != num_queries:
+                        raise ValueError(
+                            f"Bias specified but number of queries (dim of axis=1) does not match number of queries given in Q tensor"
+                        )
+                    if num_keys_b != num_keys:
+                        raise ValueError(
+                            f"Bias specified but number of keys (dim of axis=2) does not match number of queries given in K tensor "
+                            f"(dimenson of axis=1)"
+                        )
+        if mask is not None:
+            if mask.dtype != torch.bool:
+                raise ValueError(
+                    f"Mask specified but not given by correct dtype, should be torch.bool but found {mask.dtype}"
+                )
+            if mask.dim() != 4:
+                raise ValueError(
+                    f"Mask specified but dimension mismatched: passed {mask.dim()}-dimensional tensor but should be 4-dimensional"
+                    f"of shape (batch_size, n_head, num_queries, num_keys)"
+                )
+            batch_size_b, _, num_queries_b, num_keys_b = mask.size()
+            if (num_queries_b != num_queries) and (num_queries_b != 1):
+                raise ValueError(
+                    f"Bias specified but number of queries (dim of axis=2) does not match number of queries given in Q tensor"
+                )
+            if (num_keys_b != num_keys) and (num_keys_b != 1):
+                raise ValueError(
+                    f"Bias specified but number of keys (dim of axis=3) does not match number of queries given in K tensor "
+                    f"(dimenson of axis=1)"
+                )

chroma/chroma/layers/basic.py ADDED Viewed

	@@ -0,0 +1,467 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from chroma.layers.norm import MaskedBatchNorm1d
+class NoOp(nn.Module):
+    """A dummy nn.Module wrapping an identity operation.
+    空操作模块，用来满足代码结构
+    Inputs:
+        x (any)
+    Outputs:
+        x (any)
+    """
+    def __init__(self):
+        super().__init__()
+    def forward(self, x, **kwargs):
+        return x
+class Transpose(nn.Module):
+    """An nn.Module wrapping ```torch.transpose```.
+    Args:
+        d1 (int): the first (of two) dimensions to swap
+        d2 (int): the second (of two) dimensions to swap
+    Inputs:
+        x (torch.tensor)
+    Outputs:
+        y (torch.tensor): ```y = x.transpose(d1,d2)```
+    """
+    def __init__(self, d1=1, d2=2):
+        super().__init__()
+        self.d1 = d1
+        self.d2 = d2
+    def forward(self, x):
+        return x.transpose(self.d1, self.d2)
+class Unsqueeze(nn.Module):
+    """An nn.Module wrapping ```torch.unsqueeze```.
+    Args:
+        dim (int): the dimension to unsqueeze input tensors
+    Inputs:
+        x (torch.tensor):
+    Outputs:
+        y (torch.tensor): where ```y=x.unsqueeze(dim)```
+    """
+    def __init__(self, dim=1):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        return x.unsqueeze(self.dim)
+class OneHot(nn.Module):
+    """An nn.Module that wraps F.one_hot```.
+    Args:
+        n_tokens (int): the number of tokens comprising input sequences
+    Inputs:
+        x (torch.LongTensor): of size ```(batch_size, *)```
+    Outputs:
+        y (torch.ByteTensor): of size (batch_size, *, n_tokens) cast to input.device
+    """
+    def __init__(self, n_tokens):
+        super().__init__()
+        self.n_tokens = n_tokens
+    def forward(self, x):
+        return F.one_hot(x, self.n_tokens)
+class MeanEmbedding(nn.Module):
+    """A wrapper around ```nn.Embedding``` that allows for one-hot-like representation inputs (as well as standard tokenized representation),
+    optionally applying a softmax to the last dimension if the input corresponds to a log-PMF.
+    Args:
+        embedding (nn.Embedding): Embedding to wrap
+        use_softmax (bool): Whether to apply a softmax to the last dimension if input is one-hot-like.
+    Inputs:
+        x (torch.tensor): of size (batch_size, sequence_length) (standard tokenized representation) -OR- (batch_size, sequence_length, number_tokens) (one-hot representation)
+    Outputs:
+        y (torch.tensor): of size (batch_size, sequence_length, embedding_dimension) obtained via. lookup into ```self.embedding.weight``` if
+        input is in standard tokenized form or by matrix multiplication of input with ```self.embedding.weight``` if input is one-hot-like. Note
+        that if the input is a one-hot matrix the output is the same regardless of representation.
+    这个模块是nn.Embedding 的包装器,它允许输是one-hot-like的表示(以及标准的tokenized表示),
+    并且如果输入对应于log-PMF,还以选择性地对最后 个维度应用softmax
+    """
+    def __init__(self, embedding, use_softmax=True):
+        super(MeanEmbedding, self).__init__()
+        self.embedding = embedding
+        self.use_softmax = use_softmax
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x):
+        if len(x.shape) == 2:
+            return self.embedding(x)
+        elif len(x.shape) == 3:
+            if self.use_softmax:
+                return self.softmax(x) @ self.embedding.weight
+            else:
+                return x @ self.embedding.weight
+        else:
+            raise (NotImplementedError)
+class PeriodicPositionalEncoding(nn.Module):
+    """Positional encoding, adapted from 'The Annotated Transformer'
+    http://nlp.seas.harvard.edu/2018/04/03/attention.html
+    这个模块实现了周期性的位置编码,这是Transformer模型的一个重要组成部分。
+    它使用正弦和余弦函数来生成位置编码
+     Args:
+         d_model (int): input and output dimension for the layer
+         max_seq_len (int): maximum allowed sequence length
+         dropout (float): Dropout rate
+    Inputs:
+        x (torch.tensor): of size (batch_size, sequence_length, d_model)
+    Outputs:
+        y (torch.tensor): of size (batch_size, sequence_length, d_model)
+    """
+    def __init__(self, d_model, max_seq_len=4000, dropout=0.0):
+        super(PeriodicPositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_seq_len, d_model)
+        position = torch.arange(0.0, max_seq_len).unsqueeze(1)
+        div_term = torch.exp(
+            torch.arange(0.0, d_model, 2) * -(math.log(10000.0) / d_model)
+        )
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer("pe", pe)
+    def forward(self, x):
+        x = x + self.pe[:, : x.size(1)]
+        return self.dropout(x)
+class PositionWiseFeedForward(nn.Module):
+    """Position-wise feed-forward using 1x1 convolutions, a building block of legacy Transformer code (not code optimized).
+    这个模块实现了位置感知的前馈网络,这也是Transformer模型的一个重要组成部分。
+    它使用1x1的卷积来实现前馈网络。
+    Args:
+         d_model (int): input and output dimension for the layer
+         d_inner_hid (int): size of the hidden layer in the position-wise feed-forward sublayer
+    Inputs:
+        x (torch.tensor): of size (batch_size, sequence_length, d_model)
+    Outputs:
+        y (torch.tensor): of size (batch_size, sequence_length, d_model)
+    """
+    def __init__(self, d_model, d_hidden, dropout=0.1):
+        super(PositionWiseFeedForward, self).__init__()
+        self.activation = nn.ReLU()
+        self.linear1 = nn.Conv1d(d_model, d_hidden, 1)
+        self.linear2 = nn.Conv1d(d_hidden, d_model, 1)
+        self.dropout = nn.Dropout(p=dropout)
+    def reset_parameters(self):
+        self.linear1.reset_parameters()
+        self.linear2.reset_parameters()
+    def forward(self, x):
+        output = self.activation(self.linear1(x.transpose(1, 2)))
+        output = self.linear2(output).transpose(1, 2)
+        return self.dropout(output)
+class DropNormLin(nn.Module):
+    """nn.Module applying a linear layer, normalization, dropout, and activation
+    这个模块应用了一个线性层、归一化、dropout和激活函数。你可以选择使用层归一化 (In') 或批归一 (bn) ，或者跳过过归一化。
+    Args:
+        in_features (int): input dimension
+        out_features (int): output dimension
+        norm_type (str): ```'ln'``` for layer normalization or ```'bn'``` for batch normalization else skip normalization
+        dropout (float): dropout to apply
+        actn (nn.Module): activation function to apply
+    Input:
+        x (torch.tensor): of size (batch_size, sequence_length, in_features)
+        input_mask (torch.tensor): of size (batch_size, 1, sequence_length) (optional)
+    Output:
+        y (torch.tensor): of size (batch_size, sequence_length, out_features)
+    """
+    def __init__(
+        self, in_features, out_features, norm_type="ln", dropout=0.0, actn=nn.ReLU()
+    ):
+        super(DropNormLin, self).__init__()
+        self.linear = nn.Linear(in_features, out_features)
+        if norm_type == "ln":
+            self.norm_layer = nn.LayerNorm(out_features)
+        elif norm_type == "bn":
+            self.norm_layer = MaskedBatchNorm1d(out_features)
+        else:
+            self.norm_layer = NoOp()
+        self.dropout = nn.Dropout(p=dropout)
+        self.actn = actn
+    def forward(self, x, input_mask=None):
+        h = self.linear(x)
+        if isinstance(self.norm_layer, MaskedBatchNorm1d):
+            h = self.norm_layer(h.transpose(1, 2), input_mask=input_mask).transpose(
+                1, 2
+            )
+        else:
+            h = self.norm_layer(h)
+        return self.dropout(self.actn(h))
+class ResidualLinearLayer(nn.Module):
+    """A Simple Residual Layer using a linear layer a relu and an optional layer norm.
+    这个模块实现了一个简单的残差层,使用了一个线性层、ReLU激活函数和一个可选的层归一化。
+    Args:
+        d_model (int): Model Dimension
+        use_norm (bool, *optional*): Optionally Use a Layer Norm. Default `True`.
+    """
+    def __init__(self, d_model, use_norm=True):
+        super(ResidualLinearLayer, self).__init__()
+        self.linear = nn.Linear(d_model, d_model)
+        self.ReLU = nn.ReLU()
+        self.use_norm = use_norm
+        self.norm = nn.LayerNorm(d_model)
+    def forward(self, x):
+        z = self.linear(x)
+        z = self.ReLU(z)
+        if self.use_norm:
+            z = self.norm(z)
+        return x + z
+class TriangleMultiplication(nn.Module):
+    def __init__(self, d_model=512, mode="outgoing"):
+        """
+          Triangle multiplication as defined in Jumper et al. (2021)
+          这个模块实现了Jumper等人在2021年的论文中定义的三角乘法。它接受一个四维的张量作为输入
+          并通过一系列的线性变换和非线性激活函数，以及一个特殊的乘法操作(由 torch.einsum实现) ，来计算输出。
+        Args:
+            d_model (int): dimension of the embedding at each position
+            mode (str): Must be 'outgoing' (algorithm 11) or 'incoming' (algorithm 12).
+        Inputs:
+            X (torch.tensor): Pair representations of size (batch, nres,  nres, channels)
+            mask (torch.tensor): of dtype `torch.bool` and size (batch, nres, nres, channels) (or broadcastable to this size)
+        Outputs:
+            Y (torch.tensor): Pair representations of size (batch, nres,  nres, channels)
+        """
+        super().__init__()
+        self.mode = mode
+        assert self.mode in ["outgoing", "incoming"]
+        self.equation = (
+            "bikc,bjkc->bijc" if self.mode == "outgoing" else "bkjc,bkic->bijc"
+        )
+        self.layer_norm = nn.LayerNorm(d_model)
+        self.left_edge_mlp = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.Sigmoid(), nn.Linear(d_model, d_model)
+        )
+        self.right_edge_mlp = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.Sigmoid(), nn.Linear(d_model, d_model)
+        )
+        self.skip = nn.Sequential(nn.Linear(d_model, d_model), nn.Sigmoid())
+        self.combine = nn.Sequential(nn.LayerNorm(d_model), nn.Linear(d_model, d_model))
+    def forward(self, X, mask=None):
+        h = self.layer_norm(X)
+        A = self.left_edge_mlp(h)
+        B = self.right_edge_mlp(h)
+        G = self.skip(h)
+        if mask is not None:
+            A = A.masked_fill(~mask, 0.0)
+            B = B.masked_fill(~mask, 0.0)
+        h = torch.einsum(self.equation, A, B)
+        h = self.combine(h) * G
+        return h
+class NodeProduct(nn.Module):
+    """Like Alg. 10 in Jumper et al. (2021) but instead of computing a mean over MSA dimension,
+    process for single-sequence inputs.
+    这个模块实现了Jumper等人在2021年的论文中描述的节点乘积算法。
+    它接受一个二维的张量作为输入，然后通过一系列的线性变换和层归一化操作，来计算输出。
+    Args:
+        d_in (int): dimension of node embeddings (inputs)
+        d_out (int): dimension of edge embeddings (outputs)
+    Inputs:
+        node_features (torch.tensor): of size (batch_size, nres, d_model)
+        node_mask (torch.tensor): of size (batch_size, nres)
+        edge_mask (torch.tensor): of size (batch_size, nres, nres)
+    Outputs:
+        edge_features (torch.tensor): of size (batch_size, nres, nres, d_model)
+    """
+    def __init__(self, d_in, d_out):
+        super().__init__()
+        self.layer_norm = nn.LayerNorm(d_in)
+        self.left_lin = nn.Linear(d_in, d_in)
+        self.right_lin = nn.Linear(d_in, d_in)
+        self.edge_lin = nn.Linear(2 * d_in, d_out)
+    def forward(self, node_features, node_mask=None, edge_mask=None):
+        _, nres, _ = node_features.size()
+        node_features = self.layer_norm(node_features)
+        left_embs = self.left_lin(node_features)
+        right_embs = self.right_lin(node_features)
+        if node_mask is not None:
+            mask = node_mask[:, :, None]
+            left_embs = left_embs.masked_fill(~mask, 0.0)
+            right_embs = right_embs.masked_fill(~mask, 0.0)
+        left_embs = left_embs[:, None, :, :].repeat(1, nres, 1, 1)
+        right_embs = right_embs[:, :, None, :].repeat(1, 1, nres, 1)
+        edge_features = torch.cat([left_embs, right_embs], dim=-1)
+        edge_features = self.edge_lin(edge_features)
+        if edge_mask is not None:
+            mask = edge_mask[:, :, :, None]
+            edge_features = edge_features.masked_fill(~mask, 0.0)
+        return edge_features
+class FourierFeaturization(nn.Module):
+    """Applies fourier featurization of low-dimensional (usually spatial) input data as described in [https://arxiv.org/abs/2006.10739] ,
+    optionally trainable as described in [https://arxiv.org/abs/2106.02795].
+    这个模块实现了对低维输入数据的傅里叶特征化，这是一种将输入数据转换为频域表示的方法。
+    这个模块可以选择是否学习傅里叶特征的频率
+    Args:
+        d_input (int): dimension of inputs
+        d_model (int): dimension of outputs
+        trainable (bool): whether to learn the frequency of fourier features
+        scale (float): if not trainable, controls the scale of fourier feature periods (see reference for description, this parameter matters and should be tuned!)
+    Inputs:
+        input (torch.tensor): of size (batch_size, *, d_input)
+    Outputs:
+        output (torch.tensor): of size (batch_size, *, d_output)
+    """
+    def __init__(self, d_input, d_model, trainable=False, scale=1.0):
+        super().__init__()
+        self.scale = scale
+        if d_model % 2 != 0:
+            raise ValueError(
+                "d_model needs to be even for this featurization, try again!"
+            )
+        B = 2 * math.pi * scale * torch.randn(d_input, d_model // 2)
+        self.trainable = trainable
+        if not trainable:
+            self.register_buffer("B", B)
+        else:
+            self.register_parameter("B", torch.nn.Parameter(B))
+    def forward(self, inputs):
+        h = inputs @ self.B
+        return torch.cat([h.cos(), h.sin()], -1)
+class PositionalEncoding(nn.Module):
+    """Axis-aligned positional encodings with log-linear spacing.
+    这个模块实现了对输入数据的位置编码，这是一种将输入数据的位置信息编码为连续的向量的方法。
+    这个模块使用了对数线性间隔的频率组件。
+    Args:
+        d_input (int): dimension of inputs
+        d_model (int): dimension of outputs
+        period_range (tuple of floats): Min and maximum periods for the
+            frequency components. Fourier features will be log-linearly spaced
+            between these values (inclusive).
+    Inputs:
+        input (torch.tensor): of size (..., d_input)
+    Outputs:
+        output (torch.tensor): of size (..., d_model)
+    """
+    def __init__(self, d_model, d_input=1, period_range=(1.0, 1000.0)):
+        super().__init__()
+        if d_model % (2 * d_input) != 0:
+            raise ValueError(
+                "d_model needs to be divisible by 2*d_input for this featurization, "
+                f"but got {d_model} versus {d_input}"
+            )
+        num_frequencies = d_model // (2 * d_input)
+        log_bounds = np.log10(period_range)
+        p = torch.logspace(log_bounds[0], log_bounds[1], num_frequencies, base=10.0)
+        w = 2 * math.pi / p
+        self.register_buffer("w", w)
+    def forward(self, inputs):
+        batch_dims = list(inputs.shape)[:-1]
+        # (..., 1, num_out) * (..., num_in, 1)
+        w = self.w.reshape(len(batch_dims) * [1] + [1, -1])
+        h = w * inputs[..., None]
+        h = torch.cat([h.cos(), h.sin()], -1).reshape(batch_dims + [-1])
+        return h
+class MaybeOnehotEmbedding(nn.Embedding):
+    """Wrapper around :class:`torch.nn.Embedding` to support either int-encoded
+    LongTensors or one-hot encoded FloatTensors.
+    这个模块是torch.nn.Embedding 的包装器，它支持整数编码的LongTensor输入或者独热编码的FloatTensor输入。
+    如果输入是浮点类型，那么它会通过矩阵乘法来计算嵌入，否则，它会调用父类的 forward 方法来计算嵌入。
+    """
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if x.dtype.is_floating_point:  # onehot
+            return x @ self.weight
+        return super().forward(x)

chroma/chroma/layers/complexity.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Layers for computing sequence complexities.
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from chroma.constants import AA20
+from chroma.layers.graph import collect_neighbors
+def compositions(S: torch.Tensor, C: torch.LongTensor, w: int = 30):
+    """Compute local compositions per residue.
+    Args:
+        S (torch.Tensor): Sequence tensor with shape `(num_batch, num_residues)`
+            (long) or `(num_batch, num_residues, num_alphabet)` (float).
+        C (torch.LongTensor): Chain map with shape `(num_batch, num_residues)`.
+        w (int, optional): Window size.
+    Returns:
+        P (torch.Tensor): Local compositions with shape
+            `(num_batch, num_residues - w + 1, num_alphabet)`.
+        N (torch.Tensor): Local counts with shape
+            `(num_batch, num_residues - w + 1, num_alphabet)`.
+        mask_P (torch.Tensor): Mask with shape
+            `(num_batch, num_residues - w + 1)`.
+    """
+    device = S.device
+    Q = len(AA20)
+    mask_i = (C > 0).float()
+    if len(S.shape) == 2:
+        S = F.one_hot(S, Q)
+    # Build neighborhoods and masks
+    S_onehot = mask_i[..., None] * S
+    kx = torch.arange(w, device=S.device) - w // 2
+    edge_idx = (
+        torch.arange(S.shape[1], device=S.device)[None, :, None] + kx[None, None, :]
+    )
+    mask_ij = (edge_idx > 0) & (edge_idx < S.shape[1])
+    edge_idx = edge_idx.clamp(min=0, max=S.shape[1] - 1)
+    C_i = C[..., None]
+    C_j = collect_neighbors(C_i, edge_idx)[..., 0]
+    mask_ij = (mask_ij & C_j.eq(C_i) & (C_i > 0) & (C_j > 0)).float()
+    # Sum neighborhood composition
+    S_j = mask_ij[..., None] * collect_neighbors(S_onehot, edge_idx)
+    N = S_j.sum(2)
+    num_N = N.sum(-1, keepdims=True)
+    P = N / (num_N + 1e-5)
+    mask_i = ((num_N[..., 0] > 0) & (C > 0)).float()
+    mask_ij = mask_i[..., None] * mask_ij
+    return P, N, edge_idx, mask_i, mask_ij
+def complexity_lcp(
+    S: torch.LongTensor,
+    C: torch.LongTensor,
+    w: int = 30,
+    entropy_min: float = 2.32,
+    method: str = "naive",
+    differentiable=True,
+    eps: float = 1e-5,
+    min_coverage=0.9,
+    # entropy_min: float = 2.52,
+    # method = "chao-shen"
+) -> torch.Tensor:
+    """Compute the Local Composition Perplexity metric.
+    Args:
+        S (torch.Tensor): Sequence tensor with shape `(num_batch, num_residues)`
+            (index tensor) or `(num_batch, num_residues, num_alphabet)`.
+        C (torch.LongTensor): Chain map with shape `(num_batch, num_residues)`.
+        w (int): Window size.
+        grad_pseudocount (float): Pseudocount for stabilizing entropy gradients
+            on backwards pass.
+        eps (float): Small number for numerical stability in division and logarithms.
+    Returns:
+        U (torch.Tensor): Complexities with shape `(num_batch)`.
+    """
+    # adjust window size based on sequence length
+    if S.shape[1] < w:
+        w = S.shape[1]
+    P, N, edge_idx, mask_i, mask_ij = compositions(S, C, w)
+    # Only count windows with `min_coverage`
+    min_N = int(min_coverage * w)
+    mask_coverage = N.sum(-1) > int(min_coverage * w)
+    H = estimate_entropy(N, method=method)
+    U = mask_coverage * (torch.exp(H) - np.exp(entropy_min)).clamp(max=0).square()
+    # Compute entropy as a function of perturbed counts
+    if differentiable and len(S.shape) == 3:
+        # Compute how a mutation changes entropy for each neighbor
+        N_neighbors = collect_neighbors(N, edge_idx)
+        mask_coverage_j = collect_neighbors(mask_coverage[..., None], edge_idx)
+        N_ij = (N_neighbors - S[:, :, None, :])[..., None, :] + torch.eye(
+            N.shape[-1], device=N.device
+        )[None, None, None, ...]
+        N_ij = N_ij.clamp(min=0)
+        H_ij = estimate_entropy(N_ij, method=method)
+        U_ij = (torch.exp(H_ij) - np.exp(entropy_min)).clamp(max=0).square()
+        U_ij = mask_ij[..., None] * mask_coverage_j * U_ij
+        U_differentiable = (U_ij.detach() * S[:, :, None, :]).sum([-1, -2])
+        U = U.detach() + U_differentiable - U_differentiable.detach()
+    U = (mask_i * U).sum(1)
+    return U
+def complexity_scores_lcp_t(
+    t,
+    S: torch.LongTensor,
+    C: torch.LongTensor,
+    idx: torch.LongTensor,
+    edge_idx_t: torch.LongTensor,
+    mask_ij_t: torch.Tensor,
+    w: int = 30,
+    entropy_min: float = 2.515,
+    eps: float = 1e-5,
+    method: str = "chao-shen",
+) -> torch.Tensor:
+    """Compute local LCP scores for autoregressive decoding."""
+    Q = len(AA20)
+    O = F.one_hot(S, Q)
+    O_j = collect_neighbors(O, edge_idx_t)
+    idx_i = idx[:, t, None]
+    C_i = C[:, t, None]
+    idx_j = collect_neighbors(idx[..., None], edge_idx_t)[..., 0]
+    C_j = collect_neighbors(C[..., None], edge_idx_t)[..., 0]
+    # Sum valid neighbor counts
+    is_near = (idx_i - idx_j).abs() <= w / 2
+    same_chain = C_i == C_j
+    valid_ij_t = (is_near * same_chain * (mask_ij_t > 0)).float()[..., None]
+    N_k = (valid_ij_t * O_j).sum(-2)
+    # Compute counts under all possible extensions
+    N_k = N_k[:, :, None, :] + torch.eye(Q, device=N_k.device)[None, None, ...]
+    H = estimate_entropy(N_k, method=method)
+    U = -(torch.exp(H) - np.exp(entropy_min)).clamp(max=0).square()
+    return U
+def estimate_entropy(
+    N: torch.Tensor, method: str = "chao-shen", eps: float = 1e-11
+) -> torch.Tensor:
+    """Estimate entropy from counts.
+        See Chao, A., & Shen, T. J. (2003) for more details.
+    Args:
+        N (torch.Tensor): Tensor of counts with shape `(..., num_bins)`.
+    Returns:
+        H (torch.Tensor): Estimated entropy with shape `(...)`.
+    """
+    N = N.float()
+    N_total = N.sum(-1, keepdims=True)
+    P = N / (N_total + eps)
+    if method == "chao-shen":
+        # Estimate coverage and adjusted frequencies
+        singletons = N.long().eq(1).sum(-1, keepdims=True).float()
+        C = 1.0 - singletons / (N_total + eps)
+        P_adjust = C * P
+        P_inclusion = (1.0 - (1.0 - P_adjust) ** N_total).clamp(min=eps)
+        H = -(P_adjust * torch.log(P_adjust.clamp(min=eps)) / P_inclusion).sum(-1)
+    elif method == "miller-maddow":
+        bins = (N > 0).float().sum(-1)
+        bias = (bins - 1) / (2 * N_total[..., 0] + eps)
+        H = -(P * torch.log(P + eps)).sum(-1) + bias
+    elif method == "laplace":
+        N = N.float() + 1 / N.shape[-1]
+        N_total = N.sum(-1, keepdims=True)
+        P = N / (N_total + eps)
+        H = -(P * torch.log(P)).sum(-1)
+    else:
+        H = -(P * torch.log(P + eps)).sum(-1)
+    return H

chroma/chroma/layers/conv.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import platform
+import torch
+import torch.nn.functional as F
+MACHINE = platform.machine()
+"""
+一维线性衰减滤波器
+"""
+def filter1D_linear_decay(Z, B):
+    """Apply a low-pass filter with batch-heterogeneous coefficients.
+    Computes `x_i = z_i + b * x_{i-1}` where `b` varies per batch member.
+    Args:
+        Z (torch.Tensor): Batch of one-dimensional signals with shape `(N, W)`.
+        B (torch.Tensor): Batch of coefficients with shape `(N)`.
+    Returns:
+        X (torch.Tensor): Result of applying linear recurrence with shape `(N, W)`.
+    """
+    # Build filter coefficients as powers of B
+    N, W = Z.shape
+    k = (W - 1) - torch.arange(W, device=Z.device)
+    kernel = B[:, None, None] ** k[None, None, :]
+    # Pad on left to convolve from backwards in time
+    Z_pad = F.pad(Z, (W - 1, 0))[None, ...]
+    # Group convolution can effectively do one filter per batch
+    while True:
+        X = F.conv1d(Z_pad, kernel, stride=1, padding=0, groups=N)[0, :, :]
+        # on arm64 (M1 Mac) this convolution erroneously sometimes produces NaNs
+        if (
+            (MACHINE == "arm64")
+            and torch.isnan(X).any()
+            and (not torch.isnan(Z_pad).any())
+            and (not torch.isnan(kernel).any())
+        ):
+            continue
+        break
+    return X

chroma/chroma/layers/graph.py ADDED Viewed

	@@ -0,0 +1,1126 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Layers for building graph neural networks.
+This module contains layers for building neural networks that can process
+graph-structured data. The internal representations of these layers
+are node and edge embeddings.
+"""
+from typing import Callable, List, Optional, Tuple
+import torch
+import torch.nn as nn
+from torch.utils.checkpoint import checkpoint
+from tqdm.autonotebook import tqdm
+from chroma.layers.attention import Attention
+class GraphNN(nn.Module):
+    """Graph neural network with optional edge updates.
+    Args:
+        num_layers (int): Number of layers.
+        dim_nodes (int): Hidden dimension of node tensor.
+        dim_edges (int): Hidden dimension of edge tensor.
+        dropout (float): Dropout rate.
+        node_mlp_layers (int): Node update function, number of hidden layers.
+            Default is 1.
+        node_mlp_dim (int): Node update function, hidden dimension.
+            Default is to match MLP output dimension.
+        update_edge (Boolean): Include an edge-update step. Default: True
+        edge_mlp_layers (int): Edge update function, number of hidden layers.
+            Default is 1.
+        edge_mlp_dim (int): Edge update function, hidden dimension.
+            Default is to match MLP output dimension.
+        mlp_activation (str): MLP nonlinearity.
+            `'relu'`: Rectified linear unit.
+            `'softplus'`: Softplus.
+        norm (str): Which normalization function to apply between layers.
+            `'transformer'`: Default layernorm
+            `'layer'`: Masked Layer norm with shape (input.shape[1:])
+            `'instance'`: Masked Instance norm
+        scale (float): Scaling factor of edge input when updating node (default=1.0)
+        attentional (bool): If True, use attention for message aggregation function
+            instead of a sum. Default is False.
+        num_attention_heads (int): Number of attention heads (if attentional) to use.
+            Default is 4.
+    Inputs:
+        node_h (torch.Tensor): Node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_i (tensor, optional): Node mask with shape `(num_batch, num_nodes)`
+        mask_ij (tensor, optional): Edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`
+    Outputs:
+        node_h_out (torch.Tensor): Updated node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h_out (torch.Tensor): Updated edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+    """
+    def __init__(
+        self,
+        num_layers: int,
+        dim_nodes: int,
+        dim_edges: int,
+        node_mlp_layers: int = 1,
+        node_mlp_dim: Optional[int] = None,
+        edge_update: bool = True,
+        edge_mlp_layers: int = 1,
+        edge_mlp_dim: Optional[int] = None,
+        mlp_activation: str = "relu",
+        dropout: float = 0.0,
+        norm: str = "transformer",
+        scale: float = 1.0,
+        skip_connect_input: bool = False,
+        attentional: bool = False,
+        num_attention_heads: int = 4,
+        checkpoint_gradients: bool = False,
+    ):
+        super(GraphNN, self).__init__()
+        ## 残差网络
+        self.skip_connect_input = skip_connect_input
+        """
+        优化内存：正常的训练过程中，为了计算梯度，需要存储前向传播中所有层的激活值。
+        使用梯度检查点时，只在特定层保留这些激活值，并在需要时重新计算它们
+        """
+        self.checkpoint_gradients = checkpoint_gradients
+        self.layers = nn.ModuleList(
+            [
+                GraphLayer(
+                    dim_nodes=dim_nodes,
+                    dim_edges=dim_edges,
+                    node_mlp_layers=node_mlp_layers,
+                    node_mlp_dim=node_mlp_dim,
+                    edge_update=edge_update,
+                    edge_mlp_layers=edge_mlp_layers,
+                    edge_mlp_dim=edge_mlp_dim,
+                    mlp_activation=mlp_activation,
+                    dropout=dropout,
+                    norm=norm,
+                    scale=scale,
+                    attentional=attentional,
+                    num_attention_heads=num_attention_heads,
+                )
+                for _ in range(num_layers)
+            ]
+        )
+    def forward(
+        self,
+        node_h: torch.Tensor,
+        edge_h: torch.Tensor,
+        edge_idx: torch.LongTensor,
+        mask_i: Optional[torch.Tensor] = None,
+        mask_ij: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Run every layer sequentially
+        node_h_init = node_h
+        edge_h_init = edge_h
+        for i, layer in enumerate(self.layers):
+            if self.skip_connect_input:
+                node_h = node_h + node_h_init
+                edge_h = edge_h + edge_h_init
+            # Update edge and node
+            node_h, edge_h = self.checkpoint(
+                layer, node_h, edge_h, edge_idx, mask_i, mask_ij
+            )
+            if self.skip_connect_input:
+                node_h = node_h - node_h_init
+                edge_h = edge_h - edge_h_init
+            # If mask was provided, apply it
+            if mask_i is not None:
+                node_h = node_h * (mask_i.unsqueeze(-1) != 0).type(torch.float32)
+            if mask_ij is not None:
+                edge_h = edge_h * (mask_ij.unsqueeze(-1) != 0).type(torch.float32)
+        return node_h, edge_h
+    def checkpoint(self, layer, *args):
+        if self.checkpoint_gradients:
+            return checkpoint(layer, *args)
+        else:
+            return layer(*args)
+    def sequential(
+        self,
+        tensors: dict,
+        pre_step_function: Callable = None,
+        post_step_function: Callable = None,
+    ) -> dict:
+        """Decode the GNN sequentially along the node index `t`, with callbacks.
+        Args:
+            tensors (dict): Initial set of state tensors. At minimum this should
+                include the arguments to `forward`, namely `node_h`, `edge_h`,
+                `edge_idx`, `mask_i`, and `mask_ij`.
+            pre_step_function (function, optional): Callback function that is
+                optionally applied to `tensors` before each sequential GNN step as
+                `tensors_new = pre_step_function(t, pre_step_function)` where `t` is
+                the node index being updated. It should update elements of the
+                `tensors` dictionary, and it can access and update the intermediate
+                GNN state cache via the keyed lists of tensors in `node_h_cache` and
+                `edge_h_cache`.
+            post_step_function (function, optional): Same as `pre_step_function`, but
+                optionally applied after each sequential GNN step.
+        Returns:
+            tensors (dict): Processed set of tensors.
+        """
+        # Initialize the state cache
+        tensors["node_h_cache"], tensors["edge_h_cache"] = self.init_steps(
+            tensors["node_h"], tensors["edge_h"]
+        )
+        # Sequential iteration
+        num_steps = tensors["node_h"].size(1)
+        for t in tqdm(range(num_steps), desc="Sequential decoding"):
+            if pre_step_function is not None:
+                tensors = pre_step_function(t, tensors)
+            tensors["node_h_cache"], tensors["edge_h_cache"] = self.step(
+                t,
+                tensors["node_h_cache"],
+                tensors["edge_h_cache"],
+                tensors["edge_idx"],
+                tensors["mask_i"],
+                tensors["mask_ij"],
+            )
+            if post_step_function is not None:
+                tensors = post_step_function(t, tensors)
+        return tensors
+    def init_steps(
+        self, node_h: torch.Tensor, edge_h: torch.Tensor
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        """Initialize cached node and edge features.
+        Args:
+            node_h (torch.Tensor): Node features with shape
+                `(num_batch, num_nodes, dim_nodes)`.
+            edge_h (torch.Tensor): Edge features with shape
+                `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        Returns:
+            node_h_cache (torch.Tensor): List of cached node features with `num_layers + 1`
+                tensors of shape `(num_batch, num_nodes, dim_nodes)`.
+            edge_h_cache (torch.Tensor): List of cached edge features with `num_layers + 1`
+                tensors of shape `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        """
+        num_layers = len(self.layers)
+        node_h_cache = [node_h.clone() for _ in range(num_layers + 1)]
+        edge_h_cache = [edge_h.clone() for _ in range(num_layers + 1)]
+        return node_h_cache, edge_h_cache
+    def step(
+        self,
+        t: int,
+        node_h_cache: List[torch.Tensor],
+        edge_h_cache: List[torch.Tensor],
+        edge_idx: torch.LongTensor,
+        mask_i: Optional[torch.Tensor] = None,
+        mask_ij: Optional[torch.Tensor] = None,
+    ) -> Tuple[List[torch.Tensor], List[torch.Tensor]]:
+        """Process GNN update for a specific node index t from cached intermediates.
+        Inputs:
+            t (int): Node index to decode.
+            node_h_cache (List[torch.Tensor]): List of cached node features with
+                `num_layers + 1` tensors of shape `(num_batch, num_nodes, dim_nodes)`.
+            edge_h_cache (List[torch.Tensor]): List of cached edge features with
+                `num_layers + 1` tensors of shape
+                `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+            edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+                `(num_batch, num_nodes, num_neighbors)`.
+            mask_i (torch.Tensor, optional): Node mask with shape
+                `(num_batch, num_nodes)`.
+            mask_ij (torch.Tensor, optional): Edge mask with shape
+                `(num_batch, num_nodes, num_neighbors)`.
+        Outputs:
+            node_h_cache (List[torch.Tensor]): Updated list of cached node features
+                with `num_layers + 1` tensors of shape
+                `(num_batch, num_nodes, dim_nodes)`. This method updates the tensors
+                in place for memory.
+            edge_h_cache (List[torch.Tensor]): Updated list of cached edge features
+                with `num_layers + 1` tensors of shape
+                `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        """
+        if self.skip_connect_input:
+            raise NotImplementedError
+        for i, layer in enumerate(self.layers):
+            # Because the edge updates depend on the updated nodes,
+            # we need both the input node features node_h and also
+            # the previous output node states node_h
+            node_h = node_h_cache[i]
+            node_h_out = node_h_cache[i + 1]
+            edge_h = edge_h_cache[i]
+            # Update edge and node
+            node_h_t, edge_h_t = checkpoint(
+                layer.step, t, node_h, node_h_out, edge_h, edge_idx, mask_i, mask_ij
+            )
+            # Scatter them in place
+            node_h_cache[i + 1].scatter_(
+                1, (t * torch.ones_like(node_h_t)).long(), node_h_t
+            )
+            edge_h_cache[i + 1].scatter_(
+                1, (t * torch.ones_like(edge_h_t)).long(), edge_h_t
+            )
+        return node_h_cache, edge_h_cache
+## GNNLayer
+class GraphLayer(nn.Module):
+    """Graph layer that updates each node i given adjacent nodes and edges.
+    Args:
+        dim_nodes (int): Hidden dimension of node tensor.
+        dim_edges (int): Hidden dimension of edge tensor.
+        node_mlp_layers (int): Node update function, number of hidden layers.
+            Default: 1.
+        node_mlp_dim (int): Node update function, hidden dimension.
+            Default: Matches MLP output dimension.
+        update_edge (Boolean): Include an edge-update step. Default: True
+        edge_mlp_layers (int): Edge update function, number of hidden layers.
+            Default: 1.
+        edge_mlp_dim (int): Edge update function, hidden dimension.
+            Default: Matches MLP output dimension.
+        mlp_activation (str): MLP nonlinearity.
+            `'relu'`: Rectified linear unit.
+            `'softplus'`: Softplus.
+        dropout (float): Dropout rate.
+        norm (str): Which normalization function to apply between layers.
+            `'transformer'`: Default layernorm
+            `'layer'`: Masked Layer norm with shape (input.shape[1:])
+            `'instance'`: Masked Instance norm
+        scale (float): Scaling factor of edge input when updating node (default=1.0)
+    Inputs:
+        node_h (torch.Tensor): Node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_i (tensor, optional): Node mask with shape `(num_batch, num_nodes)`
+        mask_ij (tensor, optional): Edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`
+    Outputs:
+        node_h_out (torch.Tensor): Updated node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h_out (torch.Tensor): Updated edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_nodes)`.
+    """
+    def __init__(
+        self,
+        dim_nodes: int,
+        dim_edges: int,
+        node_mlp_layers: int = 1,
+        node_mlp_dim: Optional[int] = None,
+        edge_update: bool = True,
+        edge_mlp_layers: int = 1,
+        edge_mlp_dim: Optional[int] = None,
+        mlp_activation: str = "relu",
+        dropout: float = 0.0,
+        norm: str = "transformer",
+        scale: float = 1.0,
+        attentional: bool = False,
+        num_attention_heads: int = 4,
+    ):
+        super(GraphLayer, self).__init__()
+        # Store scale
+        self.scale = scale
+        self.dim_nodes = dim_nodes
+        self.dim_edges = dim_edges
+        self.attentional = attentional
+        self.node_norm_layer = MaskedNorm(
+            dim=1, num_features=dim_nodes, affine=True, norm=norm
+        )
+        self.message_mlp = MLP(
+            dim_in=2 * dim_nodes + dim_edges,
+            dim_out=dim_nodes,
+            num_layers_hidden=edge_mlp_layers,
+            dim_hidden=edge_mlp_dim,
+            activation=mlp_activation,
+            dropout=dropout,
+        )
+        self.update_mlp = MLP(
+            dim_in=2 * dim_nodes,
+            dim_out=dim_nodes,
+            num_layers_hidden=node_mlp_layers,
+            dim_hidden=node_mlp_dim,
+            activation=mlp_activation,
+            dropout=dropout,
+        )
+        self.edge_update = edge_update
+        self.edge_norm_layer = MaskedNorm(
+            dim=2, num_features=dim_edges, affine=True, norm=norm
+        )
+        if self.edge_update:
+            self.edge_mlp = MLP(
+                dim_in=2 * dim_nodes + dim_edges,
+                dim_out=dim_edges,
+                num_layers_hidden=edge_mlp_layers,
+                dim_hidden=edge_mlp_dim,
+                activation=mlp_activation,
+                dropout=dropout,
+            )
+        if self.attentional:
+            self.attention = Attention(n_head=num_attention_heads, d_model=dim_nodes)
+    ## attention
+    def attend(
+        self, node_h: torch.Tensor, messages: torch.Tensor, mask_ij: torch.Tensor
+    ) -> torch.Tensor:
+        B, L, K, D = messages.size()
+        queries = node_h.reshape(-1, 1, D)
+        keys = messages.reshape(-1, K, D)
+        values = messages.reshape(-1, K, D)
+        mask = mask_ij.reshape(-1, 1, 1, K).bool() if mask_ij is not None else None
+        return self.attention(queries, keys, values, mask=mask).reshape(B, L, D)
+    ## _normalize:Edge and node
+    def _normalize(self, node_h, edge_h, mask_i=None, mask_ij=None):
+        # Normalize node and edge embeddings
+        node_h_norm = self.node_norm_layer(node_h, mask_i)
+        edge_h_norm = self.edge_norm_layer(edge_h, mask_ij)
+        return node_h_norm, edge_h_norm
+    ## ?
+    def _normalize_t(
+        self, edge_node_stack_t, mask_ij_t, include_nodes=True, include_edges=True
+    ):
+        # Apply normalization (since we have only normalized time t information)
+        node_i_t = edge_node_stack_t[:, :, :, : self.dim_nodes]
+        node_j_t = edge_node_stack_t[:, :, :, self.dim_nodes : 2 * self.dim_nodes]
+        edge_h_t = edge_node_stack_t[:, :, :, 2 * self.dim_nodes :]
+        if include_nodes:
+            node_i_t = self.node_norm_layer(node_i_t, mask_ij_t)
+            node_j_t = self.node_norm_layer(node_j_t, mask_ij_t)
+        if include_edges:
+            edge_h_t = self.edge_norm_layer(edge_h_t, mask_ij_t)
+        edge_node_stack_t = torch.cat([node_i_t, node_j_t, edge_h_t], -1)
+        return edge_node_stack_t
+    def _update_nodes(
+        self, node_h, node_h_norm, edge_h_norm, edge_idx, mask_i=None, mask_ij=None
+    ):
+        """Update nodes given adjacent nodes and edges"""
+        # Compute messages at each ij
+        edge_node_stack = pack_edges(node_h_norm, edge_h_norm, edge_idx)
+        messages = self.message_mlp(edge_node_stack)
+        if mask_ij is not None:
+            messages = messages * mask_ij.unsqueeze(-1)
+        # Aggregate messages
+        if self.attentional:
+            message = self.attend(node_h_norm, messages, mask_ij)
+        else:
+            message = messages.sum(2) / self.scale
+        node_stack = torch.cat([node_h_norm, message], -1)
+        # Update nodes given aggregated messages
+        node_h_out = node_h + self.update_mlp(node_stack)
+        if mask_i is not None:
+            node_h_out = node_h_out * mask_i.unsqueeze(-1)
+        return node_h_out
+    def _update_nodes_t(
+        self,
+        t,
+        node_h,
+        node_h_norm_t,
+        edge_h_norm_t,
+        edge_idx_t,
+        mask_i_t=None,
+        mask_ij_t=None,
+    ):
+        """Update nodes at index t given adjacent nodes and edges"""
+        # Compute messages at each ij
+        edge_node_stack_t = mask_ij_t.unsqueeze(-1) * pack_edges_step(
+            t, node_h, edge_h_norm_t, edge_idx_t
+        )
+        # Apply normalization of gathered tensors
+        edge_node_stack_t = self._normalize_t(
+            edge_node_stack_t, mask_ij_t, include_edges=False
+        )
+        messages_t = self.message_mlp(edge_node_stack_t)
+        if mask_ij_t is not None:
+            messages_t = messages_t * mask_ij_t.unsqueeze(-1)
+        # Aggregate messages
+        if self.attentional:
+            message_t = self.attend(node_h_norm_t, messages_t, mask_ij_t)
+        else:
+            message_t = messages_t.sum(2) / self.scale
+        node_stack_t = torch.cat([node_h_norm_t, message_t], -1)
+        # Update nodes given aggregated messages
+        node_h_t = node_h[:, t, :].unsqueeze(1)
+        node_h_out_t = node_h_t + self.update_mlp(node_stack_t)
+        if mask_i_t is not None:
+            node_h_out_t = node_h_out_t * mask_i_t.unsqueeze(-1)
+        return node_h_out_t
+    def _update_edges(self, edge_h, node_h_out, edge_h_norm, edge_idx, mask_ij):
+        """Update edges given adjacent nodes and edges"""
+        edge_node_stack = pack_edges(node_h_out, edge_h_norm, edge_idx)
+        edge_h_out = edge_h + self.edge_mlp(edge_node_stack)
+        if mask_ij is not None:
+            edge_h_out = edge_h_out * mask_ij.unsqueeze(-1)
+        return edge_h_out
+    def _update_edges_t(
+        self, t, edge_h_t, node_h_out, edge_h_t_norm, edge_idx_t, mask_ij_t
+    ):
+        """Update edges given adjacent nodes and edges"""
+        edge_node_stack_t = pack_edges_step(t, node_h_out, edge_h_t_norm, edge_idx_t)
+        edge_h_out_t = edge_h_t + self.edge_mlp(edge_node_stack_t)
+        if mask_ij_t is not None:
+            edge_h_out_t = edge_h_out_t * mask_ij_t.unsqueeze(-1)
+        return edge_h_out_t
+    def forward(
+        self,
+        node_h: torch.Tensor,
+        edge_h: torch.Tensor,
+        edge_idx: torch.LongTensor,
+        mask_i: Optional[torch.Tensor] = None,
+        mask_ij: Optional[torch.Tensor] = None,
+    ):
+        node_h_norm, edge_h_norm = self._normalize(node_h, edge_h, mask_i, mask_ij)
+        if mask_i is not None:
+            mask_i = (mask_i != 0).type(torch.float32)
+        if mask_ij is not None:
+            mask_ij = (mask_ij != 0).type(torch.float32)
+        node_h_out = self._update_nodes(
+            node_h, node_h_norm, edge_h_norm, edge_idx, mask_i, mask_ij
+        )
+        edge_h_out = None
+        if self.edge_update:
+            edge_h_out = self._update_edges(
+                edge_h, node_h_out, edge_h_norm, edge_idx, mask_ij
+            )
+        return node_h_out, edge_h_out
+    def step(
+        self,
+        t: int,
+        node_h: torch.Tensor,
+        node_h_out: torch.Tensor,
+        edge_h: torch.Tensor,
+        edge_idx: torch.LongTensor,
+        mask_i: Optional[torch.Tensor] = None,
+        mask_ij: Optional[torch.Tensor] = None,
+    ):
+        """Compute update for a single node index `t`.
+        This function can be useful for sequential computation of graph
+        updates, for example with autoregressive architectures.
+        Args:
+            t (int): Index of node dimension to update
+            node_h (torch.Tensor): Node features with shape
+                `(num_batch, num_nodes, dim_nodes)`.
+            node_h_out (torch.Tensor): Cached outputs of preceding steps with shape
+                `(num_batch, num_nodes, dim_nodes)`.
+            edge_h (torch.Tensor): Edge features with shape
+                `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+            edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+                `(num_batch, num_nodes, num_neighbors)`.
+            mask_i (tensor, optional): Node mask with shape `(num_batch, num_nodes)`
+            mask_ij (tensor, optional): Edge mask with shape
+                `(num_batch, num_nodes, num_neighbors)`
+        Resturns:
+            node_h_t (torch.Tensor): Updated node features with shape
+                `(num_batch, 1, dim_nodes)`.
+            edge_h_t (torch.Tensor): Updated edge features with shape
+                `(num_batch, 1, num_neighbors, dim_nodes)`.
+        """
+        node_h_t = node_h[:, t, :].unsqueeze(1)
+        edge_h_t = edge_h[:, t, :, :].unsqueeze(1)
+        edge_idx_t = edge_idx[:, t, :].unsqueeze(1)
+        mask_i_t = mask_i[:, t].unsqueeze(1)
+        mask_ij_t = mask_ij[:, t, :].unsqueeze(1)
+        """ For a single step we need to apply the normalization both at node t and
+            also for all of the neighborhood tensors that feed in at t.
+        """
+        node_h_t_norm, edge_h_t_norm = self._normalize(
+            node_h_t, edge_h_t, mask_i_t, mask_ij_t
+        )
+        node_h_t = self._update_nodes_t(
+            t, node_h, node_h_t_norm, edge_h_t_norm, edge_idx_t, mask_i_t, mask_ij_t
+        )
+        if self.edge_update:
+            node_h_out = node_h_out.scatter(
+                1, (t * torch.ones_like(node_h_t)).long(), node_h_t
+            )
+            edge_h_t = self._update_edges_t(
+                t, edge_h_t, node_h_out, edge_h_t_norm, edge_idx_t, mask_ij_t
+            )
+        return node_h_t, edge_h_t
+## 单纯进行线性变换:Equivariance
+class MLP(nn.Module):
+    """Multilayer perceptron with variable input, hidden, and output dims.
+    Args:
+        dim_in (int): Feature dimension of input tensor.
+        dim_hidden (int or None): Feature dimension of intermediate layers.
+            Defaults to matching output dimension.
+        dim_out (int or None): Feature dimension of output tensor.
+            Defaults to matching input dimension.
+        num_layers_hidden (int): Number of hidden MLP layers.
+        activation (str): MLP nonlinearity.
+            `'relu'`: Rectified linear unit.
+            `'softplus'`: Softplus.
+        dropout (float): Dropout rate. Default is 0.
+    Inputs:
+        h (torch.Tensor): Input tensor with shape `(..., dim_in)`
+    Outputs:
+        h (torch.Tensor): Input tensor with shape `(..., dim_in)`
+    """
+    def __init__(
+        self,
+        dim_in: int,
+        dim_hidden: Optional[int] = None,
+        dim_out: Optional[int] = None,
+        num_layers_hidden: int = 1,
+        activation: str = "relu",
+        dropout: float = 0.0,
+    ):
+        super(MLP, self).__init__()
+        # Default is dimension preserving
+        dim_out = dim_out if dim_out is not None else dim_in
+        dim_hidden = dim_hidden if dim_hidden is not None else dim_out
+        nonlinearites = {"relu": nn.ReLU, "softplus": nn.Softplus}
+        activation_func = nonlinearites[activation]
+        if num_layers_hidden == 0:
+            layers = [nn.Linear(dim_in, dim_out)]
+        else:
+            layers = []
+            for i in range(num_layers_hidden):
+                d_1 = dim_in if i == 0 else dim_hidden
+                layers = layers + [
+                    nn.Linear(d_1, dim_hidden),
+                    activation_func(),
+                    nn.Dropout(dropout),
+                ]
+            layers = layers + [nn.Linear(dim_hidden, dim_out)]
+        self.layers = nn.Sequential(*layers)
+    def forward(self, h: torch.Tensor) -> torch.Tensor:
+        return self.layers(h)
+def collect_neighbors(node_h: torch.Tensor, edge_idx: torch.Tensor) -> torch.Tensor:
+    """Collect neighbor node features as edge features.
+    For each node i, collect the embeddings of neighbors {j in N(i)} as edge
+    features neighbor_ij.
+    Args:
+        node_h (torch.Tensor): Node features with shape
+            `(num_batch, num_nodes, num_features)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    Returns:
+        neighbor_h (torch.Tensor): Edge features containing neighbor node information
+            with shape `(num_batch, num_nodes, num_neighbors, num_features)`.
+    """
+    num_batch, num_nodes, num_neighbors = edge_idx.shape
+    num_features = node_h.shape[2]
+    # Flatten for the gather operation then reform the full tensor
+    idx_flat = edge_idx.reshape([num_batch, num_nodes * num_neighbors, 1])
+    idx_flat = idx_flat.expand(-1, -1, num_features)
+    neighbor_h = torch.gather(node_h, 1, idx_flat)
+    neighbor_h = neighbor_h.reshape((num_batch, num_nodes, num_neighbors, num_features))
+    return neighbor_h
+def collect_edges(
+    edge_h_dense: torch.Tensor, edge_idx: torch.LongTensor
+) -> torch.Tensor:
+    """Collect sparse edge features from a dense pairwise tensor.
+    Args:
+        edge_h_dense (torch.Tensor): Dense edges features with shape
+            `(num_batch, num_nodes, num_nodes, num_features)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    Returns:
+        edge_h (torch.Tensor): Edge features with shape
+            (num_batch, num_nodes, num_neighbors, num_features)`.
+    """
+    gather_idx = edge_idx.unsqueeze(-1).expand(-1, -1, -1, edge_h_dense.size(-1))
+    edge_h = torch.gather(edge_h_dense, 2, gather_idx)
+    return edge_h
+def collect_edges_transpose(
+    edge_h: torch.Tensor, edge_idx: torch.LongTensor, mask_ij: torch.Tensor
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Collect edge embeddings of reversed (transposed) edges in-place.
+    Args:
+        edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, num_features_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_ij (torch.Tensor): Edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`
+    Returns:
+        edge_h_transpose (torch.Tensor): Edge features of transpose with shape
+            `(num_batch, num_nodes, num_neighbors, num_features_edges)`.
+        mask_ji (torch.Tensor): Mask indicating presence of reversed edge with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    """
+    num_batch, num_residues, num_k, num_features = list(edge_h.size())
+    # Get indices of reverse edges
+    ij_to_ji, mask_ji = transpose_edge_idx(edge_idx, mask_ij)
+    # Gather features at reverse edges
+    edge_h_flat = edge_h.reshape(num_batch, num_residues * num_k, -1)
+    ij_to_ji = ij_to_ji.unsqueeze(-1).expand(-1, -1, num_features)
+    edge_h_transpose = torch.gather(edge_h_flat, 1, ij_to_ji)
+    edge_h_transpose = edge_h_transpose.reshape(
+        num_batch, num_residues, num_k, num_features
+    )
+    edge_h_transpose = mask_ji.unsqueeze(-1) * edge_h_transpose
+    return edge_h_transpose, mask_ji
+def scatter_edges(edge_h: torch.Tensor, edge_idx: torch.LongTensor) -> torch.Tensor:
+    """Scatter sparse edge features into a dense pairwise tensor.
+    Args:
+         edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, num_features_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    Returns:
+        edge_h_dense (torch.Tensor): Dense edge features with shape
+            `(batch_size, num_nodes, num_nodes, dimensions)`.
+    """
+    assert edge_h.dim() == 4
+    assert edge_idx.dim() == 3
+    bs, nres, _, dim = edge_h.size()
+    edge_indices = edge_idx.unsqueeze(-1).repeat(1, 1, 1, dim)
+    result = torch.zeros(
+        size=(bs, nres, nres, dim), dtype=edge_h.dtype, device=edge_h.device,
+    )
+    return result.scatter(dim=2, index=edge_indices, src=edge_h)
+def pack_edges(
+    node_h: torch.Tensor, edge_h: torch.Tensor, edge_idx: torch.LongTensor
+) -> torch.Tensor:
+    """Pack nodes and edge features into edge features.
+    Expands each edge_ij by packing node i, node j, and edge ij into
+    {node,node,edge}_ij.
+    Args:
+        node_h (torch.Tensor): Node features with shape
+            `(num_batch, num_nodes, num_features_nodes)`.
+        edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, num_features_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    Returns:
+        edge_packed (torch.Tensor): Concatenated node and edge features with shape
+            (num_batch, num_nodes, num_neighbors, num_features_nodes
+                + 2*num_features_edges)`.
+    """
+    num_neighbors = edge_h.shape[2]
+    node_i = node_h.unsqueeze(2).expand(-1, -1, num_neighbors, -1)
+    node_j = collect_neighbors(node_h, edge_idx)
+    edge_packed = torch.cat([node_i, node_j, edge_h], -1)
+    return edge_packed
+def pack_edges_step(
+    t: int, node_h: torch.Tensor, edge_h_t: torch.Tensor, edge_idx_t: torch.LongTensor
+) -> torch.Tensor:
+    """Pack node and edge features into edge features for a single node index t.
+    Expands each edge_ij by packing node i, node j, and edge ij into
+    {node,node,edge}_ij.
+    Args:
+        t (int): Node index to decode.
+        node_h (torch.Tensor): Node features at all positions with shape
+            `(num_batch, num_nodes, num_features_nodes)`.
+        edge_h_t (torch.Tensor): Edge features at index `t` with shape
+            `(num_batch, 1, num_neighbors, num_features_edges)`.
+        edge_idx_t (torch.LongTensor): Edge indices at index `t` for neighbors with shape
+            `(num_batch, 1, num_neighbors)`.
+    Returns:
+        edge_packed (torch.Tensor): Concatenated node and edge features
+            for index `t` with shape
+            (num_batch, 1, num_neighbors, num_features_nodes
+                + 2*num_features_edges)`.
+    """
+    num_nodes_i = node_h.shape[1]
+    num_neighbors = edge_h_t.shape[2]
+    node_h_t = node_h[:, t, :].unsqueeze(1)
+    node_i = node_h_t.unsqueeze(2).expand(-1, -1, num_neighbors, -1)
+    node_j = collect_neighbors(node_h, edge_idx_t)
+    edge_packed = torch.cat([node_i, node_j, edge_h_t], -1)
+    return edge_packed
+def transpose_edge_idx(
+    edge_idx: torch.LongTensor, mask_ij: torch.Tensor
+) -> Tuple[torch.LongTensor, torch.Tensor]:
+    """Collect edge indices of reverse edges in-place at each edge.
+    The tensor `edge_idx` stores a directed graph topology as a tensor of
+    neighbor indices, where an element `edge_idx[b,i,k]` corresponds to the
+    node index of neighbor `k` of node `i` in batch member `b`.
+    This function takes a directed graph topology and returns an index tensor
+    that maps, in-place, to the reversed edges (if they exist). The indices
+    correspond to the contracted dimension of `edge_index` when it is viewed as
+    `(num_batch, num_nodes * num_neighbors)`. These indices can be used in
+    conjunction with `torch.gather` to collect edge embeddings of `j->i` at
+    `i->j`. See `collect_edges_transpose` for an example.
+    For reverse `j->i` edges that do not exist in the directed graph, the
+    function also returns a binary mask `mask_ji` indicating which edges
+    have both `i->j` and `j->i` present in the graph.
+    Args:
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_ij (torch.Tensor): Edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`
+    Returns:
+        ij_to_ji (torch.LongTensor): Flat indices for indexing ji in-place at ij with
+            shape `(num_batch, num_nodes * num_neighbors)`.
+        mask_ji (torch.Tensor): Mask indicating presence of reversed edge with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    """
+    num_batch, num_residues, num_k = list(edge_idx.size())
+    # 1. Collect neighbors of neighbors
+    edge_idx_flat = edge_idx.reshape([num_batch, num_residues * num_k, 1]).expand(
+        -1, -1, num_k
+    )
+    edge_idx_neighbors = torch.gather(edge_idx, 1, edge_idx_flat)
+    # (b,i,j,k) gives the kth neighbor of the jth neighbor of i
+    edge_idx_neighbors = edge_idx_neighbors.reshape(
+        [num_batch, num_residues, num_k, num_k]
+    )
+    # 2. Determine which k at j maps back to i (if it exists)
+    residue_i = torch.arange(num_residues, device=edge_idx.device).reshape(
+        (1, -1, 1, 1)
+    )
+    edge_idx_match = (edge_idx_neighbors == residue_i).type(torch.float32)
+    return_mask, return_idx = torch.max(edge_idx_match, -1)
+    # 3. Build flat indices
+    ij_to_ji = edge_idx * num_k + return_idx
+    ij_to_ji = ij_to_ji.reshape(num_batch, -1)
+    # 4. Transpose mask
+    mask_ji = torch.gather(mask_ij.reshape(num_batch, -1), -1, ij_to_ji)
+    mask_ji = mask_ji.reshape(num_batch, num_residues, num_k)
+    mask_ji = mask_ij * return_mask * mask_ji
+    return ij_to_ji, mask_ji
+def permute_tensor(
+    tensor: torch.Tensor, dim: int, permute_idx: torch.LongTensor
+) -> torch.Tensor:
+    """Permute a tensor along a dimension given a permutation vector.
+    Args:
+        tensor (torch.Tensor): Input tensor with shape
+            `([batch_dims], permutation_length, [content_dims])`.
+        dim (int): Dimension to permute along.
+        permute_idx (torch.LongTensor): Permutation index tensor with shape
+            `([batch_dims], permutation_length)`.
+    Returns:
+        tensor_permute (torch.Tensor): Permuted node features with shape
+            `([batch_dims], permutation_length, [content_dims])`.
+    """
+    # Resolve absolute dimension
+    dim = range(len(list(tensor.shape)))[dim]
+    # Flatten content dimensions
+    shape = list(tensor.shape)
+    batch_dims, permute_length = shape[:dim], shape[dim]
+    tensor_flat = tensor.reshape(batch_dims + [permute_length] + [-1])
+    # Exap content dimensions
+    permute_idx_expand = permute_idx.unsqueeze(-1).expand(tensor_flat.shape)
+    tensor_permute_flat = torch.gather(tensor_flat, dim, permute_idx_expand)
+    tensor_permute = tensor_permute_flat.reshape(tensor.shape)
+    return tensor_permute
+def permute_graph_embeddings(
+    node_h: torch.Tensor,
+    edge_h: torch.Tensor,
+    edge_idx: torch.LongTensor,
+    mask_i: torch.Tensor,
+    mask_ij: torch.Tensor,
+    permute_idx: torch.LongTensor,
+) -> Tuple[torch.Tensor, torch.Tensor, torch.LongTensor, torch.Tensor, torch.Tensor]:
+    """Permute graph embeddings given a permutation vector.
+    Args:
+        node_h (torch.Tensor): Node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h (torch.Tensor): Edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_i (tensor, optional): Node mask with shape `(num_batch, num_nodes)`
+        mask_ij (tensor, optional): Edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`.
+        permute_idx (torch.LongTensor): Permutation vector with shape
+            `(num_batch, num_nodes)`.
+    Returns:
+        node_h_permute (torch.Tensor): Permuted node features with shape
+            `(num_batch, num_nodes, dim_nodes)`.
+        edge_h_permute (torch.Tensor): Permuted edge features with shape
+            `(num_batch, num_nodes, num_neighbors, dim_edges)`.
+        edge_idx_permute (torch.LongTensor): Permuted edge indices for neighbors with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+        mask_i_permute (tensor, optional): Permuted node mask with shape `(num_batch, num_nodes)`
+        mask_ij_permute (tensor, optional): Permuted edge mask with shape
+             `(num_batch, num_nodes, num_neighbors)`.
+    """
+    # Permuting one-dimensional objects is straightforward gathering
+    node_h_permute = permute_tensor(node_h, 1, permute_idx)
+    edge_h_permute = permute_tensor(edge_h, 1, permute_idx)
+    mask_i_permute = permute_tensor(mask_i, 1, permute_idx)
+    mask_ij_permute = permute_tensor(mask_ij, 1, permute_idx)
+    """
+    For edge_idx, there are two-dimensions set each edge idx that
+    previously pointed to j to now point to the new location
+    of j which is p^(-1)[j]
+    edge^(p)[i,k] = p^(-1)[edge[p(i),k]]
+    """
+    # First, permute on the i dimension
+    edge_idx_permute_1 = permute_tensor(edge_idx, 1, permute_idx)
+    # Second, permute on the j dimension by using the inverse
+    permute_idx_inverse = torch.argsort(permute_idx, dim=-1)
+    edge_idx_1_flat = edge_idx_permute_1.reshape([edge_idx.shape[0], -1])
+    edge_idx_permute_flat = torch.gather(permute_idx_inverse, 1, edge_idx_1_flat)
+    edge_idx_permute = edge_idx_permute_flat.reshape(edge_idx.shape)
+    return (
+        node_h_permute,
+        edge_h_permute,
+        edge_idx_permute,
+        mask_i_permute,
+        mask_ij_permute,
+    )
+def edge_mask_causal(edge_idx: torch.LongTensor, mask_ij: torch.Tensor) -> torch.Tensor:
+    """Make an edge mask causal with mask_ij = 0 for j >= i.
+    Args:
+        edge_idx (torch.LongTensor): Edge indices for neighbors with shape
+                `(num_batch, num_nodes, num_neighbors)`.
+        mask_ij (torch.Tensor): Edge mask with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    Returns:
+        mask_ij_causal (torch.Tensor): Causal edge mask with shape
+            `(num_batch, num_nodes, num_neighbors)`.
+    """
+    idx = torch.arange(edge_idx.size(1), device=edge_idx.device)
+    idx_expand = idx.reshape([1, -1, 1])
+    mask_ij_causal = (edge_idx < idx_expand).float() * mask_ij
+    return mask_ij_causal
+class MaskedNorm(nn.Module):
+    """Masked normalization layer.
+    Args:
+        dim (int): Dimensionality of the normalization. Can be 1 for 1D
+            normalization along dimension 1 or 2 for 2D normalization along
+            dimensions 1 and 2.
+        num_features (int): Channel dimension; only needed if `affine` is True.
+        affine (bool): If True, inclde a learnable affine transformation
+            post-normalization. Default is False.
+        norm (str): Type of normalization, can be `instance`, `layer`, or
+            `transformer`.
+        eps (float): Small number for numerical stability.
+    Inputs:
+        data (torch.Tensor): Input tensor with shape
+            `(num_batch, num_nodes, num_channels)` (1D) or
+            `(num_batch, num_nodes, num_nodes, num_channels)` (2D).
+        mask (torch.Tensor): Mask tensor with shape
+            `(num_batch, num_nodes)` (1D) or
+            `(num_batch, num_nodes, num_nodes)` (2D).
+    Outputs:
+        norm_data (torch.Tensor): Mask-normalized tensor with shape
+            `(num_batch, num_nodes, num_channels)` (1D) or
+            `(num_batch, num_nodes, num_nodes, num_channels)` (2D).
+    """
+    def __init__(
+        self,
+        dim: int,
+        num_features: int = -1,
+        affine: bool = False,
+        norm: str = "instance",
+        eps: float = 1e-5,
+    ):
+        super(MaskedNorm, self).__init__()
+        self.norm_type = norm
+        self.dim = dim
+        self.norm = norm + str(dim)
+        self.affine = affine
+        self.eps = eps
+        # Dimension to sum
+        if self.norm == "instance1":
+            self.sum_dims = [1]
+        elif self.norm == "layer1":
+            self.sum_dims = [1, 2]
+        elif self.norm == "transformer1":
+            self.sum_dims = [-1]
+        elif self.norm == "instance2":
+            self.sum_dims = [1, 2]
+        elif self.norm == "layer2":
+            self.sum_dims = [1, 2, 3]
+        elif self.norm == "transformer2":
+            self.sum_dims = [-1]
+        else:
+            raise NotImplementedError
+        # Number of features, only required if affine
+        self.num_features = num_features
+        # Affine transformation is a linear layer on the C channel
+        if self.affine:
+            self.weights = nn.Parameter(torch.rand(self.num_features))
+            self.bias = nn.Parameter(torch.zeros(self.num_features))
+    def forward(
+        self, data: torch.Tensor, mask: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        # Add optional trailing singleton dimension and expand if necessary
+        if mask is not None:
+            if len(mask.shape) == len(data.shape) - 1:
+                mask = mask.unsqueeze(-1)
+            if data.shape != mask.shape:
+                mask = mask.expand(data.shape)
+        # Input shape is Batch, Channel, Dim1, (dim2 if 2d)
+        dims = self.sum_dims
+        if (mask is None) or (self.norm_type == "transformer"):
+            mask_mean = data.mean(dim=dims, keepdim=True)
+            mask_std = torch.sqrt(
+                (((data - mask_mean)).pow(2)).mean(dim=dims, keepdim=True) + self.eps
+            )
+            # Norm
+            norm_data = (data - mask_mean) / mask_std
+        else:
+            # Zeroes vector to sum all mask data
+            norm_data = torch.zeros_like(data).to(data.device).type(data.dtype)
+            for mask_id in mask.unique():
+                # Skip zero, since real mask
+                if mask_id == 0:
+                    continue
+                # Transform mask to temp mask that match mask id
+                tmask = (mask == mask_id).type(torch.float32)
+                # Sum mask for mean
+                mask_sum = tmask.sum(dim=dims, keepdim=True)
+                # Data is tmask, so that mean is only for unmasked pos
+                mask_mean = (data * tmask).sum(dim=dims, keepdim=True) / mask_sum
+                mask_std = torch.sqrt(
+                    (((data - mask_mean) * tmask).pow(2)).sum(dim=dims, keepdim=True)
+                    / mask_sum
+                    + self.eps
+                )
+                # Calculate temp norm, apply mask
+                tnorm = ((data - mask_mean) / mask_std) * tmask
+                # Sometime mask is empty, so generate nan that are conversted to 0
+                tnorm[tnorm != tnorm] = 0
+                # Add to init zero norm data
+                norm_data += tnorm
+        # Apply affine
+        if self.affine:
+            norm_data = norm_data * self.weights + self.bias
+        # If mask, apply mask
+        if mask is not None:
+            norm_data = norm_data * (mask != 0).type(data.dtype)
+        return norm_data

chroma/chroma/layers/linalg.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# Copyright Generate Biomedicines, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Layers for linear algebra.
+进行线性代数计算
+This module contains additional pytorch layers for linear algebra operations,
+such as a more parallelization-friendly implementation of eigvenalue estimation.
+"""
+import torch
+def eig_power_iteration(A, num_iterations=50, eps=1e-5):
+    """Estimate largest magnitude eigenvalue and associated eigenvector.
+    This uses a simple power iteration algorithm to estimate leading
+    eigenvalues, which can often be considerably faster than torch's built-in
+    eigenvalue routines. All steps are differentiable and small constants are
+    added to any division to preserve the stability of the gradients. For more
+    information on power iteration, see
+    https://en.wikipedia.org/wiki/Power_iteration.
+    Args:
+        A (tensor): Batch of square matrices with shape
+            `(..., num_dims, num_dims)`.
+        num_iterations (int, optional): Number of iterations for power
+            iteration. Default: 50.
+        eps (float, optional): Small number to prevent division by zero.
+            Default: 1E-5.
+    Returns:
+        lam (tensor): Batch of estimated highest-magnitude eigenvalues with
+            shape `(...)`.
+        v (tensor): Associated eigvector with shape `(..., num_dims)`.
+    """
+    _safe = lambda x: x + eps
+    dims = list(A.size())[:-1]
+    v = torch.randn(dims, device=A.device).unsqueeze(-1)
+    for i in range(num_iterations):
+        v_prev = v
+        Av = torch.matmul(A, v)
+        v = Av / _safe(Av.norm(p=2, dim=-2, keepdim=True))
+    # Compute eigenvalue
+    v_prev = v_prev.transpose(-1, -2)
+    lam = torch.matmul(v_prev, Av) / _safe(torch.abs(torch.matmul(v_prev, v)))
+    # Reshape
+    v = v.squeeze(-1)
+    lam = lam.view(list(lam.size())[:-2])
+    return lam, v
+def eig_leading(A, num_iterations=50):
+    """Estimate largest positive eigenvalue and associated eigenvector.
+    This estimates the *most positive* eigenvalue of each matrix in a batch of
+    matrices by using two consecutive power iterations with spectral shifting.
+    Args:
+        A (tensor): Batch of square matrices with shape
+            `(..., num_dims, num_dims)`.
+        num_iterations (int, optional): Number of iterations for power
+            iteration. Default: 50.
+    Returns:
+        lam (tensor): Estimated most positive eigenvalue with shape `(...)`.
+        v (tensor): Associated eigenvectors with shape `(..., num_dims)`.
+    """
+    batch_dims = list(A.size())[:-2]
+    # First pass gets largest magnitude
+    lam_1, vec_1 = eig_power_iteration(A, num_iterations)
+    # Second pass guaranteed to grab most positive eigenvalue
+    lam_1_abs = torch.abs(lam_1)
+    lam_I = lam_1_abs.reshape(batch_dims + [1, 1]) * torch.eye(4, device=A.device).view(
+        [1 for _ in batch_dims] + [4, 4]
+    )
+    A_shift = A + lam_I
+    lam_2, vec = eig_power_iteration(A_shift, num_iterations)
+    # Shift back to original specta
+    lam = lam_2 - lam_1_abs
+    return lam, vec