Spaces:

tattabio
/

DGEB

Running

App Files Files Community

Joshua Kravitz commited on Jul 11, 2024

Commit

e284167

0 Parent(s):

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.editorconfig +12 -0
.github/workflows/ci.yml +34 -0
.github/workflows/release.yml +50 -0
.gitignore +6 -0
CHANGELOG.md +197 -0
Dockerfile +27 -0
LICENSE +201 -0
README.md +181 -0
dgeb/__init__.py +28 -0
dgeb/cli.py +136 -0
dgeb/dgeb.py +129 -0
dgeb/eval_utils.py +394 -0
dgeb/evaluators.py +839 -0
dgeb/modality.py +8 -0
dgeb/models.py +481 -0
dgeb/tasks/__init__.py +16 -0
dgeb/tasks/bigene_mining_tasks.py +77 -0
dgeb/tasks/classification_tasks.py +213 -0
dgeb/tasks/clustering_tasks.py +70 -0
dgeb/tasks/eds_tasks.py +246 -0
dgeb/tasks/pair_classification_tasks.py +96 -0
dgeb/tasks/retrieval_tasks.py +96 -0
dgeb/tasks/tasks.py +135 -0
docker-compose.yml +8 -0
docs/images/tatta_logo.png +0 -0
leaderboard/.gitignore +2 -0
leaderboard/DGEB_Figure.png +0 -0
leaderboard/README.md +2 -0
leaderboard/__init__.py +0 -0
leaderboard/app.py +260 -0
leaderboard/requirements.txt +82 -0
leaderboard/submissions/.DS_Store +0 -0
leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json +98 -0
leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json +762 -0
leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json +86 -0
leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json +62 -0
leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json +386 -0
leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json +62 -0
leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json +386 -0
leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json +762 -0
leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json +90 -0
leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json +97 -0
leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json +50 -0
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json +90 -0
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json +90 -0
leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json +386 -0
leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json +98 -0
leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json +762 -0
leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json +86 -0
leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json +62 -0

.editorconfig ADDED Viewed

	@@ -0,0 +1,12 @@

+# top-most EditorConfig file
+root = true
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+[*.py]
+charset = utf-8
+indent_style = space
+indent_size = 4

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,34 @@

+name: CI for DGEB
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+    branches: ["**"]
+permissions:
+  id-token: write
+  contents: read
+  actions: write
+  pull-requests: read
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - uses: yezz123/setup-uv@v4
+        with:
+          uv-venv: ".geb_venv"
+      - run: uv pip install ruff
+      - run: ruff format .
+      - run: ruff check .
+  # TODO: pytest
+  # TODO: pyright

.github/workflows/release.yml ADDED Viewed

	@@ -0,0 +1,50 @@

+# This workflow will
+# - Find the latest version tag based on the commit history
+#   - Create a git tag for the new version
+#   - Update the version number in pyproject.toml based on the commit history
+# - Upload the package to PyPI
+# - Create a release on GitHub
+# This workflow required the following secrets to be set:
+# - a GitHub personal access token with the `repo` scope called `RELEASE`
+# - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
+name: Release
+on:
+  push:
+    branches:
+      - main
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    concurrency: release
+    permissions:
+      id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GH_TOKEN }}
+      - name: Python Semantic Release
+        id: release
+        uses: python-semantic-release/[email protected]
+        with:
+          github_token: ${{ secrets.GH_TOKEN }}
+      - name: Publish package distributions to PyPI
+        uses: pypa/[email protected]
+        if: steps.release.outputs.released == 'true'
+        # This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually
+        # configured API token or username/password combination. To perform trusted publishing with this action, your project's
+        # publisher must already be configured on PyPI.
+      - name: Publish package distributions to GitHub Releases
+        uses: python-semantic-release/[email protected]
+        if: steps.release.outputs.released == 'true'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          tag: ${{ steps.release.outputs.tag }}

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.venv/
+__pycache__/
+.vscode/
+build/
+dist/
+*egg-info/

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,197 @@

+# CHANGELOG
+## v0.0.10 (2024-07-09)
+### Fix
+* fix: remove noop task ([`7d5b393`](https://github.com/TattaBio/DGEB/commit/7d5b3933f48e51fb4c71945f01af2cc5a7dba3ed))
+## v0.0.9 (2024-07-09)
+### Fix
+* fix: update cli script name ([`633e14d`](https://github.com/TattaBio/DGEB/commit/633e14db7e1eed0d9606ef1097e369e4f5e245f5))
+### Unknown
+* 0.0.9
+Automatically generated by python-semantic-release [skip ci] ([`a8c1a96`](https://github.com/TattaBio/DGEB/commit/a8c1a96d18af589795bc9532fee8ad9764cd52ed))
+* Merge pull request #9 from TattaBio/andre
+Update ModAC main metric ([`3c67e65`](https://github.com/TattaBio/DGEB/commit/3c67e6559d0e49d90ffe2858eb9e287abd1b6e6c))
+* ruff format ([`78461ac`](https://github.com/TattaBio/DGEB/commit/78461ac901b8617821ca15e543c0dd8e2dbf6e95))
+* update top_k=50 for modac ([`2c3dcd5`](https://github.com/TattaBio/DGEB/commit/2c3dcd5856b6679a80999b3c4b3512876ac0b58d))
+* remove revision ([`2d587da`](https://github.com/TattaBio/DGEB/commit/2d587daa79f32c49201b419892b7f95f3dc5eedb))
+* Merge pull request #8 from TattaBio/cli
+Cli &amp; cleanup ([`9698c8f`](https://github.com/TattaBio/DGEB/commit/9698c8f5ab0bab6c3c0a76d59dc29cfd964ebf15))
+* Exclude leaderboard files in anticipation of merging leaderboard PR ([`58bdcba`](https://github.com/TattaBio/DGEB/commit/58bdcba11af605bdef11cfecc087c9efb0e97b72))
+* Update README ([`d323905`](https://github.com/TattaBio/DGEB/commit/d3239059e29fb149f9c348b951bc4988d8b9f8dc))
+* cleanup ([`1f0fe16`](https://github.com/TattaBio/DGEB/commit/1f0fe16de6910200d88c918b08cbf26067313469))
+* Add cli to pyproject.toml ([`5404218`](https://github.com/TattaBio/DGEB/commit/54042181ef54c11db74ebb53c403b21a8114c02b))
+* Remove Dataset &#39;description&#39; which does not exist on model. ([`46b0040`](https://github.com/TattaBio/DGEB/commit/46b0040a302384fa00791bbfdd6fae24645d6a6d))
+* Merge pull request #7 from TattaBio/add_dna_tasks
+Add dna tasks ([`cfc5799`](https://github.com/TattaBio/DGEB/commit/cfc57995f9b1e584bb60e998f9cf68bea5ec39fa))
+* ruff ([`f9fa125`](https://github.com/TattaBio/DGEB/commit/f9fa12502df9837b5381da17b17198f3667c4911))
+* adding rpob datasets and updating ec revision ([`8f9cc3f`](https://github.com/TattaBio/DGEB/commit/8f9cc3f819beb70f51a5cc59f16c65bffceedbad))
+* Update README.md ([`d5d7c24`](https://github.com/TattaBio/DGEB/commit/d5d7c24215d347fc17d6016ac2a3eddfb3cf2a12))
+* Merge pull request #4 from TattaBio/andre
+Add dataset revisions ([`95b6f11`](https://github.com/TattaBio/DGEB/commit/95b6f11ffee3dccc45ab119ac4f602066750f7ef))
+* add dataset revision numbers ([`7e069a2`](https://github.com/TattaBio/DGEB/commit/7e069a237de5391e7c6b7f09c108292ac10c25af))
+* Merge pull request #3 from TattaBio/andre
+Update readme and task imports ([`ade30a8`](https://github.com/TattaBio/DGEB/commit/ade30a856deffe35ddf57d16705d030b6d0192c8))
+* rename dgeb ([`6b1c2ee`](https://github.com/TattaBio/DGEB/commit/6b1c2ee76798d89e487386116efe23c90d2d039c))
+* add intro ([`a2280dd`](https://github.com/TattaBio/DGEB/commit/a2280dd732984d58caed45b9a429038c0d81851a))
+* update readme and tasks ([`00e0a79`](https://github.com/TattaBio/DGEB/commit/00e0a791f070ca37e5b92770b3363ef066e2789f))
+* Merge pull request #2 from TattaBio/andre
+rename dgeb imports ([`1894ba9`](https://github.com/TattaBio/DGEB/commit/1894ba9a92a8f369053ddb9d351ae48fd8e2d674))
+* rename dgeb imports ([`5f1f8b8`](https://github.com/TattaBio/DGEB/commit/5f1f8b850f271cd6785291e3feb2c2d4bf979f9c))
+## v0.0.8 (2024-07-01)
+### Fix
+* fix: don&#39;t run ci on release of new version ([`fa97104`](https://github.com/TattaBio/DGEB/commit/fa971049429975d06c8aca086e86b19d92383969))
+### Unknown
+* 0.0.8
+Automatically generated by python-semantic-release [skip ci] ([`8dc15d3`](https://github.com/TattaBio/DGEB/commit/8dc15d34c6317087253950893974d16b9f75a17c))
+## v0.0.7 (2024-07-01)
+### Fix
+* fix: try again ([`e7d0ecd`](https://github.com/TattaBio/DGEB/commit/e7d0ecdcb63e909f9ab727f11fb3fd57414d2fa5))
+* fix: edit readme to see if job still works with restricted permissions ([`93cd728`](https://github.com/TattaBio/DGEB/commit/93cd728c8a632b9bed611c55dace2e2ffb103410))
+### Unknown
+* 0.0.7
+Automatically generated by python-semantic-release ([`9808d4f`](https://github.com/TattaBio/DGEB/commit/9808d4f328a577c066affd34d408ad26eb6098d0))
+* Merge pull request #1 from TattaBio/edit-readme
+fix: edit readme to see if job still works with restricted permissions ([`c45599c`](https://github.com/TattaBio/DGEB/commit/c45599cf9628155603245f906c09cf6483cffce8))
+## v0.0.6 (2024-07-01)
+### Fix
+* fix: nevermind that broke it ([`ec33a1c`](https://github.com/TattaBio/DGEB/commit/ec33a1c6539ac1fb2710869a2d436483a02236e0))
+* fix: see if I can remove this line ([`246d4e9`](https://github.com/TattaBio/DGEB/commit/246d4e9841a83d18217506d46f211f1341c63526))
+### Unknown
+* 0.0.6
+Automatically generated by python-semantic-release ([`1b28df5`](https://github.com/TattaBio/DGEB/commit/1b28df559c95db0aea95111a5f27d01645d23786))
+## v0.0.5 (2024-07-01)
+### Fix
+* fix: try fixing release to handle protected branch ([`5cedad3`](https://github.com/TattaBio/DGEB/commit/5cedad3e9f34d249eda9257e3c21fc8443d000cf))
+* fix: another change... ([`c5ad3f0`](https://github.com/TattaBio/DGEB/commit/c5ad3f098d36e25afdf4fa9aae20967eb968568e))
+* fix: update pip install command in readme ([`ff90791`](https://github.com/TattaBio/DGEB/commit/ff90791398f9a9b907c308400f88811a8f8633dc))
+### Unknown
+* 0.0.5
+Automatically generated by python-semantic-release ([`ec24ca3`](https://github.com/TattaBio/DGEB/commit/ec24ca343b49bee85c72907554772976f02eab1a))
+## v0.0.4 (2024-07-01)
+### Fix
+* fix: move gh token to env ([`95e292c`](https://github.com/TattaBio/DGEB/commit/95e292c46f7908659d46bc093ef4903609f1edc5))
+### Unknown
+* 0.0.4
+Automatically generated by python-semantic-release ([`03f3004`](https://github.com/TattaBio/DGEB/commit/03f300476b0aeca2796b780139fce0be037ae636))
+## v0.0.3 (2024-07-01)
+### Fix
+* fix: remove persist credentials ([`2ae683e`](https://github.com/TattaBio/DGEB/commit/2ae683ed7a68b0559b81b1b7f5716636beef1415))
+* fix: try to fix release CI ([`1dfc938`](https://github.com/TattaBio/DGEB/commit/1dfc9383b2dab8bba444b09c6b85500dadee7203))
+### Unknown
+* 0.0.3
+Automatically generated by python-semantic-release ([`7cbfc8d`](https://github.com/TattaBio/DGEB/commit/7cbfc8d0acef975d046ff485001ed289800d143f))
+## v0.0.2 (2024-07-01)
+### Fix
+* fix: new repository name ([`8fc1145`](https://github.com/TattaBio/DGEB/commit/8fc1145985eab8aa97562f697edab45a30b189ba))
+* fix: addl geb references ([`86a5af8`](https://github.com/TattaBio/DGEB/commit/86a5af8c24244ac8f2670801468e1a25b8e3e9df))
+### Unknown
+* 0.0.2
+Automatically generated by python-semantic-release ([`1c7b19b`](https://github.com/TattaBio/DGEB/commit/1c7b19b50597e9dabe07fbf7cb7d3c589438917a))
+## v0.0.1 (2024-07-01)
+### Fix
+* fix: rename geb to dgeb ([`be712f8`](https://github.com/TattaBio/DGEB/commit/be712f8d19678801b9148ac8397f13afe826871b))
+### Unknown
+* 0.0.1
+Automatically generated by python-semantic-release ([`1503e03`](https://github.com/TattaBio/DGEB/commit/1503e030bb1277e1a2dcad7b99c9ed3472243f5d))
+## v0.0.0 (2024-07-01)
+### Unknown
+* 0.0.0
+Automatically generated by python-semantic-release ([`4b791ee`](https://github.com/TattaBio/DGEB/commit/4b791ee07085647427afec31a1adf61977e6bd4c))
+* Initial commit ([`36fe62c`](https://github.com/TattaBio/DGEB/commit/36fe62c234331de97f2827a49bf62d5c35b92a1f))

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Docker file for leaderboard
+FROM python:3.11-slim
+WORKDIR /app
+# install curl
+RUN apt-get update && apt-get install -y curl
+ADD https://astral.sh/uv/install.sh /install.sh
+RUN chmod +x /install.sh
+RUN /install.sh && rm /install.sh
+# install deps
+COPY leaderboard/requirements.txt ./
+RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
+# copy src
+COPY dgeb dgeb
+COPY leaderboard/ leaderboard/
+# Run gradio when the container launches
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV GRADIO_TEMP_DIR="/app"
+WORKDIR /app/leaderboard
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,181 @@

+---
+title: DGEB
+app_file : leaderboard/app.py
+sdk: docker
+sdk_version: 4.36.1
+---
+<h1 align="center">Diverse Genomic Embedding Benchmark</h1>
+<p align="center">
+    <a href="https://github.com/tattabio/dgeb/releases">
+        <img alt="GitHub release" src="https://img.shields.io/github/v/release/tattabio/dgeb.svg">
+    </a>
+    <a href="">
+        <img alt="arXiv URL" src="">
+    </a>
+    <a href="https://github.com/tattabio/dgeb/blob/main/LICENSE">
+        <img alt="License" src="https://img.shields.io/github/license/tattabio/dgeb.svg">
+    </a>
+    <a href="https://pepy.tech/project/dgeb">
+        <img alt="Downloads" src="https://static.pepy.tech/personalized-badge/dgeb?period=total&units=international_system&left_color=grey&right_color=orange&left_text=Downloads">
+    </a>
+</p>
+<h4 align="center">
+    <p>
+        <a href="#installation">Installation</a> |
+        <a href="#usage">Usage</a> |
+        <a href="https://huggingface.co/spaces/tattabio/DGEB">Leaderboard</a> |
+        <a href="#citing">Citing</a>
+    <p>
+</h4>
+<h3 align="center">
+    <a href="https://huggingface.co/spaces/dgeb"><img style="float: middle; padding: 10px 10px 10px 10px;" width="100" height="100" src="./docs/images/tatta_logo.png" /></a>
+</h3>
+DGEB is a benchmark for evaluating biological sequence models on functional and evolutionary information.
+DGEB is designed to evaluate model embeddings using:
+- Diverse sequences accross the tree of life.
+- Diverse tasks that capture different aspects of biological function.
+- Both amino acid and nucleotide sequences.
+The current version of DGEB consists of 18 datasets covering all three domains of life (Bacteria, Archaea and Eukarya). DGEB evaluates embeddings using six different embedding tasks: Classification, BiGene mining, Evolutionary Distance Similarity (EDS), Pair Classification, Clustering, and Retrieval.
+We welcome contributions of new tasks and datasets.
+## Installation
+Install DGEB using pip.
+```bash
+pip install dgeb
+```
+## Usage
+- Launch evaluation using the python script (see [cli.py](https://github.com/tattabio/dgeb/blob/main/dgeb/cli.py)):
+```bash
+dgeb --model facebook/esm2_t6_8M_UR50D
+```
+- To see all supported models and tasks:
+```bash
+dgeb --help
+```
+- Using the python API:
+```py
+import dgeb
+model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
+tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN)
+evaluation = dgeb.DGEB(tasks=tasks)
+evaluation.run(model, output_folder="results")
+```
+### Using a custom model
+Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See [models.py](https://github.com/tattabio/dgeb/blob/main/dgeb/models.py) for additional examples on custom model loading and inference.
+```python
+import dgeb
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks.tasks import Modality
+class MyModel(BioSeqTransformer):
+    @property
+    def modality(self) -> Modality:
+        return Modality.PROTEIN
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_hidden_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.hidden_size
+model = MyModel(model_name='path_to/huggingface_model')
+tasks = dgeb.get_tasks_by_modality(model.modality)
+evaluation = dgeb.DGEB(tasks=tasks)
+evaluation.run(model)
+```
+### Evaluating on a custom dataset
+**We strongly encourage users to contribute their custom datasets to DGEB. Please open a PR adding your dataset so that the community can benefit!**
+To evaluate on a custom dataset, first upload your dataset to the [Huggingface Hub](https://huggingface.co/docs/hub/en/datasets-adding). Then define a `Task` subclass with `TaskMetadata` that points to your huggingface dataset. For example, a classification task on a custom dataset can be defined as follows:
+```python
+import dgeb
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+from dgeb.tasks.classification_tasks import run_classification_task
+class MyCustomTask(Task):
+    metadata = TaskMetadata(
+        id="my_custom_classification",
+        display_name="...",
+        description="...",
+        type="classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="path_to/huggingface_dataset",
+                revision="...",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_classification_task(model, self.metadata)
+model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
+evaluation = dgeb.DGEB(tasks=[MyCustomTask])
+evaluation.run(model)
+```
+## Leaderboard
+To add your submission to the DGEB leaderboard, proceed through the following instructions.
+1. Fork the DGEB repository by following GitHub's instruction [Forking Workflow](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
+2. Add your submission .json file to the leaderboard/submissions/<HF_MODEL_NAME>/ directory.
+```bash
+mv /path/to/<SUBMISSION_FILE>.json /path/to/DGEB/leaderboard/submissions/<HF_MODEL_NAME>/
+```
+4. Update your fork with the new submission:
+```bash
+git add leaderboard/submissions/<HF_MODEL_NAME>/<SUBMISSION_FILE>.json
+git commit -m "Add submission for <HF_MODEL_NAME>"
+git push
+```
+5. Open a pull request to the main branch of the repository via the Github interface.
+6. Once the PR is review and merged, your submission will be added to the leaderboard!
+## Acknowledgements
+DGEB follows the design of text embedding bechmark [MTEB](https://github.com/embeddings-benchmark/mteb) developed by Huggingface 🤗. The evaluation code is adapted from the MTEB codebase.
+## Citing
+DGEB was introduced in "[Diverse Genomic Embedding Benchmark for Functional Evaluation Across the Tree of Life]()", feel free to cite:
+TODO

dgeb/__init__.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from dgeb.dgeb import (
+    DGEB,
+    get_all_model_names,
+    get_all_task_names,
+    get_all_tasks,
+    get_model,
+    get_output_folder,
+    get_tasks_by_modality,
+    get_tasks_by_name,
+)
+from dgeb.modality import Modality
+from dgeb.tasks.tasks import TaskResult
+# importing without setting `__all__` produces a Ruff error:
+#   "imported but unused; consider removing, adding to __all__, or using a redundant alias RuffF401"
+# See https://docs.astral.sh/ruff/rules/unused-import/#why-is-this-bad
+__all__ = [
+    "DGEB",
+    "get_all_tasks",
+    "get_all_task_names",
+    "get_tasks_by_name",
+    "get_tasks_by_modality",
+    "get_all_model_names",
+    "get_model",
+    "get_output_folder",
+    "TaskResult",
+    "Modality",
+]

dgeb/cli.py ADDED Viewed

	@@ -0,0 +1,136 @@

+"""
+Main command to run diverse genomic embedding benchmarks (DGEB) on a model.
+example command to run DGEB:
+python run_dgeb.py -m facebook/esm2_t6_8M_UR50D
+"""
+import argparse
+import logging
+import os
+import dgeb
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+ALL_TASK_NAMES = dgeb.get_all_task_names()
+ALL_MODEL_NAMES = dgeb.get_all_model_names()
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default=None,
+        help=f"Model to evaluate. Choose from {ALL_MODEL_NAMES}",
+    )
+    parser.add_argument(
+        "-t",
+        "--tasks",
+        type=lambda s: [item for item in s.split(",")],
+        default=None,
+        help=f"Comma separated tasks to evaluate on. Choose from {ALL_TASK_NAMES} or do not specify to evaluate on all tasks",
+    )
+    parser.add_argument(
+        "-l",
+        "--layers",
+        type=str,
+        default=None,
+        help="Layer to evaluate. Comma separated list of integers or 'mid' and 'last'. Default is 'mid,last'",
+    )
+    parser.add_argument(
+        "--devices",
+        type=str,
+        default="0",
+        help="Comma separated list of GPU device ids to use. Default is 0 (if GPUs are detected).",
+    )
+    parser.add_argument(
+        "--output_folder",
+        type=str,
+        default=None,
+        help="Output directory for results. Will default to results/model_name if not set.",
+    )
+    parser.add_argument(
+        "-v", "--verbosity", type=int, default=2, help="Verbosity level"
+    )
+    parser.add_argument(
+        "-b", "--batch_size", type=int, default=64, help="Batch size for evaluation"
+    )
+    parser.add_argument(
+        "--max_seq_len",
+        type=int,
+        default=1024,
+        help="Maximum sequence length for model, default is 1024.",
+    )
+    parser.add_argument(
+        "--pool_type",
+        type=str,
+        default="mean",
+        help="Pooling type for model, choose from mean, max, cls, last. Default is mean.",
+    )
+    args = parser.parse_args()
+    # set logging based on verbosity level
+    if args.verbosity == 0:
+        logging.getLogger("geb").setLevel(logging.CRITICAL)
+    elif args.verbosity == 1:
+        logging.getLogger("geb").setLevel(logging.WARNING)
+    elif args.verbosity == 2:
+        logging.getLogger("geb").setLevel(logging.INFO)
+    elif args.verbosity == 3:
+        logging.getLogger("geb").setLevel(logging.DEBUG)
+    if args.model is None:
+        raise ValueError("Please specify a model using the -m or --model argument")
+    # make sure that devices are comma separated list of integers
+    try:
+        devices = [int(device) for device in args.devices.split(",")]
+    except ValueError:
+        raise ValueError("Devices must be comma separated list of integers")
+    layers = args.layers
+    if layers:
+        if layers not in ["mid", "last"]:
+            # Layers should be list of integers.
+            try:
+                layers = [int(layer) for layer in layers.split(",")]
+            except ValueError:
+                raise ValueError("Layers must be a list of integers.")
+    model_name = args.model.split("/")[-1]
+    output_folder = args.output_folder
+    if output_folder is None:
+        output_folder = os.path.join("results", model_name)
+        # create output folder if it does not exist
+        if not os.path.exists(output_folder):
+            os.makedirs(output_folder)
+        logger.info(f"Results will be saved to {output_folder}")
+    # Load the model by name.
+    model = dgeb.get_model(
+        model_name=args.model,
+        layers=layers,
+        devices=devices,
+        max_seq_length=args.max_seq_len,
+        batch_size=args.batch_size,
+        pool_type=args.pool_type,
+    )
+    all_tasks_for_modality = dgeb.get_tasks_by_modality(model.modality)
+    if args.tasks:
+        task_list = dgeb.get_tasks_by_name(args.tasks)
+        if not all([task.metadata.modality == model.modality for task in task_list]):
+            raise ValueError(f"Tasks must be one of {all_tasks_for_modality}")
+    else:
+        task_list = all_tasks_for_modality
+    evaluation = dgeb.DGEB(tasks=task_list)
+    _ = evaluation.run(model)
+if __name__ == "__main__":
+    main()

dgeb/dgeb.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import logging
+import os
+import traceback
+from itertools import chain
+from typing import Any, List
+from rich.console import Console
+from .eval_utils import set_all_seeds
+from .modality import Modality
+from .models import BioSeqTransformer
+from .tasks.tasks import Task
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class DGEB:
+    """GEB class to run the evaluation pipeline."""
+    def __init__(self, tasks: List[type[Task]], seed: int = 42):
+        self.tasks = tasks
+        set_all_seeds(seed)
+    def print_selected_tasks(self):
+        """Print the selected tasks."""
+        console = Console()
+        console.rule("[bold]Selected Tasks\n", style="grey15")
+        for task in self.tasks:
+            prefix = "    - "
+            name = f"{task.metadata.display_name}"
+            category = f", [italic grey39]{task.metadata.type}[/]"
+            console.print(f"{prefix}{name}{category}")
+        console.print("\n")
+    def run(
+        self,
+        model,  # type encoder
+        output_folder: str = "results",
+    ):
+        """Run the evaluation pipeline on the selected tasks.
+        Args:
+            model: Model to be used for evaluation
+            output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format:
+                `{output_folder}/{model_name}/{model_revision}/{task_name}.json`.
+        Returns:
+            A list of MTEBResults objects, one for each task evaluated.
+        """
+        # Run selected tasks
+        self.print_selected_tasks()
+        results = []
+        for task in self.tasks:
+            logger.info(
+                f"\n\n********************** Evaluating {task.metadata.display_name} **********************"
+            )
+            try:
+                result = task().run(model)
+            except Exception as e:
+                logger.error(e)
+                logger.error(traceback.format_exc())
+                logger.error(f"Error running task {task}")
+                continue
+            results.append(result)
+            save_path = get_output_folder(model.hf_name, task, output_folder)
+            with open(save_path, "w") as f_out:
+                f_out.write(result.model_dump_json(indent=2))
+        return results
+def get_model(model_name: str, **kwargs: Any) -> type[BioSeqTransformer]:
+    all_names = get_all_model_names()
+    for cls in BioSeqTransformer.__subclasses__():
+        if model_name in cls.MODEL_NAMES:
+            return cls(model_name, **kwargs)
+    raise ValueError(f"Model {model_name} not found in {all_names}.")
+def get_all_model_names() -> List[str]:
+    return list(
+        chain.from_iterable(
+            cls.MODEL_NAMES for cls in BioSeqTransformer.__subclasses__()
+        )
+    )
+def get_all_task_names() -> List[str]:
+    return [task.metadata.id for task in get_all_tasks()]
+def get_tasks_by_name(tasks: List[str]) -> List[type[Task]]:
+    return [_get_task(task) for task in tasks]
+def get_tasks_by_modality(modality: Modality) -> List[type[Task]]:
+    return [task for task in get_all_tasks() if task.metadata.modality == modality]
+def get_all_tasks() -> List[type[Task]]:
+    return Task.__subclasses__()
+def _get_task(task_name: str) -> type[Task]:
+    logger.info(f"Getting task {task_name}")
+    for task in get_all_tasks():
+        if task.metadata.id == task_name:
+            return task
+    raise ValueError(
+        f"Task {task_name} not found, available tasks are: {[task.metadata.id for task in get_all_tasks()]}"
+    )
+def get_output_folder(
+    model_hf_name: str, task: type[Task], output_folder: str, create: bool = True
+):
+    output_folder = os.path.join(output_folder, os.path.basename(model_hf_name))
+    # create output folder if it does not exist
+    if create and not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    return os.path.join(
+        output_folder,
+        f"{task.metadata.id}.json",
+    )

dgeb/eval_utils.py ADDED Viewed

	@@ -0,0 +1,394 @@

+"""Utility functions for evaluation."""
+from typing import Any, Dict, List, Tuple
+import json
+import torch
+import random
+import numpy as np
+from sklearn.metrics import auc
+class ForwardHook:
+    """Pytorch forward hook class to store outputs of intermediate layers."""
+    def __init__(self, module: torch.nn.Module):
+        self.hook = module.register_forward_hook(self.hook_fn)
+        self.output = None
+    def hook_fn(self, module, input, output):
+        self.output = output
+    def close(self):
+        self.hook.remove()
+def pool(
+    last_hidden_states: torch.Tensor, attention_mask: torch.Tensor, pool_type: str
+) -> torch.Tensor:
+    """Pool embeddings across the sequence length dimension."""
+    assert (
+        last_hidden_states.ndim == 3
+    ), f"Expected hidden_states to have shape [batch, seq_len, D], got shape: {last_hidden_states.shape}"
+    assert (
+        attention_mask.ndim == 2
+    ), f"Expected attention_mask to have shape [batch, seq_len], got shape: {attention_mask.shape}"
+    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    if pool_type == "mean":
+        emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+    elif pool_type == "max":
+        emb = last_hidden.max(dim=1)[0]
+    elif pool_type == "cls":
+        emb = last_hidden[:, 0]
+    elif pool_type == "last":
+        emb = last_hidden[torch.arange(last_hidden.size(0)), attention_mask.sum(1) - 1]
+    else:
+        raise ValueError(f"pool_type {pool_type} not supported")
+    return emb
+def set_all_seeds(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
+def write_results_to_json(results: Dict[str, Any], results_path: str):
+    """Write results dict to a json file."""
+    with open(results_path, "w") as f:
+        json.dump(results, f, indent=4)
+def merge_split_elem_embeds(ids, embeds, preserve_order: bool = False):
+    """Merge embeddings with the same id by mean-pooling and optionally preserve order in which they appear.
+    Args:
+        ids: Array of string ids, [batch].
+        embeds: Array of embeddings, [batch, ...].
+    Returns:
+        ids: Unique ids, [unique_batch].
+        embeds: Array of embeddings, [unique_batch, ...].
+    """
+    unique_ids, indices = np.unique(ids, return_inverse=True)
+    shape_no_batch = embeds.shape[1:]
+    sums = np.zeros([unique_ids.size, *shape_no_batch], dtype=embeds.dtype)
+    counts = np.bincount(indices, minlength=unique_ids.size)
+    np.add.at(sums, indices, embeds)
+    # Add trailing dimensions to counts.
+    counts = counts[(...,) + (None,) * len(shape_no_batch)]
+    mean_pooled = sums / counts
+    # Preserve the order of the input ids.
+    if preserve_order:
+        order = []
+        for id in unique_ids:
+            idx = np.where(ids == id)[0][0]
+            order.append(idx)
+        re_order = np.argsort(order)
+        unique_ids = unique_ids[re_order]
+        mean_pooled = mean_pooled[re_order]
+    return unique_ids, mean_pooled
+def paired_dataset(labels, embeds):
+    """Creates a paired dataset for consecutive operonic gene pairs."""
+    embeds1 = embeds[:-1]
+    embeds2 = embeds[1:]
+    labels = labels[:-1]
+    return embeds1, embeds2, labels
+def cos_sim(a, b):
+    """Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
+    Return:
+        Matrix with res[i][j]  = cos_sim(a[i], b[j])
+    """  # noqa: D402
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    if len(a.shape) == 1:
+        a = a.unsqueeze(0)
+    if len(b.shape) == 1:
+        b = b.unsqueeze(0)
+    a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
+    b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
+    return torch.mm(a_norm, b_norm.transpose(0, 1))
+def dot_score(a: torch.Tensor, b: torch.Tensor):
+    """Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
+    :return: Matrix with res[i][j]  = dot_prod(a[i], b[j])
+    """
+    if not isinstance(a, torch.Tensor):
+        a = torch.tensor(a)
+    if not isinstance(b, torch.Tensor):
+        b = torch.tensor(b)
+    if len(a.shape) == 1:
+        a = a.unsqueeze(0)
+    if len(b.shape) == 1:
+        b = b.unsqueeze(0)
+    return torch.mm(a, b.transpose(0, 1))
+# From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4
+def mrr(
+    qrels: dict[str, dict[str, int]],
+    results: dict[str, dict[str, float]],
+    k_values: List[int],
+    output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+    MRR = {}
+    for k in k_values:
+        MRR[f"MRR@{k}"] = []
+    k_max, top_hits = max(k_values), {}
+    for query_id, doc_scores in results.items():
+        top_hits[query_id] = sorted(
+            doc_scores.items(), key=lambda item: item[1], reverse=True
+        )[0:k_max]
+    for query_id in top_hits:
+        query_relevant_docs = set(
+            [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
+        )
+        for k in k_values:
+            rr = 0
+            for rank, hit in enumerate(top_hits[query_id][0:k]):
+                if hit[0] in query_relevant_docs:
+                    rr = 1.0 / (rank + 1)
+                    break
+            MRR[f"MRR@{k}"].append(rr)
+    if output_type == "mean":
+        for k in k_values:
+            MRR[f"MRR@{k}"] = round(sum(MRR[f"MRR@{k}"]) / len(qrels), 5)
+    elif output_type == "all":
+        pass
+    return MRR
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def recall_cap(
+    qrels: dict[str, dict[str, int]],
+    results: dict[str, dict[str, float]],
+    k_values: List[int],
+    output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+    capped_recall = {}
+    for k in k_values:
+        capped_recall[f"R_cap@{k}"] = []
+    k_max = max(k_values)
+    for query_id, doc_scores in results.items():
+        top_hits = sorted(doc_scores.items(), key=lambda item: item[1], reverse=True)[
+            0:k_max
+        ]
+        query_relevant_docs = [
+            doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0
+        ]
+        for k in k_values:
+            retrieved_docs = [
+                row[0] for row in top_hits[0:k] if qrels[query_id].get(row[0], 0) > 0
+            ]
+            denominator = min(len(query_relevant_docs), k)
+            capped_recall[f"R_cap@{k}"].append(len(retrieved_docs) / denominator)
+    if output_type == "mean":
+        for k in k_values:
+            capped_recall[f"R_cap@{k}"] = round(
+                sum(capped_recall[f"R_cap@{k}"]) / len(qrels), 5
+            )
+    elif output_type == "all":
+        pass
+    return capped_recall
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def hole(
+    qrels: dict[str, dict[str, int]],
+    results: dict[str, dict[str, float]],
+    k_values: List[int],
+    output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+    Hole = {}
+    for k in k_values:
+        Hole[f"Hole@{k}"] = []
+    annotated_corpus = set()
+    for _, docs in qrels.items():
+        for doc_id, score in docs.items():
+            annotated_corpus.add(doc_id)
+    k_max = max(k_values)
+    for _, scores in results.items():
+        top_hits = sorted(scores.items(), key=lambda item: item[1], reverse=True)[
+            0:k_max
+        ]
+        for k in k_values:
+            hole_docs = [
+                row[0] for row in top_hits[0:k] if row[0] not in annotated_corpus
+            ]
+            Hole[f"Hole@{k}"].append(len(hole_docs) / k)
+    if output_type == "mean":
+        for k in k_values:
+            Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"] / len(qrels), 5)
+    elif output_type == "all":
+        pass
+    return Hole
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def top_k_accuracy(
+    qrels: dict[str, dict[str, int]],
+    results: dict[str, dict[str, float]],
+    k_values: List[int],
+    output_type: str = "mean",
+) -> Tuple[Dict[str, float]]:
+    top_k_acc = {}
+    for k in k_values:
+        top_k_acc[f"Accuracy@{k}"] = []
+    k_max, top_hits = max(k_values), {}
+    for query_id, doc_scores in results.items():
+        top_hits[query_id] = [
+            item[0]
+            for item in sorted(
+                doc_scores.items(), key=lambda item: item[1], reverse=True
+            )[0:k_max]
+        ]
+    for query_id in top_hits:
+        query_relevant_docs = set(
+            [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
+        )
+        for k in k_values:
+            for relevant_doc_id in query_relevant_docs:
+                if relevant_doc_id in top_hits[query_id][0:k]:
+                    top_k_acc[f"Accuracy@{k}"].append(1.0)
+                    break
+    if output_type == "mean":
+        for k in k_values:
+            top_k_acc[f"Accuracy@{k}"] = round(
+                top_k_acc[f"Accuracy@{k}"] / len(qrels), 5
+            )
+    elif output_type == "all":
+        pass
+    return top_k_acc
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def confidence_scores(sim_scores: List[float]) -> Dict[str, float]:
+    """Computes confidence scores for a single instance = (query, positives, negatives)
+    Args:
+        sim_scores: Query-documents similarity scores with length `num_pos+num_neg`
+    Returns:
+        conf_scores:
+            - `max`: Maximum similarity score
+            - `std`: Standard deviation of similarity scores
+            - `diff1`: Difference between highest and second highest similarity scores
+    """
+    sim_scores_sorted = sorted(sim_scores)[::-1]
+    cs_max = sim_scores_sorted[0]
+    cs_std = np.std(sim_scores)
+    if len(sim_scores) > 1:
+        cs_diff1 = sim_scores_sorted[0] - sim_scores_sorted[1]
+    elif len(sim_scores) == 1:
+        cs_diff1 = 0.0
+    conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1}
+    return conf_scores
+# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
+def nAUC(
+    conf_scores: np.ndarray,
+    metrics: np.ndarray,
+    abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
+) -> float:
+    """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997
+    1/ Computes the raw abstention curve, i.e., the average evaluation metric at different abstention rates determined by the confidence scores
+    2/ Computes the oracle abstention curve, i.e., the best theoretical abstention curve (e.g.: at a 10% abstention rate, the oracle abstains on the bottom-10% instances with regard to the evaluation metric)
+    3/ Computes the flat abstention curve, i.e., the one remains flat for all abstention rates (ineffective abstention)
+    4/ Computes the area under the three curves
+    5/ Finally scales the raw AUC between the oracle and the flat AUCs to get normalized AUC
+    Args:
+        conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
+        metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
+        abstention_rates: Target rates for the computation of the abstention curve
+    Returns:
+        abst_nauc: Normalized area under the abstention curve (upper-bounded by 1)
+    """
+    def abstention_curve(
+        conf_scores: np.ndarray,
+        metrics: np.ndarray,
+        abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
+    ) -> np.ndarray:
+        """Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores
+        Args:
+            conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
+            metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
+            abstention_rates: Target rates for the computation of the abstention curve
+        Returns:
+            abst_curve: Abstention curve of length `len(abstention_rates)`
+        """
+        conf_scores_argsort = np.argsort(conf_scores)
+        abst_curve = np.zeros(len(abstention_rates))
+        for i, rate in enumerate(abstention_rates):
+            num_instances_abst = min(
+                round(rate * len(conf_scores_argsort)), len(conf_scores) - 1
+            )
+            abst_curve[i] = metrics[conf_scores_argsort[num_instances_abst:]].mean()
+        return abst_curve
+    abst_curve = abstention_curve(conf_scores, metrics, abstention_rates)
+    or_curve = abstention_curve(metrics, metrics, abstention_rates)
+    abst_auc = auc(abstention_rates, abst_curve)
+    or_auc = auc(abstention_rates, or_curve)
+    flat_auc = or_curve[0] * (abstention_rates[-1] - abstention_rates[0])
+    if or_auc == flat_auc:
+        abst_nauc = np.nan
+    else:
+        abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc)
+    return abst_nauc

dgeb/evaluators.py ADDED Viewed

	@@ -0,0 +1,839 @@

+"""
+Evaluator objects for different evaluation types.
+"""
+import logging
+import random
+from abc import ABC, abstractmethod
+import heapq
+from collections import defaultdict
+import pytrec_eval
+import numpy as np
+import sklearn.cluster
+import torch
+from scipy.stats import pearsonr
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import (
+    accuracy_score,
+    average_precision_score,
+    classification_report,
+    f1_score,
+    precision_score,
+    recall_score,
+    label_ranking_average_precision_score,
+)
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.pairwise import (
+    paired_cosine_distances,
+    paired_euclidean_distances,
+    paired_manhattan_distances,
+)
+from sklearn.multioutput import MultiOutputRegressor
+from sklearn.preprocessing import MultiLabelBinarizer
+from typing import Dict, List, Tuple
+from .eval_utils import (
+    cos_sim,
+    dot_score,
+    mrr,
+    recall_cap,
+    hole,
+    confidence_scores,
+    nAUC,
+    top_k_accuracy,
+)
+class Evaluator(ABC):
+    """Base class for all evaluators
+    Extend this class and implement __call__ for custom evaluators.
+    """
+    def __init__(self, seed=42, **kwargs):
+        self.seed = seed
+        random.seed(self.seed)
+        np.random.seed(self.seed)
+        torch.manual_seed(self.seed)
+        torch.cuda.manual_seed_all(self.seed)
+    @abstractmethod
+    def __call__(self, model):
+        """This is called during training to evaluate the model.
+        It returns scores.
+        Parameters
+        ----------
+        model:
+            the model to evaluate
+        """
+        pass
+logger = logging.getLogger(__name__)
+class logRegClassificationEvaluator(Evaluator):
+    def __init__(
+        self,
+        embeds_train,
+        y_train,
+        embeds_test,
+        y_test,
+        max_iter=1000,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.embeds_train = embeds_train
+        self.y_train = y_train
+        self.embeds_test = embeds_test
+        self.y_test = y_test
+        self.max_iter = max_iter
+    def __call__(self):
+        scores = {}
+        clf = LogisticRegression(
+            random_state=self.seed,
+            n_jobs=-1,
+            max_iter=self.max_iter,
+            verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0,
+        )
+        logger.info(f"Encoding {len(self.embeds_train)} training embeds...")
+        X_train = np.asarray(self.embeds_train)
+        logger.info(f"Encoding {len(self.embeds_test)} test embeds...")
+        X_test = np.asarray(self.embeds_test)
+        logger.info("Fitting logistic regression classifier...")
+        clf.fit(X_train, self.y_train)
+        logger.info("Evaluating...")
+        y_pred = clf.predict(X_test)
+        accuracy = accuracy_score(self.y_test, y_pred)
+        f1 = f1_score(self.y_test, y_pred, average="macro")
+        scores["accuracy"] = accuracy
+        scores["f1"] = f1
+        # if binary classification
+        if len(np.unique(self.y_train)) == 2:
+            ap = average_precision_score(self.y_test, y_pred)
+            scores["ap"] = ap
+        return scores
+class ClusteringEvaluator(Evaluator):
+    def __init__(
+        self,
+        embeds,
+        labels,
+        clustering_batch_size=500,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.embeds = embeds
+        self.labels = labels
+        self.clustering_batch_size = clustering_batch_size
+    def __call__(self):
+        logger.info(f"Encoding {len(self.embeds)} embeds...")
+        corpus_embeddings = np.asarray(self.embeds)
+        logger.info("Fitting Mini-Batch K-Means model...")
+        clustering_model = sklearn.cluster.MiniBatchKMeans(
+            n_clusters=len(set(self.labels)),
+            batch_size=self.clustering_batch_size,
+            n_init="auto",
+        )
+        clustering_model.fit(corpus_embeddings)
+        cluster_assignment = clustering_model.labels_
+        logger.info("Evaluating...")
+        v_measure = v_measure_score(self.labels, cluster_assignment)
+        return {"v_measure": v_measure}
+class PairClassificationEvaluator(Evaluator):
+    """Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
+    dissimilar embeds.
+    The metrics are the cosine similarity as well as euclidean and Manhattan distance
+    The returned score is the accuracy with a specified metric.
+    The results are written in a CSV. If a CSV already exists, then values are appended.
+    The labels need to be 0 for dissimilar pairs and 1 for similar pairs.
+    :param embeds1: The first column of embeds
+    :param embeds2: The second column of embeds
+    :param labels: labels[i] is the label for the pair (embeds1[i], embeds2[i]). Must be 0 or 1
+    :param name: Name for the output
+    :param write_csv: Write results to a CSV file
+    """
+    def __init__(self, embeds1, embeds2, labels, **kwargs):
+        super().__init__(**kwargs)
+        self.embeds1 = embeds1
+        self.embeds2 = embeds2
+        self.labels = labels
+        assert len(self.embeds1) == len(self.embeds2)
+        assert len(self.embeds1) == len(self.labels)
+        for label in labels:
+            assert label == 0 or label == 1
+    def __call__(self):
+        scores = self.compute_metrics()
+        # Compute the max of Average Precision (AP) over all distance metrics.
+        top_ap_score = max(score for k, score in scores.items() if k.endswith("_ap"))
+        scores["top_ap"] = top_ap_score
+        return scores
+    def compute_metrics(self):
+        embeddings1 = np.array(self.embeds1)
+        embeddings2 = np.array(self.embeds2)
+        logger.info("Computing similarity distances...")
+        cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2)
+        manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
+        euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
+        embeddings1_np = np.asarray(embeddings1)
+        embeddings2_np = np.asarray(embeddings2)
+        dot_scores = [
+            np.dot(embeddings1_np[i], embeddings2_np[i])
+            for i in range(len(embeddings1_np))
+        ]
+        logger.info("Computing metrics...")
+        labels = np.asarray(self.labels)
+        output_scores = {}
+        for short_name, name, scores, reverse in [
+            ["cos_sim", "Cosine-Similarity", cosine_scores, True],
+            ["manhattan", "Manhattan-Distance", manhattan_distances, False],
+            ["euclidean", "Euclidean-Distance", euclidean_distances, False],
+            ["dot", "Dot-Product", dot_scores, True],
+        ]:
+            metrics = self._compute_metrics(scores, labels, reverse)
+            metrics = {short_name + "_" + k: v for k, v in metrics.items()}
+            output_scores.update(metrics)
+        return output_scores
+    @staticmethod
+    def _compute_metrics(scores, labels, high_score_more_similar):
+        """Compute the metrics for the given scores and labels.
+        Args:
+            scores (`np.ndarray` of shape (n_pairs, )): The similarity/dissimilarity scores for the pairs.
+            labels (`np.ndarray` of shape (n_pairs, )): The labels for the pairs.
+            high_score_more_similar (`bool`): If true, then the higher the score, the more similar the pairs are.
+        Returns:
+            `dict`: The metrics for the given scores and labels.
+        """
+        acc, acc_threshold = PairClassificationEvaluator.find_best_acc_and_threshold(
+            scores, labels, high_score_more_similar
+        )
+        f1, precision, recall, f1_threshold = (
+            PairClassificationEvaluator.find_best_f1_and_threshold(
+                scores, labels, high_score_more_similar
+            )
+        )
+        ap = PairClassificationEvaluator.ap_score(
+            scores, labels, high_score_more_similar
+        )
+        return {
+            "accuracy": acc,
+            "accuracy_threshold": acc_threshold,
+            "f1": f1,
+            "f1_threshold": f1_threshold,
+            "precision": precision,
+            "recall": recall,
+            "ap": ap,
+        }
+    @staticmethod
+    def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool):
+        assert len(scores) == len(labels)
+        rows = list(zip(scores, labels))
+        rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
+        max_acc = 0
+        best_threshold = -1
+        positive_so_far = 0
+        remaining_negatives = sum(np.array(labels) == 0)
+        for i in range(len(rows) - 1):
+            score, label = rows[i]
+            if label == 1:
+                positive_so_far += 1
+            else:
+                remaining_negatives -= 1
+            acc = (positive_so_far + remaining_negatives) / len(labels)
+            if acc > max_acc:
+                max_acc = acc
+                best_threshold = (rows[i][0] + rows[i + 1][0]) / 2
+        return max_acc, best_threshold
+    @staticmethod
+    def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool):
+        assert len(scores) == len(labels)
+        scores = np.asarray(scores)
+        labels = np.asarray(labels)
+        rows = list(zip(scores, labels))
+        rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
+        best_f1 = best_precision = best_recall = 0
+        threshold = 0
+        nextract = 0
+        ncorrect = 0
+        total_num_duplicates = sum(labels)
+        for i in range(len(rows) - 1):
+            score, label = rows[i]
+            nextract += 1
+            if label == 1:
+                ncorrect += 1
+            if ncorrect > 0:
+                precision = ncorrect / nextract
+                recall = ncorrect / total_num_duplicates
+                f1 = 2 * precision * recall / (precision + recall)
+                if f1 > best_f1:
+                    best_f1 = f1
+                    best_precision = precision
+                    best_recall = recall
+                    threshold = (rows[i][0] + rows[i + 1][0]) / 2
+        return best_f1, best_precision, best_recall, threshold
+    @staticmethod
+    def ap_score(scores, labels, high_score_more_similar: bool):
+        return average_precision_score(
+            labels, scores * (1 if high_score_more_similar else -1)
+        )
+class MultiClassMultiOutputLogRegClassificationEvaluator(Evaluator):
+    def __init__(
+        self,
+        embeds_train,
+        y_train,
+        embeds_test,
+        y_test,
+        max_iter=1000,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.embeds_train = embeds_train
+        self.y_train = y_train
+        self.embeds_test = embeds_test
+        self.y_test = y_test
+        self.max_iter = max_iter
+    def __call__(self):
+        scores = {}
+        mlb = MultiLabelBinarizer()
+        # all classes in y_train and y_test
+        class_labels = list(self.y_train) + list(self.y_test)
+        labels = [class_label.split(", ") for class_label in class_labels]
+        mlb.fit(labels)
+        train_labels = [class_label.split(", ") for class_label in self.y_train]
+        test_labels = [class_label.split(", ") for class_label in self.y_test]
+        y_train = mlb.transform(train_labels)
+        y_test = mlb.transform(test_labels)
+        clf = MultiOutputRegressor(
+            LogisticRegression(
+                random_state=self.seed, solver="lbfgs", max_iter=self.max_iter
+            )
+        ).fit(self.embeds_train, y_train)
+        y_pred = clf.predict(self.embeds_test)
+        results_dict = classification_report(y_test, y_pred, output_dict=True)
+        assert isinstance(
+            results_dict, dict
+        ), "Should always be true since `output_dict=True` is passed to sklearn.metric.classification_report"
+        scores["precision"] = results_dict["macro avg"]["precision"]
+        scores["recall"] = results_dict["macro avg"]["recall"]
+        scores["f1"] = results_dict["macro avg"]["f1-score"]
+        scores["accuracy"] = accuracy_score(y_test, y_pred)
+        return scores
+class MultiClassMultiOutputKNNClassificationEvaluator(Evaluator):
+    def __init__(
+        self,
+        embeds_train,
+        y_train,
+        embeds_test,
+        y_test,
+        n_neighbors=5,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.embeds_train = embeds_train
+        self.y_train = y_train
+        self.embeds_test = embeds_test
+        self.y_test = y_test
+        self.n_neighbors = n_neighbors
+    def __call__(self):
+        scores = {}
+        mlb = MultiLabelBinarizer()
+        class_labels = list(self.y_train) + list(self.y_test)
+        labels = [class_label.split(", ") for class_label in class_labels]
+        mlb.fit(labels)
+        train_labels = [class_label.split(", ") for class_label in self.y_train]
+        test_labels = [class_label.split(", ") for class_label in self.y_test]
+        y_train = mlb.transform(train_labels)
+        y_test = mlb.transform(test_labels)
+        clf = sklearn.neighbors.KNeighborsClassifier(
+            n_neighbors=self.n_neighbors, metric="cosine"
+        )
+        logger.info("Fitting KNN classifier...")
+        clf.fit(self.embeds_train, y_train)
+        logger.info("Evaluating...")
+        y_pred = clf.predict(self.embeds_test)
+        accuracy = accuracy_score(y_test, y_pred)
+        f1 = f1_score(y_test, y_pred, average="macro")
+        precision = precision_score(y_test, y_pred, average="macro")
+        recall = recall_score(y_test, y_pred, average="macro")
+        lrap = label_ranking_average_precision_score(y_test, y_pred)
+        scores["f1"] = f1
+        scores["accuracy"] = accuracy
+        scores["precision"] = precision
+        scores["recall"] = recall
+        scores["lrap"] = lrap
+        return scores
+class BiGeneMiningEvaluator(Evaluator):
+    """
+    BiGene Mining Evaluator, analogous to Bitext Mining Evaluator https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/BitextMiningEvaluator.py.
+    If top_k > 1, then recall@k is also computed.
+    """
+    def __init__(self, embeds1, embeds2, top_k=1, **kwargs):
+        super().__init__(**kwargs)
+        self.n = len(embeds1)
+        self.embeds1 = np.array(embeds1)
+        self.embeds2 = np.array(embeds2)
+        self.gold = list(zip(range(self.n), range(self.n)))
+        self.top_k = top_k
+    def __call__(self):
+        scores = self.compute_metrics()
+        return scores
+    def compute_metrics(self):
+        logger.info(f"Finding nearest neighbors... with top_k={self.top_k}")
+        nearest_neighbors = self._similarity_search(
+            self.embeds1, self.embeds2, top_k=self.top_k
+        )
+        # Compute errors
+        logger.info("Computing metrics...")
+        labels = []
+        predictions = []
+        # Get predictions and labels for top_k=1.
+        for i, x in enumerate(nearest_neighbors):
+            j = x[0]["corpus_id"]
+            predictions.append(j)
+            labels.append(self.gold[i][1])
+        scores = {
+            "precision": precision_score(
+                labels, predictions, zero_division=0, average="weighted"
+            ),
+            "recall": recall_score(
+                labels, predictions, zero_division=0, average="weighted"
+            ),
+            "f1": f1_score(labels, predictions, zero_division=0, average="weighted"),
+            "accuracy": accuracy_score(labels, predictions),
+        }
+        if self.top_k > 1:
+            # Compute recall@k.
+            top_k_preds = []
+            for i, x in enumerate(nearest_neighbors):
+                top_k_preds.append([pred["corpus_id"] for pred in x])
+            top_k_recall = [
+                self.gold[i][1] in top_k_pred
+                for i, top_k_pred in enumerate(top_k_preds)
+            ]
+            scores[f"recall_at_{self.top_k}"] = sum(top_k_recall) / len(top_k_recall)
+        return scores
+    def _similarity_search(
+        self,
+        query_embeddings,
+        corpus_embeddings,
+        query_chunk_size=100,
+        corpus_chunk_size=500000,
+        top_k=1,
+        score_function=cos_sim,
+    ):
+        """This function performs a cosine similarity search between a list of query embeddings  and a list of corpus embeddings.
+        It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.
+        :param query_embeddings: A 2 dimensional tensor with the query embeddings.
+        :param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings.
+        :param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory.
+        :param corpus_chunk_size: Scans the corpus 50k entries at a time. Increasing that value increases the speed, but requires more memory.
+        :param top_k: Retrieve top k matching entries.
+        :param score_function: Function for computing scores. By default, cosine similarity.
+        :return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
+        """
+        query_embeddings = torch.from_numpy(query_embeddings)
+        corpus_embeddings = torch.from_numpy(corpus_embeddings)
+        if len(query_embeddings.shape) == 1:
+            query_embeddings = query_embeddings.unsqueeze(0)
+        if len(corpus_embeddings.shape) == 1:
+            corpus_embeddings = corpus_embeddings.unsqueeze(0)
+        # Check that corpus and queries are on the same device
+        if corpus_embeddings.device != query_embeddings.device:
+            query_embeddings = query_embeddings.to(corpus_embeddings.device)
+        queries_result_list = [[] for _ in range(len(query_embeddings))]
+        for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
+            # Iterate over chunks of the corpus
+            for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
+                # Compute cosine similarities
+                cos_scores = score_function(
+                    query_embeddings[
+                        query_start_idx : query_start_idx + query_chunk_size
+                    ],
+                    corpus_embeddings[
+                        corpus_start_idx : corpus_start_idx + corpus_chunk_size
+                    ],
+                )
+                # Get top-k scores
+                cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
+                    cos_scores,
+                    min(top_k, len(cos_scores[0])),
+                    dim=1,
+                    largest=True,
+                    sorted=False,
+                )
+                cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
+                cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
+                for query_itr in range(len(cos_scores)):
+                    for sub_corpus_id, score in zip(
+                        cos_scores_top_k_idx[query_itr],
+                        cos_scores_top_k_values[query_itr],
+                    ):
+                        corpus_id = corpus_start_idx + sub_corpus_id
+                        query_id = query_start_idx + query_itr
+                        queries_result_list[query_id].append(
+                            {"corpus_id": corpus_id, "score": score}
+                        )
+        # Sort and strip to top_k results
+        for idx in range(len(queries_result_list)):
+            queries_result_list[idx] = sorted(
+                queries_result_list[idx], key=lambda x: x["score"], reverse=True
+            )
+            queries_result_list[idx] = queries_result_list[idx][0:top_k]
+        return queries_result_list
+class EDSEvaluator(Evaluator):
+    """
+    Evolutionary Distance Similarity Evaluator, analogous to Semantic Textual Similarity Evaluator.
+    Adapted from https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/STSEvaluator.py
+    """
+    def __init__(self, embeds1, embeds2, gold_scores, **kwargs):
+        super().__init__(**kwargs)
+        self.embeds1 = embeds1
+        self.embeds2 = embeds2
+        self.gold_scores = gold_scores
+    def __call__(self):
+        embeddings1 = np.array(self.embeds1)
+        embeddings2 = np.array(self.embeds2)
+        logger.info("Evaluating...")
+        cosine_scores = paired_cosine_distances(embeddings1, embeddings2)
+        manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
+        euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
+        cosine_pearson, _ = pearsonr(self.gold_scores, cosine_scores)
+        manhattan_pearson, _ = pearsonr(self.gold_scores, manhattan_distances)
+        euclidean_pearson, _ = pearsonr(self.gold_scores, euclidean_distances)
+        top_corr = max(
+            cosine_pearson,
+            manhattan_pearson,
+            euclidean_pearson,
+        )
+        return {
+            "cos_sim": cosine_pearson,
+            "manhattan": manhattan_pearson,
+            "euclidean": euclidean_pearson,
+            "top_corr": top_corr,
+        }
+class RetrievalEvaluator(Evaluator):
+    """Adapted from
+    https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/RetrievalEvaluator.py
+    """
+    def __init__(
+        self,
+        corpus_embeds,
+        query_embeds,
+        corpus_ids,
+        query_ids,
+        qrels: Dict[str, Dict[str, int]],
+        k_values: List[int] = [5, 10, 50],
+        score_function: str = "cos_sim",
+        corpus_chunk_size: int = 50000,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.corpus_embeds = corpus_embeds
+        self.query_embeds = query_embeds
+        self.corpus_ids = corpus_ids
+        self.query_ids = query_ids
+        self.qrels = qrels
+        self.k_values = k_values
+        self.top_k = max(k_values) if "top_k" not in kwargs else kwargs["top_k"]
+        self.score_function = score_function
+        self.score_functions = {
+            "cos_sim": cos_sim,
+            "dot": dot_score,
+        }
+        self.corpus_chunk_size = corpus_chunk_size
+    def __call__(self):
+        results = self.search(
+            self.corpus_embeds,
+            self.query_embeds,
+            self.corpus_ids,
+            self.query_ids,
+            self.top_k,
+            self.score_function,
+        )
+        ndcg, _map, recall, precision, naucs = self.evaluate(
+            self.qrels, results, self.k_values
+        )
+        mrr, naucs_mrr = self.evaluate_custom(self.qrels, results, self.k_values, "mrr")
+        scores = {
+            **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()},
+            **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()},
+            **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()},
+            **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()},
+            **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()},
+            **{
+                k.replace("@", "_at_").replace("_P", "_precision").lower(): v
+                for k, v in naucs.items()
+            },
+            **{
+                k.replace("@", "_at_").replace("_P", "_precision").lower(): v
+                for k, v in naucs_mrr.items()
+            },
+        }
+        return scores
+    def search(
+        self,
+        corpus_embeds,
+        query_embeds,
+        corpus_ids,
+        query_ids,
+        top_k: int,
+        score_function: str,
+        return_sorted: bool = False,
+        **kwargs,
+    ) -> dict[str, dict[str, float]]:
+        # Create embeddings for all queries using model.encode()
+        # Runs semantic search against the corpus embeddings
+        # Returns a ranked list with the corpus ids
+        if score_function not in self.score_functions:
+            raise ValueError(
+                f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product"
+            )
+        # make query embeds and corpus embeds torch tensors
+        query_embeds = torch.from_numpy(query_embeds)
+        corpus_embeds = torch.from_numpy(corpus_embeds)
+        itr = range(0, len(corpus_embeds), self.corpus_chunk_size)
+        results = defaultdict(dict)
+        # Keep only the top-k docs for each query
+        result_heaps = defaultdict(list)
+        for batch_num, corpus_start_idx in enumerate(itr):
+            logger.info("Searching Batch {}/{}...".format(batch_num + 1, len(itr)))
+            corpus_end_idx = min(
+                corpus_start_idx + self.corpus_chunk_size, len(corpus_ids)
+            )
+            sub_corpus_embeds = corpus_embeds[corpus_start_idx:corpus_end_idx]
+            # Compute similarites using either cosine-similarity or dot product
+            cos_scores = self.score_functions[score_function](
+                query_embeds, sub_corpus_embeds
+            )
+            cos_scores[torch.isnan(cos_scores)] = -1
+            # Get top-k values
+            cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
+                cos_scores,
+                min(
+                    top_k + 1,
+                    len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]),
+                ),
+                dim=1,
+                largest=True,
+                sorted=return_sorted,
+            )
+            cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
+            cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
+            for query_itr in range(len(query_embeds)):
+                query_id = query_ids[query_itr]
+                for sub_corpus_id, score in zip(
+                    cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]
+                ):
+                    corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id]
+                    if corpus_id != query_id:
+                        if len(result_heaps[query_id]) < top_k:
+                            # Push item on the heap
+                            heapq.heappush(result_heaps[query_id], (score, corpus_id))
+                        else:
+                            # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
+                            heapq.heappushpop(
+                                result_heaps[query_id], (score, corpus_id)
+                            )
+        for qid in result_heaps:
+            for score, corpus_id in result_heaps[qid]:
+                results[qid][corpus_id] = score
+        return results
+    @staticmethod
+    def evaluate(
+        qrels: dict[str, dict[str, int]],
+        results: dict[str, dict[str, float]],
+        k_values: List[int],
+        ignore_identical_ids: bool = True,
+    ) -> Tuple[Dict[str, float], dict[str, float], dict[str, float], dict[str, float]]:
+        if ignore_identical_ids:
+            logger.info(
+                "For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this."
+            )
+            popped = []
+            for qid, rels in results.items():
+                for pid in list(rels):
+                    if qid == pid:
+                        results[qid].pop(pid)
+                        popped.append(pid)
+        all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {}
+        for k in k_values:
+            all_ndcgs[f"NDCG@{k}"] = []
+            all_aps[f"MAP@{k}"] = []
+            all_recalls[f"Recall@{k}"] = []
+            all_precisions[f"P@{k}"] = []
+        map_string = "map_cut." + ",".join([str(k) for k in k_values])
+        ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values])
+        recall_string = "recall." + ",".join([str(k) for k in k_values])
+        precision_string = "P." + ",".join([str(k) for k in k_values])
+        evaluator = pytrec_eval.RelevanceEvaluator(
+            qrels, {map_string, ndcg_string, recall_string, precision_string}
+        )
+        scores = evaluator.evaluate(results)
+        for query_id in scores.keys():
+            for k in k_values:
+                all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)])
+                all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)])
+                all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)])
+                all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)])
+        ndcg, _map, recall, precision = (
+            all_ndcgs.copy(),
+            all_aps.copy(),
+            all_recalls.copy(),
+            all_precisions.copy(),
+        )
+        for k in k_values:
+            ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5)
+            _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5)
+            recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5)
+            precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5)
+        naucs = RetrievalEvaluator.evaluate_abstention(
+            results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions}
+        )
+        return ndcg, _map, recall, precision, naucs
+    @staticmethod
+    def evaluate_abstention(
+        results: dict[str, dict[str, float]],
+        metric_scores: dict[str, list[float]],
+    ) -> Dict[str, float]:
+        """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997"""
+        all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())]
+        all_conf_scores = [
+            confidence_scores(sim_scores) for sim_scores in all_sim_scores
+        ]
+        conf_fcts = list(all_conf_scores[0].keys())
+        all_conf_scores = {
+            fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts
+        }
+        metric_scores = {k: np.array(v) for k, v in metric_scores.items()}
+        naucs = {}
+        for metric_name, scores in metric_scores.items():
+            for fct, conf_scores in all_conf_scores.items():
+                naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores)
+        return naucs
+    @staticmethod
+    def evaluate_custom(
+        qrels: dict[str, dict[str, int]],
+        results: dict[str, dict[str, float]],
+        k_values: List[int],
+        metric: str,
+        output_type: str = "all",
+    ) -> Tuple[Dict[str, float]]:
+        if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]:
+            metric_scores = mrr(qrels, results, k_values, output_type)
+        elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]:
+            metric_scores = recall_cap(qrels, results, k_values, output_type)
+        elif metric.lower() in ["hole", "hole@k"]:
+            metric_scores = hole(qrels, results, k_values, output_type)
+        elif metric.lower() in [
+            "acc",
+            "top_k_acc",
+            "accuracy",
+            "accuracy@k",
+            "top_k_accuracy",
+        ]:
+            metric_scores = top_k_accuracy(qrels, results, k_values, output_type)
+        naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores)
+        metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()}
+        return metric_scores_avg, naucs

dgeb/modality.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from enum import Enum
+class Modality(Enum):
+    """Data modality, either DNA or protein sequence."""
+    PROTEIN = "protein"
+    DNA = "dna"

dgeb/models.py ADDED Viewed

	@@ -0,0 +1,481 @@

+import logging
+import re
+from abc import ABC, abstractmethod
+from functools import partial
+from types import SimpleNamespace
+from typing import Dict, List, Literal, Optional
+import numpy as np
+import torch
+import tqdm as tqdm
+from datasets import Dataset
+from torch import Tensor
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoModelForCausalLM,
+    AutoModelForMaskedLM,
+    AutoTokenizer,
+    BatchEncoding,
+    DefaultDataCollator,
+    T5EncoderModel,
+    T5Tokenizer,
+)
+from transformers.modeling_outputs import BaseModelOutput
+from .modality import Modality
+from .eval_utils import ForwardHook, pool
+logger = logging.getLogger(__name__)
+class BioSeqTransformer(ABC):
+    """
+    Abstract class to wrap models which map biological sequences (DNA/Prot) to embeddings.
+    Modelled after SentenceTransformer (https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py)
+    Args:
+        model_name: Name or path to the pretrained model.
+        layers: List of model layers to probe. Can be integers or "mid" or "last".
+        devices: List of device ids for inference. If cuda is not available, will use cpu.
+        num_processes: Number of processes to use for data loading.
+        max_seq_length: Maximum sequence length of the input sequences.
+        l2_norm: If true, embeddings are L2-normalized before they are returned.
+        batch_size: Batch size for encoding.
+        pool_type: Pooling strategy to use. One of "mean", "max", "cls", "last".
+    """
+    def __init__(
+        self,
+        model_name: str,
+        layers: Optional[List[int] | Literal["mid"] | Literal["last"]] = None,
+        devices: List[int] = [0],
+        num_processes: int = 16,
+        max_seq_length: int = 1024,
+        l2_norm: bool = False,
+        batch_size: int = 128,
+        pool_type: str = "mean",
+    ):
+        super().__init__()
+        self.id = self.__class__.__name__
+        self.hf_name = model_name
+        self.encoder = self._load_model(model_name)
+        if not hasattr(self.encoder, "config"):
+            raise ValueError(
+                'The model from `self._load_model()` must have a "config" attribute.'
+            )
+        self.config = self.encoder.config
+        self.tokenizer = self._get_tokenizer(model_name)
+        self.num_param = sum(p.numel() for p in self.encoder.parameters())
+        self.data_collator = DefaultDataCollator()
+        self.gpu_count = len(devices)
+        self.l2_norm = l2_norm
+        self.device = torch.device(
+            f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu"
+        )
+        self.num_processes = num_processes
+        self.max_seq_length = max_seq_length
+        self.batch_size = batch_size
+        self.pool_type = pool_type
+        if self.gpu_count > 1:
+            self.encoder = torch.nn.DataParallel(self.encoder, device_ids=devices)
+        self.encoder.to(self.device)
+        self.encoder.eval()
+        mid_layer = self.num_layers // 2
+        last_layer = self.num_layers - 1
+        mid_layer_label = f"mid ({mid_layer})"
+        last_layer_label = f"last ({self.num_layers - 1})"
+        if layers is None:
+            logger.debug(f"Using default layers: {mid_layer_label}, {last_layer_label}")
+            self.layers = [mid_layer, last_layer]
+            self.layer_labels = [mid_layer_label, last_layer_label]
+        elif layers == "mid":
+            self.layers = [mid_layer]
+            self.layer_labels = [mid_layer_label]
+        elif layers == "last":
+            self.layers = [last_layer]
+            self.layer_labels = [last_layer_label]
+        else:
+            self.layers = layers
+            self.layer_labels = [str(layer) for layer in layers]
+    def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+        """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
+        outputs = self.encoder(**batch_dict, output_hidden_states=True)
+        embeds = [outputs.hidden_states[layer] for layer in self.layers]
+        embeds = [
+            pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+            for layer_embeds in embeds
+        ]
+        # Stack with shape [B, num_layers, D].
+        embeds = torch.stack(embeds, dim=1)
+        return embeds
+    def _load_model(self, model_name):
+        return AutoModel.from_pretrained(model_name, trust_remote_code=True)
+    def _get_tokenizer(self, model_name):
+        return AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    def _tokenize_func(
+        self, tokenizer, examples: Dict[str, List], max_seq_length: int
+    ) -> BatchEncoding:
+        batch_dict = tokenizer(
+            examples["input_seqs"],
+            max_length=max_seq_length,
+            padding=True,
+            truncation=True,
+        )
+        return batch_dict
+    @property
+    def metadata(self) -> Dict:
+        return {
+            "hf_name": self.hf_name,
+            "num_layers": self.num_layers,
+            "num_params": self.num_param,
+            "embed_dim": self.embed_dim,
+        }
+    @property
+    @abstractmethod
+    def num_layers(self) -> int:
+        pass
+    @property
+    @abstractmethod
+    def embed_dim(self) -> int:
+        pass
+    @property
+    @abstractmethod
+    def modality(self) -> Modality:
+        pass
+    @torch.no_grad()
+    def encode(self, sequences, **kwargs) -> np.ndarray:
+        """Returns a list of embeddings for the given sequences.
+        Args:
+            sequences (`List[str]`): List of sequences to encode
+        Returns:
+            `np.ndarray`: Embeddings for the given sequences of shape [num_sequences, num_layers, embedding_dim].
+        """
+        dataset = Dataset.from_dict({"input_seqs": sequences})
+        dataset.set_transform(
+            partial(
+                self._tokenize_func, self.tokenizer, max_seq_length=self.max_seq_length
+            )
+        )
+        data_loader = DataLoader(
+            dataset,
+            batch_size=self.batch_size * self.gpu_count,
+            shuffle=False,
+            drop_last=False,
+            num_workers=self.num_processes,
+            collate_fn=self.data_collator,
+            pin_memory=True,
+        )
+        if max(self.layers) >= self.num_layers:
+            raise ValueError(
+                f"Layer {max(self.layers)} is not available in the model. Choose a layer between 0 and {self.num_layers - 1}"
+            )
+        encoded_embeds = []
+        for batch_dict in tqdm.tqdm(
+            data_loader, desc="encoding", mininterval=10, disable=len(sequences) < 128
+        ):
+            batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()}
+            embeds = self._encode_single_batch(batch_dict)
+            if self.l2_norm:
+                embeds = F.normalize(embeds, p=2, dim=-1)
+            encoded_embeds.append(embeds.cpu().numpy())
+        return np.concatenate(encoded_embeds, axis=0)
+class ESM(BioSeqTransformer):
+    """ESM model from https://huggingface.co/docs/transformers/en/model_doc/esm"""
+    MODEL_NAMES = [
+        "facebook/esm2_t6_8M_UR50D",
+        "facebook/esm2_t12_35M_UR50D",
+        "facebook/esm2_t30_150M_UR50D",
+        "facebook/esm2_t33_650M_UR50D",
+        "facebook/esm2_t36_3B_UR50D",
+        "facebook/esm2_t48_15B_UR50D",
+    ]
+    @property
+    def modality(self) -> Modality:
+        return Modality.PROTEIN
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_hidden_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.hidden_size
+class ESM3(BioSeqTransformer):
+    """ESM3 model from https://github.com/evolutionaryscale/esm"""
+    MODEL_NAMES = ["esm3_sm_open_v1"]
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Register forward hooks to store embeddings per layer.
+        self.hooks = [
+            ForwardHook(self.encoder.transformer.blocks[layer]) for layer in self.layers
+        ]
+    @property
+    def modality(self) -> Modality:
+        return Modality.PROTEIN
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_hidden_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.hidden_size
+    def _load_model(self, model_name):
+        try:
+            from esm.models.esm3 import ESM3 as ModelESM3
+        except ImportError:
+            raise ImportError(
+                "ESM3 is not installed. Please install it with `pip install esm`."
+            )
+        model = ModelESM3.from_pretrained("esm3_sm_open_v1")
+        model.config = SimpleNamespace(
+            num_hidden_layers=len(model.transformer.blocks),
+            hidden_size=model.transformer.blocks[0].ffn[-1].out_features,
+        )
+        return model
+    def _get_tokenizer(self, model_name):
+        try:
+            from esm.tokenization.sequence_tokenizer import EsmSequenceTokenizer
+        except ImportError:
+            raise ImportError(
+                "ESM3 is not installed. Please install it with `pip install esm`."
+            )
+        return EsmSequenceTokenizer()
+    def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+        _ = self.encoder.forward(sequence_tokens=batch_dict["input_ids"])
+        embeds = [hook.output for hook in self.hooks]
+        embeds = [
+            pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+            for layer_embeds in embeds
+        ]
+        # Stack with shape [B, num_layers, D].
+        embeds = torch.stack(embeds, dim=1)
+        embeds = embeds.to(torch.float32)
+        return embeds
+class ProtT5(BioSeqTransformer):
+    """ProtT5 model from https://github.com/agemagician/ProtTrans"""
+    MODEL_NAMES = [
+        "Rostlab/prot_t5_xl_uniref50",
+        "Rostlab/prot_t5_xl_bfd",
+        "Rostlab/prot_t5_xxl_uniref50",
+        "Rostlab/prot_t5_xxl_bfd",
+    ]
+    @property
+    def modality(self) -> Modality:
+        return Modality.PROTEIN
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.d_model
+    def _load_model(self, model_name):
+        return T5EncoderModel.from_pretrained(model_name)
+    def _get_tokenizer(self, model_name):
+        return T5Tokenizer.from_pretrained(model_name, do_lower_case=False)
+    def _tokenize_func(
+        self, tokenizer, examples: Dict[str, List], max_seq_length: int
+    ) -> BatchEncoding:
+        example_sequences = examples["input_seqs"]
+        # Add space between amino acids to make sure they are tokenized correctly.
+        example_sequences = [" ".join(sequence) for sequence in example_sequences]
+        example_sequences = [
+            re.sub(r"[UZOB]", "X", sequence) for sequence in example_sequences
+        ]
+        batch_dict = tokenizer(
+            example_sequences,
+            max_length=max_seq_length,
+            padding=True,
+            truncation=True,
+            add_special_tokens=True,
+        )
+        return batch_dict
+class ProGen(BioSeqTransformer):
+    """ProGen models from https://github.com/salesforce/progen."""
+    MODEL_NAMES = [
+        "hugohrban/progen2-small",
+        "hugohrban/progen2-medium",
+        "hugohrban/progen2-base",
+        "hugohrban/progen2-large",
+        "hugohrban/progen2-xlarge",
+    ]
+    @property
+    def modality(self) -> Modality:
+        return Modality.PROTEIN
+    @property
+    def num_layers(self) -> int:
+        return self.config.n_layer
+    @property
+    def embed_dim(self) -> int:
+        return self.config.embed_dim
+    def _load_model(self, model_name):
+        return AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
+    def _get_tokenizer(self, model_name_or_path):
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name_or_path, trust_remote_code=True
+        )
+        tokenizer.pad_token = "<|pad|>"
+        return tokenizer
+    def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+        """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
+        outputs: BaseModelOutput = self.encoder(
+            input_ids=batch_dict["input_ids"],
+            output_hidden_states=True,
+            use_cache=False,
+        )
+        embeds = [outputs.hidden_states[layer] for layer in self.layers]
+        embeds = [
+            pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+            for layer_embeds in embeds
+        ]
+        # Stack with shape [B, num_layers, D].
+        embeds = torch.stack(embeds, dim=1)
+        return embeds
+class EvoModel(BioSeqTransformer):
+    """https://github.com/evo-design/evo."""
+    MODEL_NAMES = [
+        "togethercomputer/evo-1-8k-base",
+        "togethercomputer/evo-1-131k-base",
+    ]
+    @property
+    def modality(self) -> Modality:
+        return Modality.DNA
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.hidden_size
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Register forward hooks to store embeddings per layer.
+        self.hooks = []
+        for layer in self.layers:
+            # For the last layer, get the output of `backbone.norm`, which directly precedes `backbone.unembed`.
+            # This is equivalent to the approach in https://github.com/evo-design/evo/issues/32.
+            if layer == self.num_layers - 1 or layer == -1:
+                self.hooks.append(ForwardHook(self.encoder.backbone.norm))
+            else:
+                self.hooks.append(ForwardHook(self.encoder.backbone.blocks[layer]))
+    def _load_model(self, model_name):
+        config = AutoConfig.from_pretrained(
+            model_name, trust_remote_code=True, revision="1.1_fix"
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name, config=config, trust_remote_code=True, revision="1.1_fix"
+        )
+        return model
+    def _get_tokenizer(self, model_name):
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name, revision="1.1_fix", trust_remote_code=True
+        )
+        # Evo tokenizer is missing pad_token by default.
+        tokenizer.add_special_tokens({"pad_token": "N"})
+        return tokenizer
+    def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
+        _ = self.encoder(batch_dict["input_ids"], use_cache=False)
+        embeds = [hook.output for hook in self.hooks]
+        # The hook output for Evo middle layers is a tuple (embedding, inference_params=None).
+        embeds = [x[0] if isinstance(x, tuple) else x for x in embeds]
+        embeds = [
+            pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
+            for layer_embeds in embeds
+        ]
+        # Stack with shape [B, num_layers, D].
+        embeds = torch.stack(embeds, dim=1)
+        embeds = embeds.to(torch.float32)
+        return embeds
+class NTModel(BioSeqTransformer):
+    """Nucleotide Transformer https://github.com/instadeepai/nucleotide-transformer"""
+    MODEL_NAMES = [
+        "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
+        "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
+        "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
+        "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
+        "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
+    ]
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.max_seq_length = self.tokenizer.model_max_length
+    @property
+    def modality(self) -> Modality:
+        return Modality.DNA
+    @property
+    def num_layers(self) -> int:
+        return self.config.num_hidden_layers
+    @property
+    def embed_dim(self) -> int:
+        return self.config.hidden_size
+    def _load_model(self, model_name):
+        return AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True)

dgeb/tasks/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# ruff: noqa: F403
+from .tasks import Dataset, Task, TaskMetadata, TaskResult
+from .eds_tasks import *
+from .pair_classification_tasks import *
+from .retrieval_tasks import *
+from .classification_tasks import *
+from .clustering_tasks import *
+from .bigene_mining_tasks import *
+__all__ = [
+    "Dataset",
+    "Task",
+    "TaskMetadata",
+    "TaskResult",
+]

dgeb/tasks/bigene_mining_tasks.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Bigene mining tasks are analogous to bitext matching tasks, but for genes.
+Cosine similarity is used to mine genes of related functions from different organisms.
+"""
+import logging
+from collections import defaultdict
+from dgeb.evaluators import BiGeneMiningEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+logger = logging.getLogger(__name__)
+def run_bigene_mining_tasks(
+    model: BioSeqTransformer, metadata: TaskMetadata, top_k: int = 1
+) -> TaskResult:
+    """Evaluate bigene mining task. Utilizes the BiGeneMiningEvaluator."""
+    if len(metadata.datasets) != 1:
+        raise ValueError("BiGeneMining tasks require 1 dataset.")
+    ds = metadata.datasets[0].load()["train"]
+    layer_results = defaultdict(dict)
+    embeds1 = model.encode(ds["Seq1"])
+    embeds2 = model.encode(ds["Seq2"])
+    for i, layer in enumerate(model.layers):
+        evaluator = BiGeneMiningEvaluator(embeds1[:, i], embeds2[:, i], top_k=top_k)
+        layer_results["layers"][layer] = evaluator()
+        logger.info(
+            f"Layer: {layer}, {metadata.display_name} matching results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class BacArchBiGeneMining(Task):
+    metadata = TaskMetadata(
+        id="bacarch_bigene",
+        display_name="BacArch BiGene",
+        description="Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+        type="bigene_mining",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/bac_arch_bigene",
+                revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_bigene_mining_tasks(model, self.metadata)
+class ModACParalogyBiGeneMining(Task):
+    # ModAC Paralogy matching with top_k=1 is too strict (most models have accuracy < 0.1%)
+    # Instead use recall@50 as the main metric.
+    TOP_K = 50
+    metadata = TaskMetadata(
+        id="modac_paralogy_bigene",
+        display_name="ModAC Paralogy BiGene",
+        description="Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+        type="bigene_mining",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/modac_paralogy_bigene",
+                revision="241ca6397856e3360da04422d54933035b1fab87",
+            )
+        ],
+        primary_metric_id=f"recall_at_{TOP_K}",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_bigene_mining_tasks(model, self.metadata, top_k=self.TOP_K)

dgeb/tasks/classification_tasks.py ADDED Viewed

	@@ -0,0 +1,213 @@

+"""
+Classification tasks take in biological sequence and functional labels.
+Multi-class and/or multi-label classification tasks are supported.
+"""
+import logging
+from collections import defaultdict
+import datasets
+import numpy as np
+from dgeb.eval_utils import merge_split_elem_embeds
+from dgeb.evaluators import (
+    MultiClassMultiOutputKNNClassificationEvaluator,
+    logRegClassificationEvaluator,
+)
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+logger = logging.getLogger(__name__)
+def split_sequences(
+    ds: datasets.DatasetDict, max_seq_length: int
+) -> datasets.DatasetDict:
+    """Split sequences into chunks of max_seq_length using datasets.Dataset.map()."""
+    def _split_sequence(examples, max_seq_length):
+        assert (
+            len(examples["Sequence"]) == 1
+        ), "split map function should use batch size of 1."
+        example = {k: v[0] for k, v in examples.items()}
+        seq = example["Sequence"]
+        # Split by chunks of max_seq_length.
+        seq_split = [
+            seq[i : i + max_seq_length] for i in range(0, len(seq), max_seq_length)
+        ]
+        # Repeat other fields by the number of splits.
+        example = {
+            k: [v] * len(seq_split) for k, v in example.items() if k != "Sequence"
+        }
+        example["Sequence"] = seq_split
+        return example
+    ds = ds.map(
+        _split_sequence,
+        batched=True,
+        batch_size=1,
+        fn_kwargs={"max_seq_length": max_seq_length},
+        keep_in_memory=True,
+        load_from_cache_file=False,
+    )
+    return ds
+def run_classification_task(
+    model: BioSeqTransformer, metadata: TaskMetadata
+) -> TaskResult:
+    """Evaluate on classification tasks using logistic regression classifier."""
+    ds = metadata.datasets[0].load()
+    layer_results = defaultdict(dict)
+    train_embeds = model.encode(ds["train"]["Sequence"])
+    test_embeds = model.encode(ds["test"]["Sequence"])
+    for i, layer in enumerate(model.layers):
+        layer_results["layers"][layer] = logRegClassificationEvaluator(
+            train_embeds[:, i],
+            ds["train"]["Label"],
+            test_embeds[:, i],
+            ds["test"]["Label"],
+        )()
+        logger.info(
+            f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class EnzymeCommissionClassification(Task):
+    metadata = TaskMetadata(
+        id="ec_classification",
+        display_name="EC Classification",
+        description="Evaluate on Enzyme Commission number classification task.",
+        type="classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/ec_classification",
+                revision="ead5570168e6969a5149f6861e8a33d6b5d22498",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_classification_task(model, self.metadata)
+class EnzymeCommissionDNAClassification(Task):
+    metadata = TaskMetadata(
+        id="ec_dna_classification",
+        display_name="EC Classification",
+        description="Evaluate on Enzyme Commission number classification task using DNA sequences.",
+        type="classification",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/ec_classification_dna",
+                revision="cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_classification_task(model, self.metadata)
+class ConvergentEnzymesClassification(Task):
+    metadata = TaskMetadata(
+        id="convergent_enzymes_classification",
+        display_name="Convergent Enzymes Classification",
+        description="Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+        type="classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/convergent_enzymes",
+                revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_classification_task(model, self.metadata)
+def run_mibig_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+    """
+    Evaluate on MIBIG classification tasks. Multiclass, multi-label KNN classification is used for evaluation.
+    """
+    ds = metadata.datasets[0].load()
+    if metadata.modality == Modality.DNA:
+        # MIBiG DNA sequences can be very long. Instead of truncating to max_seq_length,
+        # split into multiple sequences and mean pool the resulting embeddings.
+        ds = split_sequences(ds, model.max_seq_length)
+    layer_results = defaultdict(dict)
+    train_embeds = model.encode(ds["train"]["Sequence"])
+    test_embeds = model.encode(ds["test"]["Sequence"])
+    train_ids = ds["train"]["Entry"]
+    test_ids = ds["test"]["Entry"]
+    train_labels = ds["train"]["class"]
+    test_labels = ds["test"]["class"]
+    train_id_to_label = {id: label for id, label in zip(train_ids, train_labels)}
+    test_id_to_label = {id: label for id, label in zip(test_ids, test_labels)}
+    # Mean pool embeds with the same ID.
+    train_ids, train_embeds = merge_split_elem_embeds(train_ids, train_embeds)
+    test_ids, test_embeds = merge_split_elem_embeds(test_ids, test_embeds)
+    # Gather the labels after merging by unique ID.
+    train_labels = np.array([train_id_to_label[id] for id in train_ids])
+    test_labels = np.array([test_id_to_label[id] for id in test_ids])
+    for i, layer in enumerate(model.layers):
+        evaluator = MultiClassMultiOutputKNNClassificationEvaluator(
+            train_embeds[:, i], train_labels, test_embeds[:, i], test_labels
+        )
+        layer_results["layers"][layer] = evaluator()
+        logger.info(
+            f"Layer: {layer}, MIBiG classification results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class MIBiGProteinClassification(Task):
+    metadata = TaskMetadata(
+        id="MIBIG_protein_classification",
+        display_name="MIBiG Classification",
+        description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+        type="classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/mibig_classification_prot",
+                revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_mibig_task(model, self.metadata)
+class MIBiGDNAClassification(Task):
+    metadata = TaskMetadata(
+        id="MIBIG_dna_classification",
+        display_name="MIBiG Classification",
+        description="Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
+        type="classification",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/mibig_classification_dna",
+                revision="b5ca7a76d469e4e66c46f1b655903972571e6b61",
+            )
+        ],
+        primary_metric_id="f1",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_mibig_task(model, self.metadata)

dgeb/tasks/clustering_tasks.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""
+Biological sequences are clustered and performance is determined by how well clustering matches assigned labels.
+"""
+import logging
+from collections import defaultdict
+from dgeb.evaluators import ClusteringEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+logger = logging.getLogger(__name__)
+def run_clustering_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+    """Evaluate clustering task. Utilizes the ClusteringEvaluator."""
+    if len(metadata.datasets) != 1:
+        raise ValueError("Clustering tasks require 1 dataset.")
+    ds = metadata.datasets[0].load()["train"]
+    embeds = model.encode(ds["Sequence"])
+    layer_results = defaultdict(dict)
+    for i, layer in enumerate(model.layers):
+        labels = ds["Label"]
+        evaluator = ClusteringEvaluator(embeds[:, i], labels)
+        layer_results["layers"][layer] = evaluator()
+        logger.info(
+            f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class RNAclustering(Task):
+    metadata = TaskMetadata(
+        id="ecoli_rna_clustering",
+        display_name="E.coli RNA Clustering",
+        description="Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
+        type="clustering",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/e_coli_rnas",
+                revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6",
+            )
+        ],
+        primary_metric_id="v_measure",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_clustering_task(model, self.metadata)
+class MopBClustering(Task):
+    metadata = TaskMetadata(
+        id="mopb_clustering",
+        display_name="MopB Clustering",
+        description="Evaluate on MopB clustering task.",
+        type="clustering",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/mopb_clustering",
+                revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a",
+            )
+        ],
+        primary_metric_id="v_measure",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_clustering_task(model, self.metadata)

dgeb/tasks/eds_tasks.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""
+Evolutionary Distance Similarity (EDS) tasks compare embedding distances to continuous evolutionary distances.
+The label distances are typically derived from phylogenetic trees.
+"""
+import logging
+from collections import defaultdict
+import numpy as np
+import pandas as pd
+from dgeb.evaluators import EDSEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+logger = logging.getLogger(__name__)
+def run_eds_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+    """Evaluate phylogeny distance correlation task. Utilizes the Evolutionary Distance Similarity (EDS) evaluator."""
+    if len(metadata.datasets) != 2:
+        raise ValueError("Phylogeny tasks require 2 datasets: sequences and distances.")
+    ds = metadata.datasets[0].load()["train"]
+    distance_df = metadata.datasets[1].load()["train"].to_pandas()
+    assert isinstance(
+        distance_df, pd.DataFrame
+    ), f"Expected DataFrame, got {type(distance_df)}"
+    id_index_dict = {k: i for i, k in enumerate(ds["Entry"])}
+    distance_df["embeds1"] = None
+    distance_df["embeds2"] = None
+    test_embeds = model.encode(ds["Sequence"])
+    layer_results = defaultdict(dict)
+    for i, layer in enumerate(model.layers):
+        for row_idx, row in distance_df.iterrows():
+            id1 = row["ID1"]
+            id2 = row["ID2"]
+            embedding1 = test_embeds[id_index_dict[id1], i]
+            embedding2 = test_embeds[id_index_dict[id2], i]
+            distance_df.at[row_idx, "embeds1"] = embedding1
+            distance_df.at[row_idx, "embeds2"] = embedding2
+        embeds1 = np.array(distance_df["embeds1"].tolist())
+        embeds2 = np.array(distance_df["embeds2"].tolist())
+        dists = np.array(distance_df["distance"].tolist())
+        evaluator = EDSEvaluator(embeds1, embeds2, dists)
+        layer_results["layers"][layer] = evaluator()
+        # log results
+        logger.info(
+            f"Layer: {layer}, {metadata.display_name} distance correlation results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class RpobBacPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="rpob_bac_phylogeny",
+        display_name="RpoB Bacterial Phylogeny",
+        description="Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+        type="eds",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/rpob_bac_phylogeny_sequences",
+                revision="b833ef8d8d873ea5387540562873f41d073d3e03",
+            ),
+            Dataset(
+                path="tattabio/rpob_bac_phylogeny_distances",
+                revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class RpobArchPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="rpob_arch_phylogeny",
+        display_name="RpoB Archaeal Phylogeny",
+        description="Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+        type="eds",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/rpob_arch_phylogeny_sequences",
+                revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4",
+            ),
+            Dataset(
+                path="tattabio/rpob_arch_phylogeny_distances",
+                revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class RpobBacDNAPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="rpob_bac_dna_phylogeny",
+        display_name="RpoB Bacterial Phylogeny",
+        description="Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
+        type="eds",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/rpob_bac_dna_phylogeny_sequences",
+                revision="8e137d3fb8886d8739ce08d1918745444c7d30d6",
+            ),
+            Dataset(
+                path="tattabio/rpob_bac_dna_phylogeny_distances",
+                revision="67339e271b2a1602208153d53d70d35ba6fa8876",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class RpobArchDNAPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="rpob_arch_dna_phylogeny",
+        display_name="RpoB Archaeal Phylogeny",
+        description="Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
+        type="eds",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/rpob_arch_dna_phylogeny_sequences",
+                revision="4453552a0e1021fee8697c71a559f4d3f6da2408",
+            ),
+            Dataset(
+                path="tattabio/rpob_arch_dna_phylogeny_distances",
+                revision="51df97684a927ec2203568e80175ef26a62db039",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class FeFePhylogeny(Task):
+    metadata = TaskMetadata(
+        id="fefe_phylogeny",
+        display_name="FeFeHydrogenase Phylogeny",
+        description="Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+        type="eds",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/fefe_phylogeny_sequences",
+                revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26",
+            ),
+            Dataset(
+                path="tattabio/fefe_phylogeny_distances",
+                revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class Bac16SPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="bac_16S_phylogeny",
+        display_name="16S Bacterial Phylogeny",
+        description="Evaluate on 16S Bacterial phylogeny distance correlation task.",
+        type="eds",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/bac_16S_sequences",
+                revision="efde1456b86748909cbcfecb07d783756d570aa3",
+            ),
+            Dataset(
+                path="tattabio/bac_16S_distances",
+                revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class Arch16SPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="arch_16S_phylogeny",
+        display_name="16S Archaeal Phylogeny",
+        description="Evaluate on 16S Archaeal phylogeny distance correlation task.",
+        type="eds",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/arch_16S_sequences",
+                revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0",
+            ),
+            Dataset(
+                path="tattabio/arch_16S_distances",
+                revision="b0356b632a954be70cefd57e3a02e7e1ccd34408",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)
+class Euk18SPhylogeny(Task):
+    metadata = TaskMetadata(
+        id="euk_18S_phylogeny",
+        display_name="18S Eukaryotic Phylogeny",
+        description="Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
+        type="eds",
+        modality=Modality.DNA,
+        datasets=[
+            Dataset(
+                path="tattabio/euk_18S_sequences",
+                revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196",
+            ),
+            Dataset(
+                path="tattabio/euk_18S_distances",
+                revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da",
+            ),
+        ],
+        primary_metric_id="top_corr",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_eds_task(model, self.metadata)

dgeb/tasks/pair_classification_tasks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+Pair classification tasks evaluating distances between functionally relevant gene pairs.
+For instance, distance thresholds distinguish between co-transcribed and non-co-transcribed gene pairs.
+"""
+import logging
+from collections import defaultdict
+from dgeb.evaluators import PairClassificationEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+from ..eval_utils import paired_dataset
+logger = logging.getLogger(__name__)
+def run_pair_classification_task(
+    model: BioSeqTransformer, metadata: TaskMetadata
+) -> TaskResult:
+    """Evaluate pair classification task. Utilizes the PairClassificationEvaluator."""
+    if len(metadata.datasets) != 1:
+        raise ValueError("Pair classification tasks require 1 dataset.")
+    ds = metadata.datasets[0].load()["train"]
+    embeds = model.encode(ds["Sequence"])
+    layer_results = defaultdict(dict)
+    for i, layer in enumerate(model.layers):
+        labels = ds["Label"]
+        embeds1, embeds2, labels = paired_dataset(labels, embeds[:, i])
+        evaluator = PairClassificationEvaluator(embeds1, embeds2, labels)
+        layer_results["layers"][layer] = evaluator()
+        logger.info(
+            f"Layer: {layer}, {metadata.display_name} classification results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class EcoliOperon(Task):
+    metadata = TaskMetadata(
+        id="ecoli_operonic_pair",
+        display_name="E.coli Operonic Pair",
+        description="Evaluate on E.coli K-12 operonic pair classification task.",
+        type="pair_classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/ecoli_operonic_pair",
+                revision="a62c01143a842696fc8200b91c1acb825e8cb891",
+            )
+        ],
+        primary_metric_id="top_ap",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_pair_classification_task(model, self.metadata)
+class CyanoOperonPair(Task):
+    metadata = TaskMetadata(
+        id="cyano_operonic_pair",
+        display_name="Cyano Operonic Pair",
+        description="Evaluate on Cyano operonic pair classification task.",
+        type="pair_classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/cyano_operonic_pair",
+                revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec",
+            )
+        ],
+        primary_metric_id="top_ap",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_pair_classification_task(model, self.metadata)
+class VibrioOperonPair(Task):
+    metadata = TaskMetadata(
+        id="vibrio_operonic_pair",
+        display_name="Vibrio Operonic Pair",
+        description="Evaluate on Vibrio operonic pair classification task.",
+        type="pair_classification",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/vibrio_operonic_pair",
+                revision="24781b12b45bf81a079a6164ef0d2124948c1878",
+            )
+        ],
+        primary_metric_id="top_ap",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_pair_classification_task(model, self.metadata)

dgeb/tasks/retrieval_tasks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""
+Retrieval tasks find functionally relevant genes in a corpus of genes based on a query gene.
+Typically corpus is derived from a different phylogenetic group than the query genes.
+"""
+import logging
+from collections import defaultdict
+from dgeb.evaluators import RetrievalEvaluator
+from dgeb.modality import Modality
+from dgeb.models import BioSeqTransformer
+from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
+logger = logging.getLogger(__name__)
+def run_retrieval_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
+    """Evaluate retrieval task. Utilizes the Retrieval evaluator."""
+    if len(metadata.datasets) != 2:
+        raise ValueError("Retrieval tasks require 3 datasets: corpus, query and qrels.")
+    corpus_ds = metadata.datasets[0].load()["train"]
+    query_ds = metadata.datasets[0].load()["test"]
+    qrels = metadata.datasets[1].load()
+    corpus_embeds = model.encode(corpus_ds["Sequence"])
+    query_embeds = model.encode(query_ds["Sequence"])
+    qrels_dict = defaultdict(dict)
+    def qrels_dict_init(row):
+        qrels_dict[str(row["query_id"])][str(row["corpus_id"])] = int(row["fuzz_ratio"])
+    # Populate `qrels_dict` from the dataset.
+    # See https://github.com/cvangysel/pytrec_eval for qrels format.
+    qrels.map(qrels_dict_init)
+    qrels = qrels_dict
+    layer_results = defaultdict(dict)
+    for i, layer in enumerate(model.layers):
+        evaluator = RetrievalEvaluator(
+            corpus_embeds[:, i],
+            query_embeds[:, i],
+            corpus_ds["Entry"],
+            query_ds["Entry"],
+            qrels,
+        )
+        layer_results["layers"][layer] = evaluator()
+        logger.info(
+            f"Layer: {layer}, Retrieval results: {layer_results['layers'][layer]}"
+        )
+    return TaskResult.from_dict(metadata, layer_results, model.metadata)
+class ArchRetrieval(Task):
+    metadata = TaskMetadata(
+        id="arch_retrieval",
+        display_name="Arch Retrieval",
+        description="Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+        type="retrieval",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/arch_retrieval",
+                revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b",
+            ),
+            Dataset(
+                path="tattabio/arch_retrieval_qrels",
+                revision="3f142f2f9a0995d56c6e77188c7251761450afcf",
+            ),
+        ],
+        primary_metric_id="map_at_5",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_retrieval_task(model, self.metadata)
+class EukRetrieval(Task):
+    metadata = TaskMetadata(
+        id="euk_retrieval",
+        display_name="Euk Retrieval",
+        description="Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+        type="retrieval",
+        modality=Modality.PROTEIN,
+        datasets=[
+            Dataset(
+                path="tattabio/euk_retrieval",
+                revision="c93dc56665cedd19fbeaea9ace146f2474c895f0",
+            ),
+            Dataset(
+                path="tattabio/euk_retrieval_qrels",
+                revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b",
+            ),
+        ],
+        primary_metric_id="map_at_5",
+    )
+    def run(self, model: BioSeqTransformer) -> TaskResult:
+        return run_retrieval_task(model, self.metadata)

dgeb/tasks/tasks.py ADDED Viewed

	@@ -0,0 +1,135 @@

+"""Task abstract class for evaluation and results."""
+import logging
+from typing import List, Literal, Optional, Any
+from importlib.metadata import version
+from enum import Enum
+import datasets
+from pydantic import BaseModel, model_validator
+from abc import ABC, abstractmethod
+# HACK: if Modality is not defined, then import it from modality.py
+try:
+    from ..modality import Modality
+except Exception:
+    # if not, super hack to get the leaderboard working.
+    # SHOULD MATCH the code exactly in modality.py
+    # can we read the file and run that code?
+    from enum import Enum
+    class Modality(Enum):
+        """Data modality, either DNA or protein sequence."""
+        PROTEIN = "protein"
+        DNA = "dna"
+logging.basicConfig(level=logging.INFO)
+TaskType = Literal[
+    "classification",
+    "pair_classification",
+    "clustering",
+    "eds",
+    "bigene_mining",
+    "retrieval",
+]
+class TaskMetric(BaseModel):
+    id: str
+    display_name: str
+    description: Optional[str] = None
+    value: float = 0.0
+class LayerResult(BaseModel):
+    layer_number: int
+    layer_display_name: str
+    metrics: List[TaskMetric]
+class GEBModel(BaseModel):
+    hf_name: str
+    num_layers: int
+    num_params: int
+    embed_dim: int
+class Dataset(BaseModel):
+    path: str
+    revision: str
+    def load(self) -> datasets.DatasetDict:
+        ds = datasets.load_dataset(self.path, revision=self.revision)
+        if not isinstance(ds, datasets.DatasetDict):
+            raise ValueError(
+                f"Dataset {self.path} is not a datasets.DatasetDict object."
+            )
+        return ds
+class TaskMetadata(BaseModel):
+    id: str
+    display_name: str
+    description: str
+    modality: Modality
+    type: TaskType
+    # List of datasets used by the task.
+    # Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`.
+    datasets: List[Dataset]
+    primary_metric_id: str
+# tasks.py
+class TaskResult(BaseModel):
+    dgeb_version: str
+    task: "TaskMetadata"
+    # TODO: Convert model to ModelMetadata
+    model: GEBModel
+    results: List[LayerResult]
+    @model_validator(mode="after")
+    def check_valid_primary_metric(self):
+        for result in self.results:
+            if all(
+                metric.id != self.task.primary_metric_id for metric in result.metrics
+            ):
+                raise ValueError(
+                    f"Primary metric {self.task.primary_metric_id} not found in results.metrics"
+                )
+        return self
+    @staticmethod
+    def from_dict(
+        task_metadata: "TaskMetadata",
+        layer_results: LayerResult,
+        model_metadata: GEBModel,
+    ):
+        return TaskResult(
+            dgeb_version=version("dgeb"),
+            task=task_metadata,
+            model=model_metadata,
+            results=list(
+                LayerResult(
+                    layer_number=int(layer),
+                    layer_display_name=str(layer),
+                    metrics=[
+                        TaskMetric(id=metric, display_name=metric, value=value)
+                        for metric, value in metrics.items()
+                    ],
+                )
+                for layer, metrics in layer_results["layers"].items()
+            ),
+        )
+# move to model.py?
+class Task(ABC):
+    metadata: TaskMetadata
+    # using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard
+    @abstractmethod
+    def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult:
+        pass

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,8 @@

+version: "3"
+services:
+  dgeb-leaderboard:
+    build:
+      context: ./
+      dockerfile: Dockerfile
+    ports:
+      - "7680:7860"

docs/images/tatta_logo.png ADDED Viewed

leaderboard/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ /.projectile
2	+ **/__pycache__/

leaderboard/DGEB_Figure.png ADDED Viewed

leaderboard/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # to set up hf repo to recieve origin pushes
2	+ git remote set-url --add origin [email protected]:spaces/tattabio/DGEB

leaderboard/__init__.py ADDED Viewed

File without changes

leaderboard/app.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import math
+import json
+from pathlib import Path
+import gradio as gr
+from typing import List
+import pandas as pd
+import importlib.util
+from pydantic import ValidationError, parse_obj_as
+SIG_FIGS = 4
+# HACK: very hacky way to import from parent directory, while avoiding needing all the deps of the parent package
+modality_path = "../dgeb/modality.py"
+spec = importlib.util.spec_from_file_location("modality", modality_path)
+modality = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(modality)
+Modality = modality.Modality
+tasks_path = "../dgeb/tasks/tasks.py"
+# Load the module
+spec = importlib.util.spec_from_file_location("tasks", tasks_path)
+tasks = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(tasks)
+TaskResult = tasks.TaskResult
+GEBModel = tasks.GEBModel
+# Assuming the class definitions provided above are complete and imported here
+def format_num_params(param: int) -> str:
+    # if the number of parameters is greater than 1 billion, display billion
+    million = 1_000_000
+    # billion = 1_000_000_000
+    # if param >= billion:
+    #     num_billions = int(param / 1_000_000_000)
+    #     return f"{num_billions:}B"
+    if param >= million:
+        num_millions = int(param / 1_000_000)
+        return f"{num_millions:}M"
+    else:
+        return f"{param:,}"
+def load_json_files_from_directory(directory_path: Path) -> List[dict]:
+    """
+    Recursively load all JSON files within the specified directory path.
+    :param directory_path: Path to the directory to search for JSON files.
+    :return: List of dictionaries loaded from JSON files.
+    """
+    json_files_content = []
+    for json_file in directory_path.rglob("*.json"):  # Recursively find all JSON files
+        try:
+            with open(json_file, "r", encoding="utf-8") as file:
+                json_content = json.load(file)
+                json_files_content.append(json_content)
+        except Exception as e:
+            print(f"Error loading {json_file}: {e}")
+    return json_files_content
+def load_results() -> List[TaskResult]:
+    """
+    Recursively load JSON files in ./submissions/** and return a list of TaskResult objects.
+    """
+    submissions_path = Path("./submissions")
+    json_contents = load_json_files_from_directory(submissions_path)
+    task_results_objects = []
+    for content in json_contents:
+        try:
+            task_result = parse_obj_as(
+                TaskResult, content
+            )  # Using Pydantic's parse_obj_as for creating TaskResult objects
+            task_results_objects.append(task_result)
+        except ValidationError as e:
+            print(f"Error parsing TaskResult object: {e}")
+            raise e
+    return task_results_objects
+def task_results_to_dgeb_score(
+    model: GEBModel, model_results: List[TaskResult]
+) -> dict:
+    best_scores_per_task = []
+    modalities_seen = set()
+    for task_result in model_results:
+        modalities_seen.add(task_result.task.modality)
+        assert (
+            task_result.model.hf_name == model.hf_name
+        ), f"Model names do not match, {task_result.model.hf_name} != {model.hf_name}"
+        primary_metric_id = task_result.task.primary_metric_id
+        scores = []
+        # Get the primary score for each layer.
+        for result in task_result.results:
+            for metric in result.metrics:
+                if metric.id == primary_metric_id:
+                    scores.append(metric.value)
+        best_score = max(scores)
+        best_scores_per_task.append(best_score)
+    assert (
+        len(modalities_seen) == 1
+    ), f"Multiple modalities found for model {model.hf_name}"
+    # Calculate the average of the best scores for each task.
+    assert len(best_scores_per_task) > 0, f"No tasks found for model {model.hf_name}"
+    dgeb_score = sum(best_scores_per_task) / len(best_scores_per_task)
+    return {
+        "Task Name": "DGEB Score",
+        "Task Category": "DGEB",
+        "Model": model.hf_name,
+        "Modality": list(modalities_seen)[0],
+        "Num. Parameters (millions)": format_num_params(model.num_params),
+        "Emb. Dimension": model.embed_dim,
+        "Score": dgeb_score,
+    }
+def task_results_to_df(model_results: List[TaskResult]) -> pd.DataFrame:
+    # Initialize an empty list to hold all rows of data
+    data_rows = []
+    all_models = {}
+    for res in model_results:
+        task = res.task
+        model = res.model
+        all_models[model.hf_name] = model
+        print(f"Processing {task.display_name} for {model.hf_name}")
+        for layer in res.results:
+            total_layers = model.num_layers - 1
+            mid_layer = math.ceil(total_layers / 2)
+            if mid_layer == layer.layer_number:
+                layer.layer_display_name = "mid"
+            elif total_layers == layer.layer_number:
+                layer.layer_display_name = "last"
+            if layer.layer_display_name not in ["mid", "last"]:
+                # calculate if the layer is mid or last
+                print(
+                    f"Layer {layer.layer_number} is not mid or last out of {total_layers}. Skipping"
+                )
+                continue
+            else:
+                # For each Metric in the Layer
+                # pivoting the data so that each metric is a row
+                metric_ids = []
+                primary_metric_label = f"{task.primary_metric_id} (primary metric)"
+                for metric in layer.metrics:
+                    if task.primary_metric_id == metric.id:
+                        metric_ids.append(primary_metric_label)
+                    else:
+                        metric_ids.append(metric.id)
+                metric_values = [metric.value for metric in layer.metrics]
+                zipped = zip(metric_ids, metric_values)
+                # sort primary metric id first
+                sorted_zip = sorted(
+                    zipped,
+                    key=lambda x: x[0] != primary_metric_label,
+                )
+                data_rows.append(
+                    {
+                        "Task Name": task.display_name,
+                        "Task Category": task.type,
+                        "Model": model.hf_name,
+                        "Num. Parameters (millions)": format_num_params(
+                            model.num_params
+                        ),
+                        "Emb. Dimension": model.embed_dim,
+                        "Modality": task.modality,
+                        "Layer": layer.layer_display_name,
+                        **dict(sorted_zip),
+                    }
+                )
+    for model_name, model in all_models.items():
+        results_for_model = [
+            res for res in model_results if res.model.hf_name == model_name
+        ]
+        assert len(results_for_model) > 0, f"No results found for model {model_name}"
+        dgeb_score_record = task_results_to_dgeb_score(model, results_for_model)
+        print(f'model {model.hf_name} dgeb score: {dgeb_score_record["Score"]}')
+        data_rows.append(dgeb_score_record)
+    print("Finished processing all results")
+    df = pd.DataFrame(data_rows)
+    return df
+df = task_results_to_df(load_results())
+image_path = "./DGEB_Figure.png"
+with gr.Blocks() as demo:
+    gr.Label("Diverse Genomic Embedding Benchmark", show_label=False, scale=2)
+    gr.HTML(
+        f"<img src='file/{image_path}' alt='DGEB Figure' style='border-radius: 0.8rem; width: 50%; margin-left: auto; margin-right: auto; margin-top:12px;'>"
+    )
+    gr.HTML(
+        """
+<div style='width: 50%; margin-left: auto; margin-right: auto; padding-bottom: 8px;text-align: center;'>
+DGEB Leaderboard. To submit, refer to the <a href="https://github.com/TattaBio/DGEB/blob/leaderboard/README.md" target="_blank" style="text-decoration: underline">DGEB GitHub repository</a> Refer to the <a href="https://example.com" target="_blank" style="text-decoration: underline">DGEB paper</a> for details on metrics, tasks, and models.
+</div>
+"""
+    )
+    unique_categories = df["Task Category"].unique()
+    # sort "DGEB" to the start
+    unique_categories = sorted(unique_categories, key=lambda x: x != "DGEB")
+    for category in unique_categories:
+        with gr.Tab(label=category):
+            unique_tasks_in_category = df[df["Task Category"] == category][
+                "Task Name"
+            ].unique()
+            # sort "Overall" to the start
+            unique_tasks_in_category = sorted(
+                unique_tasks_in_category, key=lambda x: x != "Overall"
+            )
+            for task in unique_tasks_in_category:
+                with gr.Tab(label=task):
+                    columns_to_hide = ["Task Name", "Task Category"]
+                    # get rows where Task Name == task and Task Category == category
+                    filtered_df = (
+                        df[
+                            (df["Task Name"] == task)
+                            & (df["Task Category"] == category)
+                        ].drop(columns=columns_to_hide)
+                    ).dropna(axis=1, how="all")  # drop all NaN columns for Overall tab
+                    # round all values to 4 decimal places
+                    rounded_df = filtered_df.round(SIG_FIGS)
+                    # calculate ranking column
+                    # if in Overview tab, rank by average metric value
+                    if task == "Overall":
+                        # rank by average col
+                        rounded_df["Rank"] = filtered_df["Average"].rank(
+                            ascending=False
+                        )
+                    else:
+                        avoid_cols = [
+                            "Model",
+                            "Emb. Dimension",
+                            "Num. Parameters (millions)",
+                            "Modality",
+                            "Layer",
+                        ]
+                        rounded_df["Rank"] = (
+                            rounded_df.drop(columns=avoid_cols, errors="ignore")
+                            .sum(axis=1)
+                            .rank(ascending=False)
+                        )
+                    # make Rank first column
+                    cols = list(rounded_df.columns)
+                    cols.insert(0, cols.pop(cols.index("Rank")))
+                    rounded_df = rounded_df[cols]
+                    # sort by rank
+                    rounded_df = rounded_df.sort_values("Rank")
+                    data_frame = gr.DataFrame(rounded_df)
+demo.launch(allowed_paths=["."])

leaderboard/requirements.txt ADDED Viewed

	@@ -0,0 +1,82 @@

+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+datasets==2.14.4
+dill==0.3.7
+dnspython==2.6.1
+email-validator==2.1.2
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.1
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.6.0
+gradio==4.37.2
+gradio-client==1.0.2
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.4
+idna==3.7
+importlib-resources==6.4.0
+jinja2==3.1.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+markupsafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+multidict==6.0.5
+multiprocess==0.70.15
+numpy==2.0.0
+orjson==3.10.5
+packaging==24.1
+pandas==2.2.2
+pillow==10.3.0
+pyarrow==16.1.0
+pydantic==2.7.4
+pydantic-core==2.18.4
+pydub==0.25.1
+pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+pyyaml==6.0.1
+referencing==0.35.1
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.9
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+tomlkit==0.12.0
+toolz==0.12.1
+tqdm==4.66.4
+typer==0.12.3
+typing-extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3
+xxhash==3.4.1
+yarl==1.9.4

leaderboard/submissions/.DS_Store ADDED Viewed

Binary file (12.3 kB). View file

leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json ADDED Viewed

	@@ -0,0 +1,98 @@

+{
+  "task": {
+    "id": "MIBIG_protein_classification",
+    "display_name": "MIBiG Classification",
+    "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+    "modality": "protein",
+    "type": "classification",
+    "datasets": [
+      {
+        "path": "tattabio/mibig_classification_prot",
+        "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.6537260383267297
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.6689342403628118
+        },
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.7853286513915045
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.6020175670931918
+        },
+        {
+          "id": "lrap",
+          "display_name": "lrap",
+          "description": null,
+          "value": 0.798563869992442
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.645844633541225
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.655328798185941
+        },
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.7407876819384401
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.5970376985838431
+        },
+        {
+          "id": "lrap",
+          "display_name": "lrap",
+          "description": null,
+          "value": 0.7849584278155715
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json ADDED Viewed

	@@ -0,0 +1,762 @@

+{
+  "task": {
+    "id": "arch_retrieval",
+    "display_name": "Arch Retrieval",
+    "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+    "modality": "protein",
+    "type": "retrieval",
+    "datasets": [
+      {
+        "path": "tattabio/arch_retrieval",
+        "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+      },
+      {
+        "path": "tattabio/arch_retrieval_qrels",
+        "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+      }
+    ],
+    "primary_metric_id": "map_at_5"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.84127
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.82701
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.79635
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.27329
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.37939
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.64453
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.2839
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.40033
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.70443
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.7621
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.69407
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.42452
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.8853108550291645
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.8879126611520968
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.8892435700922602
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.6178391415234327
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.27510768020625387
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.2751226626247053
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.6158935362175889
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.29490376307826244
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.3173510395378902
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.6282820888186709
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.217967587602592
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.3392167130961565
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": 0.02706102865662817
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.33465305568189146
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.29252115202920864
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.1461797349288265
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.3984979781227535
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.15678893453735943
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.5443958382387585
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.3379769732428374
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.23212587702223994
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": 0.008899383756080657
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.3376357180005265
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.2949278653804833
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.11957594632298725
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.4084900248156052
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.16409679466126934
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.5478175261971683
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.3566768602643857
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": -0.24770750166012404
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5588205820812548
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.053528426968584814
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.5895997876864452
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.5109397710788774
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": -0.0014360394688449447
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.5972188824684267
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.30493219390483955
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.35096314542920914
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.4163370977258702
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.6041064087877195
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.2995447501683336
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": -0.1176892239839227
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.6055526314461911
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.3015594122136539
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": -0.11951448723943421
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.6050403183375579
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.3012299482545067
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": -0.12091114334431136
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.82819
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.81615
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.78982
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.27067
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.37321
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.63596
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.27906
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.39106
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.69746
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.7487
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.68506
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.42266
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.8752382984777344
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.878253189168681
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.8795454419523189
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.6238124910465183
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.3878031710482511
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.22961445620397436
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.6136556294192528
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.4027695454909326
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.23933162739820324
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.6039490411056802
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.379240829313294
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.23134380586116654
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": -0.018274861348075953
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.3153330580523699
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.31839102956934573
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.10106646301687382
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.4143687386138405
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.18923312509326384
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.5144031685310609
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.45693618989546114
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.1513413062960939
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": -0.031265621786664255
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.32028522957198785
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.32056979656535384
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.07820354892522365
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.42551786412535775
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.2040509113490322
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.5060801621108716
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.5071691349011768
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": -0.11952783139053508
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5923656191314365
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.1954332256400316
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.5508269378169939
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.5458701611463479
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": 0.12975949111453675
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.5537528325655148
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.3549845967268747
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.26254902560124815
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.3919186481758992
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.6284613562335846
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.3609822238622607
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": -0.13691647729285375
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.6282780633119702
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.36649482857679033
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": -0.1301211341279461
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.6290574535816186
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.367920824556504
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": -0.13036774230606793
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "task": {
+    "id": "bacarch_bigene",
+    "display_name": "BacArch BiGene",
+    "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+    "modality": "protein",
+    "type": "bigene_mining",
+    "datasets": [
+      {
+        "path": "tattabio/bac_arch_bigene",
+        "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.6215094339622641
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.7056603773584905
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.6469182389937107
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.7056603773584905
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.6138364779874214
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.7018867924528301
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.6413836477987421
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.7018867924528301
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "task": {
+    "id": "convergent_enzymes_classification",
+    "display_name": "Convergent Enzymes Classification",
+    "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+    "modality": "protein",
+    "type": "classification",
+    "datasets": [
+      {
+        "path": "tattabio/convergent_enzymes",
+        "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.2475
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.20116666666666666
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.2425
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.19904761904761906
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json ADDED Viewed

	@@ -0,0 +1,386 @@

+{
+  "task": {
+    "id": "cyano_operonic_pair",
+    "display_name": "Cyano Operonic Pair",
+    "description": "Evaluate on Cyano operonic pair classification task.",
+    "modality": "protein",
+    "type": "pair_classification",
+    "datasets": [
+      {
+        "path": "tattabio/cyano_operonic_pair",
+        "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
+      }
+    ],
+    "primary_metric_id": "top_ap"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.7203065134099617
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.990619957447052
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.44058665070338227
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.815308690071106
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.28253358925143957
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 1.0
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.32424099100055437
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.7187739463601532
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 40.061012268066406
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.43963963963963965
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 380.5898742675781
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.28218966846569005
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.9945652173913043
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.3051200502841412
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.7187739463601532
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 2.2720906734466553
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.4404548174745661
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 25.41253662109375
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.28242517267843437
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 1.0
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.3117112729287826
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.7206896551724138
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 1764.11328125
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.44177215189873426
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 1021.9218139648438
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.28795379537953797
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 0.9483695652173914
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.35181607664099845
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.35181607664099845
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.7206896551724138
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.9833309650421143
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.4454067429631921
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.8805520534515381
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.2883460152182619
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 0.9782608695652174
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.3325946475342702
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.721455938697318
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 230.74539184570312
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.4439615026389321
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 690.979248046875
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.28772635814889336
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.9714673913043478
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.33577510329678106
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.7210727969348659
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 13.784924507141113
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.44413697682462816
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 39.12321472167969
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.29791183294663576
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 0.8722826086956522
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.33823458280589236
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.7191570881226054
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 10542.0
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.4403230631169608
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 4913.24560546875
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.2823168392788646
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 1.0
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.28278909833025945
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.33823458280589236
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "task": {
+    "id": "ec_classification",
+    "display_name": "EC Classification",
+    "description": "Evaluate on Enzyme Commission number classification task.",
+    "modality": "protein",
+    "type": "classification",
+    "datasets": [
+      {
+        "path": "tattabio/ec_classification",
+        "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.6015625
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.55390625
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.5546875
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.5096354166666667
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json ADDED Viewed

	@@ -0,0 +1,386 @@

+{
+  "task": {
+    "id": "ecoli_operonic_pair",
+    "display_name": "E.coli Operonic Pair",
+    "description": "Evaluate on E.coli K-12 operonic pair classification task.",
+    "modality": "protein",
+    "type": "pair_classification",
+    "datasets": [
+      {
+        "path": "tattabio/ecoli_operonic_pair",
+        "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
+      }
+    ],
+    "primary_metric_id": "top_ap"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.6309689383402874
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.9664175510406494
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.5831148400629261
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.876137375831604
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.41972823351786614
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 0.954779622209502
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.5226436718954207
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.6237830319888734
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 151.0961456298828
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.5765230312035661
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 417.6656494140625
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.4051044083526682
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.9994275901545506
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.5038561800803791
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.624246638850255
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 9.827131271362305
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.5778148457047539
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 23.485851287841797
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.4077212806026365
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 0.9914138523182598
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.5109707609256201
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.6200741770978211
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 1509.6474609375
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.576427863981512
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 827.195556640625
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.40501043841336115
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 0.9994275901545506
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.498147478687894
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.5226436718954207
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.6305053314789059
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.9585829377174377
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.5934650455927052
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.9002124071121216
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.44412851862382713
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 0.8941041785918717
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.545021841060869
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.6342141863699583
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 444.21954345703125
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.6035735322992343
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 612.2872314453125
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.45935445307830247
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.8797939324556382
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.5574639922170803
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.6339823829392675
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 29.62457275390625
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.5996841689696012
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 38.6270751953125
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.45766797228080747
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 0.86949055523755
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.5553872058517757
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.5948076031525267
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 14395.623046875
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.577018736527939
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 5674.908203125
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.4061624649859944
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 0.9959931310818546
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.3862357442891778
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.5574639922170803
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json ADDED Viewed

	@@ -0,0 +1,762 @@

+{
+  "task": {
+    "id": "euk_retrieval",
+    "display_name": "Euk Retrieval",
+    "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
+    "modality": "protein",
+    "type": "retrieval",
+    "datasets": [
+      {
+        "path": "tattabio/euk_retrieval",
+        "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
+      },
+      {
+        "path": "tattabio/euk_retrieval_qrels",
+        "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
+      }
+    ],
+    "primary_metric_id": "map_at_5"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.80067
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.79455
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.77429
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.30914
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.41095
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.60087
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.31905
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.43473
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.66233
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.7119
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.63408
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.3663
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.8471596998928188
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.8513856989741232
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.8527043294326252
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.7166495695870103
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.5383304196281262
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.38408074718110424
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.71056350273151
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.5386325626626473
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.3678412023083028
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.6787542765531929
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.4678010355684318
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.3023078330221261
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": 0.17506411594869709
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.344228905317099
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.26025197550499063
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.28364735198157687
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.4946084063548821
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.13024980686869012
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.6456837506614725
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.5024354435806796
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.18849105999507082
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": 0.15537143366366737
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.3338972930408563
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.27534514133854515
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.24230061291494534
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.4763992415794819
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.17167004025145782
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.6062660448007379
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.45445564371902375
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": -0.09621042247019258
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5420327575630611
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.37248428210075407
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.6517795575595553
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.46182346579179107
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": 0.2556997419766225
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.6371093546193429
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.22395520722060117
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.27077611986871364
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.4324048296185153
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.7966902615822546
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.5623896062382641
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": -0.27875113624180275
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.7982850278647994
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.5623589312727257
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": -0.27578274493030464
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.7977600079745486
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.5625363754999084
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": -0.2708948491113527
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.79574
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.7872
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.76804
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.30344
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.40308
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.59158
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.31068
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.41808
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.64688
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.70611
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.63055
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.36862
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.8521436227224009
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.8555504516919309
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.8571980685347454
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.687147173549288
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.534917528750057
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.039388068191112346
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.6821413074357394
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.541004104911246
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.06613569078084217
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.6546658854714889
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.5141528362539365
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.045010206374762184
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": 0.1717014705213338
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.298486867259319
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.3158992753503486
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.29394629114728443
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.4807193931287969
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.200767704240122
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.6266013107050147
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.5400967080146492
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.06821295960747309
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": 0.15728927641821855
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.3020952193182204
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.3196038571595756
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.273851179897414
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.4822263524474807
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.1998852576547706
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.610064992339158
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.5237697244132881
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": -0.047861477876695854
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5642831983945668
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.41268016275342806
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.3902377594145758
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.4757631079174044
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": 0.32238368240767273
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.4280345103983777
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.19318747544949869
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.2262940005534252
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.2898939009819229
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.7559907957579797
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.5232164154691852
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": 0.016325972601983724
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.7604182097391701
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.5188685708290457
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": 0.008720431706015956
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.7617325890747185
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.5213157058041827
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": 0.015621035073521741
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "task": {
+    "id": "fefe_phylogeny",
+    "display_name": "FeFeHydrogenase Phylogeny",
+    "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
+    "modality": "protein",
+    "type": "eds",
+    "datasets": [
+      {
+        "path": "tattabio/fefe_phylogeny_sequences",
+        "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
+      },
+      {
+        "path": "tattabio/fefe_phylogeny_distances",
+        "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
+      }
+    ],
+    "primary_metric_id": "top_corr"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.46213607103563425
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.5621218764061721
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.5442663405841599
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.5621218764061721
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.1524486344353939
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.5194125891005561
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.48868066660269227
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.5194125891005561
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "task": {
+    "id": "modac_paralogy_bigene",
+    "display_name": "ModAC Paralogy BiGene",
+    "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
+    "modality": "protein",
+    "type": "bigene_mining",
+    "datasets": [
+      {
+        "path": "tattabio/modac_paralogy_bigene",
+        "revision": "241ca6397856e3360da04422d54933035b1fab87"
+      }
+    ],
+    "primary_metric_id": "recall_at_50"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 4.4952467261118094e-7
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.0006702412868632708
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 8.984467652322665e-7
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.0006702412868632708
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.03485254691689008
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 4.4952467261118094e-7
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.0006702412868632708
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 8.984467652322665e-7
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.0006702412868632708
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.05361930294906166
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "task": {
+    "id": "mopb_clustering",
+    "display_name": "MopB Clustering",
+    "description": "Evaluate on MopB clustering task.",
+    "modality": "protein",
+    "type": "clustering",
+    "datasets": [
+      {
+        "path": "tattabio/mopb_clustering",
+        "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
+      }
+    ],
+    "primary_metric_id": "v_measure"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "v_measure",
+          "display_name": "v_measure",
+          "description": null,
+          "value": 0.7366377426487285
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "v_measure",
+          "display_name": "v_measure",
+          "description": null,
+          "value": 0.7842647128962572
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "task": {
+    "id": "rpob_arch_phylogeny",
+    "display_name": "RpoB Archaeal Phylogeny",
+    "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
+    "modality": "protein",
+    "type": "eds",
+    "datasets": [
+      {
+        "path": "tattabio/rpob_arch_phylogeny_sequences",
+        "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
+      },
+      {
+        "path": "tattabio/rpob_arch_phylogeny_distances",
+        "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
+      }
+    ],
+    "primary_metric_id": "top_corr"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.2624971928673971
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.31502824152693154
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.3088945849814121
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.31502824152693154
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.34668475738519444
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.372455403853565
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.369729316093801
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.372455403853565
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "task": {
+    "id": "rpob_bac_phylogeny",
+    "display_name": "RpoB Bacterial Phylogeny",
+    "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
+    "modality": "protein",
+    "type": "eds",
+    "datasets": [
+      {
+        "path": "tattabio/rpob_bac_phylogeny_sequences",
+        "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
+      },
+      {
+        "path": "tattabio/rpob_bac_phylogeny_distances",
+        "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
+      }
+    ],
+    "primary_metric_id": "top_corr"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.12971577033648743
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.18177734472255433
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.16423413011355156
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.18177734472255433
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim",
+          "display_name": "cos_sim",
+          "description": null,
+          "value": 0.10194557773024183
+        },
+        {
+          "id": "manhattan",
+          "display_name": "manhattan",
+          "description": null,
+          "value": 0.18622026845391912
+        },
+        {
+          "id": "euclidean",
+          "display_name": "euclidean",
+          "description": null,
+          "value": 0.15405389239655473
+        },
+        {
+          "id": "top_corr",
+          "display_name": "top_corr",
+          "description": null,
+          "value": 0.18622026845391912
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json ADDED Viewed

	@@ -0,0 +1,386 @@

+{
+  "task": {
+    "id": "vibrio_operonic_pair",
+    "display_name": "Vibrio Operonic Pair",
+    "description": "Evaluate on Vibrio operonic pair classification task.",
+    "modality": "protein",
+    "type": "pair_classification",
+    "datasets": [
+      {
+        "path": "tattabio/vibrio_operonic_pair",
+        "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
+      }
+    ],
+    "primary_metric_id": "top_ap"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t12_35M_UR50D",
+    "revision": "...",
+    "num_layers": 12,
+    "num_params": 33992881,
+    "embed_dim": 480
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 6,
+      "layer_display_name": "6",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.6781966575981345
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.970278263092041
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.518608169440242
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.8757017254829407
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.35501242750621376
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 0.9618406285072951
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.4581544787406372
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.6731441896618733
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 137.3688507080078
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.5146164978292329
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 391.87298583984375
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.3467238689547582
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.9977553310886644
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.4383109013756369
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.672755538282161
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 8.506048202514648
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.5152786099460755
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 21.124141693115234
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.35145075602778914
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 0.9652076318742986
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.4438681594614018
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.6599300427516518
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 1570.195556640625
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.5147654892877822
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 898.4225463867188
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.3468591494342567
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 0.9977553310886644
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.4179931403914694
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.4581544787406372
+        }
+      ]
+    },
+    {
+      "layer_number": 11,
+      "layer_display_name": "11",
+      "metrics": [
+        {
+          "id": "cos_sim_accuracy",
+          "display_name": "cos_sim_accuracy",
+          "description": null,
+          "value": 0.6746987951807228
+        },
+        {
+          "id": "cos_sim_accuracy_threshold",
+          "display_name": "cos_sim_accuracy_threshold",
+          "description": null,
+          "value": 0.9681814312934875
+        },
+        {
+          "id": "cos_sim_f1",
+          "display_name": "cos_sim_f1",
+          "description": null,
+          "value": 0.5363604114934374
+        },
+        {
+          "id": "cos_sim_f1_threshold",
+          "display_name": "cos_sim_f1_threshold",
+          "description": null,
+          "value": 0.9120055437088013
+        },
+        {
+          "id": "cos_sim_precision",
+          "display_name": "cos_sim_precision",
+          "description": null,
+          "value": 0.3921161825726141
+        },
+        {
+          "id": "cos_sim_recall",
+          "display_name": "cos_sim_recall",
+          "description": null,
+          "value": 0.8484848484848485
+        },
+        {
+          "id": "cos_sim_ap",
+          "display_name": "cos_sim_ap",
+          "description": null,
+          "value": 0.46704651746605186
+        },
+        {
+          "id": "manhattan_accuracy",
+          "display_name": "manhattan_accuracy",
+          "description": null,
+          "value": 0.6746987951807228
+        },
+        {
+          "id": "manhattan_accuracy_threshold",
+          "display_name": "manhattan_accuracy_threshold",
+          "description": null,
+          "value": 360.30352783203125
+        },
+        {
+          "id": "manhattan_f1",
+          "display_name": "manhattan_f1",
+          "description": null,
+          "value": 0.5305821665438467
+        },
+        {
+          "id": "manhattan_f1_threshold",
+          "display_name": "manhattan_f1_threshold",
+          "description": null,
+          "value": 576.9113159179688
+        },
+        {
+          "id": "manhattan_precision",
+          "display_name": "manhattan_precision",
+          "description": null,
+          "value": 0.3949533735600658
+        },
+        {
+          "id": "manhattan_recall",
+          "display_name": "manhattan_recall",
+          "description": null,
+          "value": 0.8080808080808081
+        },
+        {
+          "id": "manhattan_ap",
+          "display_name": "manhattan_ap",
+          "description": null,
+          "value": 0.468990806236423
+        },
+        {
+          "id": "euclidean_accuracy",
+          "display_name": "euclidean_accuracy",
+          "description": null,
+          "value": 0.6758647493198601
+        },
+        {
+          "id": "euclidean_accuracy_threshold",
+          "display_name": "euclidean_accuracy_threshold",
+          "description": null,
+          "value": 22.342727661132812
+        },
+        {
+          "id": "euclidean_f1",
+          "display_name": "euclidean_f1",
+          "description": null,
+          "value": 0.5301837270341208
+        },
+        {
+          "id": "euclidean_f1_threshold",
+          "display_name": "euclidean_f1_threshold",
+          "description": null,
+          "value": 39.38741683959961
+        },
+        {
+          "id": "euclidean_precision",
+          "display_name": "euclidean_precision",
+          "description": null,
+          "value": 0.37459434399629116
+        },
+        {
+          "id": "euclidean_recall",
+          "display_name": "euclidean_recall",
+          "description": null,
+          "value": 0.9068462401795735
+        },
+        {
+          "id": "euclidean_ap",
+          "display_name": "euclidean_ap",
+          "description": null,
+          "value": 0.46775797789146023
+        },
+        {
+          "id": "dot_accuracy",
+          "display_name": "dot_accuracy",
+          "description": null,
+          "value": 0.6541002720559658
+        },
+        {
+          "id": "dot_accuracy_threshold",
+          "display_name": "dot_accuracy_threshold",
+          "description": null,
+          "value": 9448.685546875
+        },
+        {
+          "id": "dot_f1",
+          "display_name": "dot_f1",
+          "description": null,
+          "value": 0.5145827317354895
+        },
+        {
+          "id": "dot_f1_threshold",
+          "display_name": "dot_f1_threshold",
+          "description": null,
+          "value": 4854.8955078125
+        },
+        {
+          "id": "dot_precision",
+          "display_name": "dot_precision",
+          "description": null,
+          "value": 0.3464230171073095
+        },
+        {
+          "id": "dot_recall",
+          "display_name": "dot_recall",
+          "description": null,
+          "value": 1.0
+        },
+        {
+          "id": "dot_ap",
+          "display_name": "dot_ap",
+          "description": null,
+          "value": 0.3679854825040224
+        },
+        {
+          "id": "top_ap",
+          "display_name": "top_ap",
+          "description": null,
+          "value": 0.468990806236423
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json ADDED Viewed

	@@ -0,0 +1,98 @@

+{
+  "task": {
+    "id": "MIBIG_protein_classification",
+    "display_name": "MIBiG Classification",
+    "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
+    "modality": "protein",
+    "type": "classification",
+    "datasets": [
+      {
+        "path": "tattabio/mibig_classification_prot",
+        "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t30_150M_UR50D",
+    "revision": "...",
+    "num_layers": 30,
+    "num_params": 148795481,
+    "embed_dim": 640
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 15,
+      "layer_display_name": "15",
+      "metrics": [
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.721568117708931
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.7165532879818595
+        },
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.820388189148414
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.6689951528396479
+        },
+        {
+          "id": "lrap",
+          "display_name": "lrap",
+          "description": null,
+          "value": 0.8363567649281944
+        }
+      ]
+    },
+    {
+      "layer_number": 29,
+      "layer_display_name": "29",
+      "metrics": [
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.6298307655443518
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.6099773242630385
+        },
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.7648458169950588
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.5789820341918578
+        },
+        {
+          "id": "lrap",
+          "display_name": "lrap",
+          "description": null,
+          "value": 0.752078609221467
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json ADDED Viewed

	@@ -0,0 +1,762 @@

+{
+  "task": {
+    "id": "arch_retrieval",
+    "display_name": "Arch Retrieval",
+    "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
+    "modality": "protein",
+    "type": "retrieval",
+    "datasets": [
+      {
+        "path": "tattabio/arch_retrieval",
+        "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
+      },
+      {
+        "path": "tattabio/arch_retrieval_qrels",
+        "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
+      }
+    ],
+    "primary_metric_id": "map_at_5"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t30_150M_UR50D",
+    "revision": "...",
+    "num_layers": 30,
+    "num_params": 148795481,
+    "embed_dim": 640
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 15,
+      "layer_display_name": "15",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.91537
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.90635
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.87424
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.30526
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.42635
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.72433
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.31067
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.4378
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.75859
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.82689
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.76159
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.46726
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.9422321809645754
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.9439900344829917
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.9446453591992101
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.6549640359156222
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.11037035667235007
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.41554431142868614
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.6536082943031309
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.140251553474609
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.4541965457157918
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.6159871931946869
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.006651176818080506
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.39627086499203873
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": -0.047556791244411895
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.16420917659496206
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.28627326792803204
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.06426190649373154
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.23746446970773183
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.15565045001627686
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.5237897180891637
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.1865080232459892
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.2688572949738638
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": -0.054074967730710764
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.1711511016438979
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.2896050332877169
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.05005034152582497
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.24918235642253458
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.16768640965952947
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.5114754425984644
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.2173420630028766
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": -0.2526274232326276
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5525639421444303
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.01857146637175079
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.7765476306675947
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.48362026531371466
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": -0.0051297270434755475
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.7004665714420365
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.24671476154878727
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.37006645670815747
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.36951553698605216
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.64312359548717
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.04622765419712948
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": -0.22259410250972433
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.6385468425832173
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.058640802937365115
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": -0.21579087208897282
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.6402042049799889
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.052782783025246006
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": -0.21896215733129423
+        }
+      ]
+    },
+    {
+      "layer_number": 29,
+      "layer_display_name": "29",
+      "metrics": [
+        {
+          "id": "ndcg_at_5",
+          "display_name": "ndcg_at_5",
+          "description": null,
+          "value": 0.83285
+        },
+        {
+          "id": "ndcg_at_10",
+          "display_name": "ndcg_at_10",
+          "description": null,
+          "value": 0.81413
+        },
+        {
+          "id": "ndcg_at_50",
+          "display_name": "ndcg_at_50",
+          "description": null,
+          "value": 0.76701
+        },
+        {
+          "id": "map_at_5",
+          "display_name": "map_at_5",
+          "description": null,
+          "value": 0.25404
+        },
+        {
+          "id": "map_at_10",
+          "display_name": "map_at_10",
+          "description": null,
+          "value": 0.35083
+        },
+        {
+          "id": "map_at_50",
+          "display_name": "map_at_50",
+          "description": null,
+          "value": 0.58387
+        },
+        {
+          "id": "recall_at_5",
+          "display_name": "recall_at_5",
+          "description": null,
+          "value": 0.266
+        },
+        {
+          "id": "recall_at_10",
+          "display_name": "recall_at_10",
+          "description": null,
+          "value": 0.37545
+        },
+        {
+          "id": "recall_at_50",
+          "display_name": "recall_at_50",
+          "description": null,
+          "value": 0.66303
+        },
+        {
+          "id": "precision_at_5",
+          "display_name": "precision_at_5",
+          "description": null,
+          "value": 0.75621
+        },
+        {
+          "id": "precision_at_10",
+          "display_name": "precision_at_10",
+          "description": null,
+          "value": 0.6866
+        },
+        {
+          "id": "precision_at_50",
+          "display_name": "precision_at_50",
+          "description": null,
+          "value": 0.41047
+        },
+        {
+          "id": "mrr_at_5",
+          "display_name": "mrr_at_5",
+          "description": null,
+          "value": 0.8947289799402471
+        },
+        {
+          "id": "mrr_at_10",
+          "display_name": "mrr_at_10",
+          "description": null,
+          "value": 0.895975855130784
+        },
+        {
+          "id": "mrr_at_50",
+          "display_name": "mrr_at_50",
+          "description": null,
+          "value": 0.8970771214115124
+        },
+        {
+          "id": "nauc_ndcg_at_5_max",
+          "display_name": "nauc_ndcg_at_5_max",
+          "description": null,
+          "value": 0.6033756709037629
+        },
+        {
+          "id": "nauc_ndcg_at_5_std",
+          "display_name": "nauc_ndcg_at_5_std",
+          "description": null,
+          "value": 0.48175424620769186
+        },
+        {
+          "id": "nauc_ndcg_at_5_diff1",
+          "display_name": "nauc_ndcg_at_5_diff1",
+          "description": null,
+          "value": -0.1614695329433979
+        },
+        {
+          "id": "nauc_ndcg_at_10_max",
+          "display_name": "nauc_ndcg_at_10_max",
+          "description": null,
+          "value": 0.5820557360820439
+        },
+        {
+          "id": "nauc_ndcg_at_10_std",
+          "display_name": "nauc_ndcg_at_10_std",
+          "description": null,
+          "value": 0.48937482522317327
+        },
+        {
+          "id": "nauc_ndcg_at_10_diff1",
+          "display_name": "nauc_ndcg_at_10_diff1",
+          "description": null,
+          "value": -0.18205509390904553
+        },
+        {
+          "id": "nauc_ndcg_at_50_max",
+          "display_name": "nauc_ndcg_at_50_max",
+          "description": null,
+          "value": 0.49384788238425553
+        },
+        {
+          "id": "nauc_ndcg_at_50_std",
+          "display_name": "nauc_ndcg_at_50_std",
+          "description": null,
+          "value": 0.354953353704701
+        },
+        {
+          "id": "nauc_ndcg_at_50_diff1",
+          "display_name": "nauc_ndcg_at_50_diff1",
+          "description": null,
+          "value": -0.10767304568721194
+        },
+        {
+          "id": "nauc_map_at_5_max",
+          "display_name": "nauc_map_at_5_max",
+          "description": null,
+          "value": 0.03598090314920231
+        },
+        {
+          "id": "nauc_map_at_5_std",
+          "display_name": "nauc_map_at_5_std",
+          "description": null,
+          "value": 0.11662947626949612
+        },
+        {
+          "id": "nauc_map_at_5_diff1",
+          "display_name": "nauc_map_at_5_diff1",
+          "description": null,
+          "value": 0.28974453988735166
+        },
+        {
+          "id": "nauc_map_at_10_max",
+          "display_name": "nauc_map_at_10_max",
+          "description": null,
+          "value": 0.13482748795676255
+        },
+        {
+          "id": "nauc_map_at_10_std",
+          "display_name": "nauc_map_at_10_std",
+          "description": null,
+          "value": 0.22360013731689057
+        },
+        {
+          "id": "nauc_map_at_10_diff1",
+          "display_name": "nauc_map_at_10_diff1",
+          "description": null,
+          "value": 0.19043309088480928
+        },
+        {
+          "id": "nauc_map_at_50_max",
+          "display_name": "nauc_map_at_50_max",
+          "description": null,
+          "value": 0.42287317105206507
+        },
+        {
+          "id": "nauc_map_at_50_std",
+          "display_name": "nauc_map_at_50_std",
+          "description": null,
+          "value": 0.32712992457779794
+        },
+        {
+          "id": "nauc_map_at_50_diff1",
+          "display_name": "nauc_map_at_50_diff1",
+          "description": null,
+          "value": -0.02056986996465222
+        },
+        {
+          "id": "nauc_recall_at_5_max",
+          "display_name": "nauc_recall_at_5_max",
+          "description": null,
+          "value": 0.021824220192766298
+        },
+        {
+          "id": "nauc_recall_at_5_std",
+          "display_name": "nauc_recall_at_5_std",
+          "description": null,
+          "value": 0.11009705855814085
+        },
+        {
+          "id": "nauc_recall_at_5_diff1",
+          "display_name": "nauc_recall_at_5_diff1",
+          "description": null,
+          "value": 0.28505819859304804
+        },
+        {
+          "id": "nauc_recall_at_10_max",
+          "display_name": "nauc_recall_at_10_max",
+          "description": null,
+          "value": 0.10661440304261144
+        },
+        {
+          "id": "nauc_recall_at_10_std",
+          "display_name": "nauc_recall_at_10_std",
+          "description": null,
+          "value": 0.2092712287791401
+        },
+        {
+          "id": "nauc_recall_at_10_diff1",
+          "display_name": "nauc_recall_at_10_diff1",
+          "description": null,
+          "value": 0.19742570630860265
+        },
+        {
+          "id": "nauc_recall_at_50_max",
+          "display_name": "nauc_recall_at_50_max",
+          "description": null,
+          "value": 0.38620604109572715
+        },
+        {
+          "id": "nauc_recall_at_50_std",
+          "display_name": "nauc_recall_at_50_std",
+          "description": null,
+          "value": 0.2924386961038862
+        },
+        {
+          "id": "nauc_recall_at_50_diff1",
+          "display_name": "nauc_recall_at_50_diff1",
+          "description": null,
+          "value": 0.025319280347884648
+        },
+        {
+          "id": "nauc_precision_at_5_max",
+          "display_name": "nauc_precision_at_5_max",
+          "description": null,
+          "value": 0.5425386973889819
+        },
+        {
+          "id": "nauc_precision_at_5_std",
+          "display_name": "nauc_precision_at_5_std",
+          "description": null,
+          "value": 0.4063280755847313
+        },
+        {
+          "id": "nauc_precision_at_5_diff1",
+          "display_name": "nauc_precision_at_5_diff1",
+          "description": null,
+          "value": -0.43965420847555414
+        },
+        {
+          "id": "nauc_precision_at_10_max",
+          "display_name": "nauc_precision_at_10_max",
+          "description": null,
+          "value": 0.4721960038905336
+        },
+        {
+          "id": "nauc_precision_at_10_std",
+          "display_name": "nauc_precision_at_10_std",
+          "description": null,
+          "value": 0.35700671463443756
+        },
+        {
+          "id": "nauc_precision_at_10_diff1",
+          "display_name": "nauc_precision_at_10_diff1",
+          "description": null,
+          "value": -0.44652985217538876
+        },
+        {
+          "id": "nauc_precision_at_50_max",
+          "display_name": "nauc_precision_at_50_max",
+          "description": null,
+          "value": 0.2526299155090765
+        },
+        {
+          "id": "nauc_precision_at_50_std",
+          "display_name": "nauc_precision_at_50_std",
+          "description": null,
+          "value": -0.021434326602753354
+        },
+        {
+          "id": "nauc_precision_at_50_diff1",
+          "display_name": "nauc_precision_at_50_diff1",
+          "description": null,
+          "value": -0.3009002533330021
+        },
+        {
+          "id": "nauc_mrr_at_5_max",
+          "display_name": "nauc_mrr_at_5_max",
+          "description": null,
+          "value": 0.6726463178530804
+        },
+        {
+          "id": "nauc_mrr_at_5_std",
+          "display_name": "nauc_mrr_at_5_std",
+          "description": null,
+          "value": 0.49687521406966506
+        },
+        {
+          "id": "nauc_mrr_at_5_diff1",
+          "display_name": "nauc_mrr_at_5_diff1",
+          "description": null,
+          "value": 0.05561071266486503
+        },
+        {
+          "id": "nauc_mrr_at_10_max",
+          "display_name": "nauc_mrr_at_10_max",
+          "description": null,
+          "value": 0.6731608376359998
+        },
+        {
+          "id": "nauc_mrr_at_10_std",
+          "display_name": "nauc_mrr_at_10_std",
+          "description": null,
+          "value": 0.49491217127896847
+        },
+        {
+          "id": "nauc_mrr_at_10_diff1",
+          "display_name": "nauc_mrr_at_10_diff1",
+          "description": null,
+          "value": 0.05832429376042118
+        },
+        {
+          "id": "nauc_mrr_at_50_max",
+          "display_name": "nauc_mrr_at_50_max",
+          "description": null,
+          "value": 0.6735463200113443
+        },
+        {
+          "id": "nauc_mrr_at_50_std",
+          "display_name": "nauc_mrr_at_50_std",
+          "description": null,
+          "value": 0.495779540068593
+        },
+        {
+          "id": "nauc_mrr_at_50_diff1",
+          "display_name": "nauc_mrr_at_50_diff1",
+          "description": null,
+          "value": 0.06154966156964915
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "task": {
+    "id": "bacarch_bigene",
+    "display_name": "BacArch BiGene",
+    "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
+    "modality": "protein",
+    "type": "bigene_mining",
+    "datasets": [
+      {
+        "path": "tattabio/bac_arch_bigene",
+        "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t30_150M_UR50D",
+    "revision": "...",
+    "num_layers": 30,
+    "num_params": 148795481,
+    "embed_dim": 640
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 15,
+      "layer_display_name": "15",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.7591194968553459
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.8188679245283019
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.7779874213836478
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.8188679245283019
+        }
+      ]
+    },
+    {
+      "layer_number": 29,
+      "layer_display_name": "29",
+      "metrics": [
+        {
+          "id": "precision",
+          "display_name": "precision",
+          "description": null,
+          "value": 0.656010781671159
+        },
+        {
+          "id": "recall",
+          "display_name": "recall",
+          "description": null,
+          "value": 0.7320754716981132
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.6774213836477987
+        },
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.7320754716981132
+        }
+      ]
+    }
+  ]
+}

leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "task": {
+    "id": "convergent_enzymes_classification",
+    "display_name": "Convergent Enzymes Classification",
+    "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
+    "modality": "protein",
+    "type": "classification",
+    "datasets": [
+      {
+        "path": "tattabio/convergent_enzymes",
+        "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
+      }
+    ],
+    "primary_metric_id": "f1"
+  },
+  "model": {
+    "hf_name": "facebook/esm2_t30_150M_UR50D",
+    "revision": "...",
+    "num_layers": 30,
+    "num_params": 148795481,
+    "embed_dim": 640
+  },
+  "dgeb_version": "0.0.0",
+  "results": [
+    {
+      "layer_number": 15,
+      "layer_display_name": "15",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.2975
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.24646428571428572
+        }
+      ]
+    },
+    {
+      "layer_number": 29,
+      "layer_display_name": "29",
+      "metrics": [
+        {
+          "id": "accuracy",
+          "display_name": "accuracy",
+          "description": null,
+          "value": 0.2475
+        },
+        {
+          "id": "f1",
+          "display_name": "f1",
+          "description": null,
+          "value": 0.20091666666666666
+        }
+      ]
+    }
+  ]
+}