Joshua Kravitz commited on
Commit
e284167
·
0 Parent(s):

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .editorconfig +12 -0
  2. .github/workflows/ci.yml +34 -0
  3. .github/workflows/release.yml +50 -0
  4. .gitignore +6 -0
  5. CHANGELOG.md +197 -0
  6. Dockerfile +27 -0
  7. LICENSE +201 -0
  8. README.md +181 -0
  9. dgeb/__init__.py +28 -0
  10. dgeb/cli.py +136 -0
  11. dgeb/dgeb.py +129 -0
  12. dgeb/eval_utils.py +394 -0
  13. dgeb/evaluators.py +839 -0
  14. dgeb/modality.py +8 -0
  15. dgeb/models.py +481 -0
  16. dgeb/tasks/__init__.py +16 -0
  17. dgeb/tasks/bigene_mining_tasks.py +77 -0
  18. dgeb/tasks/classification_tasks.py +213 -0
  19. dgeb/tasks/clustering_tasks.py +70 -0
  20. dgeb/tasks/eds_tasks.py +246 -0
  21. dgeb/tasks/pair_classification_tasks.py +96 -0
  22. dgeb/tasks/retrieval_tasks.py +96 -0
  23. dgeb/tasks/tasks.py +135 -0
  24. docker-compose.yml +8 -0
  25. docs/images/tatta_logo.png +0 -0
  26. leaderboard/.gitignore +2 -0
  27. leaderboard/DGEB_Figure.png +0 -0
  28. leaderboard/README.md +2 -0
  29. leaderboard/__init__.py +0 -0
  30. leaderboard/app.py +260 -0
  31. leaderboard/requirements.txt +82 -0
  32. leaderboard/submissions/.DS_Store +0 -0
  33. leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json +98 -0
  34. leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json +762 -0
  35. leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json +86 -0
  36. leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json +62 -0
  37. leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json +386 -0
  38. leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json +62 -0
  39. leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json +386 -0
  40. leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json +762 -0
  41. leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json +90 -0
  42. leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json +97 -0
  43. leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json +50 -0
  44. leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json +90 -0
  45. leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json +90 -0
  46. leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json +386 -0
  47. leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json +98 -0
  48. leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json +762 -0
  49. leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json +86 -0
  50. leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json +62 -0
.editorconfig ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # top-most EditorConfig file
2
+ root = true
3
+
4
+ # Unix-style newlines with a newline ending every file
5
+ [*]
6
+ end_of_line = lf
7
+ insert_final_newline = true
8
+
9
+ [*.py]
10
+ charset = utf-8
11
+ indent_style = space
12
+ indent_size = 4
.github/workflows/ci.yml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI for DGEB
2
+
3
+ on:
4
+ push:
5
+ branches: ["**"]
6
+ pull_request:
7
+ branches: ["**"]
8
+
9
+ permissions:
10
+ id-token: write
11
+ contents: read
12
+ actions: write
13
+ pull-requests: read
14
+
15
+ concurrency:
16
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
17
+ cancel-in-progress: true
18
+
19
+ jobs:
20
+ ruff:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v3
24
+ - uses: actions/setup-python@v4
25
+ with:
26
+ python-version: "3.11"
27
+ - uses: yezz123/setup-uv@v4
28
+ with:
29
+ uv-venv: ".geb_venv"
30
+ - run: uv pip install ruff
31
+ - run: ruff format .
32
+ - run: ruff check .
33
+ # TODO: pytest
34
+ # TODO: pyright
.github/workflows/release.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will
2
+ # - Find the latest version tag based on the commit history
3
+ # - Create a git tag for the new version
4
+ # - Update the version number in pyproject.toml based on the commit history
5
+ # - Upload the package to PyPI
6
+ # - Create a release on GitHub
7
+
8
+ # This workflow required the following secrets to be set:
9
+ # - a GitHub personal access token with the `repo` scope called `RELEASE`
10
+ # - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
11
+
12
+ name: Release
13
+ on:
14
+ push:
15
+ branches:
16
+ - main
17
+
18
+ jobs:
19
+ release:
20
+ runs-on: ubuntu-latest
21
+ concurrency: release
22
+ permissions:
23
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI
24
+ contents: write
25
+
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 0
30
+ token: ${{ secrets.GH_TOKEN }}
31
+
32
+ - name: Python Semantic Release
33
+ id: release
34
+ uses: python-semantic-release/[email protected]
35
+ with:
36
+ github_token: ${{ secrets.GH_TOKEN }}
37
+
38
+ - name: Publish package distributions to PyPI
39
+ uses: pypa/[email protected]
40
+ if: steps.release.outputs.released == 'true'
41
+ # This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually
42
+ # configured API token or username/password combination. To perform trusted publishing with this action, your project's
43
+ # publisher must already be configured on PyPI.
44
+
45
+ - name: Publish package distributions to GitHub Releases
46
+ uses: python-semantic-release/[email protected]
47
+ if: steps.release.outputs.released == 'true'
48
+ with:
49
+ github_token: ${{ secrets.GITHUB_TOKEN }}
50
+ tag: ${{ steps.release.outputs.tag }}
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .venv/
2
+ __pycache__/
3
+ .vscode/
4
+ build/
5
+ dist/
6
+ *egg-info/
CHANGELOG.md ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CHANGELOG
2
+
3
+ ## v0.0.10 (2024-07-09)
4
+
5
+ ### Fix
6
+
7
+ * fix: remove noop task ([`7d5b393`](https://github.com/TattaBio/DGEB/commit/7d5b3933f48e51fb4c71945f01af2cc5a7dba3ed))
8
+
9
+ ## v0.0.9 (2024-07-09)
10
+
11
+ ### Fix
12
+
13
+ * fix: update cli script name ([`633e14d`](https://github.com/TattaBio/DGEB/commit/633e14db7e1eed0d9606ef1097e369e4f5e245f5))
14
+
15
+ ### Unknown
16
+
17
+ * 0.0.9
18
+
19
+ Automatically generated by python-semantic-release [skip ci] ([`a8c1a96`](https://github.com/TattaBio/DGEB/commit/a8c1a96d18af589795bc9532fee8ad9764cd52ed))
20
+
21
+ * Merge pull request #9 from TattaBio/andre
22
+
23
+ Update ModAC main metric ([`3c67e65`](https://github.com/TattaBio/DGEB/commit/3c67e6559d0e49d90ffe2858eb9e287abd1b6e6c))
24
+
25
+ * ruff format ([`78461ac`](https://github.com/TattaBio/DGEB/commit/78461ac901b8617821ca15e543c0dd8e2dbf6e95))
26
+
27
+ * update top_k=50 for modac ([`2c3dcd5`](https://github.com/TattaBio/DGEB/commit/2c3dcd5856b6679a80999b3c4b3512876ac0b58d))
28
+
29
+ * remove revision ([`2d587da`](https://github.com/TattaBio/DGEB/commit/2d587daa79f32c49201b419892b7f95f3dc5eedb))
30
+
31
+ * Merge pull request #8 from TattaBio/cli
32
+
33
+ Cli & cleanup ([`9698c8f`](https://github.com/TattaBio/DGEB/commit/9698c8f5ab0bab6c3c0a76d59dc29cfd964ebf15))
34
+
35
+ * Exclude leaderboard files in anticipation of merging leaderboard PR ([`58bdcba`](https://github.com/TattaBio/DGEB/commit/58bdcba11af605bdef11cfecc087c9efb0e97b72))
36
+
37
+ * Update README ([`d323905`](https://github.com/TattaBio/DGEB/commit/d3239059e29fb149f9c348b951bc4988d8b9f8dc))
38
+
39
+ * cleanup ([`1f0fe16`](https://github.com/TattaBio/DGEB/commit/1f0fe16de6910200d88c918b08cbf26067313469))
40
+
41
+ * Add cli to pyproject.toml ([`5404218`](https://github.com/TattaBio/DGEB/commit/54042181ef54c11db74ebb53c403b21a8114c02b))
42
+
43
+ * Remove Dataset 'description' which does not exist on model. ([`46b0040`](https://github.com/TattaBio/DGEB/commit/46b0040a302384fa00791bbfdd6fae24645d6a6d))
44
+
45
+ * Merge pull request #7 from TattaBio/add_dna_tasks
46
+
47
+ Add dna tasks ([`cfc5799`](https://github.com/TattaBio/DGEB/commit/cfc57995f9b1e584bb60e998f9cf68bea5ec39fa))
48
+
49
+ * ruff ([`f9fa125`](https://github.com/TattaBio/DGEB/commit/f9fa12502df9837b5381da17b17198f3667c4911))
50
+
51
+ * adding rpob datasets and updating ec revision ([`8f9cc3f`](https://github.com/TattaBio/DGEB/commit/8f9cc3f819beb70f51a5cc59f16c65bffceedbad))
52
+
53
+ * Update README.md ([`d5d7c24`](https://github.com/TattaBio/DGEB/commit/d5d7c24215d347fc17d6016ac2a3eddfb3cf2a12))
54
+
55
+ * Merge pull request #4 from TattaBio/andre
56
+
57
+ Add dataset revisions ([`95b6f11`](https://github.com/TattaBio/DGEB/commit/95b6f11ffee3dccc45ab119ac4f602066750f7ef))
58
+
59
+ * add dataset revision numbers ([`7e069a2`](https://github.com/TattaBio/DGEB/commit/7e069a237de5391e7c6b7f09c108292ac10c25af))
60
+
61
+ * Merge pull request #3 from TattaBio/andre
62
+
63
+ Update readme and task imports ([`ade30a8`](https://github.com/TattaBio/DGEB/commit/ade30a856deffe35ddf57d16705d030b6d0192c8))
64
+
65
+ * rename dgeb ([`6b1c2ee`](https://github.com/TattaBio/DGEB/commit/6b1c2ee76798d89e487386116efe23c90d2d039c))
66
+
67
+ * add intro ([`a2280dd`](https://github.com/TattaBio/DGEB/commit/a2280dd732984d58caed45b9a429038c0d81851a))
68
+
69
+ * update readme and tasks ([`00e0a79`](https://github.com/TattaBio/DGEB/commit/00e0a791f070ca37e5b92770b3363ef066e2789f))
70
+
71
+ * Merge pull request #2 from TattaBio/andre
72
+
73
+ rename dgeb imports ([`1894ba9`](https://github.com/TattaBio/DGEB/commit/1894ba9a92a8f369053ddb9d351ae48fd8e2d674))
74
+
75
+ * rename dgeb imports ([`5f1f8b8`](https://github.com/TattaBio/DGEB/commit/5f1f8b850f271cd6785291e3feb2c2d4bf979f9c))
76
+
77
+ ## v0.0.8 (2024-07-01)
78
+
79
+ ### Fix
80
+
81
+ * fix: don't run ci on release of new version ([`fa97104`](https://github.com/TattaBio/DGEB/commit/fa971049429975d06c8aca086e86b19d92383969))
82
+
83
+ ### Unknown
84
+
85
+ * 0.0.8
86
+
87
+ Automatically generated by python-semantic-release [skip ci] ([`8dc15d3`](https://github.com/TattaBio/DGEB/commit/8dc15d34c6317087253950893974d16b9f75a17c))
88
+
89
+ ## v0.0.7 (2024-07-01)
90
+
91
+ ### Fix
92
+
93
+ * fix: try again ([`e7d0ecd`](https://github.com/TattaBio/DGEB/commit/e7d0ecdcb63e909f9ab727f11fb3fd57414d2fa5))
94
+
95
+ * fix: edit readme to see if job still works with restricted permissions ([`93cd728`](https://github.com/TattaBio/DGEB/commit/93cd728c8a632b9bed611c55dace2e2ffb103410))
96
+
97
+ ### Unknown
98
+
99
+ * 0.0.7
100
+
101
+ Automatically generated by python-semantic-release ([`9808d4f`](https://github.com/TattaBio/DGEB/commit/9808d4f328a577c066affd34d408ad26eb6098d0))
102
+
103
+ * Merge pull request #1 from TattaBio/edit-readme
104
+
105
+ fix: edit readme to see if job still works with restricted permissions ([`c45599c`](https://github.com/TattaBio/DGEB/commit/c45599cf9628155603245f906c09cf6483cffce8))
106
+
107
+ ## v0.0.6 (2024-07-01)
108
+
109
+ ### Fix
110
+
111
+ * fix: nevermind that broke it ([`ec33a1c`](https://github.com/TattaBio/DGEB/commit/ec33a1c6539ac1fb2710869a2d436483a02236e0))
112
+
113
+ * fix: see if I can remove this line ([`246d4e9`](https://github.com/TattaBio/DGEB/commit/246d4e9841a83d18217506d46f211f1341c63526))
114
+
115
+ ### Unknown
116
+
117
+ * 0.0.6
118
+
119
+ Automatically generated by python-semantic-release ([`1b28df5`](https://github.com/TattaBio/DGEB/commit/1b28df559c95db0aea95111a5f27d01645d23786))
120
+
121
+ ## v0.0.5 (2024-07-01)
122
+
123
+ ### Fix
124
+
125
+ * fix: try fixing release to handle protected branch ([`5cedad3`](https://github.com/TattaBio/DGEB/commit/5cedad3e9f34d249eda9257e3c21fc8443d000cf))
126
+
127
+ * fix: another change... ([`c5ad3f0`](https://github.com/TattaBio/DGEB/commit/c5ad3f098d36e25afdf4fa9aae20967eb968568e))
128
+
129
+ * fix: update pip install command in readme ([`ff90791`](https://github.com/TattaBio/DGEB/commit/ff90791398f9a9b907c308400f88811a8f8633dc))
130
+
131
+ ### Unknown
132
+
133
+ * 0.0.5
134
+
135
+ Automatically generated by python-semantic-release ([`ec24ca3`](https://github.com/TattaBio/DGEB/commit/ec24ca343b49bee85c72907554772976f02eab1a))
136
+
137
+ ## v0.0.4 (2024-07-01)
138
+
139
+ ### Fix
140
+
141
+ * fix: move gh token to env ([`95e292c`](https://github.com/TattaBio/DGEB/commit/95e292c46f7908659d46bc093ef4903609f1edc5))
142
+
143
+ ### Unknown
144
+
145
+ * 0.0.4
146
+
147
+ Automatically generated by python-semantic-release ([`03f3004`](https://github.com/TattaBio/DGEB/commit/03f300476b0aeca2796b780139fce0be037ae636))
148
+
149
+ ## v0.0.3 (2024-07-01)
150
+
151
+ ### Fix
152
+
153
+ * fix: remove persist credentials ([`2ae683e`](https://github.com/TattaBio/DGEB/commit/2ae683ed7a68b0559b81b1b7f5716636beef1415))
154
+
155
+ * fix: try to fix release CI ([`1dfc938`](https://github.com/TattaBio/DGEB/commit/1dfc9383b2dab8bba444b09c6b85500dadee7203))
156
+
157
+ ### Unknown
158
+
159
+ * 0.0.3
160
+
161
+ Automatically generated by python-semantic-release ([`7cbfc8d`](https://github.com/TattaBio/DGEB/commit/7cbfc8d0acef975d046ff485001ed289800d143f))
162
+
163
+ ## v0.0.2 (2024-07-01)
164
+
165
+ ### Fix
166
+
167
+ * fix: new repository name ([`8fc1145`](https://github.com/TattaBio/DGEB/commit/8fc1145985eab8aa97562f697edab45a30b189ba))
168
+
169
+ * fix: addl geb references ([`86a5af8`](https://github.com/TattaBio/DGEB/commit/86a5af8c24244ac8f2670801468e1a25b8e3e9df))
170
+
171
+ ### Unknown
172
+
173
+ * 0.0.2
174
+
175
+ Automatically generated by python-semantic-release ([`1c7b19b`](https://github.com/TattaBio/DGEB/commit/1c7b19b50597e9dabe07fbf7cb7d3c589438917a))
176
+
177
+ ## v0.0.1 (2024-07-01)
178
+
179
+ ### Fix
180
+
181
+ * fix: rename geb to dgeb ([`be712f8`](https://github.com/TattaBio/DGEB/commit/be712f8d19678801b9148ac8397f13afe826871b))
182
+
183
+ ### Unknown
184
+
185
+ * 0.0.1
186
+
187
+ Automatically generated by python-semantic-release ([`1503e03`](https://github.com/TattaBio/DGEB/commit/1503e030bb1277e1a2dcad7b99c9ed3472243f5d))
188
+
189
+ ## v0.0.0 (2024-07-01)
190
+
191
+ ### Unknown
192
+
193
+ * 0.0.0
194
+
195
+ Automatically generated by python-semantic-release ([`4b791ee`](https://github.com/TattaBio/DGEB/commit/4b791ee07085647427afec31a1adf61977e6bd4c))
196
+
197
+ * Initial commit ([`36fe62c`](https://github.com/TattaBio/DGEB/commit/36fe62c234331de97f2827a49bf62d5c35b92a1f))
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker file for leaderboard
2
+ FROM python:3.11-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # install curl
7
+ RUN apt-get update && apt-get install -y curl
8
+ ADD https://astral.sh/uv/install.sh /install.sh
9
+ RUN chmod +x /install.sh
10
+ RUN /install.sh && rm /install.sh
11
+
12
+ # install deps
13
+ COPY leaderboard/requirements.txt ./
14
+ RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
15
+
16
+ # copy src
17
+ COPY dgeb dgeb
18
+ COPY leaderboard/ leaderboard/
19
+
20
+ # Run gradio when the container launches
21
+ EXPOSE 7860
22
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
23
+ ENV GRADIO_TEMP_DIR="/app"
24
+ WORKDIR /app/leaderboard
25
+ CMD ["python", "app.py"]
26
+
27
+
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DGEB
3
+ app_file : leaderboard/app.py
4
+ sdk: docker
5
+ sdk_version: 4.36.1
6
+ ---
7
+ <h1 align="center">Diverse Genomic Embedding Benchmark</h1>
8
+
9
+ <p align="center">
10
+ <a href="https://github.com/tattabio/dgeb/releases">
11
+ <img alt="GitHub release" src="https://img.shields.io/github/v/release/tattabio/dgeb.svg">
12
+ </a>
13
+ <a href="">
14
+ <img alt="arXiv URL" src="">
15
+ </a>
16
+ <a href="https://github.com/tattabio/dgeb/blob/main/LICENSE">
17
+ <img alt="License" src="https://img.shields.io/github/license/tattabio/dgeb.svg">
18
+ </a>
19
+ <a href="https://pepy.tech/project/dgeb">
20
+ <img alt="Downloads" src="https://static.pepy.tech/personalized-badge/dgeb?period=total&units=international_system&left_color=grey&right_color=orange&left_text=Downloads">
21
+ </a>
22
+ </p>
23
+
24
+ <h4 align="center">
25
+ <p>
26
+ <a href="#installation">Installation</a> |
27
+ <a href="#usage">Usage</a> |
28
+ <a href="https://huggingface.co/spaces/tattabio/DGEB">Leaderboard</a> |
29
+ <a href="#citing">Citing</a>
30
+ <p>
31
+ </h4>
32
+
33
+ <h3 align="center">
34
+ <a href="https://huggingface.co/spaces/dgeb"><img style="float: middle; padding: 10px 10px 10px 10px;" width="100" height="100" src="./docs/images/tatta_logo.png" /></a>
35
+ </h3>
36
+
37
+ DGEB is a benchmark for evaluating biological sequence models on functional and evolutionary information.
38
+
39
+ DGEB is designed to evaluate model embeddings using:
40
+
41
+ - Diverse sequences accross the tree of life.
42
+ - Diverse tasks that capture different aspects of biological function.
43
+ - Both amino acid and nucleotide sequences.
44
+
45
+ The current version of DGEB consists of 18 datasets covering all three domains of life (Bacteria, Archaea and Eukarya). DGEB evaluates embeddings using six different embedding tasks: Classification, BiGene mining, Evolutionary Distance Similarity (EDS), Pair Classification, Clustering, and Retrieval.
46
+
47
+ We welcome contributions of new tasks and datasets.
48
+
49
+ ## Installation
50
+
51
+ Install DGEB using pip.
52
+
53
+ ```bash
54
+ pip install dgeb
55
+ ```
56
+
57
+ ## Usage
58
+
59
+ - Launch evaluation using the python script (see [cli.py](https://github.com/tattabio/dgeb/blob/main/dgeb/cli.py)):
60
+
61
+ ```bash
62
+ dgeb --model facebook/esm2_t6_8M_UR50D
63
+ ```
64
+
65
+ - To see all supported models and tasks:
66
+
67
+ ```bash
68
+ dgeb --help
69
+ ```
70
+
71
+ - Using the python API:
72
+
73
+ ```py
74
+ import dgeb
75
+
76
+ model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
77
+ tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN)
78
+ evaluation = dgeb.DGEB(tasks=tasks)
79
+ evaluation.run(model, output_folder="results")
80
+ ```
81
+
82
+ ### Using a custom model
83
+
84
+ Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See [models.py](https://github.com/tattabio/dgeb/blob/main/dgeb/models.py) for additional examples on custom model loading and inference.
85
+
86
+ ```python
87
+ import dgeb
88
+ from dgeb.models import BioSeqTransformer
89
+ from dgeb.tasks.tasks import Modality
90
+
91
+ class MyModel(BioSeqTransformer):
92
+
93
+ @property
94
+ def modality(self) -> Modality:
95
+ return Modality.PROTEIN
96
+
97
+ @property
98
+ def num_layers(self) -> int:
99
+ return self.config.num_hidden_layers
100
+
101
+ @property
102
+ def embed_dim(self) -> int:
103
+ return self.config.hidden_size
104
+
105
+
106
+ model = MyModel(model_name='path_to/huggingface_model')
107
+ tasks = dgeb.get_tasks_by_modality(model.modality)
108
+ evaluation = dgeb.DGEB(tasks=tasks)
109
+ evaluation.run(model)
110
+ ```
111
+
112
+ ### Evaluating on a custom dataset
113
+
114
+ **We strongly encourage users to contribute their custom datasets to DGEB. Please open a PR adding your dataset so that the community can benefit!**
115
+
116
+ To evaluate on a custom dataset, first upload your dataset to the [Huggingface Hub](https://huggingface.co/docs/hub/en/datasets-adding). Then define a `Task` subclass with `TaskMetadata` that points to your huggingface dataset. For example, a classification task on a custom dataset can be defined as follows:
117
+
118
+ ```python
119
+ import dgeb
120
+ from dgeb.models import BioSeqTransformer
121
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
122
+ from dgeb.tasks.classification_tasks import run_classification_task
123
+
124
+ class MyCustomTask(Task):
125
+ metadata = TaskMetadata(
126
+ id="my_custom_classification",
127
+ display_name="...",
128
+ description="...",
129
+ type="classification",
130
+ modality=Modality.PROTEIN,
131
+ datasets=[
132
+ Dataset(
133
+ path="path_to/huggingface_dataset",
134
+ revision="...",
135
+ )
136
+ ],
137
+ primary_metric_id="f1",
138
+ )
139
+
140
+ def run(self, model: BioSeqTransformer) -> TaskResult:
141
+ return run_classification_task(model, self.metadata)
142
+
143
+ model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
144
+ evaluation = dgeb.DGEB(tasks=[MyCustomTask])
145
+ evaluation.run(model)
146
+ ```
147
+
148
+ ## Leaderboard
149
+
150
+ To add your submission to the DGEB leaderboard, proceed through the following instructions.
151
+
152
+ 1. Fork the DGEB repository by following GitHub's instruction [Forking Workflow](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
153
+
154
+ 2. Add your submission .json file to the leaderboard/submissions/<HF_MODEL_NAME>/ directory.
155
+
156
+ ```bash
157
+ mv /path/to/<SUBMISSION_FILE>.json /path/to/DGEB/leaderboard/submissions/<HF_MODEL_NAME>/
158
+ ```
159
+
160
+ 4. Update your fork with the new submission:
161
+
162
+ ```bash
163
+ git add leaderboard/submissions/<HF_MODEL_NAME>/<SUBMISSION_FILE>.json
164
+ git commit -m "Add submission for <HF_MODEL_NAME>"
165
+ git push
166
+ ```
167
+
168
+ 5. Open a pull request to the main branch of the repository via the Github interface.
169
+
170
+ 6. Once the PR is review and merged, your submission will be added to the leaderboard!
171
+
172
+
173
+ ## Acknowledgements
174
+
175
+ DGEB follows the design of text embedding bechmark [MTEB](https://github.com/embeddings-benchmark/mteb) developed by Huggingface 🤗. The evaluation code is adapted from the MTEB codebase.
176
+
177
+ ## Citing
178
+
179
+ DGEB was introduced in "[Diverse Genomic Embedding Benchmark for Functional Evaluation Across the Tree of Life]()", feel free to cite:
180
+
181
+ TODO
dgeb/__init__.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dgeb.dgeb import (
2
+ DGEB,
3
+ get_all_model_names,
4
+ get_all_task_names,
5
+ get_all_tasks,
6
+ get_model,
7
+ get_output_folder,
8
+ get_tasks_by_modality,
9
+ get_tasks_by_name,
10
+ )
11
+ from dgeb.modality import Modality
12
+ from dgeb.tasks.tasks import TaskResult
13
+
14
+ # importing without setting `__all__` produces a Ruff error:
15
+ # "imported but unused; consider removing, adding to __all__, or using a redundant alias RuffF401"
16
+ # See https://docs.astral.sh/ruff/rules/unused-import/#why-is-this-bad
17
+ __all__ = [
18
+ "DGEB",
19
+ "get_all_tasks",
20
+ "get_all_task_names",
21
+ "get_tasks_by_name",
22
+ "get_tasks_by_modality",
23
+ "get_all_model_names",
24
+ "get_model",
25
+ "get_output_folder",
26
+ "TaskResult",
27
+ "Modality",
28
+ ]
dgeb/cli.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main command to run diverse genomic embedding benchmarks (DGEB) on a model.
3
+ example command to run DGEB:
4
+ python run_dgeb.py -m facebook/esm2_t6_8M_UR50D
5
+ """
6
+
7
+ import argparse
8
+ import logging
9
+ import os
10
+
11
+ import dgeb
12
+
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ ALL_TASK_NAMES = dgeb.get_all_task_names()
17
+ ALL_MODEL_NAMES = dgeb.get_all_model_names()
18
+
19
+
20
+ def main():
21
+ parser = argparse.ArgumentParser()
22
+ parser.add_argument(
23
+ "-m",
24
+ "--model",
25
+ type=str,
26
+ default=None,
27
+ help=f"Model to evaluate. Choose from {ALL_MODEL_NAMES}",
28
+ )
29
+ parser.add_argument(
30
+ "-t",
31
+ "--tasks",
32
+ type=lambda s: [item for item in s.split(",")],
33
+ default=None,
34
+ help=f"Comma separated tasks to evaluate on. Choose from {ALL_TASK_NAMES} or do not specify to evaluate on all tasks",
35
+ )
36
+ parser.add_argument(
37
+ "-l",
38
+ "--layers",
39
+ type=str,
40
+ default=None,
41
+ help="Layer to evaluate. Comma separated list of integers or 'mid' and 'last'. Default is 'mid,last'",
42
+ )
43
+ parser.add_argument(
44
+ "--devices",
45
+ type=str,
46
+ default="0",
47
+ help="Comma separated list of GPU device ids to use. Default is 0 (if GPUs are detected).",
48
+ )
49
+ parser.add_argument(
50
+ "--output_folder",
51
+ type=str,
52
+ default=None,
53
+ help="Output directory for results. Will default to results/model_name if not set.",
54
+ )
55
+ parser.add_argument(
56
+ "-v", "--verbosity", type=int, default=2, help="Verbosity level"
57
+ )
58
+ parser.add_argument(
59
+ "-b", "--batch_size", type=int, default=64, help="Batch size for evaluation"
60
+ )
61
+ parser.add_argument(
62
+ "--max_seq_len",
63
+ type=int,
64
+ default=1024,
65
+ help="Maximum sequence length for model, default is 1024.",
66
+ )
67
+ parser.add_argument(
68
+ "--pool_type",
69
+ type=str,
70
+ default="mean",
71
+ help="Pooling type for model, choose from mean, max, cls, last. Default is mean.",
72
+ )
73
+
74
+ args = parser.parse_args()
75
+
76
+ # set logging based on verbosity level
77
+ if args.verbosity == 0:
78
+ logging.getLogger("geb").setLevel(logging.CRITICAL)
79
+ elif args.verbosity == 1:
80
+ logging.getLogger("geb").setLevel(logging.WARNING)
81
+ elif args.verbosity == 2:
82
+ logging.getLogger("geb").setLevel(logging.INFO)
83
+ elif args.verbosity == 3:
84
+ logging.getLogger("geb").setLevel(logging.DEBUG)
85
+
86
+ if args.model is None:
87
+ raise ValueError("Please specify a model using the -m or --model argument")
88
+
89
+ # make sure that devices are comma separated list of integers
90
+ try:
91
+ devices = [int(device) for device in args.devices.split(",")]
92
+ except ValueError:
93
+ raise ValueError("Devices must be comma separated list of integers")
94
+
95
+ layers = args.layers
96
+ if layers:
97
+ if layers not in ["mid", "last"]:
98
+ # Layers should be list of integers.
99
+ try:
100
+ layers = [int(layer) for layer in layers.split(",")]
101
+ except ValueError:
102
+ raise ValueError("Layers must be a list of integers.")
103
+
104
+ model_name = args.model.split("/")[-1]
105
+ output_folder = args.output_folder
106
+ if output_folder is None:
107
+ output_folder = os.path.join("results", model_name)
108
+ # create output folder if it does not exist
109
+ if not os.path.exists(output_folder):
110
+ os.makedirs(output_folder)
111
+ logger.info(f"Results will be saved to {output_folder}")
112
+
113
+ # Load the model by name.
114
+ model = dgeb.get_model(
115
+ model_name=args.model,
116
+ layers=layers,
117
+ devices=devices,
118
+ max_seq_length=args.max_seq_len,
119
+ batch_size=args.batch_size,
120
+ pool_type=args.pool_type,
121
+ )
122
+
123
+ all_tasks_for_modality = dgeb.get_tasks_by_modality(model.modality)
124
+
125
+ if args.tasks:
126
+ task_list = dgeb.get_tasks_by_name(args.tasks)
127
+ if not all([task.metadata.modality == model.modality for task in task_list]):
128
+ raise ValueError(f"Tasks must be one of {all_tasks_for_modality}")
129
+ else:
130
+ task_list = all_tasks_for_modality
131
+ evaluation = dgeb.DGEB(tasks=task_list)
132
+ _ = evaluation.run(model)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ main()
dgeb/dgeb.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import traceback
4
+ from itertools import chain
5
+ from typing import Any, List
6
+
7
+ from rich.console import Console
8
+
9
+ from .eval_utils import set_all_seeds
10
+ from .modality import Modality
11
+ from .models import BioSeqTransformer
12
+ from .tasks.tasks import Task
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class DGEB:
19
+ """GEB class to run the evaluation pipeline."""
20
+
21
+ def __init__(self, tasks: List[type[Task]], seed: int = 42):
22
+ self.tasks = tasks
23
+ set_all_seeds(seed)
24
+
25
+ def print_selected_tasks(self):
26
+ """Print the selected tasks."""
27
+ console = Console()
28
+ console.rule("[bold]Selected Tasks\n", style="grey15")
29
+ for task in self.tasks:
30
+ prefix = " - "
31
+ name = f"{task.metadata.display_name}"
32
+ category = f", [italic grey39]{task.metadata.type}[/]"
33
+ console.print(f"{prefix}{name}{category}")
34
+ console.print("\n")
35
+
36
+ def run(
37
+ self,
38
+ model, # type encoder
39
+ output_folder: str = "results",
40
+ ):
41
+ """Run the evaluation pipeline on the selected tasks.
42
+
43
+ Args:
44
+ model: Model to be used for evaluation
45
+ output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format:
46
+ `{output_folder}/{model_name}/{model_revision}/{task_name}.json`.
47
+
48
+ Returns:
49
+ A list of MTEBResults objects, one for each task evaluated.
50
+ """
51
+ # Run selected tasks
52
+ self.print_selected_tasks()
53
+ results = []
54
+
55
+ for task in self.tasks:
56
+ logger.info(
57
+ f"\n\n********************** Evaluating {task.metadata.display_name} **********************"
58
+ )
59
+
60
+ try:
61
+ result = task().run(model)
62
+ except Exception as e:
63
+ logger.error(e)
64
+ logger.error(traceback.format_exc())
65
+ logger.error(f"Error running task {task}")
66
+ continue
67
+
68
+ results.append(result)
69
+
70
+ save_path = get_output_folder(model.hf_name, task, output_folder)
71
+ with open(save_path, "w") as f_out:
72
+ f_out.write(result.model_dump_json(indent=2))
73
+ return results
74
+
75
+
76
+ def get_model(model_name: str, **kwargs: Any) -> type[BioSeqTransformer]:
77
+ all_names = get_all_model_names()
78
+ for cls in BioSeqTransformer.__subclasses__():
79
+ if model_name in cls.MODEL_NAMES:
80
+ return cls(model_name, **kwargs)
81
+ raise ValueError(f"Model {model_name} not found in {all_names}.")
82
+
83
+
84
+ def get_all_model_names() -> List[str]:
85
+ return list(
86
+ chain.from_iterable(
87
+ cls.MODEL_NAMES for cls in BioSeqTransformer.__subclasses__()
88
+ )
89
+ )
90
+
91
+
92
+ def get_all_task_names() -> List[str]:
93
+ return [task.metadata.id for task in get_all_tasks()]
94
+
95
+
96
+ def get_tasks_by_name(tasks: List[str]) -> List[type[Task]]:
97
+ return [_get_task(task) for task in tasks]
98
+
99
+
100
+ def get_tasks_by_modality(modality: Modality) -> List[type[Task]]:
101
+ return [task for task in get_all_tasks() if task.metadata.modality == modality]
102
+
103
+
104
+ def get_all_tasks() -> List[type[Task]]:
105
+ return Task.__subclasses__()
106
+
107
+
108
+ def _get_task(task_name: str) -> type[Task]:
109
+ logger.info(f"Getting task {task_name}")
110
+ for task in get_all_tasks():
111
+ if task.metadata.id == task_name:
112
+ return task
113
+
114
+ raise ValueError(
115
+ f"Task {task_name} not found, available tasks are: {[task.metadata.id for task in get_all_tasks()]}"
116
+ )
117
+
118
+
119
+ def get_output_folder(
120
+ model_hf_name: str, task: type[Task], output_folder: str, create: bool = True
121
+ ):
122
+ output_folder = os.path.join(output_folder, os.path.basename(model_hf_name))
123
+ # create output folder if it does not exist
124
+ if create and not os.path.exists(output_folder):
125
+ os.makedirs(output_folder)
126
+ return os.path.join(
127
+ output_folder,
128
+ f"{task.metadata.id}.json",
129
+ )
dgeb/eval_utils.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for evaluation."""
2
+
3
+ from typing import Any, Dict, List, Tuple
4
+ import json
5
+ import torch
6
+ import random
7
+ import numpy as np
8
+ from sklearn.metrics import auc
9
+
10
+
11
+ class ForwardHook:
12
+ """Pytorch forward hook class to store outputs of intermediate layers."""
13
+
14
+ def __init__(self, module: torch.nn.Module):
15
+ self.hook = module.register_forward_hook(self.hook_fn)
16
+ self.output = None
17
+
18
+ def hook_fn(self, module, input, output):
19
+ self.output = output
20
+
21
+ def close(self):
22
+ self.hook.remove()
23
+
24
+
25
+ def pool(
26
+ last_hidden_states: torch.Tensor, attention_mask: torch.Tensor, pool_type: str
27
+ ) -> torch.Tensor:
28
+ """Pool embeddings across the sequence length dimension."""
29
+ assert (
30
+ last_hidden_states.ndim == 3
31
+ ), f"Expected hidden_states to have shape [batch, seq_len, D], got shape: {last_hidden_states.shape}"
32
+ assert (
33
+ attention_mask.ndim == 2
34
+ ), f"Expected attention_mask to have shape [batch, seq_len], got shape: {attention_mask.shape}"
35
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
36
+ if pool_type == "mean":
37
+ emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
38
+ elif pool_type == "max":
39
+ emb = last_hidden.max(dim=1)[0]
40
+ elif pool_type == "cls":
41
+ emb = last_hidden[:, 0]
42
+ elif pool_type == "last":
43
+ emb = last_hidden[torch.arange(last_hidden.size(0)), attention_mask.sum(1) - 1]
44
+ else:
45
+ raise ValueError(f"pool_type {pool_type} not supported")
46
+ return emb
47
+
48
+
49
+ def set_all_seeds(seed):
50
+ random.seed(seed)
51
+ np.random.seed(seed)
52
+ torch.manual_seed(seed)
53
+ torch.cuda.manual_seed(seed)
54
+ torch.backends.cudnn.deterministic = True
55
+
56
+
57
+ def write_results_to_json(results: Dict[str, Any], results_path: str):
58
+ """Write results dict to a json file."""
59
+ with open(results_path, "w") as f:
60
+ json.dump(results, f, indent=4)
61
+
62
+
63
+ def merge_split_elem_embeds(ids, embeds, preserve_order: bool = False):
64
+ """Merge embeddings with the same id by mean-pooling and optionally preserve order in which they appear.
65
+
66
+ Args:
67
+ ids: Array of string ids, [batch].
68
+ embeds: Array of embeddings, [batch, ...].
69
+
70
+ Returns:
71
+ ids: Unique ids, [unique_batch].
72
+ embeds: Array of embeddings, [unique_batch, ...].
73
+ """
74
+ unique_ids, indices = np.unique(ids, return_inverse=True)
75
+ shape_no_batch = embeds.shape[1:]
76
+ sums = np.zeros([unique_ids.size, *shape_no_batch], dtype=embeds.dtype)
77
+ counts = np.bincount(indices, minlength=unique_ids.size)
78
+ np.add.at(sums, indices, embeds)
79
+ # Add trailing dimensions to counts.
80
+ counts = counts[(...,) + (None,) * len(shape_no_batch)]
81
+ mean_pooled = sums / counts
82
+ # Preserve the order of the input ids.
83
+ if preserve_order:
84
+ order = []
85
+ for id in unique_ids:
86
+ idx = np.where(ids == id)[0][0]
87
+ order.append(idx)
88
+ re_order = np.argsort(order)
89
+ unique_ids = unique_ids[re_order]
90
+ mean_pooled = mean_pooled[re_order]
91
+ return unique_ids, mean_pooled
92
+
93
+
94
+ def paired_dataset(labels, embeds):
95
+ """Creates a paired dataset for consecutive operonic gene pairs."""
96
+ embeds1 = embeds[:-1]
97
+ embeds2 = embeds[1:]
98
+ labels = labels[:-1]
99
+ return embeds1, embeds2, labels
100
+
101
+
102
+ def cos_sim(a, b):
103
+ """Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
104
+
105
+ Return:
106
+ Matrix with res[i][j] = cos_sim(a[i], b[j])
107
+ """ # noqa: D402
108
+ if not isinstance(a, torch.Tensor):
109
+ a = torch.tensor(a)
110
+
111
+ if not isinstance(b, torch.Tensor):
112
+ b = torch.tensor(b)
113
+
114
+ if len(a.shape) == 1:
115
+ a = a.unsqueeze(0)
116
+
117
+ if len(b.shape) == 1:
118
+ b = b.unsqueeze(0)
119
+
120
+ a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
121
+ b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
122
+ return torch.mm(a_norm, b_norm.transpose(0, 1))
123
+
124
+
125
+ def dot_score(a: torch.Tensor, b: torch.Tensor):
126
+ """Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
127
+ :return: Matrix with res[i][j] = dot_prod(a[i], b[j])
128
+ """
129
+ if not isinstance(a, torch.Tensor):
130
+ a = torch.tensor(a)
131
+
132
+ if not isinstance(b, torch.Tensor):
133
+ b = torch.tensor(b)
134
+
135
+ if len(a.shape) == 1:
136
+ a = a.unsqueeze(0)
137
+
138
+ if len(b.shape) == 1:
139
+ b = b.unsqueeze(0)
140
+
141
+ return torch.mm(a, b.transpose(0, 1))
142
+
143
+
144
+ # From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4
145
+ def mrr(
146
+ qrels: dict[str, dict[str, int]],
147
+ results: dict[str, dict[str, float]],
148
+ k_values: List[int],
149
+ output_type: str = "mean",
150
+ ) -> Tuple[Dict[str, float]]:
151
+ MRR = {}
152
+
153
+ for k in k_values:
154
+ MRR[f"MRR@{k}"] = []
155
+
156
+ k_max, top_hits = max(k_values), {}
157
+
158
+ for query_id, doc_scores in results.items():
159
+ top_hits[query_id] = sorted(
160
+ doc_scores.items(), key=lambda item: item[1], reverse=True
161
+ )[0:k_max]
162
+
163
+ for query_id in top_hits:
164
+ query_relevant_docs = set(
165
+ [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
166
+ )
167
+ for k in k_values:
168
+ rr = 0
169
+ for rank, hit in enumerate(top_hits[query_id][0:k]):
170
+ if hit[0] in query_relevant_docs:
171
+ rr = 1.0 / (rank + 1)
172
+ break
173
+ MRR[f"MRR@{k}"].append(rr)
174
+
175
+ if output_type == "mean":
176
+ for k in k_values:
177
+ MRR[f"MRR@{k}"] = round(sum(MRR[f"MRR@{k}"]) / len(qrels), 5)
178
+
179
+ elif output_type == "all":
180
+ pass
181
+
182
+ return MRR
183
+
184
+
185
+ # From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
186
+ def recall_cap(
187
+ qrels: dict[str, dict[str, int]],
188
+ results: dict[str, dict[str, float]],
189
+ k_values: List[int],
190
+ output_type: str = "mean",
191
+ ) -> Tuple[Dict[str, float]]:
192
+ capped_recall = {}
193
+
194
+ for k in k_values:
195
+ capped_recall[f"R_cap@{k}"] = []
196
+
197
+ k_max = max(k_values)
198
+
199
+ for query_id, doc_scores in results.items():
200
+ top_hits = sorted(doc_scores.items(), key=lambda item: item[1], reverse=True)[
201
+ 0:k_max
202
+ ]
203
+ query_relevant_docs = [
204
+ doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0
205
+ ]
206
+ for k in k_values:
207
+ retrieved_docs = [
208
+ row[0] for row in top_hits[0:k] if qrels[query_id].get(row[0], 0) > 0
209
+ ]
210
+ denominator = min(len(query_relevant_docs), k)
211
+ capped_recall[f"R_cap@{k}"].append(len(retrieved_docs) / denominator)
212
+
213
+ if output_type == "mean":
214
+ for k in k_values:
215
+ capped_recall[f"R_cap@{k}"] = round(
216
+ sum(capped_recall[f"R_cap@{k}"]) / len(qrels), 5
217
+ )
218
+
219
+ elif output_type == "all":
220
+ pass
221
+
222
+ return capped_recall
223
+
224
+
225
+ # From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
226
+ def hole(
227
+ qrels: dict[str, dict[str, int]],
228
+ results: dict[str, dict[str, float]],
229
+ k_values: List[int],
230
+ output_type: str = "mean",
231
+ ) -> Tuple[Dict[str, float]]:
232
+ Hole = {}
233
+
234
+ for k in k_values:
235
+ Hole[f"Hole@{k}"] = []
236
+
237
+ annotated_corpus = set()
238
+ for _, docs in qrels.items():
239
+ for doc_id, score in docs.items():
240
+ annotated_corpus.add(doc_id)
241
+
242
+ k_max = max(k_values)
243
+
244
+ for _, scores in results.items():
245
+ top_hits = sorted(scores.items(), key=lambda item: item[1], reverse=True)[
246
+ 0:k_max
247
+ ]
248
+ for k in k_values:
249
+ hole_docs = [
250
+ row[0] for row in top_hits[0:k] if row[0] not in annotated_corpus
251
+ ]
252
+ Hole[f"Hole@{k}"].append(len(hole_docs) / k)
253
+
254
+ if output_type == "mean":
255
+ for k in k_values:
256
+ Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"] / len(qrels), 5)
257
+
258
+ elif output_type == "all":
259
+ pass
260
+
261
+ return Hole
262
+
263
+
264
+ # From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
265
+ def top_k_accuracy(
266
+ qrels: dict[str, dict[str, int]],
267
+ results: dict[str, dict[str, float]],
268
+ k_values: List[int],
269
+ output_type: str = "mean",
270
+ ) -> Tuple[Dict[str, float]]:
271
+ top_k_acc = {}
272
+
273
+ for k in k_values:
274
+ top_k_acc[f"Accuracy@{k}"] = []
275
+
276
+ k_max, top_hits = max(k_values), {}
277
+
278
+ for query_id, doc_scores in results.items():
279
+ top_hits[query_id] = [
280
+ item[0]
281
+ for item in sorted(
282
+ doc_scores.items(), key=lambda item: item[1], reverse=True
283
+ )[0:k_max]
284
+ ]
285
+
286
+ for query_id in top_hits:
287
+ query_relevant_docs = set(
288
+ [doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
289
+ )
290
+ for k in k_values:
291
+ for relevant_doc_id in query_relevant_docs:
292
+ if relevant_doc_id in top_hits[query_id][0:k]:
293
+ top_k_acc[f"Accuracy@{k}"].append(1.0)
294
+ break
295
+
296
+ if output_type == "mean":
297
+ for k in k_values:
298
+ top_k_acc[f"Accuracy@{k}"] = round(
299
+ top_k_acc[f"Accuracy@{k}"] / len(qrels), 5
300
+ )
301
+
302
+ elif output_type == "all":
303
+ pass
304
+
305
+ return top_k_acc
306
+
307
+
308
+ # From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
309
+ def confidence_scores(sim_scores: List[float]) -> Dict[str, float]:
310
+ """Computes confidence scores for a single instance = (query, positives, negatives)
311
+
312
+ Args:
313
+ sim_scores: Query-documents similarity scores with length `num_pos+num_neg`
314
+
315
+ Returns:
316
+ conf_scores:
317
+ - `max`: Maximum similarity score
318
+ - `std`: Standard deviation of similarity scores
319
+ - `diff1`: Difference between highest and second highest similarity scores
320
+ """
321
+ sim_scores_sorted = sorted(sim_scores)[::-1]
322
+
323
+ cs_max = sim_scores_sorted[0]
324
+ cs_std = np.std(sim_scores)
325
+ if len(sim_scores) > 1:
326
+ cs_diff1 = sim_scores_sorted[0] - sim_scores_sorted[1]
327
+ elif len(sim_scores) == 1:
328
+ cs_diff1 = 0.0
329
+
330
+ conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1}
331
+
332
+ return conf_scores
333
+
334
+
335
+ # From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
336
+ def nAUC(
337
+ conf_scores: np.ndarray,
338
+ metrics: np.ndarray,
339
+ abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
340
+ ) -> float:
341
+ """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997
342
+ 1/ Computes the raw abstention curve, i.e., the average evaluation metric at different abstention rates determined by the confidence scores
343
+ 2/ Computes the oracle abstention curve, i.e., the best theoretical abstention curve (e.g.: at a 10% abstention rate, the oracle abstains on the bottom-10% instances with regard to the evaluation metric)
344
+ 3/ Computes the flat abstention curve, i.e., the one remains flat for all abstention rates (ineffective abstention)
345
+ 4/ Computes the area under the three curves
346
+ 5/ Finally scales the raw AUC between the oracle and the flat AUCs to get normalized AUC
347
+
348
+ Args:
349
+ conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
350
+ metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
351
+ abstention_rates: Target rates for the computation of the abstention curve
352
+
353
+ Returns:
354
+ abst_nauc: Normalized area under the abstention curve (upper-bounded by 1)
355
+ """
356
+
357
+ def abstention_curve(
358
+ conf_scores: np.ndarray,
359
+ metrics: np.ndarray,
360
+ abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
361
+ ) -> np.ndarray:
362
+ """Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores
363
+
364
+ Args:
365
+ conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
366
+ metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
367
+ abstention_rates: Target rates for the computation of the abstention curve
368
+
369
+ Returns:
370
+ abst_curve: Abstention curve of length `len(abstention_rates)`
371
+ """
372
+ conf_scores_argsort = np.argsort(conf_scores)
373
+ abst_curve = np.zeros(len(abstention_rates))
374
+
375
+ for i, rate in enumerate(abstention_rates):
376
+ num_instances_abst = min(
377
+ round(rate * len(conf_scores_argsort)), len(conf_scores) - 1
378
+ )
379
+ abst_curve[i] = metrics[conf_scores_argsort[num_instances_abst:]].mean()
380
+
381
+ return abst_curve
382
+
383
+ abst_curve = abstention_curve(conf_scores, metrics, abstention_rates)
384
+ or_curve = abstention_curve(metrics, metrics, abstention_rates)
385
+ abst_auc = auc(abstention_rates, abst_curve)
386
+ or_auc = auc(abstention_rates, or_curve)
387
+ flat_auc = or_curve[0] * (abstention_rates[-1] - abstention_rates[0])
388
+
389
+ if or_auc == flat_auc:
390
+ abst_nauc = np.nan
391
+ else:
392
+ abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc)
393
+
394
+ return abst_nauc
dgeb/evaluators.py ADDED
@@ -0,0 +1,839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evaluator objects for different evaluation types.
3
+ """
4
+
5
+ import logging
6
+ import random
7
+ from abc import ABC, abstractmethod
8
+ import heapq
9
+ from collections import defaultdict
10
+ import pytrec_eval
11
+ import numpy as np
12
+ import sklearn.cluster
13
+ import torch
14
+ from scipy.stats import pearsonr
15
+ from sklearn.linear_model import LogisticRegression
16
+ from sklearn.metrics import (
17
+ accuracy_score,
18
+ average_precision_score,
19
+ classification_report,
20
+ f1_score,
21
+ precision_score,
22
+ recall_score,
23
+ label_ranking_average_precision_score,
24
+ )
25
+ from sklearn.metrics.cluster import v_measure_score
26
+ from sklearn.metrics.pairwise import (
27
+ paired_cosine_distances,
28
+ paired_euclidean_distances,
29
+ paired_manhattan_distances,
30
+ )
31
+ from sklearn.multioutput import MultiOutputRegressor
32
+ from sklearn.preprocessing import MultiLabelBinarizer
33
+ from typing import Dict, List, Tuple
34
+
35
+ from .eval_utils import (
36
+ cos_sim,
37
+ dot_score,
38
+ mrr,
39
+ recall_cap,
40
+ hole,
41
+ confidence_scores,
42
+ nAUC,
43
+ top_k_accuracy,
44
+ )
45
+
46
+
47
+ class Evaluator(ABC):
48
+ """Base class for all evaluators
49
+ Extend this class and implement __call__ for custom evaluators.
50
+ """
51
+
52
+ def __init__(self, seed=42, **kwargs):
53
+ self.seed = seed
54
+ random.seed(self.seed)
55
+ np.random.seed(self.seed)
56
+ torch.manual_seed(self.seed)
57
+ torch.cuda.manual_seed_all(self.seed)
58
+
59
+ @abstractmethod
60
+ def __call__(self, model):
61
+ """This is called during training to evaluate the model.
62
+ It returns scores.
63
+
64
+ Parameters
65
+ ----------
66
+ model:
67
+ the model to evaluate
68
+ """
69
+ pass
70
+
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+
75
+ class logRegClassificationEvaluator(Evaluator):
76
+ def __init__(
77
+ self,
78
+ embeds_train,
79
+ y_train,
80
+ embeds_test,
81
+ y_test,
82
+ max_iter=1000,
83
+ **kwargs,
84
+ ):
85
+ super().__init__(**kwargs)
86
+ self.embeds_train = embeds_train
87
+ self.y_train = y_train
88
+ self.embeds_test = embeds_test
89
+ self.y_test = y_test
90
+
91
+ self.max_iter = max_iter
92
+
93
+ def __call__(self):
94
+ scores = {}
95
+ clf = LogisticRegression(
96
+ random_state=self.seed,
97
+ n_jobs=-1,
98
+ max_iter=self.max_iter,
99
+ verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0,
100
+ )
101
+ logger.info(f"Encoding {len(self.embeds_train)} training embeds...")
102
+ X_train = np.asarray(self.embeds_train)
103
+
104
+ logger.info(f"Encoding {len(self.embeds_test)} test embeds...")
105
+ X_test = np.asarray(self.embeds_test)
106
+ logger.info("Fitting logistic regression classifier...")
107
+ clf.fit(X_train, self.y_train)
108
+ logger.info("Evaluating...")
109
+ y_pred = clf.predict(X_test)
110
+ accuracy = accuracy_score(self.y_test, y_pred)
111
+ f1 = f1_score(self.y_test, y_pred, average="macro")
112
+ scores["accuracy"] = accuracy
113
+ scores["f1"] = f1
114
+
115
+ # if binary classification
116
+ if len(np.unique(self.y_train)) == 2:
117
+ ap = average_precision_score(self.y_test, y_pred)
118
+ scores["ap"] = ap
119
+
120
+ return scores
121
+
122
+
123
+ class ClusteringEvaluator(Evaluator):
124
+ def __init__(
125
+ self,
126
+ embeds,
127
+ labels,
128
+ clustering_batch_size=500,
129
+ **kwargs,
130
+ ):
131
+ super().__init__(**kwargs)
132
+ self.embeds = embeds
133
+ self.labels = labels
134
+ self.clustering_batch_size = clustering_batch_size
135
+
136
+ def __call__(self):
137
+ logger.info(f"Encoding {len(self.embeds)} embeds...")
138
+ corpus_embeddings = np.asarray(self.embeds)
139
+
140
+ logger.info("Fitting Mini-Batch K-Means model...")
141
+ clustering_model = sklearn.cluster.MiniBatchKMeans(
142
+ n_clusters=len(set(self.labels)),
143
+ batch_size=self.clustering_batch_size,
144
+ n_init="auto",
145
+ )
146
+ clustering_model.fit(corpus_embeddings)
147
+ cluster_assignment = clustering_model.labels_
148
+
149
+ logger.info("Evaluating...")
150
+ v_measure = v_measure_score(self.labels, cluster_assignment)
151
+
152
+ return {"v_measure": v_measure}
153
+
154
+
155
+ class PairClassificationEvaluator(Evaluator):
156
+ """Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
157
+ dissimilar embeds.
158
+ The metrics are the cosine similarity as well as euclidean and Manhattan distance
159
+ The returned score is the accuracy with a specified metric.
160
+ The results are written in a CSV. If a CSV already exists, then values are appended.
161
+ The labels need to be 0 for dissimilar pairs and 1 for similar pairs.
162
+ :param embeds1: The first column of embeds
163
+ :param embeds2: The second column of embeds
164
+ :param labels: labels[i] is the label for the pair (embeds1[i], embeds2[i]). Must be 0 or 1
165
+ :param name: Name for the output
166
+ :param write_csv: Write results to a CSV file
167
+ """
168
+
169
+ def __init__(self, embeds1, embeds2, labels, **kwargs):
170
+ super().__init__(**kwargs)
171
+ self.embeds1 = embeds1
172
+ self.embeds2 = embeds2
173
+ self.labels = labels
174
+
175
+ assert len(self.embeds1) == len(self.embeds2)
176
+ assert len(self.embeds1) == len(self.labels)
177
+ for label in labels:
178
+ assert label == 0 or label == 1
179
+
180
+ def __call__(self):
181
+ scores = self.compute_metrics()
182
+ # Compute the max of Average Precision (AP) over all distance metrics.
183
+ top_ap_score = max(score for k, score in scores.items() if k.endswith("_ap"))
184
+ scores["top_ap"] = top_ap_score
185
+ return scores
186
+
187
+ def compute_metrics(self):
188
+ embeddings1 = np.array(self.embeds1)
189
+ embeddings2 = np.array(self.embeds2)
190
+
191
+ logger.info("Computing similarity distances...")
192
+ cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2)
193
+ manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
194
+ euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
195
+
196
+ embeddings1_np = np.asarray(embeddings1)
197
+ embeddings2_np = np.asarray(embeddings2)
198
+ dot_scores = [
199
+ np.dot(embeddings1_np[i], embeddings2_np[i])
200
+ for i in range(len(embeddings1_np))
201
+ ]
202
+
203
+ logger.info("Computing metrics...")
204
+ labels = np.asarray(self.labels)
205
+ output_scores = {}
206
+ for short_name, name, scores, reverse in [
207
+ ["cos_sim", "Cosine-Similarity", cosine_scores, True],
208
+ ["manhattan", "Manhattan-Distance", manhattan_distances, False],
209
+ ["euclidean", "Euclidean-Distance", euclidean_distances, False],
210
+ ["dot", "Dot-Product", dot_scores, True],
211
+ ]:
212
+ metrics = self._compute_metrics(scores, labels, reverse)
213
+ metrics = {short_name + "_" + k: v for k, v in metrics.items()}
214
+ output_scores.update(metrics)
215
+
216
+ return output_scores
217
+
218
+ @staticmethod
219
+ def _compute_metrics(scores, labels, high_score_more_similar):
220
+ """Compute the metrics for the given scores and labels.
221
+
222
+ Args:
223
+ scores (`np.ndarray` of shape (n_pairs, )): The similarity/dissimilarity scores for the pairs.
224
+ labels (`np.ndarray` of shape (n_pairs, )): The labels for the pairs.
225
+ high_score_more_similar (`bool`): If true, then the higher the score, the more similar the pairs are.
226
+
227
+ Returns:
228
+ `dict`: The metrics for the given scores and labels.
229
+ """
230
+ acc, acc_threshold = PairClassificationEvaluator.find_best_acc_and_threshold(
231
+ scores, labels, high_score_more_similar
232
+ )
233
+ f1, precision, recall, f1_threshold = (
234
+ PairClassificationEvaluator.find_best_f1_and_threshold(
235
+ scores, labels, high_score_more_similar
236
+ )
237
+ )
238
+ ap = PairClassificationEvaluator.ap_score(
239
+ scores, labels, high_score_more_similar
240
+ )
241
+
242
+ return {
243
+ "accuracy": acc,
244
+ "accuracy_threshold": acc_threshold,
245
+ "f1": f1,
246
+ "f1_threshold": f1_threshold,
247
+ "precision": precision,
248
+ "recall": recall,
249
+ "ap": ap,
250
+ }
251
+
252
+ @staticmethod
253
+ def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool):
254
+ assert len(scores) == len(labels)
255
+ rows = list(zip(scores, labels))
256
+
257
+ rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
258
+
259
+ max_acc = 0
260
+ best_threshold = -1
261
+
262
+ positive_so_far = 0
263
+ remaining_negatives = sum(np.array(labels) == 0)
264
+
265
+ for i in range(len(rows) - 1):
266
+ score, label = rows[i]
267
+ if label == 1:
268
+ positive_so_far += 1
269
+ else:
270
+ remaining_negatives -= 1
271
+
272
+ acc = (positive_so_far + remaining_negatives) / len(labels)
273
+ if acc > max_acc:
274
+ max_acc = acc
275
+ best_threshold = (rows[i][0] + rows[i + 1][0]) / 2
276
+
277
+ return max_acc, best_threshold
278
+
279
+ @staticmethod
280
+ def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool):
281
+ assert len(scores) == len(labels)
282
+
283
+ scores = np.asarray(scores)
284
+ labels = np.asarray(labels)
285
+
286
+ rows = list(zip(scores, labels))
287
+
288
+ rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
289
+
290
+ best_f1 = best_precision = best_recall = 0
291
+ threshold = 0
292
+ nextract = 0
293
+ ncorrect = 0
294
+ total_num_duplicates = sum(labels)
295
+
296
+ for i in range(len(rows) - 1):
297
+ score, label = rows[i]
298
+ nextract += 1
299
+
300
+ if label == 1:
301
+ ncorrect += 1
302
+
303
+ if ncorrect > 0:
304
+ precision = ncorrect / nextract
305
+ recall = ncorrect / total_num_duplicates
306
+ f1 = 2 * precision * recall / (precision + recall)
307
+ if f1 > best_f1:
308
+ best_f1 = f1
309
+ best_precision = precision
310
+ best_recall = recall
311
+ threshold = (rows[i][0] + rows[i + 1][0]) / 2
312
+
313
+ return best_f1, best_precision, best_recall, threshold
314
+
315
+ @staticmethod
316
+ def ap_score(scores, labels, high_score_more_similar: bool):
317
+ return average_precision_score(
318
+ labels, scores * (1 if high_score_more_similar else -1)
319
+ )
320
+
321
+
322
+ class MultiClassMultiOutputLogRegClassificationEvaluator(Evaluator):
323
+ def __init__(
324
+ self,
325
+ embeds_train,
326
+ y_train,
327
+ embeds_test,
328
+ y_test,
329
+ max_iter=1000,
330
+ **kwargs,
331
+ ):
332
+ super().__init__(**kwargs)
333
+ self.embeds_train = embeds_train
334
+ self.y_train = y_train
335
+ self.embeds_test = embeds_test
336
+ self.y_test = y_test
337
+ self.max_iter = max_iter
338
+
339
+ def __call__(self):
340
+ scores = {}
341
+ mlb = MultiLabelBinarizer()
342
+ # all classes in y_train and y_test
343
+
344
+ class_labels = list(self.y_train) + list(self.y_test)
345
+ labels = [class_label.split(", ") for class_label in class_labels]
346
+ mlb.fit(labels)
347
+ train_labels = [class_label.split(", ") for class_label in self.y_train]
348
+ test_labels = [class_label.split(", ") for class_label in self.y_test]
349
+
350
+ y_train = mlb.transform(train_labels)
351
+ y_test = mlb.transform(test_labels)
352
+ clf = MultiOutputRegressor(
353
+ LogisticRegression(
354
+ random_state=self.seed, solver="lbfgs", max_iter=self.max_iter
355
+ )
356
+ ).fit(self.embeds_train, y_train)
357
+ y_pred = clf.predict(self.embeds_test)
358
+
359
+ results_dict = classification_report(y_test, y_pred, output_dict=True)
360
+ assert isinstance(
361
+ results_dict, dict
362
+ ), "Should always be true since `output_dict=True` is passed to sklearn.metric.classification_report"
363
+ scores["precision"] = results_dict["macro avg"]["precision"]
364
+ scores["recall"] = results_dict["macro avg"]["recall"]
365
+ scores["f1"] = results_dict["macro avg"]["f1-score"]
366
+ scores["accuracy"] = accuracy_score(y_test, y_pred)
367
+
368
+ return scores
369
+
370
+
371
+ class MultiClassMultiOutputKNNClassificationEvaluator(Evaluator):
372
+ def __init__(
373
+ self,
374
+ embeds_train,
375
+ y_train,
376
+ embeds_test,
377
+ y_test,
378
+ n_neighbors=5,
379
+ **kwargs,
380
+ ):
381
+ super().__init__(**kwargs)
382
+ self.embeds_train = embeds_train
383
+ self.y_train = y_train
384
+ self.embeds_test = embeds_test
385
+ self.y_test = y_test
386
+ self.n_neighbors = n_neighbors
387
+
388
+ def __call__(self):
389
+ scores = {}
390
+
391
+ mlb = MultiLabelBinarizer()
392
+ class_labels = list(self.y_train) + list(self.y_test)
393
+ labels = [class_label.split(", ") for class_label in class_labels]
394
+ mlb.fit(labels)
395
+ train_labels = [class_label.split(", ") for class_label in self.y_train]
396
+ test_labels = [class_label.split(", ") for class_label in self.y_test]
397
+
398
+ y_train = mlb.transform(train_labels)
399
+ y_test = mlb.transform(test_labels)
400
+ clf = sklearn.neighbors.KNeighborsClassifier(
401
+ n_neighbors=self.n_neighbors, metric="cosine"
402
+ )
403
+ logger.info("Fitting KNN classifier...")
404
+ clf.fit(self.embeds_train, y_train)
405
+ logger.info("Evaluating...")
406
+ y_pred = clf.predict(self.embeds_test)
407
+ accuracy = accuracy_score(y_test, y_pred)
408
+ f1 = f1_score(y_test, y_pred, average="macro")
409
+ precision = precision_score(y_test, y_pred, average="macro")
410
+ recall = recall_score(y_test, y_pred, average="macro")
411
+ lrap = label_ranking_average_precision_score(y_test, y_pred)
412
+ scores["f1"] = f1
413
+ scores["accuracy"] = accuracy
414
+ scores["precision"] = precision
415
+ scores["recall"] = recall
416
+ scores["lrap"] = lrap
417
+
418
+ return scores
419
+
420
+
421
+ class BiGeneMiningEvaluator(Evaluator):
422
+ """
423
+ BiGene Mining Evaluator, analogous to Bitext Mining Evaluator https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/BitextMiningEvaluator.py.
424
+
425
+ If top_k > 1, then recall@k is also computed.
426
+ """
427
+
428
+ def __init__(self, embeds1, embeds2, top_k=1, **kwargs):
429
+ super().__init__(**kwargs)
430
+ self.n = len(embeds1)
431
+ self.embeds1 = np.array(embeds1)
432
+ self.embeds2 = np.array(embeds2)
433
+ self.gold = list(zip(range(self.n), range(self.n)))
434
+ self.top_k = top_k
435
+
436
+ def __call__(self):
437
+ scores = self.compute_metrics()
438
+ return scores
439
+
440
+ def compute_metrics(self):
441
+ logger.info(f"Finding nearest neighbors... with top_k={self.top_k}")
442
+ nearest_neighbors = self._similarity_search(
443
+ self.embeds1, self.embeds2, top_k=self.top_k
444
+ )
445
+
446
+ # Compute errors
447
+ logger.info("Computing metrics...")
448
+ labels = []
449
+ predictions = []
450
+
451
+ # Get predictions and labels for top_k=1.
452
+ for i, x in enumerate(nearest_neighbors):
453
+ j = x[0]["corpus_id"]
454
+ predictions.append(j)
455
+ labels.append(self.gold[i][1])
456
+
457
+ scores = {
458
+ "precision": precision_score(
459
+ labels, predictions, zero_division=0, average="weighted"
460
+ ),
461
+ "recall": recall_score(
462
+ labels, predictions, zero_division=0, average="weighted"
463
+ ),
464
+ "f1": f1_score(labels, predictions, zero_division=0, average="weighted"),
465
+ "accuracy": accuracy_score(labels, predictions),
466
+ }
467
+
468
+ if self.top_k > 1:
469
+ # Compute recall@k.
470
+ top_k_preds = []
471
+ for i, x in enumerate(nearest_neighbors):
472
+ top_k_preds.append([pred["corpus_id"] for pred in x])
473
+ top_k_recall = [
474
+ self.gold[i][1] in top_k_pred
475
+ for i, top_k_pred in enumerate(top_k_preds)
476
+ ]
477
+ scores[f"recall_at_{self.top_k}"] = sum(top_k_recall) / len(top_k_recall)
478
+ return scores
479
+
480
+ def _similarity_search(
481
+ self,
482
+ query_embeddings,
483
+ corpus_embeddings,
484
+ query_chunk_size=100,
485
+ corpus_chunk_size=500000,
486
+ top_k=1,
487
+ score_function=cos_sim,
488
+ ):
489
+ """This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings.
490
+ It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.
491
+ :param query_embeddings: A 2 dimensional tensor with the query embeddings.
492
+ :param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings.
493
+ :param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory.
494
+ :param corpus_chunk_size: Scans the corpus 50k entries at a time. Increasing that value increases the speed, but requires more memory.
495
+ :param top_k: Retrieve top k matching entries.
496
+ :param score_function: Function for computing scores. By default, cosine similarity.
497
+ :return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
498
+ """
499
+ query_embeddings = torch.from_numpy(query_embeddings)
500
+ corpus_embeddings = torch.from_numpy(corpus_embeddings)
501
+ if len(query_embeddings.shape) == 1:
502
+ query_embeddings = query_embeddings.unsqueeze(0)
503
+ if len(corpus_embeddings.shape) == 1:
504
+ corpus_embeddings = corpus_embeddings.unsqueeze(0)
505
+
506
+ # Check that corpus and queries are on the same device
507
+ if corpus_embeddings.device != query_embeddings.device:
508
+ query_embeddings = query_embeddings.to(corpus_embeddings.device)
509
+
510
+ queries_result_list = [[] for _ in range(len(query_embeddings))]
511
+
512
+ for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
513
+ # Iterate over chunks of the corpus
514
+ for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
515
+ # Compute cosine similarities
516
+ cos_scores = score_function(
517
+ query_embeddings[
518
+ query_start_idx : query_start_idx + query_chunk_size
519
+ ],
520
+ corpus_embeddings[
521
+ corpus_start_idx : corpus_start_idx + corpus_chunk_size
522
+ ],
523
+ )
524
+
525
+ # Get top-k scores
526
+ cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
527
+ cos_scores,
528
+ min(top_k, len(cos_scores[0])),
529
+ dim=1,
530
+ largest=True,
531
+ sorted=False,
532
+ )
533
+ cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
534
+ cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
535
+
536
+ for query_itr in range(len(cos_scores)):
537
+ for sub_corpus_id, score in zip(
538
+ cos_scores_top_k_idx[query_itr],
539
+ cos_scores_top_k_values[query_itr],
540
+ ):
541
+ corpus_id = corpus_start_idx + sub_corpus_id
542
+ query_id = query_start_idx + query_itr
543
+ queries_result_list[query_id].append(
544
+ {"corpus_id": corpus_id, "score": score}
545
+ )
546
+
547
+ # Sort and strip to top_k results
548
+ for idx in range(len(queries_result_list)):
549
+ queries_result_list[idx] = sorted(
550
+ queries_result_list[idx], key=lambda x: x["score"], reverse=True
551
+ )
552
+ queries_result_list[idx] = queries_result_list[idx][0:top_k]
553
+
554
+ return queries_result_list
555
+
556
+
557
+ class EDSEvaluator(Evaluator):
558
+ """
559
+ Evolutionary Distance Similarity Evaluator, analogous to Semantic Textual Similarity Evaluator.
560
+ Adapted from https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/STSEvaluator.py
561
+ """
562
+
563
+ def __init__(self, embeds1, embeds2, gold_scores, **kwargs):
564
+ super().__init__(**kwargs)
565
+ self.embeds1 = embeds1
566
+ self.embeds2 = embeds2
567
+ self.gold_scores = gold_scores
568
+
569
+ def __call__(self):
570
+ embeddings1 = np.array(self.embeds1)
571
+ embeddings2 = np.array(self.embeds2)
572
+ logger.info("Evaluating...")
573
+ cosine_scores = paired_cosine_distances(embeddings1, embeddings2)
574
+ manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
575
+ euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
576
+
577
+ cosine_pearson, _ = pearsonr(self.gold_scores, cosine_scores)
578
+ manhattan_pearson, _ = pearsonr(self.gold_scores, manhattan_distances)
579
+ euclidean_pearson, _ = pearsonr(self.gold_scores, euclidean_distances)
580
+
581
+ top_corr = max(
582
+ cosine_pearson,
583
+ manhattan_pearson,
584
+ euclidean_pearson,
585
+ )
586
+ return {
587
+ "cos_sim": cosine_pearson,
588
+ "manhattan": manhattan_pearson,
589
+ "euclidean": euclidean_pearson,
590
+ "top_corr": top_corr,
591
+ }
592
+
593
+
594
+ class RetrievalEvaluator(Evaluator):
595
+ """Adapted from
596
+ https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/RetrievalEvaluator.py
597
+ """
598
+
599
+ def __init__(
600
+ self,
601
+ corpus_embeds,
602
+ query_embeds,
603
+ corpus_ids,
604
+ query_ids,
605
+ qrels: Dict[str, Dict[str, int]],
606
+ k_values: List[int] = [5, 10, 50],
607
+ score_function: str = "cos_sim",
608
+ corpus_chunk_size: int = 50000,
609
+ **kwargs,
610
+ ):
611
+ super().__init__(**kwargs)
612
+ self.corpus_embeds = corpus_embeds
613
+ self.query_embeds = query_embeds
614
+ self.corpus_ids = corpus_ids
615
+ self.query_ids = query_ids
616
+ self.qrels = qrels
617
+ self.k_values = k_values
618
+ self.top_k = max(k_values) if "top_k" not in kwargs else kwargs["top_k"]
619
+ self.score_function = score_function
620
+ self.score_functions = {
621
+ "cos_sim": cos_sim,
622
+ "dot": dot_score,
623
+ }
624
+ self.corpus_chunk_size = corpus_chunk_size
625
+
626
+ def __call__(self):
627
+ results = self.search(
628
+ self.corpus_embeds,
629
+ self.query_embeds,
630
+ self.corpus_ids,
631
+ self.query_ids,
632
+ self.top_k,
633
+ self.score_function,
634
+ )
635
+ ndcg, _map, recall, precision, naucs = self.evaluate(
636
+ self.qrels, results, self.k_values
637
+ )
638
+ mrr, naucs_mrr = self.evaluate_custom(self.qrels, results, self.k_values, "mrr")
639
+ scores = {
640
+ **{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()},
641
+ **{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()},
642
+ **{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()},
643
+ **{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()},
644
+ **{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()},
645
+ **{
646
+ k.replace("@", "_at_").replace("_P", "_precision").lower(): v
647
+ for k, v in naucs.items()
648
+ },
649
+ **{
650
+ k.replace("@", "_at_").replace("_P", "_precision").lower(): v
651
+ for k, v in naucs_mrr.items()
652
+ },
653
+ }
654
+ return scores
655
+
656
+ def search(
657
+ self,
658
+ corpus_embeds,
659
+ query_embeds,
660
+ corpus_ids,
661
+ query_ids,
662
+ top_k: int,
663
+ score_function: str,
664
+ return_sorted: bool = False,
665
+ **kwargs,
666
+ ) -> dict[str, dict[str, float]]:
667
+ # Create embeddings for all queries using model.encode()
668
+ # Runs semantic search against the corpus embeddings
669
+ # Returns a ranked list with the corpus ids
670
+ if score_function not in self.score_functions:
671
+ raise ValueError(
672
+ f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product"
673
+ )
674
+ # make query embeds and corpus embeds torch tensors
675
+ query_embeds = torch.from_numpy(query_embeds)
676
+ corpus_embeds = torch.from_numpy(corpus_embeds)
677
+ itr = range(0, len(corpus_embeds), self.corpus_chunk_size)
678
+ results = defaultdict(dict)
679
+ # Keep only the top-k docs for each query
680
+ result_heaps = defaultdict(list)
681
+ for batch_num, corpus_start_idx in enumerate(itr):
682
+ logger.info("Searching Batch {}/{}...".format(batch_num + 1, len(itr)))
683
+ corpus_end_idx = min(
684
+ corpus_start_idx + self.corpus_chunk_size, len(corpus_ids)
685
+ )
686
+ sub_corpus_embeds = corpus_embeds[corpus_start_idx:corpus_end_idx]
687
+ # Compute similarites using either cosine-similarity or dot product
688
+ cos_scores = self.score_functions[score_function](
689
+ query_embeds, sub_corpus_embeds
690
+ )
691
+ cos_scores[torch.isnan(cos_scores)] = -1
692
+
693
+ # Get top-k values
694
+ cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
695
+ cos_scores,
696
+ min(
697
+ top_k + 1,
698
+ len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]),
699
+ ),
700
+ dim=1,
701
+ largest=True,
702
+ sorted=return_sorted,
703
+ )
704
+ cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
705
+ cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
706
+
707
+ for query_itr in range(len(query_embeds)):
708
+ query_id = query_ids[query_itr]
709
+ for sub_corpus_id, score in zip(
710
+ cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]
711
+ ):
712
+ corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id]
713
+ if corpus_id != query_id:
714
+ if len(result_heaps[query_id]) < top_k:
715
+ # Push item on the heap
716
+ heapq.heappush(result_heaps[query_id], (score, corpus_id))
717
+ else:
718
+ # If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
719
+ heapq.heappushpop(
720
+ result_heaps[query_id], (score, corpus_id)
721
+ )
722
+
723
+ for qid in result_heaps:
724
+ for score, corpus_id in result_heaps[qid]:
725
+ results[qid][corpus_id] = score
726
+
727
+ return results
728
+
729
+ @staticmethod
730
+ def evaluate(
731
+ qrels: dict[str, dict[str, int]],
732
+ results: dict[str, dict[str, float]],
733
+ k_values: List[int],
734
+ ignore_identical_ids: bool = True,
735
+ ) -> Tuple[Dict[str, float], dict[str, float], dict[str, float], dict[str, float]]:
736
+ if ignore_identical_ids:
737
+ logger.info(
738
+ "For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this."
739
+ )
740
+ popped = []
741
+ for qid, rels in results.items():
742
+ for pid in list(rels):
743
+ if qid == pid:
744
+ results[qid].pop(pid)
745
+ popped.append(pid)
746
+
747
+ all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {}
748
+
749
+ for k in k_values:
750
+ all_ndcgs[f"NDCG@{k}"] = []
751
+ all_aps[f"MAP@{k}"] = []
752
+ all_recalls[f"Recall@{k}"] = []
753
+ all_precisions[f"P@{k}"] = []
754
+
755
+ map_string = "map_cut." + ",".join([str(k) for k in k_values])
756
+ ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values])
757
+ recall_string = "recall." + ",".join([str(k) for k in k_values])
758
+ precision_string = "P." + ",".join([str(k) for k in k_values])
759
+ evaluator = pytrec_eval.RelevanceEvaluator(
760
+ qrels, {map_string, ndcg_string, recall_string, precision_string}
761
+ )
762
+ scores = evaluator.evaluate(results)
763
+
764
+ for query_id in scores.keys():
765
+ for k in k_values:
766
+ all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)])
767
+ all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)])
768
+ all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)])
769
+ all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)])
770
+ ndcg, _map, recall, precision = (
771
+ all_ndcgs.copy(),
772
+ all_aps.copy(),
773
+ all_recalls.copy(),
774
+ all_precisions.copy(),
775
+ )
776
+
777
+ for k in k_values:
778
+ ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5)
779
+ _map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5)
780
+ recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5)
781
+ precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5)
782
+ naucs = RetrievalEvaluator.evaluate_abstention(
783
+ results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions}
784
+ )
785
+ return ndcg, _map, recall, precision, naucs
786
+
787
+ @staticmethod
788
+ def evaluate_abstention(
789
+ results: dict[str, dict[str, float]],
790
+ metric_scores: dict[str, list[float]],
791
+ ) -> Dict[str, float]:
792
+ """Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997"""
793
+ all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())]
794
+ all_conf_scores = [
795
+ confidence_scores(sim_scores) for sim_scores in all_sim_scores
796
+ ]
797
+ conf_fcts = list(all_conf_scores[0].keys())
798
+ all_conf_scores = {
799
+ fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts
800
+ }
801
+ metric_scores = {k: np.array(v) for k, v in metric_scores.items()}
802
+ naucs = {}
803
+
804
+ for metric_name, scores in metric_scores.items():
805
+ for fct, conf_scores in all_conf_scores.items():
806
+ naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores)
807
+
808
+ return naucs
809
+
810
+ @staticmethod
811
+ def evaluate_custom(
812
+ qrels: dict[str, dict[str, int]],
813
+ results: dict[str, dict[str, float]],
814
+ k_values: List[int],
815
+ metric: str,
816
+ output_type: str = "all",
817
+ ) -> Tuple[Dict[str, float]]:
818
+ if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]:
819
+ metric_scores = mrr(qrels, results, k_values, output_type)
820
+
821
+ elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]:
822
+ metric_scores = recall_cap(qrels, results, k_values, output_type)
823
+
824
+ elif metric.lower() in ["hole", "hole@k"]:
825
+ metric_scores = hole(qrels, results, k_values, output_type)
826
+
827
+ elif metric.lower() in [
828
+ "acc",
829
+ "top_k_acc",
830
+ "accuracy",
831
+ "accuracy@k",
832
+ "top_k_accuracy",
833
+ ]:
834
+ metric_scores = top_k_accuracy(qrels, results, k_values, output_type)
835
+
836
+ naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores)
837
+ metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()}
838
+
839
+ return metric_scores_avg, naucs
dgeb/modality.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+
3
+
4
+ class Modality(Enum):
5
+ """Data modality, either DNA or protein sequence."""
6
+
7
+ PROTEIN = "protein"
8
+ DNA = "dna"
dgeb/models.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import re
3
+ from abc import ABC, abstractmethod
4
+ from functools import partial
5
+ from types import SimpleNamespace
6
+ from typing import Dict, List, Literal, Optional
7
+
8
+ import numpy as np
9
+ import torch
10
+ import tqdm as tqdm
11
+ from datasets import Dataset
12
+ from torch import Tensor
13
+ from torch.nn import functional as F
14
+ from torch.utils.data import DataLoader
15
+ from transformers import (
16
+ AutoConfig,
17
+ AutoModel,
18
+ AutoModelForCausalLM,
19
+ AutoModelForMaskedLM,
20
+ AutoTokenizer,
21
+ BatchEncoding,
22
+ DefaultDataCollator,
23
+ T5EncoderModel,
24
+ T5Tokenizer,
25
+ )
26
+ from transformers.modeling_outputs import BaseModelOutput
27
+
28
+ from .modality import Modality
29
+ from .eval_utils import ForwardHook, pool
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class BioSeqTransformer(ABC):
35
+ """
36
+ Abstract class to wrap models which map biological sequences (DNA/Prot) to embeddings.
37
+ Modelled after SentenceTransformer (https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py)
38
+
39
+ Args:
40
+ model_name: Name or path to the pretrained model.
41
+ layers: List of model layers to probe. Can be integers or "mid" or "last".
42
+ devices: List of device ids for inference. If cuda is not available, will use cpu.
43
+ num_processes: Number of processes to use for data loading.
44
+ max_seq_length: Maximum sequence length of the input sequences.
45
+ l2_norm: If true, embeddings are L2-normalized before they are returned.
46
+ batch_size: Batch size for encoding.
47
+ pool_type: Pooling strategy to use. One of "mean", "max", "cls", "last".
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ model_name: str,
53
+ layers: Optional[List[int] | Literal["mid"] | Literal["last"]] = None,
54
+ devices: List[int] = [0],
55
+ num_processes: int = 16,
56
+ max_seq_length: int = 1024,
57
+ l2_norm: bool = False,
58
+ batch_size: int = 128,
59
+ pool_type: str = "mean",
60
+ ):
61
+ super().__init__()
62
+
63
+ self.id = self.__class__.__name__
64
+ self.hf_name = model_name
65
+ self.encoder = self._load_model(model_name)
66
+ if not hasattr(self.encoder, "config"):
67
+ raise ValueError(
68
+ 'The model from `self._load_model()` must have a "config" attribute.'
69
+ )
70
+ self.config = self.encoder.config
71
+ self.tokenizer = self._get_tokenizer(model_name)
72
+ self.num_param = sum(p.numel() for p in self.encoder.parameters())
73
+ self.data_collator = DefaultDataCollator()
74
+ self.gpu_count = len(devices)
75
+ self.l2_norm = l2_norm
76
+
77
+ self.device = torch.device(
78
+ f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu"
79
+ )
80
+ self.num_processes = num_processes
81
+ self.max_seq_length = max_seq_length
82
+ self.batch_size = batch_size
83
+ self.pool_type = pool_type
84
+
85
+ if self.gpu_count > 1:
86
+ self.encoder = torch.nn.DataParallel(self.encoder, device_ids=devices)
87
+ self.encoder.to(self.device)
88
+ self.encoder.eval()
89
+
90
+ mid_layer = self.num_layers // 2
91
+ last_layer = self.num_layers - 1
92
+ mid_layer_label = f"mid ({mid_layer})"
93
+ last_layer_label = f"last ({self.num_layers - 1})"
94
+
95
+ if layers is None:
96
+ logger.debug(f"Using default layers: {mid_layer_label}, {last_layer_label}")
97
+ self.layers = [mid_layer, last_layer]
98
+ self.layer_labels = [mid_layer_label, last_layer_label]
99
+ elif layers == "mid":
100
+ self.layers = [mid_layer]
101
+ self.layer_labels = [mid_layer_label]
102
+ elif layers == "last":
103
+ self.layers = [last_layer]
104
+ self.layer_labels = [last_layer_label]
105
+ else:
106
+ self.layers = layers
107
+ self.layer_labels = [str(layer) for layer in layers]
108
+
109
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
110
+ """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
111
+ outputs = self.encoder(**batch_dict, output_hidden_states=True)
112
+ embeds = [outputs.hidden_states[layer] for layer in self.layers]
113
+ embeds = [
114
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
115
+ for layer_embeds in embeds
116
+ ]
117
+ # Stack with shape [B, num_layers, D].
118
+ embeds = torch.stack(embeds, dim=1)
119
+ return embeds
120
+
121
+ def _load_model(self, model_name):
122
+ return AutoModel.from_pretrained(model_name, trust_remote_code=True)
123
+
124
+ def _get_tokenizer(self, model_name):
125
+ return AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
126
+
127
+ def _tokenize_func(
128
+ self, tokenizer, examples: Dict[str, List], max_seq_length: int
129
+ ) -> BatchEncoding:
130
+ batch_dict = tokenizer(
131
+ examples["input_seqs"],
132
+ max_length=max_seq_length,
133
+ padding=True,
134
+ truncation=True,
135
+ )
136
+ return batch_dict
137
+
138
+ @property
139
+ def metadata(self) -> Dict:
140
+ return {
141
+ "hf_name": self.hf_name,
142
+ "num_layers": self.num_layers,
143
+ "num_params": self.num_param,
144
+ "embed_dim": self.embed_dim,
145
+ }
146
+
147
+ @property
148
+ @abstractmethod
149
+ def num_layers(self) -> int:
150
+ pass
151
+
152
+ @property
153
+ @abstractmethod
154
+ def embed_dim(self) -> int:
155
+ pass
156
+
157
+ @property
158
+ @abstractmethod
159
+ def modality(self) -> Modality:
160
+ pass
161
+
162
+ @torch.no_grad()
163
+ def encode(self, sequences, **kwargs) -> np.ndarray:
164
+ """Returns a list of embeddings for the given sequences.
165
+ Args:
166
+ sequences (`List[str]`): List of sequences to encode
167
+ Returns:
168
+ `np.ndarray`: Embeddings for the given sequences of shape [num_sequences, num_layers, embedding_dim].
169
+ """
170
+ dataset = Dataset.from_dict({"input_seqs": sequences})
171
+ dataset.set_transform(
172
+ partial(
173
+ self._tokenize_func, self.tokenizer, max_seq_length=self.max_seq_length
174
+ )
175
+ )
176
+ data_loader = DataLoader(
177
+ dataset,
178
+ batch_size=self.batch_size * self.gpu_count,
179
+ shuffle=False,
180
+ drop_last=False,
181
+ num_workers=self.num_processes,
182
+ collate_fn=self.data_collator,
183
+ pin_memory=True,
184
+ )
185
+
186
+ if max(self.layers) >= self.num_layers:
187
+ raise ValueError(
188
+ f"Layer {max(self.layers)} is not available in the model. Choose a layer between 0 and {self.num_layers - 1}"
189
+ )
190
+
191
+ encoded_embeds = []
192
+ for batch_dict in tqdm.tqdm(
193
+ data_loader, desc="encoding", mininterval=10, disable=len(sequences) < 128
194
+ ):
195
+ batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()}
196
+
197
+ embeds = self._encode_single_batch(batch_dict)
198
+
199
+ if self.l2_norm:
200
+ embeds = F.normalize(embeds, p=2, dim=-1)
201
+ encoded_embeds.append(embeds.cpu().numpy())
202
+
203
+ return np.concatenate(encoded_embeds, axis=0)
204
+
205
+
206
+ class ESM(BioSeqTransformer):
207
+ """ESM model from https://huggingface.co/docs/transformers/en/model_doc/esm"""
208
+
209
+ MODEL_NAMES = [
210
+ "facebook/esm2_t6_8M_UR50D",
211
+ "facebook/esm2_t12_35M_UR50D",
212
+ "facebook/esm2_t30_150M_UR50D",
213
+ "facebook/esm2_t33_650M_UR50D",
214
+ "facebook/esm2_t36_3B_UR50D",
215
+ "facebook/esm2_t48_15B_UR50D",
216
+ ]
217
+
218
+ @property
219
+ def modality(self) -> Modality:
220
+ return Modality.PROTEIN
221
+
222
+ @property
223
+ def num_layers(self) -> int:
224
+ return self.config.num_hidden_layers
225
+
226
+ @property
227
+ def embed_dim(self) -> int:
228
+ return self.config.hidden_size
229
+
230
+
231
+ class ESM3(BioSeqTransformer):
232
+ """ESM3 model from https://github.com/evolutionaryscale/esm"""
233
+
234
+ MODEL_NAMES = ["esm3_sm_open_v1"]
235
+
236
+ def __init__(self, *args, **kwargs):
237
+ super().__init__(*args, **kwargs)
238
+ # Register forward hooks to store embeddings per layer.
239
+ self.hooks = [
240
+ ForwardHook(self.encoder.transformer.blocks[layer]) for layer in self.layers
241
+ ]
242
+
243
+ @property
244
+ def modality(self) -> Modality:
245
+ return Modality.PROTEIN
246
+
247
+ @property
248
+ def num_layers(self) -> int:
249
+ return self.config.num_hidden_layers
250
+
251
+ @property
252
+ def embed_dim(self) -> int:
253
+ return self.config.hidden_size
254
+
255
+ def _load_model(self, model_name):
256
+ try:
257
+ from esm.models.esm3 import ESM3 as ModelESM3
258
+ except ImportError:
259
+ raise ImportError(
260
+ "ESM3 is not installed. Please install it with `pip install esm`."
261
+ )
262
+ model = ModelESM3.from_pretrained("esm3_sm_open_v1")
263
+ model.config = SimpleNamespace(
264
+ num_hidden_layers=len(model.transformer.blocks),
265
+ hidden_size=model.transformer.blocks[0].ffn[-1].out_features,
266
+ )
267
+ return model
268
+
269
+ def _get_tokenizer(self, model_name):
270
+ try:
271
+ from esm.tokenization.sequence_tokenizer import EsmSequenceTokenizer
272
+ except ImportError:
273
+ raise ImportError(
274
+ "ESM3 is not installed. Please install it with `pip install esm`."
275
+ )
276
+ return EsmSequenceTokenizer()
277
+
278
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
279
+ _ = self.encoder.forward(sequence_tokens=batch_dict["input_ids"])
280
+ embeds = [hook.output for hook in self.hooks]
281
+ embeds = [
282
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
283
+ for layer_embeds in embeds
284
+ ]
285
+ # Stack with shape [B, num_layers, D].
286
+ embeds = torch.stack(embeds, dim=1)
287
+ embeds = embeds.to(torch.float32)
288
+ return embeds
289
+
290
+
291
+ class ProtT5(BioSeqTransformer):
292
+ """ProtT5 model from https://github.com/agemagician/ProtTrans"""
293
+
294
+ MODEL_NAMES = [
295
+ "Rostlab/prot_t5_xl_uniref50",
296
+ "Rostlab/prot_t5_xl_bfd",
297
+ "Rostlab/prot_t5_xxl_uniref50",
298
+ "Rostlab/prot_t5_xxl_bfd",
299
+ ]
300
+
301
+ @property
302
+ def modality(self) -> Modality:
303
+ return Modality.PROTEIN
304
+
305
+ @property
306
+ def num_layers(self) -> int:
307
+ return self.config.num_layers
308
+
309
+ @property
310
+ def embed_dim(self) -> int:
311
+ return self.config.d_model
312
+
313
+ def _load_model(self, model_name):
314
+ return T5EncoderModel.from_pretrained(model_name)
315
+
316
+ def _get_tokenizer(self, model_name):
317
+ return T5Tokenizer.from_pretrained(model_name, do_lower_case=False)
318
+
319
+ def _tokenize_func(
320
+ self, tokenizer, examples: Dict[str, List], max_seq_length: int
321
+ ) -> BatchEncoding:
322
+ example_sequences = examples["input_seqs"]
323
+ # Add space between amino acids to make sure they are tokenized correctly.
324
+ example_sequences = [" ".join(sequence) for sequence in example_sequences]
325
+ example_sequences = [
326
+ re.sub(r"[UZOB]", "X", sequence) for sequence in example_sequences
327
+ ]
328
+ batch_dict = tokenizer(
329
+ example_sequences,
330
+ max_length=max_seq_length,
331
+ padding=True,
332
+ truncation=True,
333
+ add_special_tokens=True,
334
+ )
335
+
336
+ return batch_dict
337
+
338
+
339
+ class ProGen(BioSeqTransformer):
340
+ """ProGen models from https://github.com/salesforce/progen."""
341
+
342
+ MODEL_NAMES = [
343
+ "hugohrban/progen2-small",
344
+ "hugohrban/progen2-medium",
345
+ "hugohrban/progen2-base",
346
+ "hugohrban/progen2-large",
347
+ "hugohrban/progen2-xlarge",
348
+ ]
349
+
350
+ @property
351
+ def modality(self) -> Modality:
352
+ return Modality.PROTEIN
353
+
354
+ @property
355
+ def num_layers(self) -> int:
356
+ return self.config.n_layer
357
+
358
+ @property
359
+ def embed_dim(self) -> int:
360
+ return self.config.embed_dim
361
+
362
+ def _load_model(self, model_name):
363
+ return AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
364
+
365
+ def _get_tokenizer(self, model_name_or_path):
366
+ tokenizer = AutoTokenizer.from_pretrained(
367
+ model_name_or_path, trust_remote_code=True
368
+ )
369
+ tokenizer.pad_token = "<|pad|>"
370
+ return tokenizer
371
+
372
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
373
+ """Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
374
+ outputs: BaseModelOutput = self.encoder(
375
+ input_ids=batch_dict["input_ids"],
376
+ output_hidden_states=True,
377
+ use_cache=False,
378
+ )
379
+ embeds = [outputs.hidden_states[layer] for layer in self.layers]
380
+ embeds = [
381
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
382
+ for layer_embeds in embeds
383
+ ]
384
+ # Stack with shape [B, num_layers, D].
385
+ embeds = torch.stack(embeds, dim=1)
386
+ return embeds
387
+
388
+
389
+ class EvoModel(BioSeqTransformer):
390
+ """https://github.com/evo-design/evo."""
391
+
392
+ MODEL_NAMES = [
393
+ "togethercomputer/evo-1-8k-base",
394
+ "togethercomputer/evo-1-131k-base",
395
+ ]
396
+
397
+ @property
398
+ def modality(self) -> Modality:
399
+ return Modality.DNA
400
+
401
+ @property
402
+ def num_layers(self) -> int:
403
+ return self.config.num_layers
404
+
405
+ @property
406
+ def embed_dim(self) -> int:
407
+ return self.config.hidden_size
408
+
409
+ def __init__(self, *args, **kwargs):
410
+ super().__init__(*args, **kwargs)
411
+ # Register forward hooks to store embeddings per layer.
412
+ self.hooks = []
413
+ for layer in self.layers:
414
+ # For the last layer, get the output of `backbone.norm`, which directly precedes `backbone.unembed`.
415
+ # This is equivalent to the approach in https://github.com/evo-design/evo/issues/32.
416
+ if layer == self.num_layers - 1 or layer == -1:
417
+ self.hooks.append(ForwardHook(self.encoder.backbone.norm))
418
+ else:
419
+ self.hooks.append(ForwardHook(self.encoder.backbone.blocks[layer]))
420
+
421
+ def _load_model(self, model_name):
422
+ config = AutoConfig.from_pretrained(
423
+ model_name, trust_remote_code=True, revision="1.1_fix"
424
+ )
425
+ model = AutoModelForCausalLM.from_pretrained(
426
+ model_name, config=config, trust_remote_code=True, revision="1.1_fix"
427
+ )
428
+ return model
429
+
430
+ def _get_tokenizer(self, model_name):
431
+ tokenizer = AutoTokenizer.from_pretrained(
432
+ model_name, revision="1.1_fix", trust_remote_code=True
433
+ )
434
+ # Evo tokenizer is missing pad_token by default.
435
+ tokenizer.add_special_tokens({"pad_token": "N"})
436
+ return tokenizer
437
+
438
+ def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
439
+ _ = self.encoder(batch_dict["input_ids"], use_cache=False)
440
+ embeds = [hook.output for hook in self.hooks]
441
+ # The hook output for Evo middle layers is a tuple (embedding, inference_params=None).
442
+ embeds = [x[0] if isinstance(x, tuple) else x for x in embeds]
443
+ embeds = [
444
+ pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
445
+ for layer_embeds in embeds
446
+ ]
447
+ # Stack with shape [B, num_layers, D].
448
+ embeds = torch.stack(embeds, dim=1)
449
+ embeds = embeds.to(torch.float32)
450
+ return embeds
451
+
452
+
453
+ class NTModel(BioSeqTransformer):
454
+ """Nucleotide Transformer https://github.com/instadeepai/nucleotide-transformer"""
455
+
456
+ MODEL_NAMES = [
457
+ "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
458
+ "InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
459
+ "InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
460
+ "InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
461
+ "InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
462
+ ]
463
+
464
+ def __init__(self, *args, **kwargs):
465
+ super().__init__(*args, **kwargs)
466
+ self.max_seq_length = self.tokenizer.model_max_length
467
+
468
+ @property
469
+ def modality(self) -> Modality:
470
+ return Modality.DNA
471
+
472
+ @property
473
+ def num_layers(self) -> int:
474
+ return self.config.num_hidden_layers
475
+
476
+ @property
477
+ def embed_dim(self) -> int:
478
+ return self.config.hidden_size
479
+
480
+ def _load_model(self, model_name):
481
+ return AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True)
dgeb/tasks/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ruff: noqa: F403
2
+
3
+ from .tasks import Dataset, Task, TaskMetadata, TaskResult
4
+ from .eds_tasks import *
5
+ from .pair_classification_tasks import *
6
+ from .retrieval_tasks import *
7
+ from .classification_tasks import *
8
+ from .clustering_tasks import *
9
+ from .bigene_mining_tasks import *
10
+
11
+ __all__ = [
12
+ "Dataset",
13
+ "Task",
14
+ "TaskMetadata",
15
+ "TaskResult",
16
+ ]
dgeb/tasks/bigene_mining_tasks.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bigene mining tasks are analogous to bitext matching tasks, but for genes.
3
+ Cosine similarity is used to mine genes of related functions from different organisms.
4
+ """
5
+
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ from dgeb.evaluators import BiGeneMiningEvaluator
10
+ from dgeb.modality import Modality
11
+ from dgeb.models import BioSeqTransformer
12
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def run_bigene_mining_tasks(
18
+ model: BioSeqTransformer, metadata: TaskMetadata, top_k: int = 1
19
+ ) -> TaskResult:
20
+ """Evaluate bigene mining task. Utilizes the BiGeneMiningEvaluator."""
21
+ if len(metadata.datasets) != 1:
22
+ raise ValueError("BiGeneMining tasks require 1 dataset.")
23
+ ds = metadata.datasets[0].load()["train"]
24
+ layer_results = defaultdict(dict)
25
+ embeds1 = model.encode(ds["Seq1"])
26
+ embeds2 = model.encode(ds["Seq2"])
27
+ for i, layer in enumerate(model.layers):
28
+ evaluator = BiGeneMiningEvaluator(embeds1[:, i], embeds2[:, i], top_k=top_k)
29
+ layer_results["layers"][layer] = evaluator()
30
+ logger.info(
31
+ f"Layer: {layer}, {metadata.display_name} matching results: {layer_results['layers'][layer]}"
32
+ )
33
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
34
+
35
+
36
+ class BacArchBiGeneMining(Task):
37
+ metadata = TaskMetadata(
38
+ id="bacarch_bigene",
39
+ display_name="BacArch BiGene",
40
+ description="Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
41
+ type="bigene_mining",
42
+ modality=Modality.PROTEIN,
43
+ datasets=[
44
+ Dataset(
45
+ path="tattabio/bac_arch_bigene",
46
+ revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631",
47
+ )
48
+ ],
49
+ primary_metric_id="f1",
50
+ )
51
+
52
+ def run(self, model: BioSeqTransformer) -> TaskResult:
53
+ return run_bigene_mining_tasks(model, self.metadata)
54
+
55
+
56
+ class ModACParalogyBiGeneMining(Task):
57
+ # ModAC Paralogy matching with top_k=1 is too strict (most models have accuracy < 0.1%)
58
+ # Instead use recall@50 as the main metric.
59
+ TOP_K = 50
60
+
61
+ metadata = TaskMetadata(
62
+ id="modac_paralogy_bigene",
63
+ display_name="ModAC Paralogy BiGene",
64
+ description="Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
65
+ type="bigene_mining",
66
+ modality=Modality.PROTEIN,
67
+ datasets=[
68
+ Dataset(
69
+ path="tattabio/modac_paralogy_bigene",
70
+ revision="241ca6397856e3360da04422d54933035b1fab87",
71
+ )
72
+ ],
73
+ primary_metric_id=f"recall_at_{TOP_K}",
74
+ )
75
+
76
+ def run(self, model: BioSeqTransformer) -> TaskResult:
77
+ return run_bigene_mining_tasks(model, self.metadata, top_k=self.TOP_K)
dgeb/tasks/classification_tasks.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Classification tasks take in biological sequence and functional labels.
3
+ Multi-class and/or multi-label classification tasks are supported.
4
+ """
5
+
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ import datasets
10
+ import numpy as np
11
+
12
+ from dgeb.eval_utils import merge_split_elem_embeds
13
+ from dgeb.evaluators import (
14
+ MultiClassMultiOutputKNNClassificationEvaluator,
15
+ logRegClassificationEvaluator,
16
+ )
17
+ from dgeb.modality import Modality
18
+ from dgeb.models import BioSeqTransformer
19
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def split_sequences(
25
+ ds: datasets.DatasetDict, max_seq_length: int
26
+ ) -> datasets.DatasetDict:
27
+ """Split sequences into chunks of max_seq_length using datasets.Dataset.map()."""
28
+
29
+ def _split_sequence(examples, max_seq_length):
30
+ assert (
31
+ len(examples["Sequence"]) == 1
32
+ ), "split map function should use batch size of 1."
33
+ example = {k: v[0] for k, v in examples.items()}
34
+ seq = example["Sequence"]
35
+ # Split by chunks of max_seq_length.
36
+ seq_split = [
37
+ seq[i : i + max_seq_length] for i in range(0, len(seq), max_seq_length)
38
+ ]
39
+ # Repeat other fields by the number of splits.
40
+ example = {
41
+ k: [v] * len(seq_split) for k, v in example.items() if k != "Sequence"
42
+ }
43
+ example["Sequence"] = seq_split
44
+ return example
45
+
46
+ ds = ds.map(
47
+ _split_sequence,
48
+ batched=True,
49
+ batch_size=1,
50
+ fn_kwargs={"max_seq_length": max_seq_length},
51
+ keep_in_memory=True,
52
+ load_from_cache_file=False,
53
+ )
54
+ return ds
55
+
56
+
57
+ def run_classification_task(
58
+ model: BioSeqTransformer, metadata: TaskMetadata
59
+ ) -> TaskResult:
60
+ """Evaluate on classification tasks using logistic regression classifier."""
61
+ ds = metadata.datasets[0].load()
62
+ layer_results = defaultdict(dict)
63
+ train_embeds = model.encode(ds["train"]["Sequence"])
64
+ test_embeds = model.encode(ds["test"]["Sequence"])
65
+ for i, layer in enumerate(model.layers):
66
+ layer_results["layers"][layer] = logRegClassificationEvaluator(
67
+ train_embeds[:, i],
68
+ ds["train"]["Label"],
69
+ test_embeds[:, i],
70
+ ds["test"]["Label"],
71
+ )()
72
+ logger.info(
73
+ f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
74
+ )
75
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
76
+
77
+
78
+ class EnzymeCommissionClassification(Task):
79
+ metadata = TaskMetadata(
80
+ id="ec_classification",
81
+ display_name="EC Classification",
82
+ description="Evaluate on Enzyme Commission number classification task.",
83
+ type="classification",
84
+ modality=Modality.PROTEIN,
85
+ datasets=[
86
+ Dataset(
87
+ path="tattabio/ec_classification",
88
+ revision="ead5570168e6969a5149f6861e8a33d6b5d22498",
89
+ )
90
+ ],
91
+ primary_metric_id="f1",
92
+ )
93
+
94
+ def run(self, model: BioSeqTransformer) -> TaskResult:
95
+ return run_classification_task(model, self.metadata)
96
+
97
+
98
+ class EnzymeCommissionDNAClassification(Task):
99
+ metadata = TaskMetadata(
100
+ id="ec_dna_classification",
101
+ display_name="EC Classification",
102
+ description="Evaluate on Enzyme Commission number classification task using DNA sequences.",
103
+ type="classification",
104
+ modality=Modality.DNA,
105
+ datasets=[
106
+ Dataset(
107
+ path="tattabio/ec_classification_dna",
108
+ revision="cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd",
109
+ )
110
+ ],
111
+ primary_metric_id="f1",
112
+ )
113
+
114
+ def run(self, model: BioSeqTransformer) -> TaskResult:
115
+ return run_classification_task(model, self.metadata)
116
+
117
+
118
+ class ConvergentEnzymesClassification(Task):
119
+ metadata = TaskMetadata(
120
+ id="convergent_enzymes_classification",
121
+ display_name="Convergent Enzymes Classification",
122
+ description="Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
123
+ type="classification",
124
+ modality=Modality.PROTEIN,
125
+ datasets=[
126
+ Dataset(
127
+ path="tattabio/convergent_enzymes",
128
+ revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa",
129
+ )
130
+ ],
131
+ primary_metric_id="f1",
132
+ )
133
+
134
+ def run(self, model: BioSeqTransformer) -> TaskResult:
135
+ return run_classification_task(model, self.metadata)
136
+
137
+
138
+ def run_mibig_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
139
+ """
140
+ Evaluate on MIBIG classification tasks. Multiclass, multi-label KNN classification is used for evaluation.
141
+ """
142
+ ds = metadata.datasets[0].load()
143
+ if metadata.modality == Modality.DNA:
144
+ # MIBiG DNA sequences can be very long. Instead of truncating to max_seq_length,
145
+ # split into multiple sequences and mean pool the resulting embeddings.
146
+ ds = split_sequences(ds, model.max_seq_length)
147
+
148
+ layer_results = defaultdict(dict)
149
+ train_embeds = model.encode(ds["train"]["Sequence"])
150
+ test_embeds = model.encode(ds["test"]["Sequence"])
151
+
152
+ train_ids = ds["train"]["Entry"]
153
+ test_ids = ds["test"]["Entry"]
154
+ train_labels = ds["train"]["class"]
155
+ test_labels = ds["test"]["class"]
156
+ train_id_to_label = {id: label for id, label in zip(train_ids, train_labels)}
157
+ test_id_to_label = {id: label for id, label in zip(test_ids, test_labels)}
158
+ # Mean pool embeds with the same ID.
159
+ train_ids, train_embeds = merge_split_elem_embeds(train_ids, train_embeds)
160
+ test_ids, test_embeds = merge_split_elem_embeds(test_ids, test_embeds)
161
+ # Gather the labels after merging by unique ID.
162
+ train_labels = np.array([train_id_to_label[id] for id in train_ids])
163
+ test_labels = np.array([test_id_to_label[id] for id in test_ids])
164
+
165
+ for i, layer in enumerate(model.layers):
166
+ evaluator = MultiClassMultiOutputKNNClassificationEvaluator(
167
+ train_embeds[:, i], train_labels, test_embeds[:, i], test_labels
168
+ )
169
+ layer_results["layers"][layer] = evaluator()
170
+ logger.info(
171
+ f"Layer: {layer}, MIBiG classification results: {layer_results['layers'][layer]}"
172
+ )
173
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
174
+
175
+
176
+ class MIBiGProteinClassification(Task):
177
+ metadata = TaskMetadata(
178
+ id="MIBIG_protein_classification",
179
+ display_name="MIBiG Classification",
180
+ description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
181
+ type="classification",
182
+ modality=Modality.PROTEIN,
183
+ datasets=[
184
+ Dataset(
185
+ path="tattabio/mibig_classification_prot",
186
+ revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f",
187
+ )
188
+ ],
189
+ primary_metric_id="f1",
190
+ )
191
+
192
+ def run(self, model: BioSeqTransformer) -> TaskResult:
193
+ return run_mibig_task(model, self.metadata)
194
+
195
+
196
+ class MIBiGDNAClassification(Task):
197
+ metadata = TaskMetadata(
198
+ id="MIBIG_dna_classification",
199
+ display_name="MIBiG Classification",
200
+ description="Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
201
+ type="classification",
202
+ modality=Modality.DNA,
203
+ datasets=[
204
+ Dataset(
205
+ path="tattabio/mibig_classification_dna",
206
+ revision="b5ca7a76d469e4e66c46f1b655903972571e6b61",
207
+ )
208
+ ],
209
+ primary_metric_id="f1",
210
+ )
211
+
212
+ def run(self, model: BioSeqTransformer) -> TaskResult:
213
+ return run_mibig_task(model, self.metadata)
dgeb/tasks/clustering_tasks.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Biological sequences are clustered and performance is determined by how well clustering matches assigned labels.
3
+ """
4
+
5
+ import logging
6
+ from collections import defaultdict
7
+
8
+ from dgeb.evaluators import ClusteringEvaluator
9
+ from dgeb.modality import Modality
10
+ from dgeb.models import BioSeqTransformer
11
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def run_clustering_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
17
+ """Evaluate clustering task. Utilizes the ClusteringEvaluator."""
18
+ if len(metadata.datasets) != 1:
19
+ raise ValueError("Clustering tasks require 1 dataset.")
20
+ ds = metadata.datasets[0].load()["train"]
21
+ embeds = model.encode(ds["Sequence"])
22
+ layer_results = defaultdict(dict)
23
+ for i, layer in enumerate(model.layers):
24
+ labels = ds["Label"]
25
+ evaluator = ClusteringEvaluator(embeds[:, i], labels)
26
+ layer_results["layers"][layer] = evaluator()
27
+ logger.info(
28
+ f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
29
+ )
30
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
31
+
32
+
33
+ class RNAclustering(Task):
34
+ metadata = TaskMetadata(
35
+ id="ecoli_rna_clustering",
36
+ display_name="E.coli RNA Clustering",
37
+ description="Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
38
+ type="clustering",
39
+ modality=Modality.DNA,
40
+ datasets=[
41
+ Dataset(
42
+ path="tattabio/e_coli_rnas",
43
+ revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6",
44
+ )
45
+ ],
46
+ primary_metric_id="v_measure",
47
+ )
48
+
49
+ def run(self, model: BioSeqTransformer) -> TaskResult:
50
+ return run_clustering_task(model, self.metadata)
51
+
52
+
53
+ class MopBClustering(Task):
54
+ metadata = TaskMetadata(
55
+ id="mopb_clustering",
56
+ display_name="MopB Clustering",
57
+ description="Evaluate on MopB clustering task.",
58
+ type="clustering",
59
+ modality=Modality.PROTEIN,
60
+ datasets=[
61
+ Dataset(
62
+ path="tattabio/mopb_clustering",
63
+ revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a",
64
+ )
65
+ ],
66
+ primary_metric_id="v_measure",
67
+ )
68
+
69
+ def run(self, model: BioSeqTransformer) -> TaskResult:
70
+ return run_clustering_task(model, self.metadata)
dgeb/tasks/eds_tasks.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Evolutionary Distance Similarity (EDS) tasks compare embedding distances to continuous evolutionary distances.
3
+ The label distances are typically derived from phylogenetic trees.
4
+ """
5
+
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from dgeb.evaluators import EDSEvaluator
13
+ from dgeb.modality import Modality
14
+ from dgeb.models import BioSeqTransformer
15
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def run_eds_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
21
+ """Evaluate phylogeny distance correlation task. Utilizes the Evolutionary Distance Similarity (EDS) evaluator."""
22
+ if len(metadata.datasets) != 2:
23
+ raise ValueError("Phylogeny tasks require 2 datasets: sequences and distances.")
24
+
25
+ ds = metadata.datasets[0].load()["train"]
26
+ distance_df = metadata.datasets[1].load()["train"].to_pandas()
27
+ assert isinstance(
28
+ distance_df, pd.DataFrame
29
+ ), f"Expected DataFrame, got {type(distance_df)}"
30
+
31
+ id_index_dict = {k: i for i, k in enumerate(ds["Entry"])}
32
+ distance_df["embeds1"] = None
33
+ distance_df["embeds2"] = None
34
+ test_embeds = model.encode(ds["Sequence"])
35
+ layer_results = defaultdict(dict)
36
+ for i, layer in enumerate(model.layers):
37
+ for row_idx, row in distance_df.iterrows():
38
+ id1 = row["ID1"]
39
+ id2 = row["ID2"]
40
+ embedding1 = test_embeds[id_index_dict[id1], i]
41
+ embedding2 = test_embeds[id_index_dict[id2], i]
42
+ distance_df.at[row_idx, "embeds1"] = embedding1
43
+ distance_df.at[row_idx, "embeds2"] = embedding2
44
+ embeds1 = np.array(distance_df["embeds1"].tolist())
45
+ embeds2 = np.array(distance_df["embeds2"].tolist())
46
+ dists = np.array(distance_df["distance"].tolist())
47
+ evaluator = EDSEvaluator(embeds1, embeds2, dists)
48
+ layer_results["layers"][layer] = evaluator()
49
+ # log results
50
+ logger.info(
51
+ f"Layer: {layer}, {metadata.display_name} distance correlation results: {layer_results['layers'][layer]}"
52
+ )
53
+
54
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
55
+
56
+
57
+ class RpobBacPhylogeny(Task):
58
+ metadata = TaskMetadata(
59
+ id="rpob_bac_phylogeny",
60
+ display_name="RpoB Bacterial Phylogeny",
61
+ description="Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
62
+ type="eds",
63
+ modality=Modality.PROTEIN,
64
+ datasets=[
65
+ Dataset(
66
+ path="tattabio/rpob_bac_phylogeny_sequences",
67
+ revision="b833ef8d8d873ea5387540562873f41d073d3e03",
68
+ ),
69
+ Dataset(
70
+ path="tattabio/rpob_bac_phylogeny_distances",
71
+ revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7",
72
+ ),
73
+ ],
74
+ primary_metric_id="top_corr",
75
+ )
76
+
77
+ def run(self, model: BioSeqTransformer) -> TaskResult:
78
+ return run_eds_task(model, self.metadata)
79
+
80
+
81
+ class RpobArchPhylogeny(Task):
82
+ metadata = TaskMetadata(
83
+ id="rpob_arch_phylogeny",
84
+ display_name="RpoB Archaeal Phylogeny",
85
+ description="Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
86
+ type="eds",
87
+ modality=Modality.PROTEIN,
88
+ datasets=[
89
+ Dataset(
90
+ path="tattabio/rpob_arch_phylogeny_sequences",
91
+ revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4",
92
+ ),
93
+ Dataset(
94
+ path="tattabio/rpob_arch_phylogeny_distances",
95
+ revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18",
96
+ ),
97
+ ],
98
+ primary_metric_id="top_corr",
99
+ )
100
+
101
+ def run(self, model: BioSeqTransformer) -> TaskResult:
102
+ return run_eds_task(model, self.metadata)
103
+
104
+
105
+ class RpobBacDNAPhylogeny(Task):
106
+ metadata = TaskMetadata(
107
+ id="rpob_bac_dna_phylogeny",
108
+ display_name="RpoB Bacterial Phylogeny",
109
+ description="Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
110
+ type="eds",
111
+ modality=Modality.DNA,
112
+ datasets=[
113
+ Dataset(
114
+ path="tattabio/rpob_bac_dna_phylogeny_sequences",
115
+ revision="8e137d3fb8886d8739ce08d1918745444c7d30d6",
116
+ ),
117
+ Dataset(
118
+ path="tattabio/rpob_bac_dna_phylogeny_distances",
119
+ revision="67339e271b2a1602208153d53d70d35ba6fa8876",
120
+ ),
121
+ ],
122
+ primary_metric_id="top_corr",
123
+ )
124
+
125
+ def run(self, model: BioSeqTransformer) -> TaskResult:
126
+ return run_eds_task(model, self.metadata)
127
+
128
+
129
+ class RpobArchDNAPhylogeny(Task):
130
+ metadata = TaskMetadata(
131
+ id="rpob_arch_dna_phylogeny",
132
+ display_name="RpoB Archaeal Phylogeny",
133
+ description="Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
134
+ type="eds",
135
+ modality=Modality.DNA,
136
+ datasets=[
137
+ Dataset(
138
+ path="tattabio/rpob_arch_dna_phylogeny_sequences",
139
+ revision="4453552a0e1021fee8697c71a559f4d3f6da2408",
140
+ ),
141
+ Dataset(
142
+ path="tattabio/rpob_arch_dna_phylogeny_distances",
143
+ revision="51df97684a927ec2203568e80175ef26a62db039",
144
+ ),
145
+ ],
146
+ primary_metric_id="top_corr",
147
+ )
148
+
149
+ def run(self, model: BioSeqTransformer) -> TaskResult:
150
+ return run_eds_task(model, self.metadata)
151
+
152
+
153
+ class FeFePhylogeny(Task):
154
+ metadata = TaskMetadata(
155
+ id="fefe_phylogeny",
156
+ display_name="FeFeHydrogenase Phylogeny",
157
+ description="Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
158
+ type="eds",
159
+ modality=Modality.PROTEIN,
160
+ datasets=[
161
+ Dataset(
162
+ path="tattabio/fefe_phylogeny_sequences",
163
+ revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26",
164
+ ),
165
+ Dataset(
166
+ path="tattabio/fefe_phylogeny_distances",
167
+ revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2",
168
+ ),
169
+ ],
170
+ primary_metric_id="top_corr",
171
+ )
172
+
173
+ def run(self, model: BioSeqTransformer) -> TaskResult:
174
+ return run_eds_task(model, self.metadata)
175
+
176
+
177
+ class Bac16SPhylogeny(Task):
178
+ metadata = TaskMetadata(
179
+ id="bac_16S_phylogeny",
180
+ display_name="16S Bacterial Phylogeny",
181
+ description="Evaluate on 16S Bacterial phylogeny distance correlation task.",
182
+ type="eds",
183
+ modality=Modality.DNA,
184
+ datasets=[
185
+ Dataset(
186
+ path="tattabio/bac_16S_sequences",
187
+ revision="efde1456b86748909cbcfecb07d783756d570aa3",
188
+ ),
189
+ Dataset(
190
+ path="tattabio/bac_16S_distances",
191
+ revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3",
192
+ ),
193
+ ],
194
+ primary_metric_id="top_corr",
195
+ )
196
+
197
+ def run(self, model: BioSeqTransformer) -> TaskResult:
198
+ return run_eds_task(model, self.metadata)
199
+
200
+
201
+ class Arch16SPhylogeny(Task):
202
+ metadata = TaskMetadata(
203
+ id="arch_16S_phylogeny",
204
+ display_name="16S Archaeal Phylogeny",
205
+ description="Evaluate on 16S Archaeal phylogeny distance correlation task.",
206
+ type="eds",
207
+ modality=Modality.DNA,
208
+ datasets=[
209
+ Dataset(
210
+ path="tattabio/arch_16S_sequences",
211
+ revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0",
212
+ ),
213
+ Dataset(
214
+ path="tattabio/arch_16S_distances",
215
+ revision="b0356b632a954be70cefd57e3a02e7e1ccd34408",
216
+ ),
217
+ ],
218
+ primary_metric_id="top_corr",
219
+ )
220
+
221
+ def run(self, model: BioSeqTransformer) -> TaskResult:
222
+ return run_eds_task(model, self.metadata)
223
+
224
+
225
+ class Euk18SPhylogeny(Task):
226
+ metadata = TaskMetadata(
227
+ id="euk_18S_phylogeny",
228
+ display_name="18S Eukaryotic Phylogeny",
229
+ description="Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
230
+ type="eds",
231
+ modality=Modality.DNA,
232
+ datasets=[
233
+ Dataset(
234
+ path="tattabio/euk_18S_sequences",
235
+ revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196",
236
+ ),
237
+ Dataset(
238
+ path="tattabio/euk_18S_distances",
239
+ revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da",
240
+ ),
241
+ ],
242
+ primary_metric_id="top_corr",
243
+ )
244
+
245
+ def run(self, model: BioSeqTransformer) -> TaskResult:
246
+ return run_eds_task(model, self.metadata)
dgeb/tasks/pair_classification_tasks.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pair classification tasks evaluating distances between functionally relevant gene pairs.
3
+ For instance, distance thresholds distinguish between co-transcribed and non-co-transcribed gene pairs.
4
+ """
5
+
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ from dgeb.evaluators import PairClassificationEvaluator
10
+ from dgeb.modality import Modality
11
+ from dgeb.models import BioSeqTransformer
12
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
13
+
14
+ from ..eval_utils import paired_dataset
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def run_pair_classification_task(
20
+ model: BioSeqTransformer, metadata: TaskMetadata
21
+ ) -> TaskResult:
22
+ """Evaluate pair classification task. Utilizes the PairClassificationEvaluator."""
23
+ if len(metadata.datasets) != 1:
24
+ raise ValueError("Pair classification tasks require 1 dataset.")
25
+ ds = metadata.datasets[0].load()["train"]
26
+ embeds = model.encode(ds["Sequence"])
27
+ layer_results = defaultdict(dict)
28
+ for i, layer in enumerate(model.layers):
29
+ labels = ds["Label"]
30
+ embeds1, embeds2, labels = paired_dataset(labels, embeds[:, i])
31
+ evaluator = PairClassificationEvaluator(embeds1, embeds2, labels)
32
+ layer_results["layers"][layer] = evaluator()
33
+ logger.info(
34
+ f"Layer: {layer}, {metadata.display_name} classification results: {layer_results['layers'][layer]}"
35
+ )
36
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
37
+
38
+
39
+ class EcoliOperon(Task):
40
+ metadata = TaskMetadata(
41
+ id="ecoli_operonic_pair",
42
+ display_name="E.coli Operonic Pair",
43
+ description="Evaluate on E.coli K-12 operonic pair classification task.",
44
+ type="pair_classification",
45
+ modality=Modality.PROTEIN,
46
+ datasets=[
47
+ Dataset(
48
+ path="tattabio/ecoli_operonic_pair",
49
+ revision="a62c01143a842696fc8200b91c1acb825e8cb891",
50
+ )
51
+ ],
52
+ primary_metric_id="top_ap",
53
+ )
54
+
55
+ def run(self, model: BioSeqTransformer) -> TaskResult:
56
+ return run_pair_classification_task(model, self.metadata)
57
+
58
+
59
+ class CyanoOperonPair(Task):
60
+ metadata = TaskMetadata(
61
+ id="cyano_operonic_pair",
62
+ display_name="Cyano Operonic Pair",
63
+ description="Evaluate on Cyano operonic pair classification task.",
64
+ type="pair_classification",
65
+ modality=Modality.PROTEIN,
66
+ datasets=[
67
+ Dataset(
68
+ path="tattabio/cyano_operonic_pair",
69
+ revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec",
70
+ )
71
+ ],
72
+ primary_metric_id="top_ap",
73
+ )
74
+
75
+ def run(self, model: BioSeqTransformer) -> TaskResult:
76
+ return run_pair_classification_task(model, self.metadata)
77
+
78
+
79
+ class VibrioOperonPair(Task):
80
+ metadata = TaskMetadata(
81
+ id="vibrio_operonic_pair",
82
+ display_name="Vibrio Operonic Pair",
83
+ description="Evaluate on Vibrio operonic pair classification task.",
84
+ type="pair_classification",
85
+ modality=Modality.PROTEIN,
86
+ datasets=[
87
+ Dataset(
88
+ path="tattabio/vibrio_operonic_pair",
89
+ revision="24781b12b45bf81a079a6164ef0d2124948c1878",
90
+ )
91
+ ],
92
+ primary_metric_id="top_ap",
93
+ )
94
+
95
+ def run(self, model: BioSeqTransformer) -> TaskResult:
96
+ return run_pair_classification_task(model, self.metadata)
dgeb/tasks/retrieval_tasks.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Retrieval tasks find functionally relevant genes in a corpus of genes based on a query gene.
3
+ Typically corpus is derived from a different phylogenetic group than the query genes.
4
+ """
5
+
6
+ import logging
7
+ from collections import defaultdict
8
+
9
+ from dgeb.evaluators import RetrievalEvaluator
10
+ from dgeb.modality import Modality
11
+ from dgeb.models import BioSeqTransformer
12
+ from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def run_retrieval_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
18
+ """Evaluate retrieval task. Utilizes the Retrieval evaluator."""
19
+ if len(metadata.datasets) != 2:
20
+ raise ValueError("Retrieval tasks require 3 datasets: corpus, query and qrels.")
21
+ corpus_ds = metadata.datasets[0].load()["train"]
22
+ query_ds = metadata.datasets[0].load()["test"]
23
+ qrels = metadata.datasets[1].load()
24
+ corpus_embeds = model.encode(corpus_ds["Sequence"])
25
+ query_embeds = model.encode(query_ds["Sequence"])
26
+ qrels_dict = defaultdict(dict)
27
+
28
+ def qrels_dict_init(row):
29
+ qrels_dict[str(row["query_id"])][str(row["corpus_id"])] = int(row["fuzz_ratio"])
30
+
31
+ # Populate `qrels_dict` from the dataset.
32
+ # See https://github.com/cvangysel/pytrec_eval for qrels format.
33
+ qrels.map(qrels_dict_init)
34
+ qrels = qrels_dict
35
+ layer_results = defaultdict(dict)
36
+ for i, layer in enumerate(model.layers):
37
+ evaluator = RetrievalEvaluator(
38
+ corpus_embeds[:, i],
39
+ query_embeds[:, i],
40
+ corpus_ds["Entry"],
41
+ query_ds["Entry"],
42
+ qrels,
43
+ )
44
+ layer_results["layers"][layer] = evaluator()
45
+ logger.info(
46
+ f"Layer: {layer}, Retrieval results: {layer_results['layers'][layer]}"
47
+ )
48
+ return TaskResult.from_dict(metadata, layer_results, model.metadata)
49
+
50
+
51
+ class ArchRetrieval(Task):
52
+ metadata = TaskMetadata(
53
+ id="arch_retrieval",
54
+ display_name="Arch Retrieval",
55
+ description="Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
56
+ type="retrieval",
57
+ modality=Modality.PROTEIN,
58
+ datasets=[
59
+ Dataset(
60
+ path="tattabio/arch_retrieval",
61
+ revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b",
62
+ ),
63
+ Dataset(
64
+ path="tattabio/arch_retrieval_qrels",
65
+ revision="3f142f2f9a0995d56c6e77188c7251761450afcf",
66
+ ),
67
+ ],
68
+ primary_metric_id="map_at_5",
69
+ )
70
+
71
+ def run(self, model: BioSeqTransformer) -> TaskResult:
72
+ return run_retrieval_task(model, self.metadata)
73
+
74
+
75
+ class EukRetrieval(Task):
76
+ metadata = TaskMetadata(
77
+ id="euk_retrieval",
78
+ display_name="Euk Retrieval",
79
+ description="Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
80
+ type="retrieval",
81
+ modality=Modality.PROTEIN,
82
+ datasets=[
83
+ Dataset(
84
+ path="tattabio/euk_retrieval",
85
+ revision="c93dc56665cedd19fbeaea9ace146f2474c895f0",
86
+ ),
87
+ Dataset(
88
+ path="tattabio/euk_retrieval_qrels",
89
+ revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b",
90
+ ),
91
+ ],
92
+ primary_metric_id="map_at_5",
93
+ )
94
+
95
+ def run(self, model: BioSeqTransformer) -> TaskResult:
96
+ return run_retrieval_task(model, self.metadata)
dgeb/tasks/tasks.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task abstract class for evaluation and results."""
2
+
3
+ import logging
4
+ from typing import List, Literal, Optional, Any
5
+ from importlib.metadata import version
6
+ from enum import Enum
7
+ import datasets
8
+ from pydantic import BaseModel, model_validator
9
+ from abc import ABC, abstractmethod
10
+
11
+
12
+ # HACK: if Modality is not defined, then import it from modality.py
13
+ try:
14
+ from ..modality import Modality
15
+ except Exception:
16
+ # if not, super hack to get the leaderboard working.
17
+ # SHOULD MATCH the code exactly in modality.py
18
+ # can we read the file and run that code?
19
+ from enum import Enum
20
+
21
+ class Modality(Enum):
22
+ """Data modality, either DNA or protein sequence."""
23
+
24
+ PROTEIN = "protein"
25
+ DNA = "dna"
26
+
27
+
28
+ logging.basicConfig(level=logging.INFO)
29
+
30
+ TaskType = Literal[
31
+ "classification",
32
+ "pair_classification",
33
+ "clustering",
34
+ "eds",
35
+ "bigene_mining",
36
+ "retrieval",
37
+ ]
38
+
39
+
40
+ class TaskMetric(BaseModel):
41
+ id: str
42
+ display_name: str
43
+ description: Optional[str] = None
44
+ value: float = 0.0
45
+
46
+
47
+ class LayerResult(BaseModel):
48
+ layer_number: int
49
+ layer_display_name: str
50
+ metrics: List[TaskMetric]
51
+
52
+
53
+ class GEBModel(BaseModel):
54
+ hf_name: str
55
+ num_layers: int
56
+ num_params: int
57
+ embed_dim: int
58
+
59
+
60
+ class Dataset(BaseModel):
61
+ path: str
62
+ revision: str
63
+
64
+ def load(self) -> datasets.DatasetDict:
65
+ ds = datasets.load_dataset(self.path, revision=self.revision)
66
+ if not isinstance(ds, datasets.DatasetDict):
67
+ raise ValueError(
68
+ f"Dataset {self.path} is not a datasets.DatasetDict object."
69
+ )
70
+ return ds
71
+
72
+
73
+ class TaskMetadata(BaseModel):
74
+ id: str
75
+ display_name: str
76
+ description: str
77
+ modality: Modality
78
+ type: TaskType
79
+ # List of datasets used by the task.
80
+ # Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`.
81
+ datasets: List[Dataset]
82
+ primary_metric_id: str
83
+
84
+
85
+ # tasks.py
86
+ class TaskResult(BaseModel):
87
+ dgeb_version: str
88
+ task: "TaskMetadata"
89
+ # TODO: Convert model to ModelMetadata
90
+ model: GEBModel
91
+ results: List[LayerResult]
92
+
93
+ @model_validator(mode="after")
94
+ def check_valid_primary_metric(self):
95
+ for result in self.results:
96
+ if all(
97
+ metric.id != self.task.primary_metric_id for metric in result.metrics
98
+ ):
99
+ raise ValueError(
100
+ f"Primary metric {self.task.primary_metric_id} not found in results.metrics"
101
+ )
102
+ return self
103
+
104
+ @staticmethod
105
+ def from_dict(
106
+ task_metadata: "TaskMetadata",
107
+ layer_results: LayerResult,
108
+ model_metadata: GEBModel,
109
+ ):
110
+ return TaskResult(
111
+ dgeb_version=version("dgeb"),
112
+ task=task_metadata,
113
+ model=model_metadata,
114
+ results=list(
115
+ LayerResult(
116
+ layer_number=int(layer),
117
+ layer_display_name=str(layer),
118
+ metrics=[
119
+ TaskMetric(id=metric, display_name=metric, value=value)
120
+ for metric, value in metrics.items()
121
+ ],
122
+ )
123
+ for layer, metrics in layer_results["layers"].items()
124
+ ),
125
+ )
126
+
127
+
128
+ # move to model.py?
129
+ class Task(ABC):
130
+ metadata: TaskMetadata
131
+
132
+ # using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard
133
+ @abstractmethod
134
+ def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult:
135
+ pass
docker-compose.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ version: "3"
2
+ services:
3
+ dgeb-leaderboard:
4
+ build:
5
+ context: ./
6
+ dockerfile: Dockerfile
7
+ ports:
8
+ - "7680:7860"
docs/images/tatta_logo.png ADDED
leaderboard/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ /.projectile
2
+ **/__pycache__/
leaderboard/DGEB_Figure.png ADDED
leaderboard/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # to set up hf repo to recieve origin pushes
2
+ git remote set-url --add origin [email protected]:spaces/tattabio/DGEB
leaderboard/__init__.py ADDED
File without changes
leaderboard/app.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import json
3
+ from pathlib import Path
4
+ import gradio as gr
5
+ from typing import List
6
+ import pandas as pd
7
+ import importlib.util
8
+ from pydantic import ValidationError, parse_obj_as
9
+
10
+ SIG_FIGS = 4
11
+
12
+ # HACK: very hacky way to import from parent directory, while avoiding needing all the deps of the parent package
13
+ modality_path = "../dgeb/modality.py"
14
+ spec = importlib.util.spec_from_file_location("modality", modality_path)
15
+ modality = importlib.util.module_from_spec(spec)
16
+ spec.loader.exec_module(modality)
17
+ Modality = modality.Modality
18
+
19
+
20
+ tasks_path = "../dgeb/tasks/tasks.py"
21
+
22
+ # Load the module
23
+ spec = importlib.util.spec_from_file_location("tasks", tasks_path)
24
+ tasks = importlib.util.module_from_spec(spec)
25
+ spec.loader.exec_module(tasks)
26
+ TaskResult = tasks.TaskResult
27
+ GEBModel = tasks.GEBModel
28
+
29
+
30
+ # Assuming the class definitions provided above are complete and imported here
31
+
32
+
33
+ def format_num_params(param: int) -> str:
34
+ # if the number of parameters is greater than 1 billion, display billion
35
+ million = 1_000_000
36
+ # billion = 1_000_000_000
37
+ # if param >= billion:
38
+ # num_billions = int(param / 1_000_000_000)
39
+ # return f"{num_billions:}B"
40
+ if param >= million:
41
+ num_millions = int(param / 1_000_000)
42
+ return f"{num_millions:}M"
43
+ else:
44
+ return f"{param:,}"
45
+
46
+
47
+ def load_json_files_from_directory(directory_path: Path) -> List[dict]:
48
+ """
49
+ Recursively load all JSON files within the specified directory path.
50
+
51
+ :param directory_path: Path to the directory to search for JSON files.
52
+ :return: List of dictionaries loaded from JSON files.
53
+ """
54
+ json_files_content = []
55
+ for json_file in directory_path.rglob("*.json"): # Recursively find all JSON files
56
+ try:
57
+ with open(json_file, "r", encoding="utf-8") as file:
58
+ json_content = json.load(file)
59
+ json_files_content.append(json_content)
60
+ except Exception as e:
61
+ print(f"Error loading {json_file}: {e}")
62
+ return json_files_content
63
+
64
+
65
+ def load_results() -> List[TaskResult]:
66
+ """
67
+ Recursively load JSON files in ./submissions/** and return a list of TaskResult objects.
68
+ """
69
+ submissions_path = Path("./submissions")
70
+ json_contents = load_json_files_from_directory(submissions_path)
71
+
72
+ task_results_objects = []
73
+ for content in json_contents:
74
+ try:
75
+ task_result = parse_obj_as(
76
+ TaskResult, content
77
+ ) # Using Pydantic's parse_obj_as for creating TaskResult objects
78
+ task_results_objects.append(task_result)
79
+ except ValidationError as e:
80
+ print(f"Error parsing TaskResult object: {e}")
81
+ raise e
82
+
83
+ return task_results_objects
84
+
85
+
86
+ def task_results_to_dgeb_score(
87
+ model: GEBModel, model_results: List[TaskResult]
88
+ ) -> dict:
89
+ best_scores_per_task = []
90
+ modalities_seen = set()
91
+ for task_result in model_results:
92
+ modalities_seen.add(task_result.task.modality)
93
+ assert (
94
+ task_result.model.hf_name == model.hf_name
95
+ ), f"Model names do not match, {task_result.model.hf_name} != {model.hf_name}"
96
+ primary_metric_id = task_result.task.primary_metric_id
97
+ scores = []
98
+ # Get the primary score for each layer.
99
+ for result in task_result.results:
100
+ for metric in result.metrics:
101
+ if metric.id == primary_metric_id:
102
+ scores.append(metric.value)
103
+ best_score = max(scores)
104
+ best_scores_per_task.append(best_score)
105
+
106
+ assert (
107
+ len(modalities_seen) == 1
108
+ ), f"Multiple modalities found for model {model.hf_name}"
109
+ # Calculate the average of the best scores for each task.
110
+ assert len(best_scores_per_task) > 0, f"No tasks found for model {model.hf_name}"
111
+ dgeb_score = sum(best_scores_per_task) / len(best_scores_per_task)
112
+ return {
113
+ "Task Name": "DGEB Score",
114
+ "Task Category": "DGEB",
115
+ "Model": model.hf_name,
116
+ "Modality": list(modalities_seen)[0],
117
+ "Num. Parameters (millions)": format_num_params(model.num_params),
118
+ "Emb. Dimension": model.embed_dim,
119
+ "Score": dgeb_score,
120
+ }
121
+
122
+
123
+ def task_results_to_df(model_results: List[TaskResult]) -> pd.DataFrame:
124
+ # Initialize an empty list to hold all rows of data
125
+ data_rows = []
126
+ all_models = {}
127
+ for res in model_results:
128
+ task = res.task
129
+ model = res.model
130
+ all_models[model.hf_name] = model
131
+ print(f"Processing {task.display_name} for {model.hf_name}")
132
+ for layer in res.results:
133
+ total_layers = model.num_layers - 1
134
+ mid_layer = math.ceil(total_layers / 2)
135
+ if mid_layer == layer.layer_number:
136
+ layer.layer_display_name = "mid"
137
+ elif total_layers == layer.layer_number:
138
+ layer.layer_display_name = "last"
139
+
140
+ if layer.layer_display_name not in ["mid", "last"]:
141
+ # calculate if the layer is mid or last
142
+ print(
143
+ f"Layer {layer.layer_number} is not mid or last out of {total_layers}. Skipping"
144
+ )
145
+ continue
146
+ else:
147
+ # For each Metric in the Layer
148
+ # pivoting the data so that each metric is a row
149
+ metric_ids = []
150
+ primary_metric_label = f"{task.primary_metric_id} (primary metric)"
151
+ for metric in layer.metrics:
152
+ if task.primary_metric_id == metric.id:
153
+ metric_ids.append(primary_metric_label)
154
+ else:
155
+ metric_ids.append(metric.id)
156
+
157
+ metric_values = [metric.value for metric in layer.metrics]
158
+ zipped = zip(metric_ids, metric_values)
159
+ # sort primary metric id first
160
+ sorted_zip = sorted(
161
+ zipped,
162
+ key=lambda x: x[0] != primary_metric_label,
163
+ )
164
+ data_rows.append(
165
+ {
166
+ "Task Name": task.display_name,
167
+ "Task Category": task.type,
168
+ "Model": model.hf_name,
169
+ "Num. Parameters (millions)": format_num_params(
170
+ model.num_params
171
+ ),
172
+ "Emb. Dimension": model.embed_dim,
173
+ "Modality": task.modality,
174
+ "Layer": layer.layer_display_name,
175
+ **dict(sorted_zip),
176
+ }
177
+ )
178
+ for model_name, model in all_models.items():
179
+ results_for_model = [
180
+ res for res in model_results if res.model.hf_name == model_name
181
+ ]
182
+ assert len(results_for_model) > 0, f"No results found for model {model_name}"
183
+ dgeb_score_record = task_results_to_dgeb_score(model, results_for_model)
184
+ print(f'model {model.hf_name} dgeb score: {dgeb_score_record["Score"]}')
185
+ data_rows.append(dgeb_score_record)
186
+ print("Finished processing all results")
187
+ df = pd.DataFrame(data_rows)
188
+ return df
189
+
190
+
191
+ df = task_results_to_df(load_results())
192
+ image_path = "./DGEB_Figure.png"
193
+ with gr.Blocks() as demo:
194
+ gr.Label("Diverse Genomic Embedding Benchmark", show_label=False, scale=2)
195
+ gr.HTML(
196
+ f"<img src='file/{image_path}' alt='DGEB Figure' style='border-radius: 0.8rem; width: 50%; margin-left: auto; margin-right: auto; margin-top:12px;'>"
197
+ )
198
+ gr.HTML(
199
+ """
200
+ <div style='width: 50%; margin-left: auto; margin-right: auto; padding-bottom: 8px;text-align: center;'>
201
+ DGEB Leaderboard. To submit, refer to the <a href="https://github.com/TattaBio/DGEB/blob/leaderboard/README.md" target="_blank" style="text-decoration: underline">DGEB GitHub repository</a> Refer to the <a href="https://example.com" target="_blank" style="text-decoration: underline">DGEB paper</a> for details on metrics, tasks, and models.
202
+ </div>
203
+ """
204
+ )
205
+
206
+ unique_categories = df["Task Category"].unique()
207
+ # sort "DGEB" to the start
208
+ unique_categories = sorted(unique_categories, key=lambda x: x != "DGEB")
209
+ for category in unique_categories:
210
+ with gr.Tab(label=category):
211
+ unique_tasks_in_category = df[df["Task Category"] == category][
212
+ "Task Name"
213
+ ].unique()
214
+ # sort "Overall" to the start
215
+ unique_tasks_in_category = sorted(
216
+ unique_tasks_in_category, key=lambda x: x != "Overall"
217
+ )
218
+ for task in unique_tasks_in_category:
219
+ with gr.Tab(label=task):
220
+ columns_to_hide = ["Task Name", "Task Category"]
221
+ # get rows where Task Name == task and Task Category == category
222
+ filtered_df = (
223
+ df[
224
+ (df["Task Name"] == task)
225
+ & (df["Task Category"] == category)
226
+ ].drop(columns=columns_to_hide)
227
+ ).dropna(axis=1, how="all") # drop all NaN columns for Overall tab
228
+ # round all values to 4 decimal places
229
+ rounded_df = filtered_df.round(SIG_FIGS)
230
+
231
+ # calculate ranking column
232
+ # if in Overview tab, rank by average metric value
233
+ if task == "Overall":
234
+ # rank by average col
235
+ rounded_df["Rank"] = filtered_df["Average"].rank(
236
+ ascending=False
237
+ )
238
+ else:
239
+ avoid_cols = [
240
+ "Model",
241
+ "Emb. Dimension",
242
+ "Num. Parameters (millions)",
243
+ "Modality",
244
+ "Layer",
245
+ ]
246
+ rounded_df["Rank"] = (
247
+ rounded_df.drop(columns=avoid_cols, errors="ignore")
248
+ .sum(axis=1)
249
+ .rank(ascending=False)
250
+ )
251
+ # make Rank first column
252
+ cols = list(rounded_df.columns)
253
+ cols.insert(0, cols.pop(cols.index("Rank")))
254
+ rounded_df = rounded_df[cols]
255
+ # sort by rank
256
+ rounded_df = rounded_df.sort_values("Rank")
257
+ data_frame = gr.DataFrame(rounded_df)
258
+
259
+
260
+ demo.launch(allowed_paths=["."])
leaderboard/requirements.txt ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.5
3
+ aiosignal==1.3.1
4
+ altair==5.3.0
5
+ annotated-types==0.7.0
6
+ anyio==4.4.0
7
+ attrs==23.2.0
8
+ certifi==2024.6.2
9
+ charset-normalizer==3.3.2
10
+ click==8.1.7
11
+ contourpy==1.2.1
12
+ cycler==0.12.1
13
+ datasets==2.14.4
14
+ dill==0.3.7
15
+ dnspython==2.6.1
16
+ email-validator==2.1.2
17
+ fastapi==0.111.0
18
+ fastapi-cli==0.0.4
19
+ ffmpy==0.3.2
20
+ filelock==3.15.1
21
+ fonttools==4.53.0
22
+ frozenlist==1.4.1
23
+ fsspec==2024.6.0
24
+ gradio==4.37.2
25
+ gradio-client==1.0.2
26
+ h11==0.14.0
27
+ httpcore==1.0.5
28
+ httptools==0.6.1
29
+ httpx==0.27.0
30
+ huggingface-hub==0.23.4
31
+ idna==3.7
32
+ importlib-resources==6.4.0
33
+ jinja2==3.1.4
34
+ jsonschema==4.22.0
35
+ jsonschema-specifications==2023.12.1
36
+ kiwisolver==1.4.5
37
+ markdown-it-py==3.0.0
38
+ markupsafe==2.1.5
39
+ matplotlib==3.9.0
40
+ mdurl==0.1.2
41
+ multidict==6.0.5
42
+ multiprocess==0.70.15
43
+ numpy==2.0.0
44
+ orjson==3.10.5
45
+ packaging==24.1
46
+ pandas==2.2.2
47
+ pillow==10.3.0
48
+ pyarrow==16.1.0
49
+ pydantic==2.7.4
50
+ pydantic-core==2.18.4
51
+ pydub==0.25.1
52
+ pygments==2.18.0
53
+ pyparsing==3.1.2
54
+ python-dateutil==2.9.0.post0
55
+ python-dotenv==1.0.1
56
+ python-multipart==0.0.9
57
+ pytz==2024.1
58
+ pyyaml==6.0.1
59
+ referencing==0.35.1
60
+ requests==2.32.3
61
+ rich==13.7.1
62
+ rpds-py==0.18.1
63
+ ruff==0.4.9
64
+ semantic-version==2.10.0
65
+ shellingham==1.5.4
66
+ six==1.16.0
67
+ sniffio==1.3.1
68
+ starlette==0.37.2
69
+ tomlkit==0.12.0
70
+ toolz==0.12.1
71
+ tqdm==4.66.4
72
+ typer==0.12.3
73
+ typing-extensions==4.12.2
74
+ tzdata==2024.1
75
+ ujson==5.10.0
76
+ urllib3==2.2.2
77
+ uvicorn==0.30.1
78
+ uvloop==0.19.0
79
+ watchfiles==0.22.0
80
+ websockets==11.0.3
81
+ xxhash==3.4.1
82
+ yarl==1.9.4
leaderboard/submissions/.DS_Store ADDED
Binary file (12.3 kB). View file
 
leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "MIBIG_protein_classification",
4
+ "display_name": "MIBiG Classification",
5
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
6
+ "modality": "protein",
7
+ "type": "classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/mibig_classification_prot",
11
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "f1",
31
+ "display_name": "f1",
32
+ "description": null,
33
+ "value": 0.6537260383267297
34
+ },
35
+ {
36
+ "id": "accuracy",
37
+ "display_name": "accuracy",
38
+ "description": null,
39
+ "value": 0.6689342403628118
40
+ },
41
+ {
42
+ "id": "precision",
43
+ "display_name": "precision",
44
+ "description": null,
45
+ "value": 0.7853286513915045
46
+ },
47
+ {
48
+ "id": "recall",
49
+ "display_name": "recall",
50
+ "description": null,
51
+ "value": 0.6020175670931918
52
+ },
53
+ {
54
+ "id": "lrap",
55
+ "display_name": "lrap",
56
+ "description": null,
57
+ "value": 0.798563869992442
58
+ }
59
+ ]
60
+ },
61
+ {
62
+ "layer_number": 11,
63
+ "layer_display_name": "11",
64
+ "metrics": [
65
+ {
66
+ "id": "f1",
67
+ "display_name": "f1",
68
+ "description": null,
69
+ "value": 0.645844633541225
70
+ },
71
+ {
72
+ "id": "accuracy",
73
+ "display_name": "accuracy",
74
+ "description": null,
75
+ "value": 0.655328798185941
76
+ },
77
+ {
78
+ "id": "precision",
79
+ "display_name": "precision",
80
+ "description": null,
81
+ "value": 0.7407876819384401
82
+ },
83
+ {
84
+ "id": "recall",
85
+ "display_name": "recall",
86
+ "description": null,
87
+ "value": 0.5970376985838431
88
+ },
89
+ {
90
+ "id": "lrap",
91
+ "display_name": "lrap",
92
+ "description": null,
93
+ "value": 0.7849584278155715
94
+ }
95
+ ]
96
+ }
97
+ ]
98
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "arch_retrieval",
4
+ "display_name": "Arch Retrieval",
5
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
6
+ "modality": "protein",
7
+ "type": "retrieval",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/arch_retrieval",
11
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
12
+ },
13
+ {
14
+ "path": "tattabio/arch_retrieval_qrels",
15
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
16
+ }
17
+ ],
18
+ "primary_metric_id": "map_at_5"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 12,
24
+ "num_params": 33992881,
25
+ "embed_dim": 480
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 6,
31
+ "layer_display_name": "6",
32
+ "metrics": [
33
+ {
34
+ "id": "ndcg_at_5",
35
+ "display_name": "ndcg_at_5",
36
+ "description": null,
37
+ "value": 0.84127
38
+ },
39
+ {
40
+ "id": "ndcg_at_10",
41
+ "display_name": "ndcg_at_10",
42
+ "description": null,
43
+ "value": 0.82701
44
+ },
45
+ {
46
+ "id": "ndcg_at_50",
47
+ "display_name": "ndcg_at_50",
48
+ "description": null,
49
+ "value": 0.79635
50
+ },
51
+ {
52
+ "id": "map_at_5",
53
+ "display_name": "map_at_5",
54
+ "description": null,
55
+ "value": 0.27329
56
+ },
57
+ {
58
+ "id": "map_at_10",
59
+ "display_name": "map_at_10",
60
+ "description": null,
61
+ "value": 0.37939
62
+ },
63
+ {
64
+ "id": "map_at_50",
65
+ "display_name": "map_at_50",
66
+ "description": null,
67
+ "value": 0.64453
68
+ },
69
+ {
70
+ "id": "recall_at_5",
71
+ "display_name": "recall_at_5",
72
+ "description": null,
73
+ "value": 0.2839
74
+ },
75
+ {
76
+ "id": "recall_at_10",
77
+ "display_name": "recall_at_10",
78
+ "description": null,
79
+ "value": 0.40033
80
+ },
81
+ {
82
+ "id": "recall_at_50",
83
+ "display_name": "recall_at_50",
84
+ "description": null,
85
+ "value": 0.70443
86
+ },
87
+ {
88
+ "id": "precision_at_5",
89
+ "display_name": "precision_at_5",
90
+ "description": null,
91
+ "value": 0.7621
92
+ },
93
+ {
94
+ "id": "precision_at_10",
95
+ "display_name": "precision_at_10",
96
+ "description": null,
97
+ "value": 0.69407
98
+ },
99
+ {
100
+ "id": "precision_at_50",
101
+ "display_name": "precision_at_50",
102
+ "description": null,
103
+ "value": 0.42452
104
+ },
105
+ {
106
+ "id": "mrr_at_5",
107
+ "display_name": "mrr_at_5",
108
+ "description": null,
109
+ "value": 0.8853108550291645
110
+ },
111
+ {
112
+ "id": "mrr_at_10",
113
+ "display_name": "mrr_at_10",
114
+ "description": null,
115
+ "value": 0.8879126611520968
116
+ },
117
+ {
118
+ "id": "mrr_at_50",
119
+ "display_name": "mrr_at_50",
120
+ "description": null,
121
+ "value": 0.8892435700922602
122
+ },
123
+ {
124
+ "id": "nauc_ndcg_at_5_max",
125
+ "display_name": "nauc_ndcg_at_5_max",
126
+ "description": null,
127
+ "value": 0.6178391415234327
128
+ },
129
+ {
130
+ "id": "nauc_ndcg_at_5_std",
131
+ "display_name": "nauc_ndcg_at_5_std",
132
+ "description": null,
133
+ "value": 0.27510768020625387
134
+ },
135
+ {
136
+ "id": "nauc_ndcg_at_5_diff1",
137
+ "display_name": "nauc_ndcg_at_5_diff1",
138
+ "description": null,
139
+ "value": -0.2751226626247053
140
+ },
141
+ {
142
+ "id": "nauc_ndcg_at_10_max",
143
+ "display_name": "nauc_ndcg_at_10_max",
144
+ "description": null,
145
+ "value": 0.6158935362175889
146
+ },
147
+ {
148
+ "id": "nauc_ndcg_at_10_std",
149
+ "display_name": "nauc_ndcg_at_10_std",
150
+ "description": null,
151
+ "value": 0.29490376307826244
152
+ },
153
+ {
154
+ "id": "nauc_ndcg_at_10_diff1",
155
+ "display_name": "nauc_ndcg_at_10_diff1",
156
+ "description": null,
157
+ "value": -0.3173510395378902
158
+ },
159
+ {
160
+ "id": "nauc_ndcg_at_50_max",
161
+ "display_name": "nauc_ndcg_at_50_max",
162
+ "description": null,
163
+ "value": 0.6282820888186709
164
+ },
165
+ {
166
+ "id": "nauc_ndcg_at_50_std",
167
+ "display_name": "nauc_ndcg_at_50_std",
168
+ "description": null,
169
+ "value": 0.217967587602592
170
+ },
171
+ {
172
+ "id": "nauc_ndcg_at_50_diff1",
173
+ "display_name": "nauc_ndcg_at_50_diff1",
174
+ "description": null,
175
+ "value": -0.3392167130961565
176
+ },
177
+ {
178
+ "id": "nauc_map_at_5_max",
179
+ "display_name": "nauc_map_at_5_max",
180
+ "description": null,
181
+ "value": 0.02706102865662817
182
+ },
183
+ {
184
+ "id": "nauc_map_at_5_std",
185
+ "display_name": "nauc_map_at_5_std",
186
+ "description": null,
187
+ "value": 0.33465305568189146
188
+ },
189
+ {
190
+ "id": "nauc_map_at_5_diff1",
191
+ "display_name": "nauc_map_at_5_diff1",
192
+ "description": null,
193
+ "value": 0.29252115202920864
194
+ },
195
+ {
196
+ "id": "nauc_map_at_10_max",
197
+ "display_name": "nauc_map_at_10_max",
198
+ "description": null,
199
+ "value": 0.1461797349288265
200
+ },
201
+ {
202
+ "id": "nauc_map_at_10_std",
203
+ "display_name": "nauc_map_at_10_std",
204
+ "description": null,
205
+ "value": 0.3984979781227535
206
+ },
207
+ {
208
+ "id": "nauc_map_at_10_diff1",
209
+ "display_name": "nauc_map_at_10_diff1",
210
+ "description": null,
211
+ "value": 0.15678893453735943
212
+ },
213
+ {
214
+ "id": "nauc_map_at_50_max",
215
+ "display_name": "nauc_map_at_50_max",
216
+ "description": null,
217
+ "value": 0.5443958382387585
218
+ },
219
+ {
220
+ "id": "nauc_map_at_50_std",
221
+ "display_name": "nauc_map_at_50_std",
222
+ "description": null,
223
+ "value": 0.3379769732428374
224
+ },
225
+ {
226
+ "id": "nauc_map_at_50_diff1",
227
+ "display_name": "nauc_map_at_50_diff1",
228
+ "description": null,
229
+ "value": -0.23212587702223994
230
+ },
231
+ {
232
+ "id": "nauc_recall_at_5_max",
233
+ "display_name": "nauc_recall_at_5_max",
234
+ "description": null,
235
+ "value": 0.008899383756080657
236
+ },
237
+ {
238
+ "id": "nauc_recall_at_5_std",
239
+ "display_name": "nauc_recall_at_5_std",
240
+ "description": null,
241
+ "value": 0.3376357180005265
242
+ },
243
+ {
244
+ "id": "nauc_recall_at_5_diff1",
245
+ "display_name": "nauc_recall_at_5_diff1",
246
+ "description": null,
247
+ "value": 0.2949278653804833
248
+ },
249
+ {
250
+ "id": "nauc_recall_at_10_max",
251
+ "display_name": "nauc_recall_at_10_max",
252
+ "description": null,
253
+ "value": 0.11957594632298725
254
+ },
255
+ {
256
+ "id": "nauc_recall_at_10_std",
257
+ "display_name": "nauc_recall_at_10_std",
258
+ "description": null,
259
+ "value": 0.4084900248156052
260
+ },
261
+ {
262
+ "id": "nauc_recall_at_10_diff1",
263
+ "display_name": "nauc_recall_at_10_diff1",
264
+ "description": null,
265
+ "value": 0.16409679466126934
266
+ },
267
+ {
268
+ "id": "nauc_recall_at_50_max",
269
+ "display_name": "nauc_recall_at_50_max",
270
+ "description": null,
271
+ "value": 0.5478175261971683
272
+ },
273
+ {
274
+ "id": "nauc_recall_at_50_std",
275
+ "display_name": "nauc_recall_at_50_std",
276
+ "description": null,
277
+ "value": 0.3566768602643857
278
+ },
279
+ {
280
+ "id": "nauc_recall_at_50_diff1",
281
+ "display_name": "nauc_recall_at_50_diff1",
282
+ "description": null,
283
+ "value": -0.24770750166012404
284
+ },
285
+ {
286
+ "id": "nauc_precision_at_5_max",
287
+ "display_name": "nauc_precision_at_5_max",
288
+ "description": null,
289
+ "value": 0.5588205820812548
290
+ },
291
+ {
292
+ "id": "nauc_precision_at_5_std",
293
+ "display_name": "nauc_precision_at_5_std",
294
+ "description": null,
295
+ "value": 0.053528426968584814
296
+ },
297
+ {
298
+ "id": "nauc_precision_at_5_diff1",
299
+ "display_name": "nauc_precision_at_5_diff1",
300
+ "description": null,
301
+ "value": -0.5895997876864452
302
+ },
303
+ {
304
+ "id": "nauc_precision_at_10_max",
305
+ "display_name": "nauc_precision_at_10_max",
306
+ "description": null,
307
+ "value": 0.5109397710788774
308
+ },
309
+ {
310
+ "id": "nauc_precision_at_10_std",
311
+ "display_name": "nauc_precision_at_10_std",
312
+ "description": null,
313
+ "value": -0.0014360394688449447
314
+ },
315
+ {
316
+ "id": "nauc_precision_at_10_diff1",
317
+ "display_name": "nauc_precision_at_10_diff1",
318
+ "description": null,
319
+ "value": -0.5972188824684267
320
+ },
321
+ {
322
+ "id": "nauc_precision_at_50_max",
323
+ "display_name": "nauc_precision_at_50_max",
324
+ "description": null,
325
+ "value": 0.30493219390483955
326
+ },
327
+ {
328
+ "id": "nauc_precision_at_50_std",
329
+ "display_name": "nauc_precision_at_50_std",
330
+ "description": null,
331
+ "value": -0.35096314542920914
332
+ },
333
+ {
334
+ "id": "nauc_precision_at_50_diff1",
335
+ "display_name": "nauc_precision_at_50_diff1",
336
+ "description": null,
337
+ "value": -0.4163370977258702
338
+ },
339
+ {
340
+ "id": "nauc_mrr_at_5_max",
341
+ "display_name": "nauc_mrr_at_5_max",
342
+ "description": null,
343
+ "value": 0.6041064087877195
344
+ },
345
+ {
346
+ "id": "nauc_mrr_at_5_std",
347
+ "display_name": "nauc_mrr_at_5_std",
348
+ "description": null,
349
+ "value": 0.2995447501683336
350
+ },
351
+ {
352
+ "id": "nauc_mrr_at_5_diff1",
353
+ "display_name": "nauc_mrr_at_5_diff1",
354
+ "description": null,
355
+ "value": -0.1176892239839227
356
+ },
357
+ {
358
+ "id": "nauc_mrr_at_10_max",
359
+ "display_name": "nauc_mrr_at_10_max",
360
+ "description": null,
361
+ "value": 0.6055526314461911
362
+ },
363
+ {
364
+ "id": "nauc_mrr_at_10_std",
365
+ "display_name": "nauc_mrr_at_10_std",
366
+ "description": null,
367
+ "value": 0.3015594122136539
368
+ },
369
+ {
370
+ "id": "nauc_mrr_at_10_diff1",
371
+ "display_name": "nauc_mrr_at_10_diff1",
372
+ "description": null,
373
+ "value": -0.11951448723943421
374
+ },
375
+ {
376
+ "id": "nauc_mrr_at_50_max",
377
+ "display_name": "nauc_mrr_at_50_max",
378
+ "description": null,
379
+ "value": 0.6050403183375579
380
+ },
381
+ {
382
+ "id": "nauc_mrr_at_50_std",
383
+ "display_name": "nauc_mrr_at_50_std",
384
+ "description": null,
385
+ "value": 0.3012299482545067
386
+ },
387
+ {
388
+ "id": "nauc_mrr_at_50_diff1",
389
+ "display_name": "nauc_mrr_at_50_diff1",
390
+ "description": null,
391
+ "value": -0.12091114334431136
392
+ }
393
+ ]
394
+ },
395
+ {
396
+ "layer_number": 11,
397
+ "layer_display_name": "11",
398
+ "metrics": [
399
+ {
400
+ "id": "ndcg_at_5",
401
+ "display_name": "ndcg_at_5",
402
+ "description": null,
403
+ "value": 0.82819
404
+ },
405
+ {
406
+ "id": "ndcg_at_10",
407
+ "display_name": "ndcg_at_10",
408
+ "description": null,
409
+ "value": 0.81615
410
+ },
411
+ {
412
+ "id": "ndcg_at_50",
413
+ "display_name": "ndcg_at_50",
414
+ "description": null,
415
+ "value": 0.78982
416
+ },
417
+ {
418
+ "id": "map_at_5",
419
+ "display_name": "map_at_5",
420
+ "description": null,
421
+ "value": 0.27067
422
+ },
423
+ {
424
+ "id": "map_at_10",
425
+ "display_name": "map_at_10",
426
+ "description": null,
427
+ "value": 0.37321
428
+ },
429
+ {
430
+ "id": "map_at_50",
431
+ "display_name": "map_at_50",
432
+ "description": null,
433
+ "value": 0.63596
434
+ },
435
+ {
436
+ "id": "recall_at_5",
437
+ "display_name": "recall_at_5",
438
+ "description": null,
439
+ "value": 0.27906
440
+ },
441
+ {
442
+ "id": "recall_at_10",
443
+ "display_name": "recall_at_10",
444
+ "description": null,
445
+ "value": 0.39106
446
+ },
447
+ {
448
+ "id": "recall_at_50",
449
+ "display_name": "recall_at_50",
450
+ "description": null,
451
+ "value": 0.69746
452
+ },
453
+ {
454
+ "id": "precision_at_5",
455
+ "display_name": "precision_at_5",
456
+ "description": null,
457
+ "value": 0.7487
458
+ },
459
+ {
460
+ "id": "precision_at_10",
461
+ "display_name": "precision_at_10",
462
+ "description": null,
463
+ "value": 0.68506
464
+ },
465
+ {
466
+ "id": "precision_at_50",
467
+ "display_name": "precision_at_50",
468
+ "description": null,
469
+ "value": 0.42266
470
+ },
471
+ {
472
+ "id": "mrr_at_5",
473
+ "display_name": "mrr_at_5",
474
+ "description": null,
475
+ "value": 0.8752382984777344
476
+ },
477
+ {
478
+ "id": "mrr_at_10",
479
+ "display_name": "mrr_at_10",
480
+ "description": null,
481
+ "value": 0.878253189168681
482
+ },
483
+ {
484
+ "id": "mrr_at_50",
485
+ "display_name": "mrr_at_50",
486
+ "description": null,
487
+ "value": 0.8795454419523189
488
+ },
489
+ {
490
+ "id": "nauc_ndcg_at_5_max",
491
+ "display_name": "nauc_ndcg_at_5_max",
492
+ "description": null,
493
+ "value": 0.6238124910465183
494
+ },
495
+ {
496
+ "id": "nauc_ndcg_at_5_std",
497
+ "display_name": "nauc_ndcg_at_5_std",
498
+ "description": null,
499
+ "value": 0.3878031710482511
500
+ },
501
+ {
502
+ "id": "nauc_ndcg_at_5_diff1",
503
+ "display_name": "nauc_ndcg_at_5_diff1",
504
+ "description": null,
505
+ "value": -0.22961445620397436
506
+ },
507
+ {
508
+ "id": "nauc_ndcg_at_10_max",
509
+ "display_name": "nauc_ndcg_at_10_max",
510
+ "description": null,
511
+ "value": 0.6136556294192528
512
+ },
513
+ {
514
+ "id": "nauc_ndcg_at_10_std",
515
+ "display_name": "nauc_ndcg_at_10_std",
516
+ "description": null,
517
+ "value": 0.4027695454909326
518
+ },
519
+ {
520
+ "id": "nauc_ndcg_at_10_diff1",
521
+ "display_name": "nauc_ndcg_at_10_diff1",
522
+ "description": null,
523
+ "value": -0.23933162739820324
524
+ },
525
+ {
526
+ "id": "nauc_ndcg_at_50_max",
527
+ "display_name": "nauc_ndcg_at_50_max",
528
+ "description": null,
529
+ "value": 0.6039490411056802
530
+ },
531
+ {
532
+ "id": "nauc_ndcg_at_50_std",
533
+ "display_name": "nauc_ndcg_at_50_std",
534
+ "description": null,
535
+ "value": 0.379240829313294
536
+ },
537
+ {
538
+ "id": "nauc_ndcg_at_50_diff1",
539
+ "display_name": "nauc_ndcg_at_50_diff1",
540
+ "description": null,
541
+ "value": -0.23134380586116654
542
+ },
543
+ {
544
+ "id": "nauc_map_at_5_max",
545
+ "display_name": "nauc_map_at_5_max",
546
+ "description": null,
547
+ "value": -0.018274861348075953
548
+ },
549
+ {
550
+ "id": "nauc_map_at_5_std",
551
+ "display_name": "nauc_map_at_5_std",
552
+ "description": null,
553
+ "value": 0.3153330580523699
554
+ },
555
+ {
556
+ "id": "nauc_map_at_5_diff1",
557
+ "display_name": "nauc_map_at_5_diff1",
558
+ "description": null,
559
+ "value": 0.31839102956934573
560
+ },
561
+ {
562
+ "id": "nauc_map_at_10_max",
563
+ "display_name": "nauc_map_at_10_max",
564
+ "description": null,
565
+ "value": 0.10106646301687382
566
+ },
567
+ {
568
+ "id": "nauc_map_at_10_std",
569
+ "display_name": "nauc_map_at_10_std",
570
+ "description": null,
571
+ "value": 0.4143687386138405
572
+ },
573
+ {
574
+ "id": "nauc_map_at_10_diff1",
575
+ "display_name": "nauc_map_at_10_diff1",
576
+ "description": null,
577
+ "value": 0.18923312509326384
578
+ },
579
+ {
580
+ "id": "nauc_map_at_50_max",
581
+ "display_name": "nauc_map_at_50_max",
582
+ "description": null,
583
+ "value": 0.5144031685310609
584
+ },
585
+ {
586
+ "id": "nauc_map_at_50_std",
587
+ "display_name": "nauc_map_at_50_std",
588
+ "description": null,
589
+ "value": 0.45693618989546114
590
+ },
591
+ {
592
+ "id": "nauc_map_at_50_diff1",
593
+ "display_name": "nauc_map_at_50_diff1",
594
+ "description": null,
595
+ "value": -0.1513413062960939
596
+ },
597
+ {
598
+ "id": "nauc_recall_at_5_max",
599
+ "display_name": "nauc_recall_at_5_max",
600
+ "description": null,
601
+ "value": -0.031265621786664255
602
+ },
603
+ {
604
+ "id": "nauc_recall_at_5_std",
605
+ "display_name": "nauc_recall_at_5_std",
606
+ "description": null,
607
+ "value": 0.32028522957198785
608
+ },
609
+ {
610
+ "id": "nauc_recall_at_5_diff1",
611
+ "display_name": "nauc_recall_at_5_diff1",
612
+ "description": null,
613
+ "value": 0.32056979656535384
614
+ },
615
+ {
616
+ "id": "nauc_recall_at_10_max",
617
+ "display_name": "nauc_recall_at_10_max",
618
+ "description": null,
619
+ "value": 0.07820354892522365
620
+ },
621
+ {
622
+ "id": "nauc_recall_at_10_std",
623
+ "display_name": "nauc_recall_at_10_std",
624
+ "description": null,
625
+ "value": 0.42551786412535775
626
+ },
627
+ {
628
+ "id": "nauc_recall_at_10_diff1",
629
+ "display_name": "nauc_recall_at_10_diff1",
630
+ "description": null,
631
+ "value": 0.2040509113490322
632
+ },
633
+ {
634
+ "id": "nauc_recall_at_50_max",
635
+ "display_name": "nauc_recall_at_50_max",
636
+ "description": null,
637
+ "value": 0.5060801621108716
638
+ },
639
+ {
640
+ "id": "nauc_recall_at_50_std",
641
+ "display_name": "nauc_recall_at_50_std",
642
+ "description": null,
643
+ "value": 0.5071691349011768
644
+ },
645
+ {
646
+ "id": "nauc_recall_at_50_diff1",
647
+ "display_name": "nauc_recall_at_50_diff1",
648
+ "description": null,
649
+ "value": -0.11952783139053508
650
+ },
651
+ {
652
+ "id": "nauc_precision_at_5_max",
653
+ "display_name": "nauc_precision_at_5_max",
654
+ "description": null,
655
+ "value": 0.5923656191314365
656
+ },
657
+ {
658
+ "id": "nauc_precision_at_5_std",
659
+ "display_name": "nauc_precision_at_5_std",
660
+ "description": null,
661
+ "value": 0.1954332256400316
662
+ },
663
+ {
664
+ "id": "nauc_precision_at_5_diff1",
665
+ "display_name": "nauc_precision_at_5_diff1",
666
+ "description": null,
667
+ "value": -0.5508269378169939
668
+ },
669
+ {
670
+ "id": "nauc_precision_at_10_max",
671
+ "display_name": "nauc_precision_at_10_max",
672
+ "description": null,
673
+ "value": 0.5458701611463479
674
+ },
675
+ {
676
+ "id": "nauc_precision_at_10_std",
677
+ "display_name": "nauc_precision_at_10_std",
678
+ "description": null,
679
+ "value": 0.12975949111453675
680
+ },
681
+ {
682
+ "id": "nauc_precision_at_10_diff1",
683
+ "display_name": "nauc_precision_at_10_diff1",
684
+ "description": null,
685
+ "value": -0.5537528325655148
686
+ },
687
+ {
688
+ "id": "nauc_precision_at_50_max",
689
+ "display_name": "nauc_precision_at_50_max",
690
+ "description": null,
691
+ "value": 0.3549845967268747
692
+ },
693
+ {
694
+ "id": "nauc_precision_at_50_std",
695
+ "display_name": "nauc_precision_at_50_std",
696
+ "description": null,
697
+ "value": -0.26254902560124815
698
+ },
699
+ {
700
+ "id": "nauc_precision_at_50_diff1",
701
+ "display_name": "nauc_precision_at_50_diff1",
702
+ "description": null,
703
+ "value": -0.3919186481758992
704
+ },
705
+ {
706
+ "id": "nauc_mrr_at_5_max",
707
+ "display_name": "nauc_mrr_at_5_max",
708
+ "description": null,
709
+ "value": 0.6284613562335846
710
+ },
711
+ {
712
+ "id": "nauc_mrr_at_5_std",
713
+ "display_name": "nauc_mrr_at_5_std",
714
+ "description": null,
715
+ "value": 0.3609822238622607
716
+ },
717
+ {
718
+ "id": "nauc_mrr_at_5_diff1",
719
+ "display_name": "nauc_mrr_at_5_diff1",
720
+ "description": null,
721
+ "value": -0.13691647729285375
722
+ },
723
+ {
724
+ "id": "nauc_mrr_at_10_max",
725
+ "display_name": "nauc_mrr_at_10_max",
726
+ "description": null,
727
+ "value": 0.6282780633119702
728
+ },
729
+ {
730
+ "id": "nauc_mrr_at_10_std",
731
+ "display_name": "nauc_mrr_at_10_std",
732
+ "description": null,
733
+ "value": 0.36649482857679033
734
+ },
735
+ {
736
+ "id": "nauc_mrr_at_10_diff1",
737
+ "display_name": "nauc_mrr_at_10_diff1",
738
+ "description": null,
739
+ "value": -0.1301211341279461
740
+ },
741
+ {
742
+ "id": "nauc_mrr_at_50_max",
743
+ "display_name": "nauc_mrr_at_50_max",
744
+ "description": null,
745
+ "value": 0.6290574535816186
746
+ },
747
+ {
748
+ "id": "nauc_mrr_at_50_std",
749
+ "display_name": "nauc_mrr_at_50_std",
750
+ "description": null,
751
+ "value": 0.367920824556504
752
+ },
753
+ {
754
+ "id": "nauc_mrr_at_50_diff1",
755
+ "display_name": "nauc_mrr_at_50_diff1",
756
+ "description": null,
757
+ "value": -0.13036774230606793
758
+ }
759
+ ]
760
+ }
761
+ ]
762
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "bacarch_bigene",
4
+ "display_name": "BacArch BiGene",
5
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
6
+ "modality": "protein",
7
+ "type": "bigene_mining",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/bac_arch_bigene",
11
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "precision",
31
+ "display_name": "precision",
32
+ "description": null,
33
+ "value": 0.6215094339622641
34
+ },
35
+ {
36
+ "id": "recall",
37
+ "display_name": "recall",
38
+ "description": null,
39
+ "value": 0.7056603773584905
40
+ },
41
+ {
42
+ "id": "f1",
43
+ "display_name": "f1",
44
+ "description": null,
45
+ "value": 0.6469182389937107
46
+ },
47
+ {
48
+ "id": "accuracy",
49
+ "display_name": "accuracy",
50
+ "description": null,
51
+ "value": 0.7056603773584905
52
+ }
53
+ ]
54
+ },
55
+ {
56
+ "layer_number": 11,
57
+ "layer_display_name": "11",
58
+ "metrics": [
59
+ {
60
+ "id": "precision",
61
+ "display_name": "precision",
62
+ "description": null,
63
+ "value": 0.6138364779874214
64
+ },
65
+ {
66
+ "id": "recall",
67
+ "display_name": "recall",
68
+ "description": null,
69
+ "value": 0.7018867924528301
70
+ },
71
+ {
72
+ "id": "f1",
73
+ "display_name": "f1",
74
+ "description": null,
75
+ "value": 0.6413836477987421
76
+ },
77
+ {
78
+ "id": "accuracy",
79
+ "display_name": "accuracy",
80
+ "description": null,
81
+ "value": 0.7018867924528301
82
+ }
83
+ ]
84
+ }
85
+ ]
86
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "convergent_enzymes_classification",
4
+ "display_name": "Convergent Enzymes Classification",
5
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
6
+ "modality": "protein",
7
+ "type": "classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/convergent_enzymes",
11
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "accuracy",
31
+ "display_name": "accuracy",
32
+ "description": null,
33
+ "value": 0.2475
34
+ },
35
+ {
36
+ "id": "f1",
37
+ "display_name": "f1",
38
+ "description": null,
39
+ "value": 0.20116666666666666
40
+ }
41
+ ]
42
+ },
43
+ {
44
+ "layer_number": 11,
45
+ "layer_display_name": "11",
46
+ "metrics": [
47
+ {
48
+ "id": "accuracy",
49
+ "display_name": "accuracy",
50
+ "description": null,
51
+ "value": 0.2425
52
+ },
53
+ {
54
+ "id": "f1",
55
+ "display_name": "f1",
56
+ "description": null,
57
+ "value": 0.19904761904761906
58
+ }
59
+ ]
60
+ }
61
+ ]
62
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "cyano_operonic_pair",
4
+ "display_name": "Cyano Operonic Pair",
5
+ "description": "Evaluate on Cyano operonic pair classification task.",
6
+ "modality": "protein",
7
+ "type": "pair_classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/cyano_operonic_pair",
11
+ "revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
12
+ }
13
+ ],
14
+ "primary_metric_id": "top_ap"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "cos_sim_accuracy",
31
+ "display_name": "cos_sim_accuracy",
32
+ "description": null,
33
+ "value": 0.7203065134099617
34
+ },
35
+ {
36
+ "id": "cos_sim_accuracy_threshold",
37
+ "display_name": "cos_sim_accuracy_threshold",
38
+ "description": null,
39
+ "value": 0.990619957447052
40
+ },
41
+ {
42
+ "id": "cos_sim_f1",
43
+ "display_name": "cos_sim_f1",
44
+ "description": null,
45
+ "value": 0.44058665070338227
46
+ },
47
+ {
48
+ "id": "cos_sim_f1_threshold",
49
+ "display_name": "cos_sim_f1_threshold",
50
+ "description": null,
51
+ "value": 0.815308690071106
52
+ },
53
+ {
54
+ "id": "cos_sim_precision",
55
+ "display_name": "cos_sim_precision",
56
+ "description": null,
57
+ "value": 0.28253358925143957
58
+ },
59
+ {
60
+ "id": "cos_sim_recall",
61
+ "display_name": "cos_sim_recall",
62
+ "description": null,
63
+ "value": 1.0
64
+ },
65
+ {
66
+ "id": "cos_sim_ap",
67
+ "display_name": "cos_sim_ap",
68
+ "description": null,
69
+ "value": 0.32424099100055437
70
+ },
71
+ {
72
+ "id": "manhattan_accuracy",
73
+ "display_name": "manhattan_accuracy",
74
+ "description": null,
75
+ "value": 0.7187739463601532
76
+ },
77
+ {
78
+ "id": "manhattan_accuracy_threshold",
79
+ "display_name": "manhattan_accuracy_threshold",
80
+ "description": null,
81
+ "value": 40.061012268066406
82
+ },
83
+ {
84
+ "id": "manhattan_f1",
85
+ "display_name": "manhattan_f1",
86
+ "description": null,
87
+ "value": 0.43963963963963965
88
+ },
89
+ {
90
+ "id": "manhattan_f1_threshold",
91
+ "display_name": "manhattan_f1_threshold",
92
+ "description": null,
93
+ "value": 380.5898742675781
94
+ },
95
+ {
96
+ "id": "manhattan_precision",
97
+ "display_name": "manhattan_precision",
98
+ "description": null,
99
+ "value": 0.28218966846569005
100
+ },
101
+ {
102
+ "id": "manhattan_recall",
103
+ "display_name": "manhattan_recall",
104
+ "description": null,
105
+ "value": 0.9945652173913043
106
+ },
107
+ {
108
+ "id": "manhattan_ap",
109
+ "display_name": "manhattan_ap",
110
+ "description": null,
111
+ "value": 0.3051200502841412
112
+ },
113
+ {
114
+ "id": "euclidean_accuracy",
115
+ "display_name": "euclidean_accuracy",
116
+ "description": null,
117
+ "value": 0.7187739463601532
118
+ },
119
+ {
120
+ "id": "euclidean_accuracy_threshold",
121
+ "display_name": "euclidean_accuracy_threshold",
122
+ "description": null,
123
+ "value": 2.2720906734466553
124
+ },
125
+ {
126
+ "id": "euclidean_f1",
127
+ "display_name": "euclidean_f1",
128
+ "description": null,
129
+ "value": 0.4404548174745661
130
+ },
131
+ {
132
+ "id": "euclidean_f1_threshold",
133
+ "display_name": "euclidean_f1_threshold",
134
+ "description": null,
135
+ "value": 25.41253662109375
136
+ },
137
+ {
138
+ "id": "euclidean_precision",
139
+ "display_name": "euclidean_precision",
140
+ "description": null,
141
+ "value": 0.28242517267843437
142
+ },
143
+ {
144
+ "id": "euclidean_recall",
145
+ "display_name": "euclidean_recall",
146
+ "description": null,
147
+ "value": 1.0
148
+ },
149
+ {
150
+ "id": "euclidean_ap",
151
+ "display_name": "euclidean_ap",
152
+ "description": null,
153
+ "value": 0.3117112729287826
154
+ },
155
+ {
156
+ "id": "dot_accuracy",
157
+ "display_name": "dot_accuracy",
158
+ "description": null,
159
+ "value": 0.7206896551724138
160
+ },
161
+ {
162
+ "id": "dot_accuracy_threshold",
163
+ "display_name": "dot_accuracy_threshold",
164
+ "description": null,
165
+ "value": 1764.11328125
166
+ },
167
+ {
168
+ "id": "dot_f1",
169
+ "display_name": "dot_f1",
170
+ "description": null,
171
+ "value": 0.44177215189873426
172
+ },
173
+ {
174
+ "id": "dot_f1_threshold",
175
+ "display_name": "dot_f1_threshold",
176
+ "description": null,
177
+ "value": 1021.9218139648438
178
+ },
179
+ {
180
+ "id": "dot_precision",
181
+ "display_name": "dot_precision",
182
+ "description": null,
183
+ "value": 0.28795379537953797
184
+ },
185
+ {
186
+ "id": "dot_recall",
187
+ "display_name": "dot_recall",
188
+ "description": null,
189
+ "value": 0.9483695652173914
190
+ },
191
+ {
192
+ "id": "dot_ap",
193
+ "display_name": "dot_ap",
194
+ "description": null,
195
+ "value": 0.35181607664099845
196
+ },
197
+ {
198
+ "id": "top_ap",
199
+ "display_name": "top_ap",
200
+ "description": null,
201
+ "value": 0.35181607664099845
202
+ }
203
+ ]
204
+ },
205
+ {
206
+ "layer_number": 11,
207
+ "layer_display_name": "11",
208
+ "metrics": [
209
+ {
210
+ "id": "cos_sim_accuracy",
211
+ "display_name": "cos_sim_accuracy",
212
+ "description": null,
213
+ "value": 0.7206896551724138
214
+ },
215
+ {
216
+ "id": "cos_sim_accuracy_threshold",
217
+ "display_name": "cos_sim_accuracy_threshold",
218
+ "description": null,
219
+ "value": 0.9833309650421143
220
+ },
221
+ {
222
+ "id": "cos_sim_f1",
223
+ "display_name": "cos_sim_f1",
224
+ "description": null,
225
+ "value": 0.4454067429631921
226
+ },
227
+ {
228
+ "id": "cos_sim_f1_threshold",
229
+ "display_name": "cos_sim_f1_threshold",
230
+ "description": null,
231
+ "value": 0.8805520534515381
232
+ },
233
+ {
234
+ "id": "cos_sim_precision",
235
+ "display_name": "cos_sim_precision",
236
+ "description": null,
237
+ "value": 0.2883460152182619
238
+ },
239
+ {
240
+ "id": "cos_sim_recall",
241
+ "display_name": "cos_sim_recall",
242
+ "description": null,
243
+ "value": 0.9782608695652174
244
+ },
245
+ {
246
+ "id": "cos_sim_ap",
247
+ "display_name": "cos_sim_ap",
248
+ "description": null,
249
+ "value": 0.3325946475342702
250
+ },
251
+ {
252
+ "id": "manhattan_accuracy",
253
+ "display_name": "manhattan_accuracy",
254
+ "description": null,
255
+ "value": 0.721455938697318
256
+ },
257
+ {
258
+ "id": "manhattan_accuracy_threshold",
259
+ "display_name": "manhattan_accuracy_threshold",
260
+ "description": null,
261
+ "value": 230.74539184570312
262
+ },
263
+ {
264
+ "id": "manhattan_f1",
265
+ "display_name": "manhattan_f1",
266
+ "description": null,
267
+ "value": 0.4439615026389321
268
+ },
269
+ {
270
+ "id": "manhattan_f1_threshold",
271
+ "display_name": "manhattan_f1_threshold",
272
+ "description": null,
273
+ "value": 690.979248046875
274
+ },
275
+ {
276
+ "id": "manhattan_precision",
277
+ "display_name": "manhattan_precision",
278
+ "description": null,
279
+ "value": 0.28772635814889336
280
+ },
281
+ {
282
+ "id": "manhattan_recall",
283
+ "display_name": "manhattan_recall",
284
+ "description": null,
285
+ "value": 0.9714673913043478
286
+ },
287
+ {
288
+ "id": "manhattan_ap",
289
+ "display_name": "manhattan_ap",
290
+ "description": null,
291
+ "value": 0.33577510329678106
292
+ },
293
+ {
294
+ "id": "euclidean_accuracy",
295
+ "display_name": "euclidean_accuracy",
296
+ "description": null,
297
+ "value": 0.7210727969348659
298
+ },
299
+ {
300
+ "id": "euclidean_accuracy_threshold",
301
+ "display_name": "euclidean_accuracy_threshold",
302
+ "description": null,
303
+ "value": 13.784924507141113
304
+ },
305
+ {
306
+ "id": "euclidean_f1",
307
+ "display_name": "euclidean_f1",
308
+ "description": null,
309
+ "value": 0.44413697682462816
310
+ },
311
+ {
312
+ "id": "euclidean_f1_threshold",
313
+ "display_name": "euclidean_f1_threshold",
314
+ "description": null,
315
+ "value": 39.12321472167969
316
+ },
317
+ {
318
+ "id": "euclidean_precision",
319
+ "display_name": "euclidean_precision",
320
+ "description": null,
321
+ "value": 0.29791183294663576
322
+ },
323
+ {
324
+ "id": "euclidean_recall",
325
+ "display_name": "euclidean_recall",
326
+ "description": null,
327
+ "value": 0.8722826086956522
328
+ },
329
+ {
330
+ "id": "euclidean_ap",
331
+ "display_name": "euclidean_ap",
332
+ "description": null,
333
+ "value": 0.33823458280589236
334
+ },
335
+ {
336
+ "id": "dot_accuracy",
337
+ "display_name": "dot_accuracy",
338
+ "description": null,
339
+ "value": 0.7191570881226054
340
+ },
341
+ {
342
+ "id": "dot_accuracy_threshold",
343
+ "display_name": "dot_accuracy_threshold",
344
+ "description": null,
345
+ "value": 10542.0
346
+ },
347
+ {
348
+ "id": "dot_f1",
349
+ "display_name": "dot_f1",
350
+ "description": null,
351
+ "value": 0.4403230631169608
352
+ },
353
+ {
354
+ "id": "dot_f1_threshold",
355
+ "display_name": "dot_f1_threshold",
356
+ "description": null,
357
+ "value": 4913.24560546875
358
+ },
359
+ {
360
+ "id": "dot_precision",
361
+ "display_name": "dot_precision",
362
+ "description": null,
363
+ "value": 0.2823168392788646
364
+ },
365
+ {
366
+ "id": "dot_recall",
367
+ "display_name": "dot_recall",
368
+ "description": null,
369
+ "value": 1.0
370
+ },
371
+ {
372
+ "id": "dot_ap",
373
+ "display_name": "dot_ap",
374
+ "description": null,
375
+ "value": 0.28278909833025945
376
+ },
377
+ {
378
+ "id": "top_ap",
379
+ "display_name": "top_ap",
380
+ "description": null,
381
+ "value": 0.33823458280589236
382
+ }
383
+ ]
384
+ }
385
+ ]
386
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "ec_classification",
4
+ "display_name": "EC Classification",
5
+ "description": "Evaluate on Enzyme Commission number classification task.",
6
+ "modality": "protein",
7
+ "type": "classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/ec_classification",
11
+ "revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "accuracy",
31
+ "display_name": "accuracy",
32
+ "description": null,
33
+ "value": 0.6015625
34
+ },
35
+ {
36
+ "id": "f1",
37
+ "display_name": "f1",
38
+ "description": null,
39
+ "value": 0.55390625
40
+ }
41
+ ]
42
+ },
43
+ {
44
+ "layer_number": 11,
45
+ "layer_display_name": "11",
46
+ "metrics": [
47
+ {
48
+ "id": "accuracy",
49
+ "display_name": "accuracy",
50
+ "description": null,
51
+ "value": 0.5546875
52
+ },
53
+ {
54
+ "id": "f1",
55
+ "display_name": "f1",
56
+ "description": null,
57
+ "value": 0.5096354166666667
58
+ }
59
+ ]
60
+ }
61
+ ]
62
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "ecoli_operonic_pair",
4
+ "display_name": "E.coli Operonic Pair",
5
+ "description": "Evaluate on E.coli K-12 operonic pair classification task.",
6
+ "modality": "protein",
7
+ "type": "pair_classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/ecoli_operonic_pair",
11
+ "revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
12
+ }
13
+ ],
14
+ "primary_metric_id": "top_ap"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "cos_sim_accuracy",
31
+ "display_name": "cos_sim_accuracy",
32
+ "description": null,
33
+ "value": 0.6309689383402874
34
+ },
35
+ {
36
+ "id": "cos_sim_accuracy_threshold",
37
+ "display_name": "cos_sim_accuracy_threshold",
38
+ "description": null,
39
+ "value": 0.9664175510406494
40
+ },
41
+ {
42
+ "id": "cos_sim_f1",
43
+ "display_name": "cos_sim_f1",
44
+ "description": null,
45
+ "value": 0.5831148400629261
46
+ },
47
+ {
48
+ "id": "cos_sim_f1_threshold",
49
+ "display_name": "cos_sim_f1_threshold",
50
+ "description": null,
51
+ "value": 0.876137375831604
52
+ },
53
+ {
54
+ "id": "cos_sim_precision",
55
+ "display_name": "cos_sim_precision",
56
+ "description": null,
57
+ "value": 0.41972823351786614
58
+ },
59
+ {
60
+ "id": "cos_sim_recall",
61
+ "display_name": "cos_sim_recall",
62
+ "description": null,
63
+ "value": 0.954779622209502
64
+ },
65
+ {
66
+ "id": "cos_sim_ap",
67
+ "display_name": "cos_sim_ap",
68
+ "description": null,
69
+ "value": 0.5226436718954207
70
+ },
71
+ {
72
+ "id": "manhattan_accuracy",
73
+ "display_name": "manhattan_accuracy",
74
+ "description": null,
75
+ "value": 0.6237830319888734
76
+ },
77
+ {
78
+ "id": "manhattan_accuracy_threshold",
79
+ "display_name": "manhattan_accuracy_threshold",
80
+ "description": null,
81
+ "value": 151.0961456298828
82
+ },
83
+ {
84
+ "id": "manhattan_f1",
85
+ "display_name": "manhattan_f1",
86
+ "description": null,
87
+ "value": 0.5765230312035661
88
+ },
89
+ {
90
+ "id": "manhattan_f1_threshold",
91
+ "display_name": "manhattan_f1_threshold",
92
+ "description": null,
93
+ "value": 417.6656494140625
94
+ },
95
+ {
96
+ "id": "manhattan_precision",
97
+ "display_name": "manhattan_precision",
98
+ "description": null,
99
+ "value": 0.4051044083526682
100
+ },
101
+ {
102
+ "id": "manhattan_recall",
103
+ "display_name": "manhattan_recall",
104
+ "description": null,
105
+ "value": 0.9994275901545506
106
+ },
107
+ {
108
+ "id": "manhattan_ap",
109
+ "display_name": "manhattan_ap",
110
+ "description": null,
111
+ "value": 0.5038561800803791
112
+ },
113
+ {
114
+ "id": "euclidean_accuracy",
115
+ "display_name": "euclidean_accuracy",
116
+ "description": null,
117
+ "value": 0.624246638850255
118
+ },
119
+ {
120
+ "id": "euclidean_accuracy_threshold",
121
+ "display_name": "euclidean_accuracy_threshold",
122
+ "description": null,
123
+ "value": 9.827131271362305
124
+ },
125
+ {
126
+ "id": "euclidean_f1",
127
+ "display_name": "euclidean_f1",
128
+ "description": null,
129
+ "value": 0.5778148457047539
130
+ },
131
+ {
132
+ "id": "euclidean_f1_threshold",
133
+ "display_name": "euclidean_f1_threshold",
134
+ "description": null,
135
+ "value": 23.485851287841797
136
+ },
137
+ {
138
+ "id": "euclidean_precision",
139
+ "display_name": "euclidean_precision",
140
+ "description": null,
141
+ "value": 0.4077212806026365
142
+ },
143
+ {
144
+ "id": "euclidean_recall",
145
+ "display_name": "euclidean_recall",
146
+ "description": null,
147
+ "value": 0.9914138523182598
148
+ },
149
+ {
150
+ "id": "euclidean_ap",
151
+ "display_name": "euclidean_ap",
152
+ "description": null,
153
+ "value": 0.5109707609256201
154
+ },
155
+ {
156
+ "id": "dot_accuracy",
157
+ "display_name": "dot_accuracy",
158
+ "description": null,
159
+ "value": 0.6200741770978211
160
+ },
161
+ {
162
+ "id": "dot_accuracy_threshold",
163
+ "display_name": "dot_accuracy_threshold",
164
+ "description": null,
165
+ "value": 1509.6474609375
166
+ },
167
+ {
168
+ "id": "dot_f1",
169
+ "display_name": "dot_f1",
170
+ "description": null,
171
+ "value": 0.576427863981512
172
+ },
173
+ {
174
+ "id": "dot_f1_threshold",
175
+ "display_name": "dot_f1_threshold",
176
+ "description": null,
177
+ "value": 827.195556640625
178
+ },
179
+ {
180
+ "id": "dot_precision",
181
+ "display_name": "dot_precision",
182
+ "description": null,
183
+ "value": 0.40501043841336115
184
+ },
185
+ {
186
+ "id": "dot_recall",
187
+ "display_name": "dot_recall",
188
+ "description": null,
189
+ "value": 0.9994275901545506
190
+ },
191
+ {
192
+ "id": "dot_ap",
193
+ "display_name": "dot_ap",
194
+ "description": null,
195
+ "value": 0.498147478687894
196
+ },
197
+ {
198
+ "id": "top_ap",
199
+ "display_name": "top_ap",
200
+ "description": null,
201
+ "value": 0.5226436718954207
202
+ }
203
+ ]
204
+ },
205
+ {
206
+ "layer_number": 11,
207
+ "layer_display_name": "11",
208
+ "metrics": [
209
+ {
210
+ "id": "cos_sim_accuracy",
211
+ "display_name": "cos_sim_accuracy",
212
+ "description": null,
213
+ "value": 0.6305053314789059
214
+ },
215
+ {
216
+ "id": "cos_sim_accuracy_threshold",
217
+ "display_name": "cos_sim_accuracy_threshold",
218
+ "description": null,
219
+ "value": 0.9585829377174377
220
+ },
221
+ {
222
+ "id": "cos_sim_f1",
223
+ "display_name": "cos_sim_f1",
224
+ "description": null,
225
+ "value": 0.5934650455927052
226
+ },
227
+ {
228
+ "id": "cos_sim_f1_threshold",
229
+ "display_name": "cos_sim_f1_threshold",
230
+ "description": null,
231
+ "value": 0.9002124071121216
232
+ },
233
+ {
234
+ "id": "cos_sim_precision",
235
+ "display_name": "cos_sim_precision",
236
+ "description": null,
237
+ "value": 0.44412851862382713
238
+ },
239
+ {
240
+ "id": "cos_sim_recall",
241
+ "display_name": "cos_sim_recall",
242
+ "description": null,
243
+ "value": 0.8941041785918717
244
+ },
245
+ {
246
+ "id": "cos_sim_ap",
247
+ "display_name": "cos_sim_ap",
248
+ "description": null,
249
+ "value": 0.545021841060869
250
+ },
251
+ {
252
+ "id": "manhattan_accuracy",
253
+ "display_name": "manhattan_accuracy",
254
+ "description": null,
255
+ "value": 0.6342141863699583
256
+ },
257
+ {
258
+ "id": "manhattan_accuracy_threshold",
259
+ "display_name": "manhattan_accuracy_threshold",
260
+ "description": null,
261
+ "value": 444.21954345703125
262
+ },
263
+ {
264
+ "id": "manhattan_f1",
265
+ "display_name": "manhattan_f1",
266
+ "description": null,
267
+ "value": 0.6035735322992343
268
+ },
269
+ {
270
+ "id": "manhattan_f1_threshold",
271
+ "display_name": "manhattan_f1_threshold",
272
+ "description": null,
273
+ "value": 612.2872314453125
274
+ },
275
+ {
276
+ "id": "manhattan_precision",
277
+ "display_name": "manhattan_precision",
278
+ "description": null,
279
+ "value": 0.45935445307830247
280
+ },
281
+ {
282
+ "id": "manhattan_recall",
283
+ "display_name": "manhattan_recall",
284
+ "description": null,
285
+ "value": 0.8797939324556382
286
+ },
287
+ {
288
+ "id": "manhattan_ap",
289
+ "display_name": "manhattan_ap",
290
+ "description": null,
291
+ "value": 0.5574639922170803
292
+ },
293
+ {
294
+ "id": "euclidean_accuracy",
295
+ "display_name": "euclidean_accuracy",
296
+ "description": null,
297
+ "value": 0.6339823829392675
298
+ },
299
+ {
300
+ "id": "euclidean_accuracy_threshold",
301
+ "display_name": "euclidean_accuracy_threshold",
302
+ "description": null,
303
+ "value": 29.62457275390625
304
+ },
305
+ {
306
+ "id": "euclidean_f1",
307
+ "display_name": "euclidean_f1",
308
+ "description": null,
309
+ "value": 0.5996841689696012
310
+ },
311
+ {
312
+ "id": "euclidean_f1_threshold",
313
+ "display_name": "euclidean_f1_threshold",
314
+ "description": null,
315
+ "value": 38.6270751953125
316
+ },
317
+ {
318
+ "id": "euclidean_precision",
319
+ "display_name": "euclidean_precision",
320
+ "description": null,
321
+ "value": 0.45766797228080747
322
+ },
323
+ {
324
+ "id": "euclidean_recall",
325
+ "display_name": "euclidean_recall",
326
+ "description": null,
327
+ "value": 0.86949055523755
328
+ },
329
+ {
330
+ "id": "euclidean_ap",
331
+ "display_name": "euclidean_ap",
332
+ "description": null,
333
+ "value": 0.5553872058517757
334
+ },
335
+ {
336
+ "id": "dot_accuracy",
337
+ "display_name": "dot_accuracy",
338
+ "description": null,
339
+ "value": 0.5948076031525267
340
+ },
341
+ {
342
+ "id": "dot_accuracy_threshold",
343
+ "display_name": "dot_accuracy_threshold",
344
+ "description": null,
345
+ "value": 14395.623046875
346
+ },
347
+ {
348
+ "id": "dot_f1",
349
+ "display_name": "dot_f1",
350
+ "description": null,
351
+ "value": 0.577018736527939
352
+ },
353
+ {
354
+ "id": "dot_f1_threshold",
355
+ "display_name": "dot_f1_threshold",
356
+ "description": null,
357
+ "value": 5674.908203125
358
+ },
359
+ {
360
+ "id": "dot_precision",
361
+ "display_name": "dot_precision",
362
+ "description": null,
363
+ "value": 0.4061624649859944
364
+ },
365
+ {
366
+ "id": "dot_recall",
367
+ "display_name": "dot_recall",
368
+ "description": null,
369
+ "value": 0.9959931310818546
370
+ },
371
+ {
372
+ "id": "dot_ap",
373
+ "display_name": "dot_ap",
374
+ "description": null,
375
+ "value": 0.3862357442891778
376
+ },
377
+ {
378
+ "id": "top_ap",
379
+ "display_name": "top_ap",
380
+ "description": null,
381
+ "value": 0.5574639922170803
382
+ }
383
+ ]
384
+ }
385
+ ]
386
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "euk_retrieval",
4
+ "display_name": "Euk Retrieval",
5
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
6
+ "modality": "protein",
7
+ "type": "retrieval",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/euk_retrieval",
11
+ "revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
12
+ },
13
+ {
14
+ "path": "tattabio/euk_retrieval_qrels",
15
+ "revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
16
+ }
17
+ ],
18
+ "primary_metric_id": "map_at_5"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 12,
24
+ "num_params": 33992881,
25
+ "embed_dim": 480
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 6,
31
+ "layer_display_name": "6",
32
+ "metrics": [
33
+ {
34
+ "id": "ndcg_at_5",
35
+ "display_name": "ndcg_at_5",
36
+ "description": null,
37
+ "value": 0.80067
38
+ },
39
+ {
40
+ "id": "ndcg_at_10",
41
+ "display_name": "ndcg_at_10",
42
+ "description": null,
43
+ "value": 0.79455
44
+ },
45
+ {
46
+ "id": "ndcg_at_50",
47
+ "display_name": "ndcg_at_50",
48
+ "description": null,
49
+ "value": 0.77429
50
+ },
51
+ {
52
+ "id": "map_at_5",
53
+ "display_name": "map_at_5",
54
+ "description": null,
55
+ "value": 0.30914
56
+ },
57
+ {
58
+ "id": "map_at_10",
59
+ "display_name": "map_at_10",
60
+ "description": null,
61
+ "value": 0.41095
62
+ },
63
+ {
64
+ "id": "map_at_50",
65
+ "display_name": "map_at_50",
66
+ "description": null,
67
+ "value": 0.60087
68
+ },
69
+ {
70
+ "id": "recall_at_5",
71
+ "display_name": "recall_at_5",
72
+ "description": null,
73
+ "value": 0.31905
74
+ },
75
+ {
76
+ "id": "recall_at_10",
77
+ "display_name": "recall_at_10",
78
+ "description": null,
79
+ "value": 0.43473
80
+ },
81
+ {
82
+ "id": "recall_at_50",
83
+ "display_name": "recall_at_50",
84
+ "description": null,
85
+ "value": 0.66233
86
+ },
87
+ {
88
+ "id": "precision_at_5",
89
+ "display_name": "precision_at_5",
90
+ "description": null,
91
+ "value": 0.7119
92
+ },
93
+ {
94
+ "id": "precision_at_10",
95
+ "display_name": "precision_at_10",
96
+ "description": null,
97
+ "value": 0.63408
98
+ },
99
+ {
100
+ "id": "precision_at_50",
101
+ "display_name": "precision_at_50",
102
+ "description": null,
103
+ "value": 0.3663
104
+ },
105
+ {
106
+ "id": "mrr_at_5",
107
+ "display_name": "mrr_at_5",
108
+ "description": null,
109
+ "value": 0.8471596998928188
110
+ },
111
+ {
112
+ "id": "mrr_at_10",
113
+ "display_name": "mrr_at_10",
114
+ "description": null,
115
+ "value": 0.8513856989741232
116
+ },
117
+ {
118
+ "id": "mrr_at_50",
119
+ "display_name": "mrr_at_50",
120
+ "description": null,
121
+ "value": 0.8527043294326252
122
+ },
123
+ {
124
+ "id": "nauc_ndcg_at_5_max",
125
+ "display_name": "nauc_ndcg_at_5_max",
126
+ "description": null,
127
+ "value": 0.7166495695870103
128
+ },
129
+ {
130
+ "id": "nauc_ndcg_at_5_std",
131
+ "display_name": "nauc_ndcg_at_5_std",
132
+ "description": null,
133
+ "value": 0.5383304196281262
134
+ },
135
+ {
136
+ "id": "nauc_ndcg_at_5_diff1",
137
+ "display_name": "nauc_ndcg_at_5_diff1",
138
+ "description": null,
139
+ "value": -0.38408074718110424
140
+ },
141
+ {
142
+ "id": "nauc_ndcg_at_10_max",
143
+ "display_name": "nauc_ndcg_at_10_max",
144
+ "description": null,
145
+ "value": 0.71056350273151
146
+ },
147
+ {
148
+ "id": "nauc_ndcg_at_10_std",
149
+ "display_name": "nauc_ndcg_at_10_std",
150
+ "description": null,
151
+ "value": 0.5386325626626473
152
+ },
153
+ {
154
+ "id": "nauc_ndcg_at_10_diff1",
155
+ "display_name": "nauc_ndcg_at_10_diff1",
156
+ "description": null,
157
+ "value": -0.3678412023083028
158
+ },
159
+ {
160
+ "id": "nauc_ndcg_at_50_max",
161
+ "display_name": "nauc_ndcg_at_50_max",
162
+ "description": null,
163
+ "value": 0.6787542765531929
164
+ },
165
+ {
166
+ "id": "nauc_ndcg_at_50_std",
167
+ "display_name": "nauc_ndcg_at_50_std",
168
+ "description": null,
169
+ "value": 0.4678010355684318
170
+ },
171
+ {
172
+ "id": "nauc_ndcg_at_50_diff1",
173
+ "display_name": "nauc_ndcg_at_50_diff1",
174
+ "description": null,
175
+ "value": -0.3023078330221261
176
+ },
177
+ {
178
+ "id": "nauc_map_at_5_max",
179
+ "display_name": "nauc_map_at_5_max",
180
+ "description": null,
181
+ "value": 0.17506411594869709
182
+ },
183
+ {
184
+ "id": "nauc_map_at_5_std",
185
+ "display_name": "nauc_map_at_5_std",
186
+ "description": null,
187
+ "value": 0.344228905317099
188
+ },
189
+ {
190
+ "id": "nauc_map_at_5_diff1",
191
+ "display_name": "nauc_map_at_5_diff1",
192
+ "description": null,
193
+ "value": 0.26025197550499063
194
+ },
195
+ {
196
+ "id": "nauc_map_at_10_max",
197
+ "display_name": "nauc_map_at_10_max",
198
+ "description": null,
199
+ "value": 0.28364735198157687
200
+ },
201
+ {
202
+ "id": "nauc_map_at_10_std",
203
+ "display_name": "nauc_map_at_10_std",
204
+ "description": null,
205
+ "value": 0.4946084063548821
206
+ },
207
+ {
208
+ "id": "nauc_map_at_10_diff1",
209
+ "display_name": "nauc_map_at_10_diff1",
210
+ "description": null,
211
+ "value": 0.13024980686869012
212
+ },
213
+ {
214
+ "id": "nauc_map_at_50_max",
215
+ "display_name": "nauc_map_at_50_max",
216
+ "description": null,
217
+ "value": 0.6456837506614725
218
+ },
219
+ {
220
+ "id": "nauc_map_at_50_std",
221
+ "display_name": "nauc_map_at_50_std",
222
+ "description": null,
223
+ "value": 0.5024354435806796
224
+ },
225
+ {
226
+ "id": "nauc_map_at_50_diff1",
227
+ "display_name": "nauc_map_at_50_diff1",
228
+ "description": null,
229
+ "value": -0.18849105999507082
230
+ },
231
+ {
232
+ "id": "nauc_recall_at_5_max",
233
+ "display_name": "nauc_recall_at_5_max",
234
+ "description": null,
235
+ "value": 0.15537143366366737
236
+ },
237
+ {
238
+ "id": "nauc_recall_at_5_std",
239
+ "display_name": "nauc_recall_at_5_std",
240
+ "description": null,
241
+ "value": 0.3338972930408563
242
+ },
243
+ {
244
+ "id": "nauc_recall_at_5_diff1",
245
+ "display_name": "nauc_recall_at_5_diff1",
246
+ "description": null,
247
+ "value": 0.27534514133854515
248
+ },
249
+ {
250
+ "id": "nauc_recall_at_10_max",
251
+ "display_name": "nauc_recall_at_10_max",
252
+ "description": null,
253
+ "value": 0.24230061291494534
254
+ },
255
+ {
256
+ "id": "nauc_recall_at_10_std",
257
+ "display_name": "nauc_recall_at_10_std",
258
+ "description": null,
259
+ "value": 0.4763992415794819
260
+ },
261
+ {
262
+ "id": "nauc_recall_at_10_diff1",
263
+ "display_name": "nauc_recall_at_10_diff1",
264
+ "description": null,
265
+ "value": 0.17167004025145782
266
+ },
267
+ {
268
+ "id": "nauc_recall_at_50_max",
269
+ "display_name": "nauc_recall_at_50_max",
270
+ "description": null,
271
+ "value": 0.6062660448007379
272
+ },
273
+ {
274
+ "id": "nauc_recall_at_50_std",
275
+ "display_name": "nauc_recall_at_50_std",
276
+ "description": null,
277
+ "value": 0.45445564371902375
278
+ },
279
+ {
280
+ "id": "nauc_recall_at_50_diff1",
281
+ "display_name": "nauc_recall_at_50_diff1",
282
+ "description": null,
283
+ "value": -0.09621042247019258
284
+ },
285
+ {
286
+ "id": "nauc_precision_at_5_max",
287
+ "display_name": "nauc_precision_at_5_max",
288
+ "description": null,
289
+ "value": 0.5420327575630611
290
+ },
291
+ {
292
+ "id": "nauc_precision_at_5_std",
293
+ "display_name": "nauc_precision_at_5_std",
294
+ "description": null,
295
+ "value": 0.37248428210075407
296
+ },
297
+ {
298
+ "id": "nauc_precision_at_5_diff1",
299
+ "display_name": "nauc_precision_at_5_diff1",
300
+ "description": null,
301
+ "value": -0.6517795575595553
302
+ },
303
+ {
304
+ "id": "nauc_precision_at_10_max",
305
+ "display_name": "nauc_precision_at_10_max",
306
+ "description": null,
307
+ "value": 0.46182346579179107
308
+ },
309
+ {
310
+ "id": "nauc_precision_at_10_std",
311
+ "display_name": "nauc_precision_at_10_std",
312
+ "description": null,
313
+ "value": 0.2556997419766225
314
+ },
315
+ {
316
+ "id": "nauc_precision_at_10_diff1",
317
+ "display_name": "nauc_precision_at_10_diff1",
318
+ "description": null,
319
+ "value": -0.6371093546193429
320
+ },
321
+ {
322
+ "id": "nauc_precision_at_50_max",
323
+ "display_name": "nauc_precision_at_50_max",
324
+ "description": null,
325
+ "value": 0.22395520722060117
326
+ },
327
+ {
328
+ "id": "nauc_precision_at_50_std",
329
+ "display_name": "nauc_precision_at_50_std",
330
+ "description": null,
331
+ "value": -0.27077611986871364
332
+ },
333
+ {
334
+ "id": "nauc_precision_at_50_diff1",
335
+ "display_name": "nauc_precision_at_50_diff1",
336
+ "description": null,
337
+ "value": -0.4324048296185153
338
+ },
339
+ {
340
+ "id": "nauc_mrr_at_5_max",
341
+ "display_name": "nauc_mrr_at_5_max",
342
+ "description": null,
343
+ "value": 0.7966902615822546
344
+ },
345
+ {
346
+ "id": "nauc_mrr_at_5_std",
347
+ "display_name": "nauc_mrr_at_5_std",
348
+ "description": null,
349
+ "value": 0.5623896062382641
350
+ },
351
+ {
352
+ "id": "nauc_mrr_at_5_diff1",
353
+ "display_name": "nauc_mrr_at_5_diff1",
354
+ "description": null,
355
+ "value": -0.27875113624180275
356
+ },
357
+ {
358
+ "id": "nauc_mrr_at_10_max",
359
+ "display_name": "nauc_mrr_at_10_max",
360
+ "description": null,
361
+ "value": 0.7982850278647994
362
+ },
363
+ {
364
+ "id": "nauc_mrr_at_10_std",
365
+ "display_name": "nauc_mrr_at_10_std",
366
+ "description": null,
367
+ "value": 0.5623589312727257
368
+ },
369
+ {
370
+ "id": "nauc_mrr_at_10_diff1",
371
+ "display_name": "nauc_mrr_at_10_diff1",
372
+ "description": null,
373
+ "value": -0.27578274493030464
374
+ },
375
+ {
376
+ "id": "nauc_mrr_at_50_max",
377
+ "display_name": "nauc_mrr_at_50_max",
378
+ "description": null,
379
+ "value": 0.7977600079745486
380
+ },
381
+ {
382
+ "id": "nauc_mrr_at_50_std",
383
+ "display_name": "nauc_mrr_at_50_std",
384
+ "description": null,
385
+ "value": 0.5625363754999084
386
+ },
387
+ {
388
+ "id": "nauc_mrr_at_50_diff1",
389
+ "display_name": "nauc_mrr_at_50_diff1",
390
+ "description": null,
391
+ "value": -0.2708948491113527
392
+ }
393
+ ]
394
+ },
395
+ {
396
+ "layer_number": 11,
397
+ "layer_display_name": "11",
398
+ "metrics": [
399
+ {
400
+ "id": "ndcg_at_5",
401
+ "display_name": "ndcg_at_5",
402
+ "description": null,
403
+ "value": 0.79574
404
+ },
405
+ {
406
+ "id": "ndcg_at_10",
407
+ "display_name": "ndcg_at_10",
408
+ "description": null,
409
+ "value": 0.7872
410
+ },
411
+ {
412
+ "id": "ndcg_at_50",
413
+ "display_name": "ndcg_at_50",
414
+ "description": null,
415
+ "value": 0.76804
416
+ },
417
+ {
418
+ "id": "map_at_5",
419
+ "display_name": "map_at_5",
420
+ "description": null,
421
+ "value": 0.30344
422
+ },
423
+ {
424
+ "id": "map_at_10",
425
+ "display_name": "map_at_10",
426
+ "description": null,
427
+ "value": 0.40308
428
+ },
429
+ {
430
+ "id": "map_at_50",
431
+ "display_name": "map_at_50",
432
+ "description": null,
433
+ "value": 0.59158
434
+ },
435
+ {
436
+ "id": "recall_at_5",
437
+ "display_name": "recall_at_5",
438
+ "description": null,
439
+ "value": 0.31068
440
+ },
441
+ {
442
+ "id": "recall_at_10",
443
+ "display_name": "recall_at_10",
444
+ "description": null,
445
+ "value": 0.41808
446
+ },
447
+ {
448
+ "id": "recall_at_50",
449
+ "display_name": "recall_at_50",
450
+ "description": null,
451
+ "value": 0.64688
452
+ },
453
+ {
454
+ "id": "precision_at_5",
455
+ "display_name": "precision_at_5",
456
+ "description": null,
457
+ "value": 0.70611
458
+ },
459
+ {
460
+ "id": "precision_at_10",
461
+ "display_name": "precision_at_10",
462
+ "description": null,
463
+ "value": 0.63055
464
+ },
465
+ {
466
+ "id": "precision_at_50",
467
+ "display_name": "precision_at_50",
468
+ "description": null,
469
+ "value": 0.36862
470
+ },
471
+ {
472
+ "id": "mrr_at_5",
473
+ "display_name": "mrr_at_5",
474
+ "description": null,
475
+ "value": 0.8521436227224009
476
+ },
477
+ {
478
+ "id": "mrr_at_10",
479
+ "display_name": "mrr_at_10",
480
+ "description": null,
481
+ "value": 0.8555504516919309
482
+ },
483
+ {
484
+ "id": "mrr_at_50",
485
+ "display_name": "mrr_at_50",
486
+ "description": null,
487
+ "value": 0.8571980685347454
488
+ },
489
+ {
490
+ "id": "nauc_ndcg_at_5_max",
491
+ "display_name": "nauc_ndcg_at_5_max",
492
+ "description": null,
493
+ "value": 0.687147173549288
494
+ },
495
+ {
496
+ "id": "nauc_ndcg_at_5_std",
497
+ "display_name": "nauc_ndcg_at_5_std",
498
+ "description": null,
499
+ "value": 0.534917528750057
500
+ },
501
+ {
502
+ "id": "nauc_ndcg_at_5_diff1",
503
+ "display_name": "nauc_ndcg_at_5_diff1",
504
+ "description": null,
505
+ "value": -0.039388068191112346
506
+ },
507
+ {
508
+ "id": "nauc_ndcg_at_10_max",
509
+ "display_name": "nauc_ndcg_at_10_max",
510
+ "description": null,
511
+ "value": 0.6821413074357394
512
+ },
513
+ {
514
+ "id": "nauc_ndcg_at_10_std",
515
+ "display_name": "nauc_ndcg_at_10_std",
516
+ "description": null,
517
+ "value": 0.541004104911246
518
+ },
519
+ {
520
+ "id": "nauc_ndcg_at_10_diff1",
521
+ "display_name": "nauc_ndcg_at_10_diff1",
522
+ "description": null,
523
+ "value": -0.06613569078084217
524
+ },
525
+ {
526
+ "id": "nauc_ndcg_at_50_max",
527
+ "display_name": "nauc_ndcg_at_50_max",
528
+ "description": null,
529
+ "value": 0.6546658854714889
530
+ },
531
+ {
532
+ "id": "nauc_ndcg_at_50_std",
533
+ "display_name": "nauc_ndcg_at_50_std",
534
+ "description": null,
535
+ "value": 0.5141528362539365
536
+ },
537
+ {
538
+ "id": "nauc_ndcg_at_50_diff1",
539
+ "display_name": "nauc_ndcg_at_50_diff1",
540
+ "description": null,
541
+ "value": -0.045010206374762184
542
+ },
543
+ {
544
+ "id": "nauc_map_at_5_max",
545
+ "display_name": "nauc_map_at_5_max",
546
+ "description": null,
547
+ "value": 0.1717014705213338
548
+ },
549
+ {
550
+ "id": "nauc_map_at_5_std",
551
+ "display_name": "nauc_map_at_5_std",
552
+ "description": null,
553
+ "value": 0.298486867259319
554
+ },
555
+ {
556
+ "id": "nauc_map_at_5_diff1",
557
+ "display_name": "nauc_map_at_5_diff1",
558
+ "description": null,
559
+ "value": 0.3158992753503486
560
+ },
561
+ {
562
+ "id": "nauc_map_at_10_max",
563
+ "display_name": "nauc_map_at_10_max",
564
+ "description": null,
565
+ "value": 0.29394629114728443
566
+ },
567
+ {
568
+ "id": "nauc_map_at_10_std",
569
+ "display_name": "nauc_map_at_10_std",
570
+ "description": null,
571
+ "value": 0.4807193931287969
572
+ },
573
+ {
574
+ "id": "nauc_map_at_10_diff1",
575
+ "display_name": "nauc_map_at_10_diff1",
576
+ "description": null,
577
+ "value": 0.200767704240122
578
+ },
579
+ {
580
+ "id": "nauc_map_at_50_max",
581
+ "display_name": "nauc_map_at_50_max",
582
+ "description": null,
583
+ "value": 0.6266013107050147
584
+ },
585
+ {
586
+ "id": "nauc_map_at_50_std",
587
+ "display_name": "nauc_map_at_50_std",
588
+ "description": null,
589
+ "value": 0.5400967080146492
590
+ },
591
+ {
592
+ "id": "nauc_map_at_50_diff1",
593
+ "display_name": "nauc_map_at_50_diff1",
594
+ "description": null,
595
+ "value": -0.06821295960747309
596
+ },
597
+ {
598
+ "id": "nauc_recall_at_5_max",
599
+ "display_name": "nauc_recall_at_5_max",
600
+ "description": null,
601
+ "value": 0.15728927641821855
602
+ },
603
+ {
604
+ "id": "nauc_recall_at_5_std",
605
+ "display_name": "nauc_recall_at_5_std",
606
+ "description": null,
607
+ "value": 0.3020952193182204
608
+ },
609
+ {
610
+ "id": "nauc_recall_at_5_diff1",
611
+ "display_name": "nauc_recall_at_5_diff1",
612
+ "description": null,
613
+ "value": 0.3196038571595756
614
+ },
615
+ {
616
+ "id": "nauc_recall_at_10_max",
617
+ "display_name": "nauc_recall_at_10_max",
618
+ "description": null,
619
+ "value": 0.273851179897414
620
+ },
621
+ {
622
+ "id": "nauc_recall_at_10_std",
623
+ "display_name": "nauc_recall_at_10_std",
624
+ "description": null,
625
+ "value": 0.4822263524474807
626
+ },
627
+ {
628
+ "id": "nauc_recall_at_10_diff1",
629
+ "display_name": "nauc_recall_at_10_diff1",
630
+ "description": null,
631
+ "value": 0.1998852576547706
632
+ },
633
+ {
634
+ "id": "nauc_recall_at_50_max",
635
+ "display_name": "nauc_recall_at_50_max",
636
+ "description": null,
637
+ "value": 0.610064992339158
638
+ },
639
+ {
640
+ "id": "nauc_recall_at_50_std",
641
+ "display_name": "nauc_recall_at_50_std",
642
+ "description": null,
643
+ "value": 0.5237697244132881
644
+ },
645
+ {
646
+ "id": "nauc_recall_at_50_diff1",
647
+ "display_name": "nauc_recall_at_50_diff1",
648
+ "description": null,
649
+ "value": -0.047861477876695854
650
+ },
651
+ {
652
+ "id": "nauc_precision_at_5_max",
653
+ "display_name": "nauc_precision_at_5_max",
654
+ "description": null,
655
+ "value": 0.5642831983945668
656
+ },
657
+ {
658
+ "id": "nauc_precision_at_5_std",
659
+ "display_name": "nauc_precision_at_5_std",
660
+ "description": null,
661
+ "value": 0.41268016275342806
662
+ },
663
+ {
664
+ "id": "nauc_precision_at_5_diff1",
665
+ "display_name": "nauc_precision_at_5_diff1",
666
+ "description": null,
667
+ "value": -0.3902377594145758
668
+ },
669
+ {
670
+ "id": "nauc_precision_at_10_max",
671
+ "display_name": "nauc_precision_at_10_max",
672
+ "description": null,
673
+ "value": 0.4757631079174044
674
+ },
675
+ {
676
+ "id": "nauc_precision_at_10_std",
677
+ "display_name": "nauc_precision_at_10_std",
678
+ "description": null,
679
+ "value": 0.32238368240767273
680
+ },
681
+ {
682
+ "id": "nauc_precision_at_10_diff1",
683
+ "display_name": "nauc_precision_at_10_diff1",
684
+ "description": null,
685
+ "value": -0.4280345103983777
686
+ },
687
+ {
688
+ "id": "nauc_precision_at_50_max",
689
+ "display_name": "nauc_precision_at_50_max",
690
+ "description": null,
691
+ "value": 0.19318747544949869
692
+ },
693
+ {
694
+ "id": "nauc_precision_at_50_std",
695
+ "display_name": "nauc_precision_at_50_std",
696
+ "description": null,
697
+ "value": -0.2262940005534252
698
+ },
699
+ {
700
+ "id": "nauc_precision_at_50_diff1",
701
+ "display_name": "nauc_precision_at_50_diff1",
702
+ "description": null,
703
+ "value": -0.2898939009819229
704
+ },
705
+ {
706
+ "id": "nauc_mrr_at_5_max",
707
+ "display_name": "nauc_mrr_at_5_max",
708
+ "description": null,
709
+ "value": 0.7559907957579797
710
+ },
711
+ {
712
+ "id": "nauc_mrr_at_5_std",
713
+ "display_name": "nauc_mrr_at_5_std",
714
+ "description": null,
715
+ "value": 0.5232164154691852
716
+ },
717
+ {
718
+ "id": "nauc_mrr_at_5_diff1",
719
+ "display_name": "nauc_mrr_at_5_diff1",
720
+ "description": null,
721
+ "value": 0.016325972601983724
722
+ },
723
+ {
724
+ "id": "nauc_mrr_at_10_max",
725
+ "display_name": "nauc_mrr_at_10_max",
726
+ "description": null,
727
+ "value": 0.7604182097391701
728
+ },
729
+ {
730
+ "id": "nauc_mrr_at_10_std",
731
+ "display_name": "nauc_mrr_at_10_std",
732
+ "description": null,
733
+ "value": 0.5188685708290457
734
+ },
735
+ {
736
+ "id": "nauc_mrr_at_10_diff1",
737
+ "display_name": "nauc_mrr_at_10_diff1",
738
+ "description": null,
739
+ "value": 0.008720431706015956
740
+ },
741
+ {
742
+ "id": "nauc_mrr_at_50_max",
743
+ "display_name": "nauc_mrr_at_50_max",
744
+ "description": null,
745
+ "value": 0.7617325890747185
746
+ },
747
+ {
748
+ "id": "nauc_mrr_at_50_std",
749
+ "display_name": "nauc_mrr_at_50_std",
750
+ "description": null,
751
+ "value": 0.5213157058041827
752
+ },
753
+ {
754
+ "id": "nauc_mrr_at_50_diff1",
755
+ "display_name": "nauc_mrr_at_50_diff1",
756
+ "description": null,
757
+ "value": 0.015621035073521741
758
+ }
759
+ ]
760
+ }
761
+ ]
762
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "fefe_phylogeny",
4
+ "display_name": "FeFeHydrogenase Phylogeny",
5
+ "description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
6
+ "modality": "protein",
7
+ "type": "eds",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/fefe_phylogeny_sequences",
11
+ "revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
12
+ },
13
+ {
14
+ "path": "tattabio/fefe_phylogeny_distances",
15
+ "revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
16
+ }
17
+ ],
18
+ "primary_metric_id": "top_corr"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 12,
24
+ "num_params": 33992881,
25
+ "embed_dim": 480
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 6,
31
+ "layer_display_name": "6",
32
+ "metrics": [
33
+ {
34
+ "id": "cos_sim",
35
+ "display_name": "cos_sim",
36
+ "description": null,
37
+ "value": 0.46213607103563425
38
+ },
39
+ {
40
+ "id": "manhattan",
41
+ "display_name": "manhattan",
42
+ "description": null,
43
+ "value": 0.5621218764061721
44
+ },
45
+ {
46
+ "id": "euclidean",
47
+ "display_name": "euclidean",
48
+ "description": null,
49
+ "value": 0.5442663405841599
50
+ },
51
+ {
52
+ "id": "top_corr",
53
+ "display_name": "top_corr",
54
+ "description": null,
55
+ "value": 0.5621218764061721
56
+ }
57
+ ]
58
+ },
59
+ {
60
+ "layer_number": 11,
61
+ "layer_display_name": "11",
62
+ "metrics": [
63
+ {
64
+ "id": "cos_sim",
65
+ "display_name": "cos_sim",
66
+ "description": null,
67
+ "value": 0.1524486344353939
68
+ },
69
+ {
70
+ "id": "manhattan",
71
+ "display_name": "manhattan",
72
+ "description": null,
73
+ "value": 0.5194125891005561
74
+ },
75
+ {
76
+ "id": "euclidean",
77
+ "display_name": "euclidean",
78
+ "description": null,
79
+ "value": 0.48868066660269227
80
+ },
81
+ {
82
+ "id": "top_corr",
83
+ "display_name": "top_corr",
84
+ "description": null,
85
+ "value": 0.5194125891005561
86
+ }
87
+ ]
88
+ }
89
+ ]
90
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "modac_paralogy_bigene",
4
+ "display_name": "ModAC Paralogy BiGene",
5
+ "description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
6
+ "modality": "protein",
7
+ "type": "bigene_mining",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/modac_paralogy_bigene",
11
+ "revision": "241ca6397856e3360da04422d54933035b1fab87"
12
+ }
13
+ ],
14
+ "primary_metric_id": "recall_at_50"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "num_layers": 12,
19
+ "num_params": 33992881,
20
+ "embed_dim": 480
21
+ },
22
+ "dgeb_version": "0.0.0",
23
+ "results": [
24
+ {
25
+ "layer_number": 6,
26
+ "layer_display_name": "6",
27
+ "metrics": [
28
+ {
29
+ "id": "precision",
30
+ "display_name": "precision",
31
+ "description": null,
32
+ "value": 4.4952467261118094e-7
33
+ },
34
+ {
35
+ "id": "recall",
36
+ "display_name": "recall",
37
+ "description": null,
38
+ "value": 0.0006702412868632708
39
+ },
40
+ {
41
+ "id": "f1",
42
+ "display_name": "f1",
43
+ "description": null,
44
+ "value": 8.984467652322665e-7
45
+ },
46
+ {
47
+ "id": "accuracy",
48
+ "display_name": "accuracy",
49
+ "description": null,
50
+ "value": 0.0006702412868632708
51
+ },
52
+ {
53
+ "id": "recall_at_50",
54
+ "display_name": "recall_at_50",
55
+ "description": null,
56
+ "value": 0.03485254691689008
57
+ }
58
+ ]
59
+ },
60
+ {
61
+ "layer_number": 11,
62
+ "layer_display_name": "11",
63
+ "metrics": [
64
+ {
65
+ "id": "precision",
66
+ "display_name": "precision",
67
+ "description": null,
68
+ "value": 4.4952467261118094e-7
69
+ },
70
+ {
71
+ "id": "recall",
72
+ "display_name": "recall",
73
+ "description": null,
74
+ "value": 0.0006702412868632708
75
+ },
76
+ {
77
+ "id": "f1",
78
+ "display_name": "f1",
79
+ "description": null,
80
+ "value": 8.984467652322665e-7
81
+ },
82
+ {
83
+ "id": "accuracy",
84
+ "display_name": "accuracy",
85
+ "description": null,
86
+ "value": 0.0006702412868632708
87
+ },
88
+ {
89
+ "id": "recall_at_50",
90
+ "display_name": "recall_at_50",
91
+ "description": null,
92
+ "value": 0.05361930294906166
93
+ }
94
+ ]
95
+ }
96
+ ]
97
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "mopb_clustering",
4
+ "display_name": "MopB Clustering",
5
+ "description": "Evaluate on MopB clustering task.",
6
+ "modality": "protein",
7
+ "type": "clustering",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/mopb_clustering",
11
+ "revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
12
+ }
13
+ ],
14
+ "primary_metric_id": "v_measure"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "v_measure",
31
+ "display_name": "v_measure",
32
+ "description": null,
33
+ "value": 0.7366377426487285
34
+ }
35
+ ]
36
+ },
37
+ {
38
+ "layer_number": 11,
39
+ "layer_display_name": "11",
40
+ "metrics": [
41
+ {
42
+ "id": "v_measure",
43
+ "display_name": "v_measure",
44
+ "description": null,
45
+ "value": 0.7842647128962572
46
+ }
47
+ ]
48
+ }
49
+ ]
50
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "rpob_arch_phylogeny",
4
+ "display_name": "RpoB Archaeal Phylogeny",
5
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
6
+ "modality": "protein",
7
+ "type": "eds",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/rpob_arch_phylogeny_sequences",
11
+ "revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
12
+ },
13
+ {
14
+ "path": "tattabio/rpob_arch_phylogeny_distances",
15
+ "revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
16
+ }
17
+ ],
18
+ "primary_metric_id": "top_corr"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 12,
24
+ "num_params": 33992881,
25
+ "embed_dim": 480
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 6,
31
+ "layer_display_name": "6",
32
+ "metrics": [
33
+ {
34
+ "id": "cos_sim",
35
+ "display_name": "cos_sim",
36
+ "description": null,
37
+ "value": 0.2624971928673971
38
+ },
39
+ {
40
+ "id": "manhattan",
41
+ "display_name": "manhattan",
42
+ "description": null,
43
+ "value": 0.31502824152693154
44
+ },
45
+ {
46
+ "id": "euclidean",
47
+ "display_name": "euclidean",
48
+ "description": null,
49
+ "value": 0.3088945849814121
50
+ },
51
+ {
52
+ "id": "top_corr",
53
+ "display_name": "top_corr",
54
+ "description": null,
55
+ "value": 0.31502824152693154
56
+ }
57
+ ]
58
+ },
59
+ {
60
+ "layer_number": 11,
61
+ "layer_display_name": "11",
62
+ "metrics": [
63
+ {
64
+ "id": "cos_sim",
65
+ "display_name": "cos_sim",
66
+ "description": null,
67
+ "value": 0.34668475738519444
68
+ },
69
+ {
70
+ "id": "manhattan",
71
+ "display_name": "manhattan",
72
+ "description": null,
73
+ "value": 0.372455403853565
74
+ },
75
+ {
76
+ "id": "euclidean",
77
+ "display_name": "euclidean",
78
+ "description": null,
79
+ "value": 0.369729316093801
80
+ },
81
+ {
82
+ "id": "top_corr",
83
+ "display_name": "top_corr",
84
+ "description": null,
85
+ "value": 0.372455403853565
86
+ }
87
+ ]
88
+ }
89
+ ]
90
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "rpob_bac_phylogeny",
4
+ "display_name": "RpoB Bacterial Phylogeny",
5
+ "description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
6
+ "modality": "protein",
7
+ "type": "eds",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/rpob_bac_phylogeny_sequences",
11
+ "revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
12
+ },
13
+ {
14
+ "path": "tattabio/rpob_bac_phylogeny_distances",
15
+ "revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
16
+ }
17
+ ],
18
+ "primary_metric_id": "top_corr"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 12,
24
+ "num_params": 33992881,
25
+ "embed_dim": 480
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 6,
31
+ "layer_display_name": "6",
32
+ "metrics": [
33
+ {
34
+ "id": "cos_sim",
35
+ "display_name": "cos_sim",
36
+ "description": null,
37
+ "value": 0.12971577033648743
38
+ },
39
+ {
40
+ "id": "manhattan",
41
+ "display_name": "manhattan",
42
+ "description": null,
43
+ "value": 0.18177734472255433
44
+ },
45
+ {
46
+ "id": "euclidean",
47
+ "display_name": "euclidean",
48
+ "description": null,
49
+ "value": 0.16423413011355156
50
+ },
51
+ {
52
+ "id": "top_corr",
53
+ "display_name": "top_corr",
54
+ "description": null,
55
+ "value": 0.18177734472255433
56
+ }
57
+ ]
58
+ },
59
+ {
60
+ "layer_number": 11,
61
+ "layer_display_name": "11",
62
+ "metrics": [
63
+ {
64
+ "id": "cos_sim",
65
+ "display_name": "cos_sim",
66
+ "description": null,
67
+ "value": 0.10194557773024183
68
+ },
69
+ {
70
+ "id": "manhattan",
71
+ "display_name": "manhattan",
72
+ "description": null,
73
+ "value": 0.18622026845391912
74
+ },
75
+ {
76
+ "id": "euclidean",
77
+ "display_name": "euclidean",
78
+ "description": null,
79
+ "value": 0.15405389239655473
80
+ },
81
+ {
82
+ "id": "top_corr",
83
+ "display_name": "top_corr",
84
+ "description": null,
85
+ "value": 0.18622026845391912
86
+ }
87
+ ]
88
+ }
89
+ ]
90
+ }
leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "vibrio_operonic_pair",
4
+ "display_name": "Vibrio Operonic Pair",
5
+ "description": "Evaluate on Vibrio operonic pair classification task.",
6
+ "modality": "protein",
7
+ "type": "pair_classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/vibrio_operonic_pair",
11
+ "revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
12
+ }
13
+ ],
14
+ "primary_metric_id": "top_ap"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t12_35M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 12,
20
+ "num_params": 33992881,
21
+ "embed_dim": 480
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 6,
27
+ "layer_display_name": "6",
28
+ "metrics": [
29
+ {
30
+ "id": "cos_sim_accuracy",
31
+ "display_name": "cos_sim_accuracy",
32
+ "description": null,
33
+ "value": 0.6781966575981345
34
+ },
35
+ {
36
+ "id": "cos_sim_accuracy_threshold",
37
+ "display_name": "cos_sim_accuracy_threshold",
38
+ "description": null,
39
+ "value": 0.970278263092041
40
+ },
41
+ {
42
+ "id": "cos_sim_f1",
43
+ "display_name": "cos_sim_f1",
44
+ "description": null,
45
+ "value": 0.518608169440242
46
+ },
47
+ {
48
+ "id": "cos_sim_f1_threshold",
49
+ "display_name": "cos_sim_f1_threshold",
50
+ "description": null,
51
+ "value": 0.8757017254829407
52
+ },
53
+ {
54
+ "id": "cos_sim_precision",
55
+ "display_name": "cos_sim_precision",
56
+ "description": null,
57
+ "value": 0.35501242750621376
58
+ },
59
+ {
60
+ "id": "cos_sim_recall",
61
+ "display_name": "cos_sim_recall",
62
+ "description": null,
63
+ "value": 0.9618406285072951
64
+ },
65
+ {
66
+ "id": "cos_sim_ap",
67
+ "display_name": "cos_sim_ap",
68
+ "description": null,
69
+ "value": 0.4581544787406372
70
+ },
71
+ {
72
+ "id": "manhattan_accuracy",
73
+ "display_name": "manhattan_accuracy",
74
+ "description": null,
75
+ "value": 0.6731441896618733
76
+ },
77
+ {
78
+ "id": "manhattan_accuracy_threshold",
79
+ "display_name": "manhattan_accuracy_threshold",
80
+ "description": null,
81
+ "value": 137.3688507080078
82
+ },
83
+ {
84
+ "id": "manhattan_f1",
85
+ "display_name": "manhattan_f1",
86
+ "description": null,
87
+ "value": 0.5146164978292329
88
+ },
89
+ {
90
+ "id": "manhattan_f1_threshold",
91
+ "display_name": "manhattan_f1_threshold",
92
+ "description": null,
93
+ "value": 391.87298583984375
94
+ },
95
+ {
96
+ "id": "manhattan_precision",
97
+ "display_name": "manhattan_precision",
98
+ "description": null,
99
+ "value": 0.3467238689547582
100
+ },
101
+ {
102
+ "id": "manhattan_recall",
103
+ "display_name": "manhattan_recall",
104
+ "description": null,
105
+ "value": 0.9977553310886644
106
+ },
107
+ {
108
+ "id": "manhattan_ap",
109
+ "display_name": "manhattan_ap",
110
+ "description": null,
111
+ "value": 0.4383109013756369
112
+ },
113
+ {
114
+ "id": "euclidean_accuracy",
115
+ "display_name": "euclidean_accuracy",
116
+ "description": null,
117
+ "value": 0.672755538282161
118
+ },
119
+ {
120
+ "id": "euclidean_accuracy_threshold",
121
+ "display_name": "euclidean_accuracy_threshold",
122
+ "description": null,
123
+ "value": 8.506048202514648
124
+ },
125
+ {
126
+ "id": "euclidean_f1",
127
+ "display_name": "euclidean_f1",
128
+ "description": null,
129
+ "value": 0.5152786099460755
130
+ },
131
+ {
132
+ "id": "euclidean_f1_threshold",
133
+ "display_name": "euclidean_f1_threshold",
134
+ "description": null,
135
+ "value": 21.124141693115234
136
+ },
137
+ {
138
+ "id": "euclidean_precision",
139
+ "display_name": "euclidean_precision",
140
+ "description": null,
141
+ "value": 0.35145075602778914
142
+ },
143
+ {
144
+ "id": "euclidean_recall",
145
+ "display_name": "euclidean_recall",
146
+ "description": null,
147
+ "value": 0.9652076318742986
148
+ },
149
+ {
150
+ "id": "euclidean_ap",
151
+ "display_name": "euclidean_ap",
152
+ "description": null,
153
+ "value": 0.4438681594614018
154
+ },
155
+ {
156
+ "id": "dot_accuracy",
157
+ "display_name": "dot_accuracy",
158
+ "description": null,
159
+ "value": 0.6599300427516518
160
+ },
161
+ {
162
+ "id": "dot_accuracy_threshold",
163
+ "display_name": "dot_accuracy_threshold",
164
+ "description": null,
165
+ "value": 1570.195556640625
166
+ },
167
+ {
168
+ "id": "dot_f1",
169
+ "display_name": "dot_f1",
170
+ "description": null,
171
+ "value": 0.5147654892877822
172
+ },
173
+ {
174
+ "id": "dot_f1_threshold",
175
+ "display_name": "dot_f1_threshold",
176
+ "description": null,
177
+ "value": 898.4225463867188
178
+ },
179
+ {
180
+ "id": "dot_precision",
181
+ "display_name": "dot_precision",
182
+ "description": null,
183
+ "value": 0.3468591494342567
184
+ },
185
+ {
186
+ "id": "dot_recall",
187
+ "display_name": "dot_recall",
188
+ "description": null,
189
+ "value": 0.9977553310886644
190
+ },
191
+ {
192
+ "id": "dot_ap",
193
+ "display_name": "dot_ap",
194
+ "description": null,
195
+ "value": 0.4179931403914694
196
+ },
197
+ {
198
+ "id": "top_ap",
199
+ "display_name": "top_ap",
200
+ "description": null,
201
+ "value": 0.4581544787406372
202
+ }
203
+ ]
204
+ },
205
+ {
206
+ "layer_number": 11,
207
+ "layer_display_name": "11",
208
+ "metrics": [
209
+ {
210
+ "id": "cos_sim_accuracy",
211
+ "display_name": "cos_sim_accuracy",
212
+ "description": null,
213
+ "value": 0.6746987951807228
214
+ },
215
+ {
216
+ "id": "cos_sim_accuracy_threshold",
217
+ "display_name": "cos_sim_accuracy_threshold",
218
+ "description": null,
219
+ "value": 0.9681814312934875
220
+ },
221
+ {
222
+ "id": "cos_sim_f1",
223
+ "display_name": "cos_sim_f1",
224
+ "description": null,
225
+ "value": 0.5363604114934374
226
+ },
227
+ {
228
+ "id": "cos_sim_f1_threshold",
229
+ "display_name": "cos_sim_f1_threshold",
230
+ "description": null,
231
+ "value": 0.9120055437088013
232
+ },
233
+ {
234
+ "id": "cos_sim_precision",
235
+ "display_name": "cos_sim_precision",
236
+ "description": null,
237
+ "value": 0.3921161825726141
238
+ },
239
+ {
240
+ "id": "cos_sim_recall",
241
+ "display_name": "cos_sim_recall",
242
+ "description": null,
243
+ "value": 0.8484848484848485
244
+ },
245
+ {
246
+ "id": "cos_sim_ap",
247
+ "display_name": "cos_sim_ap",
248
+ "description": null,
249
+ "value": 0.46704651746605186
250
+ },
251
+ {
252
+ "id": "manhattan_accuracy",
253
+ "display_name": "manhattan_accuracy",
254
+ "description": null,
255
+ "value": 0.6746987951807228
256
+ },
257
+ {
258
+ "id": "manhattan_accuracy_threshold",
259
+ "display_name": "manhattan_accuracy_threshold",
260
+ "description": null,
261
+ "value": 360.30352783203125
262
+ },
263
+ {
264
+ "id": "manhattan_f1",
265
+ "display_name": "manhattan_f1",
266
+ "description": null,
267
+ "value": 0.5305821665438467
268
+ },
269
+ {
270
+ "id": "manhattan_f1_threshold",
271
+ "display_name": "manhattan_f1_threshold",
272
+ "description": null,
273
+ "value": 576.9113159179688
274
+ },
275
+ {
276
+ "id": "manhattan_precision",
277
+ "display_name": "manhattan_precision",
278
+ "description": null,
279
+ "value": 0.3949533735600658
280
+ },
281
+ {
282
+ "id": "manhattan_recall",
283
+ "display_name": "manhattan_recall",
284
+ "description": null,
285
+ "value": 0.8080808080808081
286
+ },
287
+ {
288
+ "id": "manhattan_ap",
289
+ "display_name": "manhattan_ap",
290
+ "description": null,
291
+ "value": 0.468990806236423
292
+ },
293
+ {
294
+ "id": "euclidean_accuracy",
295
+ "display_name": "euclidean_accuracy",
296
+ "description": null,
297
+ "value": 0.6758647493198601
298
+ },
299
+ {
300
+ "id": "euclidean_accuracy_threshold",
301
+ "display_name": "euclidean_accuracy_threshold",
302
+ "description": null,
303
+ "value": 22.342727661132812
304
+ },
305
+ {
306
+ "id": "euclidean_f1",
307
+ "display_name": "euclidean_f1",
308
+ "description": null,
309
+ "value": 0.5301837270341208
310
+ },
311
+ {
312
+ "id": "euclidean_f1_threshold",
313
+ "display_name": "euclidean_f1_threshold",
314
+ "description": null,
315
+ "value": 39.38741683959961
316
+ },
317
+ {
318
+ "id": "euclidean_precision",
319
+ "display_name": "euclidean_precision",
320
+ "description": null,
321
+ "value": 0.37459434399629116
322
+ },
323
+ {
324
+ "id": "euclidean_recall",
325
+ "display_name": "euclidean_recall",
326
+ "description": null,
327
+ "value": 0.9068462401795735
328
+ },
329
+ {
330
+ "id": "euclidean_ap",
331
+ "display_name": "euclidean_ap",
332
+ "description": null,
333
+ "value": 0.46775797789146023
334
+ },
335
+ {
336
+ "id": "dot_accuracy",
337
+ "display_name": "dot_accuracy",
338
+ "description": null,
339
+ "value": 0.6541002720559658
340
+ },
341
+ {
342
+ "id": "dot_accuracy_threshold",
343
+ "display_name": "dot_accuracy_threshold",
344
+ "description": null,
345
+ "value": 9448.685546875
346
+ },
347
+ {
348
+ "id": "dot_f1",
349
+ "display_name": "dot_f1",
350
+ "description": null,
351
+ "value": 0.5145827317354895
352
+ },
353
+ {
354
+ "id": "dot_f1_threshold",
355
+ "display_name": "dot_f1_threshold",
356
+ "description": null,
357
+ "value": 4854.8955078125
358
+ },
359
+ {
360
+ "id": "dot_precision",
361
+ "display_name": "dot_precision",
362
+ "description": null,
363
+ "value": 0.3464230171073095
364
+ },
365
+ {
366
+ "id": "dot_recall",
367
+ "display_name": "dot_recall",
368
+ "description": null,
369
+ "value": 1.0
370
+ },
371
+ {
372
+ "id": "dot_ap",
373
+ "display_name": "dot_ap",
374
+ "description": null,
375
+ "value": 0.3679854825040224
376
+ },
377
+ {
378
+ "id": "top_ap",
379
+ "display_name": "top_ap",
380
+ "description": null,
381
+ "value": 0.468990806236423
382
+ }
383
+ ]
384
+ }
385
+ ]
386
+ }
leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "MIBIG_protein_classification",
4
+ "display_name": "MIBiG Classification",
5
+ "description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
6
+ "modality": "protein",
7
+ "type": "classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/mibig_classification_prot",
11
+ "revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 30,
20
+ "num_params": 148795481,
21
+ "embed_dim": 640
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 15,
27
+ "layer_display_name": "15",
28
+ "metrics": [
29
+ {
30
+ "id": "f1",
31
+ "display_name": "f1",
32
+ "description": null,
33
+ "value": 0.721568117708931
34
+ },
35
+ {
36
+ "id": "accuracy",
37
+ "display_name": "accuracy",
38
+ "description": null,
39
+ "value": 0.7165532879818595
40
+ },
41
+ {
42
+ "id": "precision",
43
+ "display_name": "precision",
44
+ "description": null,
45
+ "value": 0.820388189148414
46
+ },
47
+ {
48
+ "id": "recall",
49
+ "display_name": "recall",
50
+ "description": null,
51
+ "value": 0.6689951528396479
52
+ },
53
+ {
54
+ "id": "lrap",
55
+ "display_name": "lrap",
56
+ "description": null,
57
+ "value": 0.8363567649281944
58
+ }
59
+ ]
60
+ },
61
+ {
62
+ "layer_number": 29,
63
+ "layer_display_name": "29",
64
+ "metrics": [
65
+ {
66
+ "id": "f1",
67
+ "display_name": "f1",
68
+ "description": null,
69
+ "value": 0.6298307655443518
70
+ },
71
+ {
72
+ "id": "accuracy",
73
+ "display_name": "accuracy",
74
+ "description": null,
75
+ "value": 0.6099773242630385
76
+ },
77
+ {
78
+ "id": "precision",
79
+ "display_name": "precision",
80
+ "description": null,
81
+ "value": 0.7648458169950588
82
+ },
83
+ {
84
+ "id": "recall",
85
+ "display_name": "recall",
86
+ "description": null,
87
+ "value": 0.5789820341918578
88
+ },
89
+ {
90
+ "id": "lrap",
91
+ "display_name": "lrap",
92
+ "description": null,
93
+ "value": 0.752078609221467
94
+ }
95
+ ]
96
+ }
97
+ ]
98
+ }
leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json ADDED
@@ -0,0 +1,762 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "arch_retrieval",
4
+ "display_name": "Arch Retrieval",
5
+ "description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
6
+ "modality": "protein",
7
+ "type": "retrieval",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/arch_retrieval",
11
+ "revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
12
+ },
13
+ {
14
+ "path": "tattabio/arch_retrieval_qrels",
15
+ "revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
16
+ }
17
+ ],
18
+ "primary_metric_id": "map_at_5"
19
+ },
20
+ "model": {
21
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
22
+ "revision": "...",
23
+ "num_layers": 30,
24
+ "num_params": 148795481,
25
+ "embed_dim": 640
26
+ },
27
+ "dgeb_version": "0.0.0",
28
+ "results": [
29
+ {
30
+ "layer_number": 15,
31
+ "layer_display_name": "15",
32
+ "metrics": [
33
+ {
34
+ "id": "ndcg_at_5",
35
+ "display_name": "ndcg_at_5",
36
+ "description": null,
37
+ "value": 0.91537
38
+ },
39
+ {
40
+ "id": "ndcg_at_10",
41
+ "display_name": "ndcg_at_10",
42
+ "description": null,
43
+ "value": 0.90635
44
+ },
45
+ {
46
+ "id": "ndcg_at_50",
47
+ "display_name": "ndcg_at_50",
48
+ "description": null,
49
+ "value": 0.87424
50
+ },
51
+ {
52
+ "id": "map_at_5",
53
+ "display_name": "map_at_5",
54
+ "description": null,
55
+ "value": 0.30526
56
+ },
57
+ {
58
+ "id": "map_at_10",
59
+ "display_name": "map_at_10",
60
+ "description": null,
61
+ "value": 0.42635
62
+ },
63
+ {
64
+ "id": "map_at_50",
65
+ "display_name": "map_at_50",
66
+ "description": null,
67
+ "value": 0.72433
68
+ },
69
+ {
70
+ "id": "recall_at_5",
71
+ "display_name": "recall_at_5",
72
+ "description": null,
73
+ "value": 0.31067
74
+ },
75
+ {
76
+ "id": "recall_at_10",
77
+ "display_name": "recall_at_10",
78
+ "description": null,
79
+ "value": 0.4378
80
+ },
81
+ {
82
+ "id": "recall_at_50",
83
+ "display_name": "recall_at_50",
84
+ "description": null,
85
+ "value": 0.75859
86
+ },
87
+ {
88
+ "id": "precision_at_5",
89
+ "display_name": "precision_at_5",
90
+ "description": null,
91
+ "value": 0.82689
92
+ },
93
+ {
94
+ "id": "precision_at_10",
95
+ "display_name": "precision_at_10",
96
+ "description": null,
97
+ "value": 0.76159
98
+ },
99
+ {
100
+ "id": "precision_at_50",
101
+ "display_name": "precision_at_50",
102
+ "description": null,
103
+ "value": 0.46726
104
+ },
105
+ {
106
+ "id": "mrr_at_5",
107
+ "display_name": "mrr_at_5",
108
+ "description": null,
109
+ "value": 0.9422321809645754
110
+ },
111
+ {
112
+ "id": "mrr_at_10",
113
+ "display_name": "mrr_at_10",
114
+ "description": null,
115
+ "value": 0.9439900344829917
116
+ },
117
+ {
118
+ "id": "mrr_at_50",
119
+ "display_name": "mrr_at_50",
120
+ "description": null,
121
+ "value": 0.9446453591992101
122
+ },
123
+ {
124
+ "id": "nauc_ndcg_at_5_max",
125
+ "display_name": "nauc_ndcg_at_5_max",
126
+ "description": null,
127
+ "value": 0.6549640359156222
128
+ },
129
+ {
130
+ "id": "nauc_ndcg_at_5_std",
131
+ "display_name": "nauc_ndcg_at_5_std",
132
+ "description": null,
133
+ "value": 0.11037035667235007
134
+ },
135
+ {
136
+ "id": "nauc_ndcg_at_5_diff1",
137
+ "display_name": "nauc_ndcg_at_5_diff1",
138
+ "description": null,
139
+ "value": -0.41554431142868614
140
+ },
141
+ {
142
+ "id": "nauc_ndcg_at_10_max",
143
+ "display_name": "nauc_ndcg_at_10_max",
144
+ "description": null,
145
+ "value": 0.6536082943031309
146
+ },
147
+ {
148
+ "id": "nauc_ndcg_at_10_std",
149
+ "display_name": "nauc_ndcg_at_10_std",
150
+ "description": null,
151
+ "value": 0.140251553474609
152
+ },
153
+ {
154
+ "id": "nauc_ndcg_at_10_diff1",
155
+ "display_name": "nauc_ndcg_at_10_diff1",
156
+ "description": null,
157
+ "value": -0.4541965457157918
158
+ },
159
+ {
160
+ "id": "nauc_ndcg_at_50_max",
161
+ "display_name": "nauc_ndcg_at_50_max",
162
+ "description": null,
163
+ "value": 0.6159871931946869
164
+ },
165
+ {
166
+ "id": "nauc_ndcg_at_50_std",
167
+ "display_name": "nauc_ndcg_at_50_std",
168
+ "description": null,
169
+ "value": 0.006651176818080506
170
+ },
171
+ {
172
+ "id": "nauc_ndcg_at_50_diff1",
173
+ "display_name": "nauc_ndcg_at_50_diff1",
174
+ "description": null,
175
+ "value": -0.39627086499203873
176
+ },
177
+ {
178
+ "id": "nauc_map_at_5_max",
179
+ "display_name": "nauc_map_at_5_max",
180
+ "description": null,
181
+ "value": -0.047556791244411895
182
+ },
183
+ {
184
+ "id": "nauc_map_at_5_std",
185
+ "display_name": "nauc_map_at_5_std",
186
+ "description": null,
187
+ "value": 0.16420917659496206
188
+ },
189
+ {
190
+ "id": "nauc_map_at_5_diff1",
191
+ "display_name": "nauc_map_at_5_diff1",
192
+ "description": null,
193
+ "value": 0.28627326792803204
194
+ },
195
+ {
196
+ "id": "nauc_map_at_10_max",
197
+ "display_name": "nauc_map_at_10_max",
198
+ "description": null,
199
+ "value": 0.06426190649373154
200
+ },
201
+ {
202
+ "id": "nauc_map_at_10_std",
203
+ "display_name": "nauc_map_at_10_std",
204
+ "description": null,
205
+ "value": 0.23746446970773183
206
+ },
207
+ {
208
+ "id": "nauc_map_at_10_diff1",
209
+ "display_name": "nauc_map_at_10_diff1",
210
+ "description": null,
211
+ "value": 0.15565045001627686
212
+ },
213
+ {
214
+ "id": "nauc_map_at_50_max",
215
+ "display_name": "nauc_map_at_50_max",
216
+ "description": null,
217
+ "value": 0.5237897180891637
218
+ },
219
+ {
220
+ "id": "nauc_map_at_50_std",
221
+ "display_name": "nauc_map_at_50_std",
222
+ "description": null,
223
+ "value": 0.1865080232459892
224
+ },
225
+ {
226
+ "id": "nauc_map_at_50_diff1",
227
+ "display_name": "nauc_map_at_50_diff1",
228
+ "description": null,
229
+ "value": -0.2688572949738638
230
+ },
231
+ {
232
+ "id": "nauc_recall_at_5_max",
233
+ "display_name": "nauc_recall_at_5_max",
234
+ "description": null,
235
+ "value": -0.054074967730710764
236
+ },
237
+ {
238
+ "id": "nauc_recall_at_5_std",
239
+ "display_name": "nauc_recall_at_5_std",
240
+ "description": null,
241
+ "value": 0.1711511016438979
242
+ },
243
+ {
244
+ "id": "nauc_recall_at_5_diff1",
245
+ "display_name": "nauc_recall_at_5_diff1",
246
+ "description": null,
247
+ "value": 0.2896050332877169
248
+ },
249
+ {
250
+ "id": "nauc_recall_at_10_max",
251
+ "display_name": "nauc_recall_at_10_max",
252
+ "description": null,
253
+ "value": 0.05005034152582497
254
+ },
255
+ {
256
+ "id": "nauc_recall_at_10_std",
257
+ "display_name": "nauc_recall_at_10_std",
258
+ "description": null,
259
+ "value": 0.24918235642253458
260
+ },
261
+ {
262
+ "id": "nauc_recall_at_10_diff1",
263
+ "display_name": "nauc_recall_at_10_diff1",
264
+ "description": null,
265
+ "value": 0.16768640965952947
266
+ },
267
+ {
268
+ "id": "nauc_recall_at_50_max",
269
+ "display_name": "nauc_recall_at_50_max",
270
+ "description": null,
271
+ "value": 0.5114754425984644
272
+ },
273
+ {
274
+ "id": "nauc_recall_at_50_std",
275
+ "display_name": "nauc_recall_at_50_std",
276
+ "description": null,
277
+ "value": 0.2173420630028766
278
+ },
279
+ {
280
+ "id": "nauc_recall_at_50_diff1",
281
+ "display_name": "nauc_recall_at_50_diff1",
282
+ "description": null,
283
+ "value": -0.2526274232326276
284
+ },
285
+ {
286
+ "id": "nauc_precision_at_5_max",
287
+ "display_name": "nauc_precision_at_5_max",
288
+ "description": null,
289
+ "value": 0.5525639421444303
290
+ },
291
+ {
292
+ "id": "nauc_precision_at_5_std",
293
+ "display_name": "nauc_precision_at_5_std",
294
+ "description": null,
295
+ "value": 0.01857146637175079
296
+ },
297
+ {
298
+ "id": "nauc_precision_at_5_diff1",
299
+ "display_name": "nauc_precision_at_5_diff1",
300
+ "description": null,
301
+ "value": -0.7765476306675947
302
+ },
303
+ {
304
+ "id": "nauc_precision_at_10_max",
305
+ "display_name": "nauc_precision_at_10_max",
306
+ "description": null,
307
+ "value": 0.48362026531371466
308
+ },
309
+ {
310
+ "id": "nauc_precision_at_10_std",
311
+ "display_name": "nauc_precision_at_10_std",
312
+ "description": null,
313
+ "value": -0.0051297270434755475
314
+ },
315
+ {
316
+ "id": "nauc_precision_at_10_diff1",
317
+ "display_name": "nauc_precision_at_10_diff1",
318
+ "description": null,
319
+ "value": -0.7004665714420365
320
+ },
321
+ {
322
+ "id": "nauc_precision_at_50_max",
323
+ "display_name": "nauc_precision_at_50_max",
324
+ "description": null,
325
+ "value": 0.24671476154878727
326
+ },
327
+ {
328
+ "id": "nauc_precision_at_50_std",
329
+ "display_name": "nauc_precision_at_50_std",
330
+ "description": null,
331
+ "value": -0.37006645670815747
332
+ },
333
+ {
334
+ "id": "nauc_precision_at_50_diff1",
335
+ "display_name": "nauc_precision_at_50_diff1",
336
+ "description": null,
337
+ "value": -0.36951553698605216
338
+ },
339
+ {
340
+ "id": "nauc_mrr_at_5_max",
341
+ "display_name": "nauc_mrr_at_5_max",
342
+ "description": null,
343
+ "value": 0.64312359548717
344
+ },
345
+ {
346
+ "id": "nauc_mrr_at_5_std",
347
+ "display_name": "nauc_mrr_at_5_std",
348
+ "description": null,
349
+ "value": 0.04622765419712948
350
+ },
351
+ {
352
+ "id": "nauc_mrr_at_5_diff1",
353
+ "display_name": "nauc_mrr_at_5_diff1",
354
+ "description": null,
355
+ "value": -0.22259410250972433
356
+ },
357
+ {
358
+ "id": "nauc_mrr_at_10_max",
359
+ "display_name": "nauc_mrr_at_10_max",
360
+ "description": null,
361
+ "value": 0.6385468425832173
362
+ },
363
+ {
364
+ "id": "nauc_mrr_at_10_std",
365
+ "display_name": "nauc_mrr_at_10_std",
366
+ "description": null,
367
+ "value": 0.058640802937365115
368
+ },
369
+ {
370
+ "id": "nauc_mrr_at_10_diff1",
371
+ "display_name": "nauc_mrr_at_10_diff1",
372
+ "description": null,
373
+ "value": -0.21579087208897282
374
+ },
375
+ {
376
+ "id": "nauc_mrr_at_50_max",
377
+ "display_name": "nauc_mrr_at_50_max",
378
+ "description": null,
379
+ "value": 0.6402042049799889
380
+ },
381
+ {
382
+ "id": "nauc_mrr_at_50_std",
383
+ "display_name": "nauc_mrr_at_50_std",
384
+ "description": null,
385
+ "value": 0.052782783025246006
386
+ },
387
+ {
388
+ "id": "nauc_mrr_at_50_diff1",
389
+ "display_name": "nauc_mrr_at_50_diff1",
390
+ "description": null,
391
+ "value": -0.21896215733129423
392
+ }
393
+ ]
394
+ },
395
+ {
396
+ "layer_number": 29,
397
+ "layer_display_name": "29",
398
+ "metrics": [
399
+ {
400
+ "id": "ndcg_at_5",
401
+ "display_name": "ndcg_at_5",
402
+ "description": null,
403
+ "value": 0.83285
404
+ },
405
+ {
406
+ "id": "ndcg_at_10",
407
+ "display_name": "ndcg_at_10",
408
+ "description": null,
409
+ "value": 0.81413
410
+ },
411
+ {
412
+ "id": "ndcg_at_50",
413
+ "display_name": "ndcg_at_50",
414
+ "description": null,
415
+ "value": 0.76701
416
+ },
417
+ {
418
+ "id": "map_at_5",
419
+ "display_name": "map_at_5",
420
+ "description": null,
421
+ "value": 0.25404
422
+ },
423
+ {
424
+ "id": "map_at_10",
425
+ "display_name": "map_at_10",
426
+ "description": null,
427
+ "value": 0.35083
428
+ },
429
+ {
430
+ "id": "map_at_50",
431
+ "display_name": "map_at_50",
432
+ "description": null,
433
+ "value": 0.58387
434
+ },
435
+ {
436
+ "id": "recall_at_5",
437
+ "display_name": "recall_at_5",
438
+ "description": null,
439
+ "value": 0.266
440
+ },
441
+ {
442
+ "id": "recall_at_10",
443
+ "display_name": "recall_at_10",
444
+ "description": null,
445
+ "value": 0.37545
446
+ },
447
+ {
448
+ "id": "recall_at_50",
449
+ "display_name": "recall_at_50",
450
+ "description": null,
451
+ "value": 0.66303
452
+ },
453
+ {
454
+ "id": "precision_at_5",
455
+ "display_name": "precision_at_5",
456
+ "description": null,
457
+ "value": 0.75621
458
+ },
459
+ {
460
+ "id": "precision_at_10",
461
+ "display_name": "precision_at_10",
462
+ "description": null,
463
+ "value": 0.6866
464
+ },
465
+ {
466
+ "id": "precision_at_50",
467
+ "display_name": "precision_at_50",
468
+ "description": null,
469
+ "value": 0.41047
470
+ },
471
+ {
472
+ "id": "mrr_at_5",
473
+ "display_name": "mrr_at_5",
474
+ "description": null,
475
+ "value": 0.8947289799402471
476
+ },
477
+ {
478
+ "id": "mrr_at_10",
479
+ "display_name": "mrr_at_10",
480
+ "description": null,
481
+ "value": 0.895975855130784
482
+ },
483
+ {
484
+ "id": "mrr_at_50",
485
+ "display_name": "mrr_at_50",
486
+ "description": null,
487
+ "value": 0.8970771214115124
488
+ },
489
+ {
490
+ "id": "nauc_ndcg_at_5_max",
491
+ "display_name": "nauc_ndcg_at_5_max",
492
+ "description": null,
493
+ "value": 0.6033756709037629
494
+ },
495
+ {
496
+ "id": "nauc_ndcg_at_5_std",
497
+ "display_name": "nauc_ndcg_at_5_std",
498
+ "description": null,
499
+ "value": 0.48175424620769186
500
+ },
501
+ {
502
+ "id": "nauc_ndcg_at_5_diff1",
503
+ "display_name": "nauc_ndcg_at_5_diff1",
504
+ "description": null,
505
+ "value": -0.1614695329433979
506
+ },
507
+ {
508
+ "id": "nauc_ndcg_at_10_max",
509
+ "display_name": "nauc_ndcg_at_10_max",
510
+ "description": null,
511
+ "value": 0.5820557360820439
512
+ },
513
+ {
514
+ "id": "nauc_ndcg_at_10_std",
515
+ "display_name": "nauc_ndcg_at_10_std",
516
+ "description": null,
517
+ "value": 0.48937482522317327
518
+ },
519
+ {
520
+ "id": "nauc_ndcg_at_10_diff1",
521
+ "display_name": "nauc_ndcg_at_10_diff1",
522
+ "description": null,
523
+ "value": -0.18205509390904553
524
+ },
525
+ {
526
+ "id": "nauc_ndcg_at_50_max",
527
+ "display_name": "nauc_ndcg_at_50_max",
528
+ "description": null,
529
+ "value": 0.49384788238425553
530
+ },
531
+ {
532
+ "id": "nauc_ndcg_at_50_std",
533
+ "display_name": "nauc_ndcg_at_50_std",
534
+ "description": null,
535
+ "value": 0.354953353704701
536
+ },
537
+ {
538
+ "id": "nauc_ndcg_at_50_diff1",
539
+ "display_name": "nauc_ndcg_at_50_diff1",
540
+ "description": null,
541
+ "value": -0.10767304568721194
542
+ },
543
+ {
544
+ "id": "nauc_map_at_5_max",
545
+ "display_name": "nauc_map_at_5_max",
546
+ "description": null,
547
+ "value": 0.03598090314920231
548
+ },
549
+ {
550
+ "id": "nauc_map_at_5_std",
551
+ "display_name": "nauc_map_at_5_std",
552
+ "description": null,
553
+ "value": 0.11662947626949612
554
+ },
555
+ {
556
+ "id": "nauc_map_at_5_diff1",
557
+ "display_name": "nauc_map_at_5_diff1",
558
+ "description": null,
559
+ "value": 0.28974453988735166
560
+ },
561
+ {
562
+ "id": "nauc_map_at_10_max",
563
+ "display_name": "nauc_map_at_10_max",
564
+ "description": null,
565
+ "value": 0.13482748795676255
566
+ },
567
+ {
568
+ "id": "nauc_map_at_10_std",
569
+ "display_name": "nauc_map_at_10_std",
570
+ "description": null,
571
+ "value": 0.22360013731689057
572
+ },
573
+ {
574
+ "id": "nauc_map_at_10_diff1",
575
+ "display_name": "nauc_map_at_10_diff1",
576
+ "description": null,
577
+ "value": 0.19043309088480928
578
+ },
579
+ {
580
+ "id": "nauc_map_at_50_max",
581
+ "display_name": "nauc_map_at_50_max",
582
+ "description": null,
583
+ "value": 0.42287317105206507
584
+ },
585
+ {
586
+ "id": "nauc_map_at_50_std",
587
+ "display_name": "nauc_map_at_50_std",
588
+ "description": null,
589
+ "value": 0.32712992457779794
590
+ },
591
+ {
592
+ "id": "nauc_map_at_50_diff1",
593
+ "display_name": "nauc_map_at_50_diff1",
594
+ "description": null,
595
+ "value": -0.02056986996465222
596
+ },
597
+ {
598
+ "id": "nauc_recall_at_5_max",
599
+ "display_name": "nauc_recall_at_5_max",
600
+ "description": null,
601
+ "value": 0.021824220192766298
602
+ },
603
+ {
604
+ "id": "nauc_recall_at_5_std",
605
+ "display_name": "nauc_recall_at_5_std",
606
+ "description": null,
607
+ "value": 0.11009705855814085
608
+ },
609
+ {
610
+ "id": "nauc_recall_at_5_diff1",
611
+ "display_name": "nauc_recall_at_5_diff1",
612
+ "description": null,
613
+ "value": 0.28505819859304804
614
+ },
615
+ {
616
+ "id": "nauc_recall_at_10_max",
617
+ "display_name": "nauc_recall_at_10_max",
618
+ "description": null,
619
+ "value": 0.10661440304261144
620
+ },
621
+ {
622
+ "id": "nauc_recall_at_10_std",
623
+ "display_name": "nauc_recall_at_10_std",
624
+ "description": null,
625
+ "value": 0.2092712287791401
626
+ },
627
+ {
628
+ "id": "nauc_recall_at_10_diff1",
629
+ "display_name": "nauc_recall_at_10_diff1",
630
+ "description": null,
631
+ "value": 0.19742570630860265
632
+ },
633
+ {
634
+ "id": "nauc_recall_at_50_max",
635
+ "display_name": "nauc_recall_at_50_max",
636
+ "description": null,
637
+ "value": 0.38620604109572715
638
+ },
639
+ {
640
+ "id": "nauc_recall_at_50_std",
641
+ "display_name": "nauc_recall_at_50_std",
642
+ "description": null,
643
+ "value": 0.2924386961038862
644
+ },
645
+ {
646
+ "id": "nauc_recall_at_50_diff1",
647
+ "display_name": "nauc_recall_at_50_diff1",
648
+ "description": null,
649
+ "value": 0.025319280347884648
650
+ },
651
+ {
652
+ "id": "nauc_precision_at_5_max",
653
+ "display_name": "nauc_precision_at_5_max",
654
+ "description": null,
655
+ "value": 0.5425386973889819
656
+ },
657
+ {
658
+ "id": "nauc_precision_at_5_std",
659
+ "display_name": "nauc_precision_at_5_std",
660
+ "description": null,
661
+ "value": 0.4063280755847313
662
+ },
663
+ {
664
+ "id": "nauc_precision_at_5_diff1",
665
+ "display_name": "nauc_precision_at_5_diff1",
666
+ "description": null,
667
+ "value": -0.43965420847555414
668
+ },
669
+ {
670
+ "id": "nauc_precision_at_10_max",
671
+ "display_name": "nauc_precision_at_10_max",
672
+ "description": null,
673
+ "value": 0.4721960038905336
674
+ },
675
+ {
676
+ "id": "nauc_precision_at_10_std",
677
+ "display_name": "nauc_precision_at_10_std",
678
+ "description": null,
679
+ "value": 0.35700671463443756
680
+ },
681
+ {
682
+ "id": "nauc_precision_at_10_diff1",
683
+ "display_name": "nauc_precision_at_10_diff1",
684
+ "description": null,
685
+ "value": -0.44652985217538876
686
+ },
687
+ {
688
+ "id": "nauc_precision_at_50_max",
689
+ "display_name": "nauc_precision_at_50_max",
690
+ "description": null,
691
+ "value": 0.2526299155090765
692
+ },
693
+ {
694
+ "id": "nauc_precision_at_50_std",
695
+ "display_name": "nauc_precision_at_50_std",
696
+ "description": null,
697
+ "value": -0.021434326602753354
698
+ },
699
+ {
700
+ "id": "nauc_precision_at_50_diff1",
701
+ "display_name": "nauc_precision_at_50_diff1",
702
+ "description": null,
703
+ "value": -0.3009002533330021
704
+ },
705
+ {
706
+ "id": "nauc_mrr_at_5_max",
707
+ "display_name": "nauc_mrr_at_5_max",
708
+ "description": null,
709
+ "value": 0.6726463178530804
710
+ },
711
+ {
712
+ "id": "nauc_mrr_at_5_std",
713
+ "display_name": "nauc_mrr_at_5_std",
714
+ "description": null,
715
+ "value": 0.49687521406966506
716
+ },
717
+ {
718
+ "id": "nauc_mrr_at_5_diff1",
719
+ "display_name": "nauc_mrr_at_5_diff1",
720
+ "description": null,
721
+ "value": 0.05561071266486503
722
+ },
723
+ {
724
+ "id": "nauc_mrr_at_10_max",
725
+ "display_name": "nauc_mrr_at_10_max",
726
+ "description": null,
727
+ "value": 0.6731608376359998
728
+ },
729
+ {
730
+ "id": "nauc_mrr_at_10_std",
731
+ "display_name": "nauc_mrr_at_10_std",
732
+ "description": null,
733
+ "value": 0.49491217127896847
734
+ },
735
+ {
736
+ "id": "nauc_mrr_at_10_diff1",
737
+ "display_name": "nauc_mrr_at_10_diff1",
738
+ "description": null,
739
+ "value": 0.05832429376042118
740
+ },
741
+ {
742
+ "id": "nauc_mrr_at_50_max",
743
+ "display_name": "nauc_mrr_at_50_max",
744
+ "description": null,
745
+ "value": 0.6735463200113443
746
+ },
747
+ {
748
+ "id": "nauc_mrr_at_50_std",
749
+ "display_name": "nauc_mrr_at_50_std",
750
+ "description": null,
751
+ "value": 0.495779540068593
752
+ },
753
+ {
754
+ "id": "nauc_mrr_at_50_diff1",
755
+ "display_name": "nauc_mrr_at_50_diff1",
756
+ "description": null,
757
+ "value": 0.06154966156964915
758
+ }
759
+ ]
760
+ }
761
+ ]
762
+ }
leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "bacarch_bigene",
4
+ "display_name": "BacArch BiGene",
5
+ "description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
6
+ "modality": "protein",
7
+ "type": "bigene_mining",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/bac_arch_bigene",
11
+ "revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 30,
20
+ "num_params": 148795481,
21
+ "embed_dim": 640
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 15,
27
+ "layer_display_name": "15",
28
+ "metrics": [
29
+ {
30
+ "id": "precision",
31
+ "display_name": "precision",
32
+ "description": null,
33
+ "value": 0.7591194968553459
34
+ },
35
+ {
36
+ "id": "recall",
37
+ "display_name": "recall",
38
+ "description": null,
39
+ "value": 0.8188679245283019
40
+ },
41
+ {
42
+ "id": "f1",
43
+ "display_name": "f1",
44
+ "description": null,
45
+ "value": 0.7779874213836478
46
+ },
47
+ {
48
+ "id": "accuracy",
49
+ "display_name": "accuracy",
50
+ "description": null,
51
+ "value": 0.8188679245283019
52
+ }
53
+ ]
54
+ },
55
+ {
56
+ "layer_number": 29,
57
+ "layer_display_name": "29",
58
+ "metrics": [
59
+ {
60
+ "id": "precision",
61
+ "display_name": "precision",
62
+ "description": null,
63
+ "value": 0.656010781671159
64
+ },
65
+ {
66
+ "id": "recall",
67
+ "display_name": "recall",
68
+ "description": null,
69
+ "value": 0.7320754716981132
70
+ },
71
+ {
72
+ "id": "f1",
73
+ "display_name": "f1",
74
+ "description": null,
75
+ "value": 0.6774213836477987
76
+ },
77
+ {
78
+ "id": "accuracy",
79
+ "display_name": "accuracy",
80
+ "description": null,
81
+ "value": 0.7320754716981132
82
+ }
83
+ ]
84
+ }
85
+ ]
86
+ }
leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": {
3
+ "id": "convergent_enzymes_classification",
4
+ "display_name": "Convergent Enzymes Classification",
5
+ "description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
6
+ "modality": "protein",
7
+ "type": "classification",
8
+ "datasets": [
9
+ {
10
+ "path": "tattabio/convergent_enzymes",
11
+ "revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
12
+ }
13
+ ],
14
+ "primary_metric_id": "f1"
15
+ },
16
+ "model": {
17
+ "hf_name": "facebook/esm2_t30_150M_UR50D",
18
+ "revision": "...",
19
+ "num_layers": 30,
20
+ "num_params": 148795481,
21
+ "embed_dim": 640
22
+ },
23
+ "dgeb_version": "0.0.0",
24
+ "results": [
25
+ {
26
+ "layer_number": 15,
27
+ "layer_display_name": "15",
28
+ "metrics": [
29
+ {
30
+ "id": "accuracy",
31
+ "display_name": "accuracy",
32
+ "description": null,
33
+ "value": 0.2975
34
+ },
35
+ {
36
+ "id": "f1",
37
+ "display_name": "f1",
38
+ "description": null,
39
+ "value": 0.24646428571428572
40
+ }
41
+ ]
42
+ },
43
+ {
44
+ "layer_number": 29,
45
+ "layer_display_name": "29",
46
+ "metrics": [
47
+ {
48
+ "id": "accuracy",
49
+ "display_name": "accuracy",
50
+ "description": null,
51
+ "value": 0.2475
52
+ },
53
+ {
54
+ "id": "f1",
55
+ "display_name": "f1",
56
+ "description": null,
57
+ "value": 0.20091666666666666
58
+ }
59
+ ]
60
+ }
61
+ ]
62
+ }