Spaces:
Running
Running
Joshua Kravitz
commited on
Commit
·
e284167
0
Parent(s):
Initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .editorconfig +12 -0
- .github/workflows/ci.yml +34 -0
- .github/workflows/release.yml +50 -0
- .gitignore +6 -0
- CHANGELOG.md +197 -0
- Dockerfile +27 -0
- LICENSE +201 -0
- README.md +181 -0
- dgeb/__init__.py +28 -0
- dgeb/cli.py +136 -0
- dgeb/dgeb.py +129 -0
- dgeb/eval_utils.py +394 -0
- dgeb/evaluators.py +839 -0
- dgeb/modality.py +8 -0
- dgeb/models.py +481 -0
- dgeb/tasks/__init__.py +16 -0
- dgeb/tasks/bigene_mining_tasks.py +77 -0
- dgeb/tasks/classification_tasks.py +213 -0
- dgeb/tasks/clustering_tasks.py +70 -0
- dgeb/tasks/eds_tasks.py +246 -0
- dgeb/tasks/pair_classification_tasks.py +96 -0
- dgeb/tasks/retrieval_tasks.py +96 -0
- dgeb/tasks/tasks.py +135 -0
- docker-compose.yml +8 -0
- docs/images/tatta_logo.png +0 -0
- leaderboard/.gitignore +2 -0
- leaderboard/DGEB_Figure.png +0 -0
- leaderboard/README.md +2 -0
- leaderboard/__init__.py +0 -0
- leaderboard/app.py +260 -0
- leaderboard/requirements.txt +82 -0
- leaderboard/submissions/.DS_Store +0 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json +98 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json +762 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json +86 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json +62 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json +386 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json +62 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json +386 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json +762 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json +90 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json +97 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json +50 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json +90 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json +90 -0
- leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json +386 -0
- leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json +98 -0
- leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json +762 -0
- leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json +86 -0
- leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json +62 -0
.editorconfig
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# top-most EditorConfig file
|
2 |
+
root = true
|
3 |
+
|
4 |
+
# Unix-style newlines with a newline ending every file
|
5 |
+
[*]
|
6 |
+
end_of_line = lf
|
7 |
+
insert_final_newline = true
|
8 |
+
|
9 |
+
[*.py]
|
10 |
+
charset = utf-8
|
11 |
+
indent_style = space
|
12 |
+
indent_size = 4
|
.github/workflows/ci.yml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: CI for DGEB
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: ["**"]
|
6 |
+
pull_request:
|
7 |
+
branches: ["**"]
|
8 |
+
|
9 |
+
permissions:
|
10 |
+
id-token: write
|
11 |
+
contents: read
|
12 |
+
actions: write
|
13 |
+
pull-requests: read
|
14 |
+
|
15 |
+
concurrency:
|
16 |
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
17 |
+
cancel-in-progress: true
|
18 |
+
|
19 |
+
jobs:
|
20 |
+
ruff:
|
21 |
+
runs-on: ubuntu-latest
|
22 |
+
steps:
|
23 |
+
- uses: actions/checkout@v3
|
24 |
+
- uses: actions/setup-python@v4
|
25 |
+
with:
|
26 |
+
python-version: "3.11"
|
27 |
+
- uses: yezz123/setup-uv@v4
|
28 |
+
with:
|
29 |
+
uv-venv: ".geb_venv"
|
30 |
+
- run: uv pip install ruff
|
31 |
+
- run: ruff format .
|
32 |
+
- run: ruff check .
|
33 |
+
# TODO: pytest
|
34 |
+
# TODO: pyright
|
.github/workflows/release.yml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This workflow will
|
2 |
+
# - Find the latest version tag based on the commit history
|
3 |
+
# - Create a git tag for the new version
|
4 |
+
# - Update the version number in pyproject.toml based on the commit history
|
5 |
+
# - Upload the package to PyPI
|
6 |
+
# - Create a release on GitHub
|
7 |
+
|
8 |
+
# This workflow required the following secrets to be set:
|
9 |
+
# - a GitHub personal access token with the `repo` scope called `RELEASE`
|
10 |
+
# - and that you setup trusted publishing using PyPI as described here: https://blog.pypi.org/posts/2023-04-20-introducing-trusted-publishers/
|
11 |
+
|
12 |
+
name: Release
|
13 |
+
on:
|
14 |
+
push:
|
15 |
+
branches:
|
16 |
+
- main
|
17 |
+
|
18 |
+
jobs:
|
19 |
+
release:
|
20 |
+
runs-on: ubuntu-latest
|
21 |
+
concurrency: release
|
22 |
+
permissions:
|
23 |
+
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing using PyPI
|
24 |
+
contents: write
|
25 |
+
|
26 |
+
steps:
|
27 |
+
- uses: actions/checkout@v4
|
28 |
+
with:
|
29 |
+
fetch-depth: 0
|
30 |
+
token: ${{ secrets.GH_TOKEN }}
|
31 |
+
|
32 |
+
- name: Python Semantic Release
|
33 |
+
id: release
|
34 |
+
uses: python-semantic-release/[email protected]
|
35 |
+
with:
|
36 |
+
github_token: ${{ secrets.GH_TOKEN }}
|
37 |
+
|
38 |
+
- name: Publish package distributions to PyPI
|
39 |
+
uses: pypa/[email protected]
|
40 |
+
if: steps.release.outputs.released == 'true'
|
41 |
+
# This action supports PyPI's trusted publishing implementation, which allows authentication to PyPI without a manually
|
42 |
+
# configured API token or username/password combination. To perform trusted publishing with this action, your project's
|
43 |
+
# publisher must already be configured on PyPI.
|
44 |
+
|
45 |
+
- name: Publish package distributions to GitHub Releases
|
46 |
+
uses: python-semantic-release/[email protected]
|
47 |
+
if: steps.release.outputs.released == 'true'
|
48 |
+
with:
|
49 |
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
50 |
+
tag: ${{ steps.release.outputs.tag }}
|
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv/
|
2 |
+
__pycache__/
|
3 |
+
.vscode/
|
4 |
+
build/
|
5 |
+
dist/
|
6 |
+
*egg-info/
|
CHANGELOG.md
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# CHANGELOG
|
2 |
+
|
3 |
+
## v0.0.10 (2024-07-09)
|
4 |
+
|
5 |
+
### Fix
|
6 |
+
|
7 |
+
* fix: remove noop task ([`7d5b393`](https://github.com/TattaBio/DGEB/commit/7d5b3933f48e51fb4c71945f01af2cc5a7dba3ed))
|
8 |
+
|
9 |
+
## v0.0.9 (2024-07-09)
|
10 |
+
|
11 |
+
### Fix
|
12 |
+
|
13 |
+
* fix: update cli script name ([`633e14d`](https://github.com/TattaBio/DGEB/commit/633e14db7e1eed0d9606ef1097e369e4f5e245f5))
|
14 |
+
|
15 |
+
### Unknown
|
16 |
+
|
17 |
+
* 0.0.9
|
18 |
+
|
19 |
+
Automatically generated by python-semantic-release [skip ci] ([`a8c1a96`](https://github.com/TattaBio/DGEB/commit/a8c1a96d18af589795bc9532fee8ad9764cd52ed))
|
20 |
+
|
21 |
+
* Merge pull request #9 from TattaBio/andre
|
22 |
+
|
23 |
+
Update ModAC main metric ([`3c67e65`](https://github.com/TattaBio/DGEB/commit/3c67e6559d0e49d90ffe2858eb9e287abd1b6e6c))
|
24 |
+
|
25 |
+
* ruff format ([`78461ac`](https://github.com/TattaBio/DGEB/commit/78461ac901b8617821ca15e543c0dd8e2dbf6e95))
|
26 |
+
|
27 |
+
* update top_k=50 for modac ([`2c3dcd5`](https://github.com/TattaBio/DGEB/commit/2c3dcd5856b6679a80999b3c4b3512876ac0b58d))
|
28 |
+
|
29 |
+
* remove revision ([`2d587da`](https://github.com/TattaBio/DGEB/commit/2d587daa79f32c49201b419892b7f95f3dc5eedb))
|
30 |
+
|
31 |
+
* Merge pull request #8 from TattaBio/cli
|
32 |
+
|
33 |
+
Cli & cleanup ([`9698c8f`](https://github.com/TattaBio/DGEB/commit/9698c8f5ab0bab6c3c0a76d59dc29cfd964ebf15))
|
34 |
+
|
35 |
+
* Exclude leaderboard files in anticipation of merging leaderboard PR ([`58bdcba`](https://github.com/TattaBio/DGEB/commit/58bdcba11af605bdef11cfecc087c9efb0e97b72))
|
36 |
+
|
37 |
+
* Update README ([`d323905`](https://github.com/TattaBio/DGEB/commit/d3239059e29fb149f9c348b951bc4988d8b9f8dc))
|
38 |
+
|
39 |
+
* cleanup ([`1f0fe16`](https://github.com/TattaBio/DGEB/commit/1f0fe16de6910200d88c918b08cbf26067313469))
|
40 |
+
|
41 |
+
* Add cli to pyproject.toml ([`5404218`](https://github.com/TattaBio/DGEB/commit/54042181ef54c11db74ebb53c403b21a8114c02b))
|
42 |
+
|
43 |
+
* Remove Dataset 'description' which does not exist on model. ([`46b0040`](https://github.com/TattaBio/DGEB/commit/46b0040a302384fa00791bbfdd6fae24645d6a6d))
|
44 |
+
|
45 |
+
* Merge pull request #7 from TattaBio/add_dna_tasks
|
46 |
+
|
47 |
+
Add dna tasks ([`cfc5799`](https://github.com/TattaBio/DGEB/commit/cfc57995f9b1e584bb60e998f9cf68bea5ec39fa))
|
48 |
+
|
49 |
+
* ruff ([`f9fa125`](https://github.com/TattaBio/DGEB/commit/f9fa12502df9837b5381da17b17198f3667c4911))
|
50 |
+
|
51 |
+
* adding rpob datasets and updating ec revision ([`8f9cc3f`](https://github.com/TattaBio/DGEB/commit/8f9cc3f819beb70f51a5cc59f16c65bffceedbad))
|
52 |
+
|
53 |
+
* Update README.md ([`d5d7c24`](https://github.com/TattaBio/DGEB/commit/d5d7c24215d347fc17d6016ac2a3eddfb3cf2a12))
|
54 |
+
|
55 |
+
* Merge pull request #4 from TattaBio/andre
|
56 |
+
|
57 |
+
Add dataset revisions ([`95b6f11`](https://github.com/TattaBio/DGEB/commit/95b6f11ffee3dccc45ab119ac4f602066750f7ef))
|
58 |
+
|
59 |
+
* add dataset revision numbers ([`7e069a2`](https://github.com/TattaBio/DGEB/commit/7e069a237de5391e7c6b7f09c108292ac10c25af))
|
60 |
+
|
61 |
+
* Merge pull request #3 from TattaBio/andre
|
62 |
+
|
63 |
+
Update readme and task imports ([`ade30a8`](https://github.com/TattaBio/DGEB/commit/ade30a856deffe35ddf57d16705d030b6d0192c8))
|
64 |
+
|
65 |
+
* rename dgeb ([`6b1c2ee`](https://github.com/TattaBio/DGEB/commit/6b1c2ee76798d89e487386116efe23c90d2d039c))
|
66 |
+
|
67 |
+
* add intro ([`a2280dd`](https://github.com/TattaBio/DGEB/commit/a2280dd732984d58caed45b9a429038c0d81851a))
|
68 |
+
|
69 |
+
* update readme and tasks ([`00e0a79`](https://github.com/TattaBio/DGEB/commit/00e0a791f070ca37e5b92770b3363ef066e2789f))
|
70 |
+
|
71 |
+
* Merge pull request #2 from TattaBio/andre
|
72 |
+
|
73 |
+
rename dgeb imports ([`1894ba9`](https://github.com/TattaBio/DGEB/commit/1894ba9a92a8f369053ddb9d351ae48fd8e2d674))
|
74 |
+
|
75 |
+
* rename dgeb imports ([`5f1f8b8`](https://github.com/TattaBio/DGEB/commit/5f1f8b850f271cd6785291e3feb2c2d4bf979f9c))
|
76 |
+
|
77 |
+
## v0.0.8 (2024-07-01)
|
78 |
+
|
79 |
+
### Fix
|
80 |
+
|
81 |
+
* fix: don't run ci on release of new version ([`fa97104`](https://github.com/TattaBio/DGEB/commit/fa971049429975d06c8aca086e86b19d92383969))
|
82 |
+
|
83 |
+
### Unknown
|
84 |
+
|
85 |
+
* 0.0.8
|
86 |
+
|
87 |
+
Automatically generated by python-semantic-release [skip ci] ([`8dc15d3`](https://github.com/TattaBio/DGEB/commit/8dc15d34c6317087253950893974d16b9f75a17c))
|
88 |
+
|
89 |
+
## v0.0.7 (2024-07-01)
|
90 |
+
|
91 |
+
### Fix
|
92 |
+
|
93 |
+
* fix: try again ([`e7d0ecd`](https://github.com/TattaBio/DGEB/commit/e7d0ecdcb63e909f9ab727f11fb3fd57414d2fa5))
|
94 |
+
|
95 |
+
* fix: edit readme to see if job still works with restricted permissions ([`93cd728`](https://github.com/TattaBio/DGEB/commit/93cd728c8a632b9bed611c55dace2e2ffb103410))
|
96 |
+
|
97 |
+
### Unknown
|
98 |
+
|
99 |
+
* 0.0.7
|
100 |
+
|
101 |
+
Automatically generated by python-semantic-release ([`9808d4f`](https://github.com/TattaBio/DGEB/commit/9808d4f328a577c066affd34d408ad26eb6098d0))
|
102 |
+
|
103 |
+
* Merge pull request #1 from TattaBio/edit-readme
|
104 |
+
|
105 |
+
fix: edit readme to see if job still works with restricted permissions ([`c45599c`](https://github.com/TattaBio/DGEB/commit/c45599cf9628155603245f906c09cf6483cffce8))
|
106 |
+
|
107 |
+
## v0.0.6 (2024-07-01)
|
108 |
+
|
109 |
+
### Fix
|
110 |
+
|
111 |
+
* fix: nevermind that broke it ([`ec33a1c`](https://github.com/TattaBio/DGEB/commit/ec33a1c6539ac1fb2710869a2d436483a02236e0))
|
112 |
+
|
113 |
+
* fix: see if I can remove this line ([`246d4e9`](https://github.com/TattaBio/DGEB/commit/246d4e9841a83d18217506d46f211f1341c63526))
|
114 |
+
|
115 |
+
### Unknown
|
116 |
+
|
117 |
+
* 0.0.6
|
118 |
+
|
119 |
+
Automatically generated by python-semantic-release ([`1b28df5`](https://github.com/TattaBio/DGEB/commit/1b28df559c95db0aea95111a5f27d01645d23786))
|
120 |
+
|
121 |
+
## v0.0.5 (2024-07-01)
|
122 |
+
|
123 |
+
### Fix
|
124 |
+
|
125 |
+
* fix: try fixing release to handle protected branch ([`5cedad3`](https://github.com/TattaBio/DGEB/commit/5cedad3e9f34d249eda9257e3c21fc8443d000cf))
|
126 |
+
|
127 |
+
* fix: another change... ([`c5ad3f0`](https://github.com/TattaBio/DGEB/commit/c5ad3f098d36e25afdf4fa9aae20967eb968568e))
|
128 |
+
|
129 |
+
* fix: update pip install command in readme ([`ff90791`](https://github.com/TattaBio/DGEB/commit/ff90791398f9a9b907c308400f88811a8f8633dc))
|
130 |
+
|
131 |
+
### Unknown
|
132 |
+
|
133 |
+
* 0.0.5
|
134 |
+
|
135 |
+
Automatically generated by python-semantic-release ([`ec24ca3`](https://github.com/TattaBio/DGEB/commit/ec24ca343b49bee85c72907554772976f02eab1a))
|
136 |
+
|
137 |
+
## v0.0.4 (2024-07-01)
|
138 |
+
|
139 |
+
### Fix
|
140 |
+
|
141 |
+
* fix: move gh token to env ([`95e292c`](https://github.com/TattaBio/DGEB/commit/95e292c46f7908659d46bc093ef4903609f1edc5))
|
142 |
+
|
143 |
+
### Unknown
|
144 |
+
|
145 |
+
* 0.0.4
|
146 |
+
|
147 |
+
Automatically generated by python-semantic-release ([`03f3004`](https://github.com/TattaBio/DGEB/commit/03f300476b0aeca2796b780139fce0be037ae636))
|
148 |
+
|
149 |
+
## v0.0.3 (2024-07-01)
|
150 |
+
|
151 |
+
### Fix
|
152 |
+
|
153 |
+
* fix: remove persist credentials ([`2ae683e`](https://github.com/TattaBio/DGEB/commit/2ae683ed7a68b0559b81b1b7f5716636beef1415))
|
154 |
+
|
155 |
+
* fix: try to fix release CI ([`1dfc938`](https://github.com/TattaBio/DGEB/commit/1dfc9383b2dab8bba444b09c6b85500dadee7203))
|
156 |
+
|
157 |
+
### Unknown
|
158 |
+
|
159 |
+
* 0.0.3
|
160 |
+
|
161 |
+
Automatically generated by python-semantic-release ([`7cbfc8d`](https://github.com/TattaBio/DGEB/commit/7cbfc8d0acef975d046ff485001ed289800d143f))
|
162 |
+
|
163 |
+
## v0.0.2 (2024-07-01)
|
164 |
+
|
165 |
+
### Fix
|
166 |
+
|
167 |
+
* fix: new repository name ([`8fc1145`](https://github.com/TattaBio/DGEB/commit/8fc1145985eab8aa97562f697edab45a30b189ba))
|
168 |
+
|
169 |
+
* fix: addl geb references ([`86a5af8`](https://github.com/TattaBio/DGEB/commit/86a5af8c24244ac8f2670801468e1a25b8e3e9df))
|
170 |
+
|
171 |
+
### Unknown
|
172 |
+
|
173 |
+
* 0.0.2
|
174 |
+
|
175 |
+
Automatically generated by python-semantic-release ([`1c7b19b`](https://github.com/TattaBio/DGEB/commit/1c7b19b50597e9dabe07fbf7cb7d3c589438917a))
|
176 |
+
|
177 |
+
## v0.0.1 (2024-07-01)
|
178 |
+
|
179 |
+
### Fix
|
180 |
+
|
181 |
+
* fix: rename geb to dgeb ([`be712f8`](https://github.com/TattaBio/DGEB/commit/be712f8d19678801b9148ac8397f13afe826871b))
|
182 |
+
|
183 |
+
### Unknown
|
184 |
+
|
185 |
+
* 0.0.1
|
186 |
+
|
187 |
+
Automatically generated by python-semantic-release ([`1503e03`](https://github.com/TattaBio/DGEB/commit/1503e030bb1277e1a2dcad7b99c9ed3472243f5d))
|
188 |
+
|
189 |
+
## v0.0.0 (2024-07-01)
|
190 |
+
|
191 |
+
### Unknown
|
192 |
+
|
193 |
+
* 0.0.0
|
194 |
+
|
195 |
+
Automatically generated by python-semantic-release ([`4b791ee`](https://github.com/TattaBio/DGEB/commit/4b791ee07085647427afec31a1adf61977e6bd4c))
|
196 |
+
|
197 |
+
* Initial commit ([`36fe62c`](https://github.com/TattaBio/DGEB/commit/36fe62c234331de97f2827a49bf62d5c35b92a1f))
|
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Docker file for leaderboard
|
2 |
+
FROM python:3.11-slim
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# install curl
|
7 |
+
RUN apt-get update && apt-get install -y curl
|
8 |
+
ADD https://astral.sh/uv/install.sh /install.sh
|
9 |
+
RUN chmod +x /install.sh
|
10 |
+
RUN /install.sh && rm /install.sh
|
11 |
+
|
12 |
+
# install deps
|
13 |
+
COPY leaderboard/requirements.txt ./
|
14 |
+
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
|
15 |
+
|
16 |
+
# copy src
|
17 |
+
COPY dgeb dgeb
|
18 |
+
COPY leaderboard/ leaderboard/
|
19 |
+
|
20 |
+
# Run gradio when the container launches
|
21 |
+
EXPOSE 7860
|
22 |
+
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
23 |
+
ENV GRADIO_TEMP_DIR="/app"
|
24 |
+
WORKDIR /app/leaderboard
|
25 |
+
CMD ["python", "app.py"]
|
26 |
+
|
27 |
+
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: DGEB
|
3 |
+
app_file : leaderboard/app.py
|
4 |
+
sdk: docker
|
5 |
+
sdk_version: 4.36.1
|
6 |
+
---
|
7 |
+
<h1 align="center">Diverse Genomic Embedding Benchmark</h1>
|
8 |
+
|
9 |
+
<p align="center">
|
10 |
+
<a href="https://github.com/tattabio/dgeb/releases">
|
11 |
+
<img alt="GitHub release" src="https://img.shields.io/github/v/release/tattabio/dgeb.svg">
|
12 |
+
</a>
|
13 |
+
<a href="">
|
14 |
+
<img alt="arXiv URL" src="">
|
15 |
+
</a>
|
16 |
+
<a href="https://github.com/tattabio/dgeb/blob/main/LICENSE">
|
17 |
+
<img alt="License" src="https://img.shields.io/github/license/tattabio/dgeb.svg">
|
18 |
+
</a>
|
19 |
+
<a href="https://pepy.tech/project/dgeb">
|
20 |
+
<img alt="Downloads" src="https://static.pepy.tech/personalized-badge/dgeb?period=total&units=international_system&left_color=grey&right_color=orange&left_text=Downloads">
|
21 |
+
</a>
|
22 |
+
</p>
|
23 |
+
|
24 |
+
<h4 align="center">
|
25 |
+
<p>
|
26 |
+
<a href="#installation">Installation</a> |
|
27 |
+
<a href="#usage">Usage</a> |
|
28 |
+
<a href="https://huggingface.co/spaces/tattabio/DGEB">Leaderboard</a> |
|
29 |
+
<a href="#citing">Citing</a>
|
30 |
+
<p>
|
31 |
+
</h4>
|
32 |
+
|
33 |
+
<h3 align="center">
|
34 |
+
<a href="https://huggingface.co/spaces/dgeb"><img style="float: middle; padding: 10px 10px 10px 10px;" width="100" height="100" src="./docs/images/tatta_logo.png" /></a>
|
35 |
+
</h3>
|
36 |
+
|
37 |
+
DGEB is a benchmark for evaluating biological sequence models on functional and evolutionary information.
|
38 |
+
|
39 |
+
DGEB is designed to evaluate model embeddings using:
|
40 |
+
|
41 |
+
- Diverse sequences accross the tree of life.
|
42 |
+
- Diverse tasks that capture different aspects of biological function.
|
43 |
+
- Both amino acid and nucleotide sequences.
|
44 |
+
|
45 |
+
The current version of DGEB consists of 18 datasets covering all three domains of life (Bacteria, Archaea and Eukarya). DGEB evaluates embeddings using six different embedding tasks: Classification, BiGene mining, Evolutionary Distance Similarity (EDS), Pair Classification, Clustering, and Retrieval.
|
46 |
+
|
47 |
+
We welcome contributions of new tasks and datasets.
|
48 |
+
|
49 |
+
## Installation
|
50 |
+
|
51 |
+
Install DGEB using pip.
|
52 |
+
|
53 |
+
```bash
|
54 |
+
pip install dgeb
|
55 |
+
```
|
56 |
+
|
57 |
+
## Usage
|
58 |
+
|
59 |
+
- Launch evaluation using the python script (see [cli.py](https://github.com/tattabio/dgeb/blob/main/dgeb/cli.py)):
|
60 |
+
|
61 |
+
```bash
|
62 |
+
dgeb --model facebook/esm2_t6_8M_UR50D
|
63 |
+
```
|
64 |
+
|
65 |
+
- To see all supported models and tasks:
|
66 |
+
|
67 |
+
```bash
|
68 |
+
dgeb --help
|
69 |
+
```
|
70 |
+
|
71 |
+
- Using the python API:
|
72 |
+
|
73 |
+
```py
|
74 |
+
import dgeb
|
75 |
+
|
76 |
+
model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
|
77 |
+
tasks = dgeb.get_tasks_by_modality(dgeb.Modality.PROTEIN)
|
78 |
+
evaluation = dgeb.DGEB(tasks=tasks)
|
79 |
+
evaluation.run(model, output_folder="results")
|
80 |
+
```
|
81 |
+
|
82 |
+
### Using a custom model
|
83 |
+
|
84 |
+
Custom models should be wrapped with the `dgeb.models.BioSeqTransformer` abstract class, and specify the modality, number of layers, and embedding dimension. See [models.py](https://github.com/tattabio/dgeb/blob/main/dgeb/models.py) for additional examples on custom model loading and inference.
|
85 |
+
|
86 |
+
```python
|
87 |
+
import dgeb
|
88 |
+
from dgeb.models import BioSeqTransformer
|
89 |
+
from dgeb.tasks.tasks import Modality
|
90 |
+
|
91 |
+
class MyModel(BioSeqTransformer):
|
92 |
+
|
93 |
+
@property
|
94 |
+
def modality(self) -> Modality:
|
95 |
+
return Modality.PROTEIN
|
96 |
+
|
97 |
+
@property
|
98 |
+
def num_layers(self) -> int:
|
99 |
+
return self.config.num_hidden_layers
|
100 |
+
|
101 |
+
@property
|
102 |
+
def embed_dim(self) -> int:
|
103 |
+
return self.config.hidden_size
|
104 |
+
|
105 |
+
|
106 |
+
model = MyModel(model_name='path_to/huggingface_model')
|
107 |
+
tasks = dgeb.get_tasks_by_modality(model.modality)
|
108 |
+
evaluation = dgeb.DGEB(tasks=tasks)
|
109 |
+
evaluation.run(model)
|
110 |
+
```
|
111 |
+
|
112 |
+
### Evaluating on a custom dataset
|
113 |
+
|
114 |
+
**We strongly encourage users to contribute their custom datasets to DGEB. Please open a PR adding your dataset so that the community can benefit!**
|
115 |
+
|
116 |
+
To evaluate on a custom dataset, first upload your dataset to the [Huggingface Hub](https://huggingface.co/docs/hub/en/datasets-adding). Then define a `Task` subclass with `TaskMetadata` that points to your huggingface dataset. For example, a classification task on a custom dataset can be defined as follows:
|
117 |
+
|
118 |
+
```python
|
119 |
+
import dgeb
|
120 |
+
from dgeb.models import BioSeqTransformer
|
121 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
122 |
+
from dgeb.tasks.classification_tasks import run_classification_task
|
123 |
+
|
124 |
+
class MyCustomTask(Task):
|
125 |
+
metadata = TaskMetadata(
|
126 |
+
id="my_custom_classification",
|
127 |
+
display_name="...",
|
128 |
+
description="...",
|
129 |
+
type="classification",
|
130 |
+
modality=Modality.PROTEIN,
|
131 |
+
datasets=[
|
132 |
+
Dataset(
|
133 |
+
path="path_to/huggingface_dataset",
|
134 |
+
revision="...",
|
135 |
+
)
|
136 |
+
],
|
137 |
+
primary_metric_id="f1",
|
138 |
+
)
|
139 |
+
|
140 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
141 |
+
return run_classification_task(model, self.metadata)
|
142 |
+
|
143 |
+
model = dgeb.get_model("facebook/esm2_t6_8M_UR50D")
|
144 |
+
evaluation = dgeb.DGEB(tasks=[MyCustomTask])
|
145 |
+
evaluation.run(model)
|
146 |
+
```
|
147 |
+
|
148 |
+
## Leaderboard
|
149 |
+
|
150 |
+
To add your submission to the DGEB leaderboard, proceed through the following instructions.
|
151 |
+
|
152 |
+
1. Fork the DGEB repository by following GitHub's instruction [Forking Workflow](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
|
153 |
+
|
154 |
+
2. Add your submission .json file to the leaderboard/submissions/<HF_MODEL_NAME>/ directory.
|
155 |
+
|
156 |
+
```bash
|
157 |
+
mv /path/to/<SUBMISSION_FILE>.json /path/to/DGEB/leaderboard/submissions/<HF_MODEL_NAME>/
|
158 |
+
```
|
159 |
+
|
160 |
+
4. Update your fork with the new submission:
|
161 |
+
|
162 |
+
```bash
|
163 |
+
git add leaderboard/submissions/<HF_MODEL_NAME>/<SUBMISSION_FILE>.json
|
164 |
+
git commit -m "Add submission for <HF_MODEL_NAME>"
|
165 |
+
git push
|
166 |
+
```
|
167 |
+
|
168 |
+
5. Open a pull request to the main branch of the repository via the Github interface.
|
169 |
+
|
170 |
+
6. Once the PR is review and merged, your submission will be added to the leaderboard!
|
171 |
+
|
172 |
+
|
173 |
+
## Acknowledgements
|
174 |
+
|
175 |
+
DGEB follows the design of text embedding bechmark [MTEB](https://github.com/embeddings-benchmark/mteb) developed by Huggingface 🤗. The evaluation code is adapted from the MTEB codebase.
|
176 |
+
|
177 |
+
## Citing
|
178 |
+
|
179 |
+
DGEB was introduced in "[Diverse Genomic Embedding Benchmark for Functional Evaluation Across the Tree of Life]()", feel free to cite:
|
180 |
+
|
181 |
+
TODO
|
dgeb/__init__.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dgeb.dgeb import (
|
2 |
+
DGEB,
|
3 |
+
get_all_model_names,
|
4 |
+
get_all_task_names,
|
5 |
+
get_all_tasks,
|
6 |
+
get_model,
|
7 |
+
get_output_folder,
|
8 |
+
get_tasks_by_modality,
|
9 |
+
get_tasks_by_name,
|
10 |
+
)
|
11 |
+
from dgeb.modality import Modality
|
12 |
+
from dgeb.tasks.tasks import TaskResult
|
13 |
+
|
14 |
+
# importing without setting `__all__` produces a Ruff error:
|
15 |
+
# "imported but unused; consider removing, adding to __all__, or using a redundant alias RuffF401"
|
16 |
+
# See https://docs.astral.sh/ruff/rules/unused-import/#why-is-this-bad
|
17 |
+
__all__ = [
|
18 |
+
"DGEB",
|
19 |
+
"get_all_tasks",
|
20 |
+
"get_all_task_names",
|
21 |
+
"get_tasks_by_name",
|
22 |
+
"get_tasks_by_modality",
|
23 |
+
"get_all_model_names",
|
24 |
+
"get_model",
|
25 |
+
"get_output_folder",
|
26 |
+
"TaskResult",
|
27 |
+
"Modality",
|
28 |
+
]
|
dgeb/cli.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Main command to run diverse genomic embedding benchmarks (DGEB) on a model.
|
3 |
+
example command to run DGEB:
|
4 |
+
python run_dgeb.py -m facebook/esm2_t6_8M_UR50D
|
5 |
+
"""
|
6 |
+
|
7 |
+
import argparse
|
8 |
+
import logging
|
9 |
+
import os
|
10 |
+
|
11 |
+
import dgeb
|
12 |
+
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
ALL_TASK_NAMES = dgeb.get_all_task_names()
|
17 |
+
ALL_MODEL_NAMES = dgeb.get_all_model_names()
|
18 |
+
|
19 |
+
|
20 |
+
def main():
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument(
|
23 |
+
"-m",
|
24 |
+
"--model",
|
25 |
+
type=str,
|
26 |
+
default=None,
|
27 |
+
help=f"Model to evaluate. Choose from {ALL_MODEL_NAMES}",
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"-t",
|
31 |
+
"--tasks",
|
32 |
+
type=lambda s: [item for item in s.split(",")],
|
33 |
+
default=None,
|
34 |
+
help=f"Comma separated tasks to evaluate on. Choose from {ALL_TASK_NAMES} or do not specify to evaluate on all tasks",
|
35 |
+
)
|
36 |
+
parser.add_argument(
|
37 |
+
"-l",
|
38 |
+
"--layers",
|
39 |
+
type=str,
|
40 |
+
default=None,
|
41 |
+
help="Layer to evaluate. Comma separated list of integers or 'mid' and 'last'. Default is 'mid,last'",
|
42 |
+
)
|
43 |
+
parser.add_argument(
|
44 |
+
"--devices",
|
45 |
+
type=str,
|
46 |
+
default="0",
|
47 |
+
help="Comma separated list of GPU device ids to use. Default is 0 (if GPUs are detected).",
|
48 |
+
)
|
49 |
+
parser.add_argument(
|
50 |
+
"--output_folder",
|
51 |
+
type=str,
|
52 |
+
default=None,
|
53 |
+
help="Output directory for results. Will default to results/model_name if not set.",
|
54 |
+
)
|
55 |
+
parser.add_argument(
|
56 |
+
"-v", "--verbosity", type=int, default=2, help="Verbosity level"
|
57 |
+
)
|
58 |
+
parser.add_argument(
|
59 |
+
"-b", "--batch_size", type=int, default=64, help="Batch size for evaluation"
|
60 |
+
)
|
61 |
+
parser.add_argument(
|
62 |
+
"--max_seq_len",
|
63 |
+
type=int,
|
64 |
+
default=1024,
|
65 |
+
help="Maximum sequence length for model, default is 1024.",
|
66 |
+
)
|
67 |
+
parser.add_argument(
|
68 |
+
"--pool_type",
|
69 |
+
type=str,
|
70 |
+
default="mean",
|
71 |
+
help="Pooling type for model, choose from mean, max, cls, last. Default is mean.",
|
72 |
+
)
|
73 |
+
|
74 |
+
args = parser.parse_args()
|
75 |
+
|
76 |
+
# set logging based on verbosity level
|
77 |
+
if args.verbosity == 0:
|
78 |
+
logging.getLogger("geb").setLevel(logging.CRITICAL)
|
79 |
+
elif args.verbosity == 1:
|
80 |
+
logging.getLogger("geb").setLevel(logging.WARNING)
|
81 |
+
elif args.verbosity == 2:
|
82 |
+
logging.getLogger("geb").setLevel(logging.INFO)
|
83 |
+
elif args.verbosity == 3:
|
84 |
+
logging.getLogger("geb").setLevel(logging.DEBUG)
|
85 |
+
|
86 |
+
if args.model is None:
|
87 |
+
raise ValueError("Please specify a model using the -m or --model argument")
|
88 |
+
|
89 |
+
# make sure that devices are comma separated list of integers
|
90 |
+
try:
|
91 |
+
devices = [int(device) for device in args.devices.split(",")]
|
92 |
+
except ValueError:
|
93 |
+
raise ValueError("Devices must be comma separated list of integers")
|
94 |
+
|
95 |
+
layers = args.layers
|
96 |
+
if layers:
|
97 |
+
if layers not in ["mid", "last"]:
|
98 |
+
# Layers should be list of integers.
|
99 |
+
try:
|
100 |
+
layers = [int(layer) for layer in layers.split(",")]
|
101 |
+
except ValueError:
|
102 |
+
raise ValueError("Layers must be a list of integers.")
|
103 |
+
|
104 |
+
model_name = args.model.split("/")[-1]
|
105 |
+
output_folder = args.output_folder
|
106 |
+
if output_folder is None:
|
107 |
+
output_folder = os.path.join("results", model_name)
|
108 |
+
# create output folder if it does not exist
|
109 |
+
if not os.path.exists(output_folder):
|
110 |
+
os.makedirs(output_folder)
|
111 |
+
logger.info(f"Results will be saved to {output_folder}")
|
112 |
+
|
113 |
+
# Load the model by name.
|
114 |
+
model = dgeb.get_model(
|
115 |
+
model_name=args.model,
|
116 |
+
layers=layers,
|
117 |
+
devices=devices,
|
118 |
+
max_seq_length=args.max_seq_len,
|
119 |
+
batch_size=args.batch_size,
|
120 |
+
pool_type=args.pool_type,
|
121 |
+
)
|
122 |
+
|
123 |
+
all_tasks_for_modality = dgeb.get_tasks_by_modality(model.modality)
|
124 |
+
|
125 |
+
if args.tasks:
|
126 |
+
task_list = dgeb.get_tasks_by_name(args.tasks)
|
127 |
+
if not all([task.metadata.modality == model.modality for task in task_list]):
|
128 |
+
raise ValueError(f"Tasks must be one of {all_tasks_for_modality}")
|
129 |
+
else:
|
130 |
+
task_list = all_tasks_for_modality
|
131 |
+
evaluation = dgeb.DGEB(tasks=task_list)
|
132 |
+
_ = evaluation.run(model)
|
133 |
+
|
134 |
+
|
135 |
+
if __name__ == "__main__":
|
136 |
+
main()
|
dgeb/dgeb.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import traceback
|
4 |
+
from itertools import chain
|
5 |
+
from typing import Any, List
|
6 |
+
|
7 |
+
from rich.console import Console
|
8 |
+
|
9 |
+
from .eval_utils import set_all_seeds
|
10 |
+
from .modality import Modality
|
11 |
+
from .models import BioSeqTransformer
|
12 |
+
from .tasks.tasks import Task
|
13 |
+
|
14 |
+
logging.basicConfig(level=logging.INFO)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
|
18 |
+
class DGEB:
|
19 |
+
"""GEB class to run the evaluation pipeline."""
|
20 |
+
|
21 |
+
def __init__(self, tasks: List[type[Task]], seed: int = 42):
|
22 |
+
self.tasks = tasks
|
23 |
+
set_all_seeds(seed)
|
24 |
+
|
25 |
+
def print_selected_tasks(self):
|
26 |
+
"""Print the selected tasks."""
|
27 |
+
console = Console()
|
28 |
+
console.rule("[bold]Selected Tasks\n", style="grey15")
|
29 |
+
for task in self.tasks:
|
30 |
+
prefix = " - "
|
31 |
+
name = f"{task.metadata.display_name}"
|
32 |
+
category = f", [italic grey39]{task.metadata.type}[/]"
|
33 |
+
console.print(f"{prefix}{name}{category}")
|
34 |
+
console.print("\n")
|
35 |
+
|
36 |
+
def run(
|
37 |
+
self,
|
38 |
+
model, # type encoder
|
39 |
+
output_folder: str = "results",
|
40 |
+
):
|
41 |
+
"""Run the evaluation pipeline on the selected tasks.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
model: Model to be used for evaluation
|
45 |
+
output_folder: Folder where the results will be saved. Default to 'results'. Where it will save the results in the format:
|
46 |
+
`{output_folder}/{model_name}/{model_revision}/{task_name}.json`.
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
A list of MTEBResults objects, one for each task evaluated.
|
50 |
+
"""
|
51 |
+
# Run selected tasks
|
52 |
+
self.print_selected_tasks()
|
53 |
+
results = []
|
54 |
+
|
55 |
+
for task in self.tasks:
|
56 |
+
logger.info(
|
57 |
+
f"\n\n********************** Evaluating {task.metadata.display_name} **********************"
|
58 |
+
)
|
59 |
+
|
60 |
+
try:
|
61 |
+
result = task().run(model)
|
62 |
+
except Exception as e:
|
63 |
+
logger.error(e)
|
64 |
+
logger.error(traceback.format_exc())
|
65 |
+
logger.error(f"Error running task {task}")
|
66 |
+
continue
|
67 |
+
|
68 |
+
results.append(result)
|
69 |
+
|
70 |
+
save_path = get_output_folder(model.hf_name, task, output_folder)
|
71 |
+
with open(save_path, "w") as f_out:
|
72 |
+
f_out.write(result.model_dump_json(indent=2))
|
73 |
+
return results
|
74 |
+
|
75 |
+
|
76 |
+
def get_model(model_name: str, **kwargs: Any) -> type[BioSeqTransformer]:
|
77 |
+
all_names = get_all_model_names()
|
78 |
+
for cls in BioSeqTransformer.__subclasses__():
|
79 |
+
if model_name in cls.MODEL_NAMES:
|
80 |
+
return cls(model_name, **kwargs)
|
81 |
+
raise ValueError(f"Model {model_name} not found in {all_names}.")
|
82 |
+
|
83 |
+
|
84 |
+
def get_all_model_names() -> List[str]:
|
85 |
+
return list(
|
86 |
+
chain.from_iterable(
|
87 |
+
cls.MODEL_NAMES for cls in BioSeqTransformer.__subclasses__()
|
88 |
+
)
|
89 |
+
)
|
90 |
+
|
91 |
+
|
92 |
+
def get_all_task_names() -> List[str]:
|
93 |
+
return [task.metadata.id for task in get_all_tasks()]
|
94 |
+
|
95 |
+
|
96 |
+
def get_tasks_by_name(tasks: List[str]) -> List[type[Task]]:
|
97 |
+
return [_get_task(task) for task in tasks]
|
98 |
+
|
99 |
+
|
100 |
+
def get_tasks_by_modality(modality: Modality) -> List[type[Task]]:
|
101 |
+
return [task for task in get_all_tasks() if task.metadata.modality == modality]
|
102 |
+
|
103 |
+
|
104 |
+
def get_all_tasks() -> List[type[Task]]:
|
105 |
+
return Task.__subclasses__()
|
106 |
+
|
107 |
+
|
108 |
+
def _get_task(task_name: str) -> type[Task]:
|
109 |
+
logger.info(f"Getting task {task_name}")
|
110 |
+
for task in get_all_tasks():
|
111 |
+
if task.metadata.id == task_name:
|
112 |
+
return task
|
113 |
+
|
114 |
+
raise ValueError(
|
115 |
+
f"Task {task_name} not found, available tasks are: {[task.metadata.id for task in get_all_tasks()]}"
|
116 |
+
)
|
117 |
+
|
118 |
+
|
119 |
+
def get_output_folder(
|
120 |
+
model_hf_name: str, task: type[Task], output_folder: str, create: bool = True
|
121 |
+
):
|
122 |
+
output_folder = os.path.join(output_folder, os.path.basename(model_hf_name))
|
123 |
+
# create output folder if it does not exist
|
124 |
+
if create and not os.path.exists(output_folder):
|
125 |
+
os.makedirs(output_folder)
|
126 |
+
return os.path.join(
|
127 |
+
output_folder,
|
128 |
+
f"{task.metadata.id}.json",
|
129 |
+
)
|
dgeb/eval_utils.py
ADDED
@@ -0,0 +1,394 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utility functions for evaluation."""
|
2 |
+
|
3 |
+
from typing import Any, Dict, List, Tuple
|
4 |
+
import json
|
5 |
+
import torch
|
6 |
+
import random
|
7 |
+
import numpy as np
|
8 |
+
from sklearn.metrics import auc
|
9 |
+
|
10 |
+
|
11 |
+
class ForwardHook:
|
12 |
+
"""Pytorch forward hook class to store outputs of intermediate layers."""
|
13 |
+
|
14 |
+
def __init__(self, module: torch.nn.Module):
|
15 |
+
self.hook = module.register_forward_hook(self.hook_fn)
|
16 |
+
self.output = None
|
17 |
+
|
18 |
+
def hook_fn(self, module, input, output):
|
19 |
+
self.output = output
|
20 |
+
|
21 |
+
def close(self):
|
22 |
+
self.hook.remove()
|
23 |
+
|
24 |
+
|
25 |
+
def pool(
|
26 |
+
last_hidden_states: torch.Tensor, attention_mask: torch.Tensor, pool_type: str
|
27 |
+
) -> torch.Tensor:
|
28 |
+
"""Pool embeddings across the sequence length dimension."""
|
29 |
+
assert (
|
30 |
+
last_hidden_states.ndim == 3
|
31 |
+
), f"Expected hidden_states to have shape [batch, seq_len, D], got shape: {last_hidden_states.shape}"
|
32 |
+
assert (
|
33 |
+
attention_mask.ndim == 2
|
34 |
+
), f"Expected attention_mask to have shape [batch, seq_len], got shape: {attention_mask.shape}"
|
35 |
+
last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
|
36 |
+
if pool_type == "mean":
|
37 |
+
emb = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
|
38 |
+
elif pool_type == "max":
|
39 |
+
emb = last_hidden.max(dim=1)[0]
|
40 |
+
elif pool_type == "cls":
|
41 |
+
emb = last_hidden[:, 0]
|
42 |
+
elif pool_type == "last":
|
43 |
+
emb = last_hidden[torch.arange(last_hidden.size(0)), attention_mask.sum(1) - 1]
|
44 |
+
else:
|
45 |
+
raise ValueError(f"pool_type {pool_type} not supported")
|
46 |
+
return emb
|
47 |
+
|
48 |
+
|
49 |
+
def set_all_seeds(seed):
|
50 |
+
random.seed(seed)
|
51 |
+
np.random.seed(seed)
|
52 |
+
torch.manual_seed(seed)
|
53 |
+
torch.cuda.manual_seed(seed)
|
54 |
+
torch.backends.cudnn.deterministic = True
|
55 |
+
|
56 |
+
|
57 |
+
def write_results_to_json(results: Dict[str, Any], results_path: str):
|
58 |
+
"""Write results dict to a json file."""
|
59 |
+
with open(results_path, "w") as f:
|
60 |
+
json.dump(results, f, indent=4)
|
61 |
+
|
62 |
+
|
63 |
+
def merge_split_elem_embeds(ids, embeds, preserve_order: bool = False):
|
64 |
+
"""Merge embeddings with the same id by mean-pooling and optionally preserve order in which they appear.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
ids: Array of string ids, [batch].
|
68 |
+
embeds: Array of embeddings, [batch, ...].
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
ids: Unique ids, [unique_batch].
|
72 |
+
embeds: Array of embeddings, [unique_batch, ...].
|
73 |
+
"""
|
74 |
+
unique_ids, indices = np.unique(ids, return_inverse=True)
|
75 |
+
shape_no_batch = embeds.shape[1:]
|
76 |
+
sums = np.zeros([unique_ids.size, *shape_no_batch], dtype=embeds.dtype)
|
77 |
+
counts = np.bincount(indices, minlength=unique_ids.size)
|
78 |
+
np.add.at(sums, indices, embeds)
|
79 |
+
# Add trailing dimensions to counts.
|
80 |
+
counts = counts[(...,) + (None,) * len(shape_no_batch)]
|
81 |
+
mean_pooled = sums / counts
|
82 |
+
# Preserve the order of the input ids.
|
83 |
+
if preserve_order:
|
84 |
+
order = []
|
85 |
+
for id in unique_ids:
|
86 |
+
idx = np.where(ids == id)[0][0]
|
87 |
+
order.append(idx)
|
88 |
+
re_order = np.argsort(order)
|
89 |
+
unique_ids = unique_ids[re_order]
|
90 |
+
mean_pooled = mean_pooled[re_order]
|
91 |
+
return unique_ids, mean_pooled
|
92 |
+
|
93 |
+
|
94 |
+
def paired_dataset(labels, embeds):
|
95 |
+
"""Creates a paired dataset for consecutive operonic gene pairs."""
|
96 |
+
embeds1 = embeds[:-1]
|
97 |
+
embeds2 = embeds[1:]
|
98 |
+
labels = labels[:-1]
|
99 |
+
return embeds1, embeds2, labels
|
100 |
+
|
101 |
+
|
102 |
+
def cos_sim(a, b):
|
103 |
+
"""Computes the cosine similarity cos_sim(a[i], b[j]) for all i and j.
|
104 |
+
|
105 |
+
Return:
|
106 |
+
Matrix with res[i][j] = cos_sim(a[i], b[j])
|
107 |
+
""" # noqa: D402
|
108 |
+
if not isinstance(a, torch.Tensor):
|
109 |
+
a = torch.tensor(a)
|
110 |
+
|
111 |
+
if not isinstance(b, torch.Tensor):
|
112 |
+
b = torch.tensor(b)
|
113 |
+
|
114 |
+
if len(a.shape) == 1:
|
115 |
+
a = a.unsqueeze(0)
|
116 |
+
|
117 |
+
if len(b.shape) == 1:
|
118 |
+
b = b.unsqueeze(0)
|
119 |
+
|
120 |
+
a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
|
121 |
+
b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
|
122 |
+
return torch.mm(a_norm, b_norm.transpose(0, 1))
|
123 |
+
|
124 |
+
|
125 |
+
def dot_score(a: torch.Tensor, b: torch.Tensor):
|
126 |
+
"""Computes the dot-product dot_prod(a[i], b[j]) for all i and j.
|
127 |
+
:return: Matrix with res[i][j] = dot_prod(a[i], b[j])
|
128 |
+
"""
|
129 |
+
if not isinstance(a, torch.Tensor):
|
130 |
+
a = torch.tensor(a)
|
131 |
+
|
132 |
+
if not isinstance(b, torch.Tensor):
|
133 |
+
b = torch.tensor(b)
|
134 |
+
|
135 |
+
if len(a.shape) == 1:
|
136 |
+
a = a.unsqueeze(0)
|
137 |
+
|
138 |
+
if len(b.shape) == 1:
|
139 |
+
b = b.unsqueeze(0)
|
140 |
+
|
141 |
+
return torch.mm(a, b.transpose(0, 1))
|
142 |
+
|
143 |
+
|
144 |
+
# From https://github.com/beir-cellar/beir/blob/f062f038c4bfd19a8ca942a9910b1e0d218759d4/beir/retrieval/custom_metrics.py#L4
|
145 |
+
def mrr(
|
146 |
+
qrels: dict[str, dict[str, int]],
|
147 |
+
results: dict[str, dict[str, float]],
|
148 |
+
k_values: List[int],
|
149 |
+
output_type: str = "mean",
|
150 |
+
) -> Tuple[Dict[str, float]]:
|
151 |
+
MRR = {}
|
152 |
+
|
153 |
+
for k in k_values:
|
154 |
+
MRR[f"MRR@{k}"] = []
|
155 |
+
|
156 |
+
k_max, top_hits = max(k_values), {}
|
157 |
+
|
158 |
+
for query_id, doc_scores in results.items():
|
159 |
+
top_hits[query_id] = sorted(
|
160 |
+
doc_scores.items(), key=lambda item: item[1], reverse=True
|
161 |
+
)[0:k_max]
|
162 |
+
|
163 |
+
for query_id in top_hits:
|
164 |
+
query_relevant_docs = set(
|
165 |
+
[doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
|
166 |
+
)
|
167 |
+
for k in k_values:
|
168 |
+
rr = 0
|
169 |
+
for rank, hit in enumerate(top_hits[query_id][0:k]):
|
170 |
+
if hit[0] in query_relevant_docs:
|
171 |
+
rr = 1.0 / (rank + 1)
|
172 |
+
break
|
173 |
+
MRR[f"MRR@{k}"].append(rr)
|
174 |
+
|
175 |
+
if output_type == "mean":
|
176 |
+
for k in k_values:
|
177 |
+
MRR[f"MRR@{k}"] = round(sum(MRR[f"MRR@{k}"]) / len(qrels), 5)
|
178 |
+
|
179 |
+
elif output_type == "all":
|
180 |
+
pass
|
181 |
+
|
182 |
+
return MRR
|
183 |
+
|
184 |
+
|
185 |
+
# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
|
186 |
+
def recall_cap(
|
187 |
+
qrels: dict[str, dict[str, int]],
|
188 |
+
results: dict[str, dict[str, float]],
|
189 |
+
k_values: List[int],
|
190 |
+
output_type: str = "mean",
|
191 |
+
) -> Tuple[Dict[str, float]]:
|
192 |
+
capped_recall = {}
|
193 |
+
|
194 |
+
for k in k_values:
|
195 |
+
capped_recall[f"R_cap@{k}"] = []
|
196 |
+
|
197 |
+
k_max = max(k_values)
|
198 |
+
|
199 |
+
for query_id, doc_scores in results.items():
|
200 |
+
top_hits = sorted(doc_scores.items(), key=lambda item: item[1], reverse=True)[
|
201 |
+
0:k_max
|
202 |
+
]
|
203 |
+
query_relevant_docs = [
|
204 |
+
doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0
|
205 |
+
]
|
206 |
+
for k in k_values:
|
207 |
+
retrieved_docs = [
|
208 |
+
row[0] for row in top_hits[0:k] if qrels[query_id].get(row[0], 0) > 0
|
209 |
+
]
|
210 |
+
denominator = min(len(query_relevant_docs), k)
|
211 |
+
capped_recall[f"R_cap@{k}"].append(len(retrieved_docs) / denominator)
|
212 |
+
|
213 |
+
if output_type == "mean":
|
214 |
+
for k in k_values:
|
215 |
+
capped_recall[f"R_cap@{k}"] = round(
|
216 |
+
sum(capped_recall[f"R_cap@{k}"]) / len(qrels), 5
|
217 |
+
)
|
218 |
+
|
219 |
+
elif output_type == "all":
|
220 |
+
pass
|
221 |
+
|
222 |
+
return capped_recall
|
223 |
+
|
224 |
+
|
225 |
+
# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
|
226 |
+
def hole(
|
227 |
+
qrels: dict[str, dict[str, int]],
|
228 |
+
results: dict[str, dict[str, float]],
|
229 |
+
k_values: List[int],
|
230 |
+
output_type: str = "mean",
|
231 |
+
) -> Tuple[Dict[str, float]]:
|
232 |
+
Hole = {}
|
233 |
+
|
234 |
+
for k in k_values:
|
235 |
+
Hole[f"Hole@{k}"] = []
|
236 |
+
|
237 |
+
annotated_corpus = set()
|
238 |
+
for _, docs in qrels.items():
|
239 |
+
for doc_id, score in docs.items():
|
240 |
+
annotated_corpus.add(doc_id)
|
241 |
+
|
242 |
+
k_max = max(k_values)
|
243 |
+
|
244 |
+
for _, scores in results.items():
|
245 |
+
top_hits = sorted(scores.items(), key=lambda item: item[1], reverse=True)[
|
246 |
+
0:k_max
|
247 |
+
]
|
248 |
+
for k in k_values:
|
249 |
+
hole_docs = [
|
250 |
+
row[0] for row in top_hits[0:k] if row[0] not in annotated_corpus
|
251 |
+
]
|
252 |
+
Hole[f"Hole@{k}"].append(len(hole_docs) / k)
|
253 |
+
|
254 |
+
if output_type == "mean":
|
255 |
+
for k in k_values:
|
256 |
+
Hole[f"Hole@{k}"] = round(Hole[f"Hole@{k}"] / len(qrels), 5)
|
257 |
+
|
258 |
+
elif output_type == "all":
|
259 |
+
pass
|
260 |
+
|
261 |
+
return Hole
|
262 |
+
|
263 |
+
|
264 |
+
# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
|
265 |
+
def top_k_accuracy(
|
266 |
+
qrels: dict[str, dict[str, int]],
|
267 |
+
results: dict[str, dict[str, float]],
|
268 |
+
k_values: List[int],
|
269 |
+
output_type: str = "mean",
|
270 |
+
) -> Tuple[Dict[str, float]]:
|
271 |
+
top_k_acc = {}
|
272 |
+
|
273 |
+
for k in k_values:
|
274 |
+
top_k_acc[f"Accuracy@{k}"] = []
|
275 |
+
|
276 |
+
k_max, top_hits = max(k_values), {}
|
277 |
+
|
278 |
+
for query_id, doc_scores in results.items():
|
279 |
+
top_hits[query_id] = [
|
280 |
+
item[0]
|
281 |
+
for item in sorted(
|
282 |
+
doc_scores.items(), key=lambda item: item[1], reverse=True
|
283 |
+
)[0:k_max]
|
284 |
+
]
|
285 |
+
|
286 |
+
for query_id in top_hits:
|
287 |
+
query_relevant_docs = set(
|
288 |
+
[doc_id for doc_id in qrels[query_id] if qrels[query_id][doc_id] > 0]
|
289 |
+
)
|
290 |
+
for k in k_values:
|
291 |
+
for relevant_doc_id in query_relevant_docs:
|
292 |
+
if relevant_doc_id in top_hits[query_id][0:k]:
|
293 |
+
top_k_acc[f"Accuracy@{k}"].append(1.0)
|
294 |
+
break
|
295 |
+
|
296 |
+
if output_type == "mean":
|
297 |
+
for k in k_values:
|
298 |
+
top_k_acc[f"Accuracy@{k}"] = round(
|
299 |
+
top_k_acc[f"Accuracy@{k}"] / len(qrels), 5
|
300 |
+
)
|
301 |
+
|
302 |
+
elif output_type == "all":
|
303 |
+
pass
|
304 |
+
|
305 |
+
return top_k_acc
|
306 |
+
|
307 |
+
|
308 |
+
# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
|
309 |
+
def confidence_scores(sim_scores: List[float]) -> Dict[str, float]:
|
310 |
+
"""Computes confidence scores for a single instance = (query, positives, negatives)
|
311 |
+
|
312 |
+
Args:
|
313 |
+
sim_scores: Query-documents similarity scores with length `num_pos+num_neg`
|
314 |
+
|
315 |
+
Returns:
|
316 |
+
conf_scores:
|
317 |
+
- `max`: Maximum similarity score
|
318 |
+
- `std`: Standard deviation of similarity scores
|
319 |
+
- `diff1`: Difference between highest and second highest similarity scores
|
320 |
+
"""
|
321 |
+
sim_scores_sorted = sorted(sim_scores)[::-1]
|
322 |
+
|
323 |
+
cs_max = sim_scores_sorted[0]
|
324 |
+
cs_std = np.std(sim_scores)
|
325 |
+
if len(sim_scores) > 1:
|
326 |
+
cs_diff1 = sim_scores_sorted[0] - sim_scores_sorted[1]
|
327 |
+
elif len(sim_scores) == 1:
|
328 |
+
cs_diff1 = 0.0
|
329 |
+
|
330 |
+
conf_scores = {"max": cs_max, "std": cs_std, "diff1": cs_diff1}
|
331 |
+
|
332 |
+
return conf_scores
|
333 |
+
|
334 |
+
|
335 |
+
# From https://github.com/embeddings-benchmark/mteb/blob/8178981fd8fcd546d7031afe61a083d13c41520f/mteb/evaluation/evaluators/utils.py
|
336 |
+
def nAUC(
|
337 |
+
conf_scores: np.ndarray,
|
338 |
+
metrics: np.ndarray,
|
339 |
+
abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
|
340 |
+
) -> float:
|
341 |
+
"""Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997
|
342 |
+
1/ Computes the raw abstention curve, i.e., the average evaluation metric at different abstention rates determined by the confidence scores
|
343 |
+
2/ Computes the oracle abstention curve, i.e., the best theoretical abstention curve (e.g.: at a 10% abstention rate, the oracle abstains on the bottom-10% instances with regard to the evaluation metric)
|
344 |
+
3/ Computes the flat abstention curve, i.e., the one remains flat for all abstention rates (ineffective abstention)
|
345 |
+
4/ Computes the area under the three curves
|
346 |
+
5/ Finally scales the raw AUC between the oracle and the flat AUCs to get normalized AUC
|
347 |
+
|
348 |
+
Args:
|
349 |
+
conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
|
350 |
+
metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
|
351 |
+
abstention_rates: Target rates for the computation of the abstention curve
|
352 |
+
|
353 |
+
Returns:
|
354 |
+
abst_nauc: Normalized area under the abstention curve (upper-bounded by 1)
|
355 |
+
"""
|
356 |
+
|
357 |
+
def abstention_curve(
|
358 |
+
conf_scores: np.ndarray,
|
359 |
+
metrics: np.ndarray,
|
360 |
+
abstention_rates: np.ndarray = np.linspace(0, 1, 11)[:-1],
|
361 |
+
) -> np.ndarray:
|
362 |
+
"""Computes the raw abstention curve for a given set of evaluated instances and corresponding confidence scores
|
363 |
+
|
364 |
+
Args:
|
365 |
+
conf_scores: Instance confidence scores used for abstention thresholding, with shape `(num_test_instances,)`
|
366 |
+
metrics: Metric evaluations at instance-level (e.g.: average precision, NDCG...), with shape `(num_test_instances,)`
|
367 |
+
abstention_rates: Target rates for the computation of the abstention curve
|
368 |
+
|
369 |
+
Returns:
|
370 |
+
abst_curve: Abstention curve of length `len(abstention_rates)`
|
371 |
+
"""
|
372 |
+
conf_scores_argsort = np.argsort(conf_scores)
|
373 |
+
abst_curve = np.zeros(len(abstention_rates))
|
374 |
+
|
375 |
+
for i, rate in enumerate(abstention_rates):
|
376 |
+
num_instances_abst = min(
|
377 |
+
round(rate * len(conf_scores_argsort)), len(conf_scores) - 1
|
378 |
+
)
|
379 |
+
abst_curve[i] = metrics[conf_scores_argsort[num_instances_abst:]].mean()
|
380 |
+
|
381 |
+
return abst_curve
|
382 |
+
|
383 |
+
abst_curve = abstention_curve(conf_scores, metrics, abstention_rates)
|
384 |
+
or_curve = abstention_curve(metrics, metrics, abstention_rates)
|
385 |
+
abst_auc = auc(abstention_rates, abst_curve)
|
386 |
+
or_auc = auc(abstention_rates, or_curve)
|
387 |
+
flat_auc = or_curve[0] * (abstention_rates[-1] - abstention_rates[0])
|
388 |
+
|
389 |
+
if or_auc == flat_auc:
|
390 |
+
abst_nauc = np.nan
|
391 |
+
else:
|
392 |
+
abst_nauc = (abst_auc - flat_auc) / (or_auc - flat_auc)
|
393 |
+
|
394 |
+
return abst_nauc
|
dgeb/evaluators.py
ADDED
@@ -0,0 +1,839 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Evaluator objects for different evaluation types.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
import random
|
7 |
+
from abc import ABC, abstractmethod
|
8 |
+
import heapq
|
9 |
+
from collections import defaultdict
|
10 |
+
import pytrec_eval
|
11 |
+
import numpy as np
|
12 |
+
import sklearn.cluster
|
13 |
+
import torch
|
14 |
+
from scipy.stats import pearsonr
|
15 |
+
from sklearn.linear_model import LogisticRegression
|
16 |
+
from sklearn.metrics import (
|
17 |
+
accuracy_score,
|
18 |
+
average_precision_score,
|
19 |
+
classification_report,
|
20 |
+
f1_score,
|
21 |
+
precision_score,
|
22 |
+
recall_score,
|
23 |
+
label_ranking_average_precision_score,
|
24 |
+
)
|
25 |
+
from sklearn.metrics.cluster import v_measure_score
|
26 |
+
from sklearn.metrics.pairwise import (
|
27 |
+
paired_cosine_distances,
|
28 |
+
paired_euclidean_distances,
|
29 |
+
paired_manhattan_distances,
|
30 |
+
)
|
31 |
+
from sklearn.multioutput import MultiOutputRegressor
|
32 |
+
from sklearn.preprocessing import MultiLabelBinarizer
|
33 |
+
from typing import Dict, List, Tuple
|
34 |
+
|
35 |
+
from .eval_utils import (
|
36 |
+
cos_sim,
|
37 |
+
dot_score,
|
38 |
+
mrr,
|
39 |
+
recall_cap,
|
40 |
+
hole,
|
41 |
+
confidence_scores,
|
42 |
+
nAUC,
|
43 |
+
top_k_accuracy,
|
44 |
+
)
|
45 |
+
|
46 |
+
|
47 |
+
class Evaluator(ABC):
|
48 |
+
"""Base class for all evaluators
|
49 |
+
Extend this class and implement __call__ for custom evaluators.
|
50 |
+
"""
|
51 |
+
|
52 |
+
def __init__(self, seed=42, **kwargs):
|
53 |
+
self.seed = seed
|
54 |
+
random.seed(self.seed)
|
55 |
+
np.random.seed(self.seed)
|
56 |
+
torch.manual_seed(self.seed)
|
57 |
+
torch.cuda.manual_seed_all(self.seed)
|
58 |
+
|
59 |
+
@abstractmethod
|
60 |
+
def __call__(self, model):
|
61 |
+
"""This is called during training to evaluate the model.
|
62 |
+
It returns scores.
|
63 |
+
|
64 |
+
Parameters
|
65 |
+
----------
|
66 |
+
model:
|
67 |
+
the model to evaluate
|
68 |
+
"""
|
69 |
+
pass
|
70 |
+
|
71 |
+
|
72 |
+
logger = logging.getLogger(__name__)
|
73 |
+
|
74 |
+
|
75 |
+
class logRegClassificationEvaluator(Evaluator):
|
76 |
+
def __init__(
|
77 |
+
self,
|
78 |
+
embeds_train,
|
79 |
+
y_train,
|
80 |
+
embeds_test,
|
81 |
+
y_test,
|
82 |
+
max_iter=1000,
|
83 |
+
**kwargs,
|
84 |
+
):
|
85 |
+
super().__init__(**kwargs)
|
86 |
+
self.embeds_train = embeds_train
|
87 |
+
self.y_train = y_train
|
88 |
+
self.embeds_test = embeds_test
|
89 |
+
self.y_test = y_test
|
90 |
+
|
91 |
+
self.max_iter = max_iter
|
92 |
+
|
93 |
+
def __call__(self):
|
94 |
+
scores = {}
|
95 |
+
clf = LogisticRegression(
|
96 |
+
random_state=self.seed,
|
97 |
+
n_jobs=-1,
|
98 |
+
max_iter=self.max_iter,
|
99 |
+
verbose=1 if logger.isEnabledFor(logging.DEBUG) else 0,
|
100 |
+
)
|
101 |
+
logger.info(f"Encoding {len(self.embeds_train)} training embeds...")
|
102 |
+
X_train = np.asarray(self.embeds_train)
|
103 |
+
|
104 |
+
logger.info(f"Encoding {len(self.embeds_test)} test embeds...")
|
105 |
+
X_test = np.asarray(self.embeds_test)
|
106 |
+
logger.info("Fitting logistic regression classifier...")
|
107 |
+
clf.fit(X_train, self.y_train)
|
108 |
+
logger.info("Evaluating...")
|
109 |
+
y_pred = clf.predict(X_test)
|
110 |
+
accuracy = accuracy_score(self.y_test, y_pred)
|
111 |
+
f1 = f1_score(self.y_test, y_pred, average="macro")
|
112 |
+
scores["accuracy"] = accuracy
|
113 |
+
scores["f1"] = f1
|
114 |
+
|
115 |
+
# if binary classification
|
116 |
+
if len(np.unique(self.y_train)) == 2:
|
117 |
+
ap = average_precision_score(self.y_test, y_pred)
|
118 |
+
scores["ap"] = ap
|
119 |
+
|
120 |
+
return scores
|
121 |
+
|
122 |
+
|
123 |
+
class ClusteringEvaluator(Evaluator):
|
124 |
+
def __init__(
|
125 |
+
self,
|
126 |
+
embeds,
|
127 |
+
labels,
|
128 |
+
clustering_batch_size=500,
|
129 |
+
**kwargs,
|
130 |
+
):
|
131 |
+
super().__init__(**kwargs)
|
132 |
+
self.embeds = embeds
|
133 |
+
self.labels = labels
|
134 |
+
self.clustering_batch_size = clustering_batch_size
|
135 |
+
|
136 |
+
def __call__(self):
|
137 |
+
logger.info(f"Encoding {len(self.embeds)} embeds...")
|
138 |
+
corpus_embeddings = np.asarray(self.embeds)
|
139 |
+
|
140 |
+
logger.info("Fitting Mini-Batch K-Means model...")
|
141 |
+
clustering_model = sklearn.cluster.MiniBatchKMeans(
|
142 |
+
n_clusters=len(set(self.labels)),
|
143 |
+
batch_size=self.clustering_batch_size,
|
144 |
+
n_init="auto",
|
145 |
+
)
|
146 |
+
clustering_model.fit(corpus_embeddings)
|
147 |
+
cluster_assignment = clustering_model.labels_
|
148 |
+
|
149 |
+
logger.info("Evaluating...")
|
150 |
+
v_measure = v_measure_score(self.labels, cluster_assignment)
|
151 |
+
|
152 |
+
return {"v_measure": v_measure}
|
153 |
+
|
154 |
+
|
155 |
+
class PairClassificationEvaluator(Evaluator):
|
156 |
+
"""Evaluate a model based on the similarity of the embeddings by calculating the accuracy of identifying similar and
|
157 |
+
dissimilar embeds.
|
158 |
+
The metrics are the cosine similarity as well as euclidean and Manhattan distance
|
159 |
+
The returned score is the accuracy with a specified metric.
|
160 |
+
The results are written in a CSV. If a CSV already exists, then values are appended.
|
161 |
+
The labels need to be 0 for dissimilar pairs and 1 for similar pairs.
|
162 |
+
:param embeds1: The first column of embeds
|
163 |
+
:param embeds2: The second column of embeds
|
164 |
+
:param labels: labels[i] is the label for the pair (embeds1[i], embeds2[i]). Must be 0 or 1
|
165 |
+
:param name: Name for the output
|
166 |
+
:param write_csv: Write results to a CSV file
|
167 |
+
"""
|
168 |
+
|
169 |
+
def __init__(self, embeds1, embeds2, labels, **kwargs):
|
170 |
+
super().__init__(**kwargs)
|
171 |
+
self.embeds1 = embeds1
|
172 |
+
self.embeds2 = embeds2
|
173 |
+
self.labels = labels
|
174 |
+
|
175 |
+
assert len(self.embeds1) == len(self.embeds2)
|
176 |
+
assert len(self.embeds1) == len(self.labels)
|
177 |
+
for label in labels:
|
178 |
+
assert label == 0 or label == 1
|
179 |
+
|
180 |
+
def __call__(self):
|
181 |
+
scores = self.compute_metrics()
|
182 |
+
# Compute the max of Average Precision (AP) over all distance metrics.
|
183 |
+
top_ap_score = max(score for k, score in scores.items() if k.endswith("_ap"))
|
184 |
+
scores["top_ap"] = top_ap_score
|
185 |
+
return scores
|
186 |
+
|
187 |
+
def compute_metrics(self):
|
188 |
+
embeddings1 = np.array(self.embeds1)
|
189 |
+
embeddings2 = np.array(self.embeds2)
|
190 |
+
|
191 |
+
logger.info("Computing similarity distances...")
|
192 |
+
cosine_scores = 1 - paired_cosine_distances(embeddings1, embeddings2)
|
193 |
+
manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
|
194 |
+
euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
|
195 |
+
|
196 |
+
embeddings1_np = np.asarray(embeddings1)
|
197 |
+
embeddings2_np = np.asarray(embeddings2)
|
198 |
+
dot_scores = [
|
199 |
+
np.dot(embeddings1_np[i], embeddings2_np[i])
|
200 |
+
for i in range(len(embeddings1_np))
|
201 |
+
]
|
202 |
+
|
203 |
+
logger.info("Computing metrics...")
|
204 |
+
labels = np.asarray(self.labels)
|
205 |
+
output_scores = {}
|
206 |
+
for short_name, name, scores, reverse in [
|
207 |
+
["cos_sim", "Cosine-Similarity", cosine_scores, True],
|
208 |
+
["manhattan", "Manhattan-Distance", manhattan_distances, False],
|
209 |
+
["euclidean", "Euclidean-Distance", euclidean_distances, False],
|
210 |
+
["dot", "Dot-Product", dot_scores, True],
|
211 |
+
]:
|
212 |
+
metrics = self._compute_metrics(scores, labels, reverse)
|
213 |
+
metrics = {short_name + "_" + k: v for k, v in metrics.items()}
|
214 |
+
output_scores.update(metrics)
|
215 |
+
|
216 |
+
return output_scores
|
217 |
+
|
218 |
+
@staticmethod
|
219 |
+
def _compute_metrics(scores, labels, high_score_more_similar):
|
220 |
+
"""Compute the metrics for the given scores and labels.
|
221 |
+
|
222 |
+
Args:
|
223 |
+
scores (`np.ndarray` of shape (n_pairs, )): The similarity/dissimilarity scores for the pairs.
|
224 |
+
labels (`np.ndarray` of shape (n_pairs, )): The labels for the pairs.
|
225 |
+
high_score_more_similar (`bool`): If true, then the higher the score, the more similar the pairs are.
|
226 |
+
|
227 |
+
Returns:
|
228 |
+
`dict`: The metrics for the given scores and labels.
|
229 |
+
"""
|
230 |
+
acc, acc_threshold = PairClassificationEvaluator.find_best_acc_and_threshold(
|
231 |
+
scores, labels, high_score_more_similar
|
232 |
+
)
|
233 |
+
f1, precision, recall, f1_threshold = (
|
234 |
+
PairClassificationEvaluator.find_best_f1_and_threshold(
|
235 |
+
scores, labels, high_score_more_similar
|
236 |
+
)
|
237 |
+
)
|
238 |
+
ap = PairClassificationEvaluator.ap_score(
|
239 |
+
scores, labels, high_score_more_similar
|
240 |
+
)
|
241 |
+
|
242 |
+
return {
|
243 |
+
"accuracy": acc,
|
244 |
+
"accuracy_threshold": acc_threshold,
|
245 |
+
"f1": f1,
|
246 |
+
"f1_threshold": f1_threshold,
|
247 |
+
"precision": precision,
|
248 |
+
"recall": recall,
|
249 |
+
"ap": ap,
|
250 |
+
}
|
251 |
+
|
252 |
+
@staticmethod
|
253 |
+
def find_best_acc_and_threshold(scores, labels, high_score_more_similar: bool):
|
254 |
+
assert len(scores) == len(labels)
|
255 |
+
rows = list(zip(scores, labels))
|
256 |
+
|
257 |
+
rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
|
258 |
+
|
259 |
+
max_acc = 0
|
260 |
+
best_threshold = -1
|
261 |
+
|
262 |
+
positive_so_far = 0
|
263 |
+
remaining_negatives = sum(np.array(labels) == 0)
|
264 |
+
|
265 |
+
for i in range(len(rows) - 1):
|
266 |
+
score, label = rows[i]
|
267 |
+
if label == 1:
|
268 |
+
positive_so_far += 1
|
269 |
+
else:
|
270 |
+
remaining_negatives -= 1
|
271 |
+
|
272 |
+
acc = (positive_so_far + remaining_negatives) / len(labels)
|
273 |
+
if acc > max_acc:
|
274 |
+
max_acc = acc
|
275 |
+
best_threshold = (rows[i][0] + rows[i + 1][0]) / 2
|
276 |
+
|
277 |
+
return max_acc, best_threshold
|
278 |
+
|
279 |
+
@staticmethod
|
280 |
+
def find_best_f1_and_threshold(scores, labels, high_score_more_similar: bool):
|
281 |
+
assert len(scores) == len(labels)
|
282 |
+
|
283 |
+
scores = np.asarray(scores)
|
284 |
+
labels = np.asarray(labels)
|
285 |
+
|
286 |
+
rows = list(zip(scores, labels))
|
287 |
+
|
288 |
+
rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
|
289 |
+
|
290 |
+
best_f1 = best_precision = best_recall = 0
|
291 |
+
threshold = 0
|
292 |
+
nextract = 0
|
293 |
+
ncorrect = 0
|
294 |
+
total_num_duplicates = sum(labels)
|
295 |
+
|
296 |
+
for i in range(len(rows) - 1):
|
297 |
+
score, label = rows[i]
|
298 |
+
nextract += 1
|
299 |
+
|
300 |
+
if label == 1:
|
301 |
+
ncorrect += 1
|
302 |
+
|
303 |
+
if ncorrect > 0:
|
304 |
+
precision = ncorrect / nextract
|
305 |
+
recall = ncorrect / total_num_duplicates
|
306 |
+
f1 = 2 * precision * recall / (precision + recall)
|
307 |
+
if f1 > best_f1:
|
308 |
+
best_f1 = f1
|
309 |
+
best_precision = precision
|
310 |
+
best_recall = recall
|
311 |
+
threshold = (rows[i][0] + rows[i + 1][0]) / 2
|
312 |
+
|
313 |
+
return best_f1, best_precision, best_recall, threshold
|
314 |
+
|
315 |
+
@staticmethod
|
316 |
+
def ap_score(scores, labels, high_score_more_similar: bool):
|
317 |
+
return average_precision_score(
|
318 |
+
labels, scores * (1 if high_score_more_similar else -1)
|
319 |
+
)
|
320 |
+
|
321 |
+
|
322 |
+
class MultiClassMultiOutputLogRegClassificationEvaluator(Evaluator):
|
323 |
+
def __init__(
|
324 |
+
self,
|
325 |
+
embeds_train,
|
326 |
+
y_train,
|
327 |
+
embeds_test,
|
328 |
+
y_test,
|
329 |
+
max_iter=1000,
|
330 |
+
**kwargs,
|
331 |
+
):
|
332 |
+
super().__init__(**kwargs)
|
333 |
+
self.embeds_train = embeds_train
|
334 |
+
self.y_train = y_train
|
335 |
+
self.embeds_test = embeds_test
|
336 |
+
self.y_test = y_test
|
337 |
+
self.max_iter = max_iter
|
338 |
+
|
339 |
+
def __call__(self):
|
340 |
+
scores = {}
|
341 |
+
mlb = MultiLabelBinarizer()
|
342 |
+
# all classes in y_train and y_test
|
343 |
+
|
344 |
+
class_labels = list(self.y_train) + list(self.y_test)
|
345 |
+
labels = [class_label.split(", ") for class_label in class_labels]
|
346 |
+
mlb.fit(labels)
|
347 |
+
train_labels = [class_label.split(", ") for class_label in self.y_train]
|
348 |
+
test_labels = [class_label.split(", ") for class_label in self.y_test]
|
349 |
+
|
350 |
+
y_train = mlb.transform(train_labels)
|
351 |
+
y_test = mlb.transform(test_labels)
|
352 |
+
clf = MultiOutputRegressor(
|
353 |
+
LogisticRegression(
|
354 |
+
random_state=self.seed, solver="lbfgs", max_iter=self.max_iter
|
355 |
+
)
|
356 |
+
).fit(self.embeds_train, y_train)
|
357 |
+
y_pred = clf.predict(self.embeds_test)
|
358 |
+
|
359 |
+
results_dict = classification_report(y_test, y_pred, output_dict=True)
|
360 |
+
assert isinstance(
|
361 |
+
results_dict, dict
|
362 |
+
), "Should always be true since `output_dict=True` is passed to sklearn.metric.classification_report"
|
363 |
+
scores["precision"] = results_dict["macro avg"]["precision"]
|
364 |
+
scores["recall"] = results_dict["macro avg"]["recall"]
|
365 |
+
scores["f1"] = results_dict["macro avg"]["f1-score"]
|
366 |
+
scores["accuracy"] = accuracy_score(y_test, y_pred)
|
367 |
+
|
368 |
+
return scores
|
369 |
+
|
370 |
+
|
371 |
+
class MultiClassMultiOutputKNNClassificationEvaluator(Evaluator):
|
372 |
+
def __init__(
|
373 |
+
self,
|
374 |
+
embeds_train,
|
375 |
+
y_train,
|
376 |
+
embeds_test,
|
377 |
+
y_test,
|
378 |
+
n_neighbors=5,
|
379 |
+
**kwargs,
|
380 |
+
):
|
381 |
+
super().__init__(**kwargs)
|
382 |
+
self.embeds_train = embeds_train
|
383 |
+
self.y_train = y_train
|
384 |
+
self.embeds_test = embeds_test
|
385 |
+
self.y_test = y_test
|
386 |
+
self.n_neighbors = n_neighbors
|
387 |
+
|
388 |
+
def __call__(self):
|
389 |
+
scores = {}
|
390 |
+
|
391 |
+
mlb = MultiLabelBinarizer()
|
392 |
+
class_labels = list(self.y_train) + list(self.y_test)
|
393 |
+
labels = [class_label.split(", ") for class_label in class_labels]
|
394 |
+
mlb.fit(labels)
|
395 |
+
train_labels = [class_label.split(", ") for class_label in self.y_train]
|
396 |
+
test_labels = [class_label.split(", ") for class_label in self.y_test]
|
397 |
+
|
398 |
+
y_train = mlb.transform(train_labels)
|
399 |
+
y_test = mlb.transform(test_labels)
|
400 |
+
clf = sklearn.neighbors.KNeighborsClassifier(
|
401 |
+
n_neighbors=self.n_neighbors, metric="cosine"
|
402 |
+
)
|
403 |
+
logger.info("Fitting KNN classifier...")
|
404 |
+
clf.fit(self.embeds_train, y_train)
|
405 |
+
logger.info("Evaluating...")
|
406 |
+
y_pred = clf.predict(self.embeds_test)
|
407 |
+
accuracy = accuracy_score(y_test, y_pred)
|
408 |
+
f1 = f1_score(y_test, y_pred, average="macro")
|
409 |
+
precision = precision_score(y_test, y_pred, average="macro")
|
410 |
+
recall = recall_score(y_test, y_pred, average="macro")
|
411 |
+
lrap = label_ranking_average_precision_score(y_test, y_pred)
|
412 |
+
scores["f1"] = f1
|
413 |
+
scores["accuracy"] = accuracy
|
414 |
+
scores["precision"] = precision
|
415 |
+
scores["recall"] = recall
|
416 |
+
scores["lrap"] = lrap
|
417 |
+
|
418 |
+
return scores
|
419 |
+
|
420 |
+
|
421 |
+
class BiGeneMiningEvaluator(Evaluator):
|
422 |
+
"""
|
423 |
+
BiGene Mining Evaluator, analogous to Bitext Mining Evaluator https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/BitextMiningEvaluator.py.
|
424 |
+
|
425 |
+
If top_k > 1, then recall@k is also computed.
|
426 |
+
"""
|
427 |
+
|
428 |
+
def __init__(self, embeds1, embeds2, top_k=1, **kwargs):
|
429 |
+
super().__init__(**kwargs)
|
430 |
+
self.n = len(embeds1)
|
431 |
+
self.embeds1 = np.array(embeds1)
|
432 |
+
self.embeds2 = np.array(embeds2)
|
433 |
+
self.gold = list(zip(range(self.n), range(self.n)))
|
434 |
+
self.top_k = top_k
|
435 |
+
|
436 |
+
def __call__(self):
|
437 |
+
scores = self.compute_metrics()
|
438 |
+
return scores
|
439 |
+
|
440 |
+
def compute_metrics(self):
|
441 |
+
logger.info(f"Finding nearest neighbors... with top_k={self.top_k}")
|
442 |
+
nearest_neighbors = self._similarity_search(
|
443 |
+
self.embeds1, self.embeds2, top_k=self.top_k
|
444 |
+
)
|
445 |
+
|
446 |
+
# Compute errors
|
447 |
+
logger.info("Computing metrics...")
|
448 |
+
labels = []
|
449 |
+
predictions = []
|
450 |
+
|
451 |
+
# Get predictions and labels for top_k=1.
|
452 |
+
for i, x in enumerate(nearest_neighbors):
|
453 |
+
j = x[0]["corpus_id"]
|
454 |
+
predictions.append(j)
|
455 |
+
labels.append(self.gold[i][1])
|
456 |
+
|
457 |
+
scores = {
|
458 |
+
"precision": precision_score(
|
459 |
+
labels, predictions, zero_division=0, average="weighted"
|
460 |
+
),
|
461 |
+
"recall": recall_score(
|
462 |
+
labels, predictions, zero_division=0, average="weighted"
|
463 |
+
),
|
464 |
+
"f1": f1_score(labels, predictions, zero_division=0, average="weighted"),
|
465 |
+
"accuracy": accuracy_score(labels, predictions),
|
466 |
+
}
|
467 |
+
|
468 |
+
if self.top_k > 1:
|
469 |
+
# Compute recall@k.
|
470 |
+
top_k_preds = []
|
471 |
+
for i, x in enumerate(nearest_neighbors):
|
472 |
+
top_k_preds.append([pred["corpus_id"] for pred in x])
|
473 |
+
top_k_recall = [
|
474 |
+
self.gold[i][1] in top_k_pred
|
475 |
+
for i, top_k_pred in enumerate(top_k_preds)
|
476 |
+
]
|
477 |
+
scores[f"recall_at_{self.top_k}"] = sum(top_k_recall) / len(top_k_recall)
|
478 |
+
return scores
|
479 |
+
|
480 |
+
def _similarity_search(
|
481 |
+
self,
|
482 |
+
query_embeddings,
|
483 |
+
corpus_embeddings,
|
484 |
+
query_chunk_size=100,
|
485 |
+
corpus_chunk_size=500000,
|
486 |
+
top_k=1,
|
487 |
+
score_function=cos_sim,
|
488 |
+
):
|
489 |
+
"""This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings.
|
490 |
+
It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.
|
491 |
+
:param query_embeddings: A 2 dimensional tensor with the query embeddings.
|
492 |
+
:param corpus_embeddings: A 2 dimensional tensor with the corpus embeddings.
|
493 |
+
:param query_chunk_size: Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory.
|
494 |
+
:param corpus_chunk_size: Scans the corpus 50k entries at a time. Increasing that value increases the speed, but requires more memory.
|
495 |
+
:param top_k: Retrieve top k matching entries.
|
496 |
+
:param score_function: Function for computing scores. By default, cosine similarity.
|
497 |
+
:return: Returns a list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
|
498 |
+
"""
|
499 |
+
query_embeddings = torch.from_numpy(query_embeddings)
|
500 |
+
corpus_embeddings = torch.from_numpy(corpus_embeddings)
|
501 |
+
if len(query_embeddings.shape) == 1:
|
502 |
+
query_embeddings = query_embeddings.unsqueeze(0)
|
503 |
+
if len(corpus_embeddings.shape) == 1:
|
504 |
+
corpus_embeddings = corpus_embeddings.unsqueeze(0)
|
505 |
+
|
506 |
+
# Check that corpus and queries are on the same device
|
507 |
+
if corpus_embeddings.device != query_embeddings.device:
|
508 |
+
query_embeddings = query_embeddings.to(corpus_embeddings.device)
|
509 |
+
|
510 |
+
queries_result_list = [[] for _ in range(len(query_embeddings))]
|
511 |
+
|
512 |
+
for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
|
513 |
+
# Iterate over chunks of the corpus
|
514 |
+
for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
|
515 |
+
# Compute cosine similarities
|
516 |
+
cos_scores = score_function(
|
517 |
+
query_embeddings[
|
518 |
+
query_start_idx : query_start_idx + query_chunk_size
|
519 |
+
],
|
520 |
+
corpus_embeddings[
|
521 |
+
corpus_start_idx : corpus_start_idx + corpus_chunk_size
|
522 |
+
],
|
523 |
+
)
|
524 |
+
|
525 |
+
# Get top-k scores
|
526 |
+
cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
|
527 |
+
cos_scores,
|
528 |
+
min(top_k, len(cos_scores[0])),
|
529 |
+
dim=1,
|
530 |
+
largest=True,
|
531 |
+
sorted=False,
|
532 |
+
)
|
533 |
+
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
534 |
+
cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
|
535 |
+
|
536 |
+
for query_itr in range(len(cos_scores)):
|
537 |
+
for sub_corpus_id, score in zip(
|
538 |
+
cos_scores_top_k_idx[query_itr],
|
539 |
+
cos_scores_top_k_values[query_itr],
|
540 |
+
):
|
541 |
+
corpus_id = corpus_start_idx + sub_corpus_id
|
542 |
+
query_id = query_start_idx + query_itr
|
543 |
+
queries_result_list[query_id].append(
|
544 |
+
{"corpus_id": corpus_id, "score": score}
|
545 |
+
)
|
546 |
+
|
547 |
+
# Sort and strip to top_k results
|
548 |
+
for idx in range(len(queries_result_list)):
|
549 |
+
queries_result_list[idx] = sorted(
|
550 |
+
queries_result_list[idx], key=lambda x: x["score"], reverse=True
|
551 |
+
)
|
552 |
+
queries_result_list[idx] = queries_result_list[idx][0:top_k]
|
553 |
+
|
554 |
+
return queries_result_list
|
555 |
+
|
556 |
+
|
557 |
+
class EDSEvaluator(Evaluator):
|
558 |
+
"""
|
559 |
+
Evolutionary Distance Similarity Evaluator, analogous to Semantic Textual Similarity Evaluator.
|
560 |
+
Adapted from https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/STSEvaluator.py
|
561 |
+
"""
|
562 |
+
|
563 |
+
def __init__(self, embeds1, embeds2, gold_scores, **kwargs):
|
564 |
+
super().__init__(**kwargs)
|
565 |
+
self.embeds1 = embeds1
|
566 |
+
self.embeds2 = embeds2
|
567 |
+
self.gold_scores = gold_scores
|
568 |
+
|
569 |
+
def __call__(self):
|
570 |
+
embeddings1 = np.array(self.embeds1)
|
571 |
+
embeddings2 = np.array(self.embeds2)
|
572 |
+
logger.info("Evaluating...")
|
573 |
+
cosine_scores = paired_cosine_distances(embeddings1, embeddings2)
|
574 |
+
manhattan_distances = paired_manhattan_distances(embeddings1, embeddings2)
|
575 |
+
euclidean_distances = paired_euclidean_distances(embeddings1, embeddings2)
|
576 |
+
|
577 |
+
cosine_pearson, _ = pearsonr(self.gold_scores, cosine_scores)
|
578 |
+
manhattan_pearson, _ = pearsonr(self.gold_scores, manhattan_distances)
|
579 |
+
euclidean_pearson, _ = pearsonr(self.gold_scores, euclidean_distances)
|
580 |
+
|
581 |
+
top_corr = max(
|
582 |
+
cosine_pearson,
|
583 |
+
manhattan_pearson,
|
584 |
+
euclidean_pearson,
|
585 |
+
)
|
586 |
+
return {
|
587 |
+
"cos_sim": cosine_pearson,
|
588 |
+
"manhattan": manhattan_pearson,
|
589 |
+
"euclidean": euclidean_pearson,
|
590 |
+
"top_corr": top_corr,
|
591 |
+
}
|
592 |
+
|
593 |
+
|
594 |
+
class RetrievalEvaluator(Evaluator):
|
595 |
+
"""Adapted from
|
596 |
+
https://github.com/embeddings-benchmark/mteb/blob/main/mteb/evaluation/evaluators/RetrievalEvaluator.py
|
597 |
+
"""
|
598 |
+
|
599 |
+
def __init__(
|
600 |
+
self,
|
601 |
+
corpus_embeds,
|
602 |
+
query_embeds,
|
603 |
+
corpus_ids,
|
604 |
+
query_ids,
|
605 |
+
qrels: Dict[str, Dict[str, int]],
|
606 |
+
k_values: List[int] = [5, 10, 50],
|
607 |
+
score_function: str = "cos_sim",
|
608 |
+
corpus_chunk_size: int = 50000,
|
609 |
+
**kwargs,
|
610 |
+
):
|
611 |
+
super().__init__(**kwargs)
|
612 |
+
self.corpus_embeds = corpus_embeds
|
613 |
+
self.query_embeds = query_embeds
|
614 |
+
self.corpus_ids = corpus_ids
|
615 |
+
self.query_ids = query_ids
|
616 |
+
self.qrels = qrels
|
617 |
+
self.k_values = k_values
|
618 |
+
self.top_k = max(k_values) if "top_k" not in kwargs else kwargs["top_k"]
|
619 |
+
self.score_function = score_function
|
620 |
+
self.score_functions = {
|
621 |
+
"cos_sim": cos_sim,
|
622 |
+
"dot": dot_score,
|
623 |
+
}
|
624 |
+
self.corpus_chunk_size = corpus_chunk_size
|
625 |
+
|
626 |
+
def __call__(self):
|
627 |
+
results = self.search(
|
628 |
+
self.corpus_embeds,
|
629 |
+
self.query_embeds,
|
630 |
+
self.corpus_ids,
|
631 |
+
self.query_ids,
|
632 |
+
self.top_k,
|
633 |
+
self.score_function,
|
634 |
+
)
|
635 |
+
ndcg, _map, recall, precision, naucs = self.evaluate(
|
636 |
+
self.qrels, results, self.k_values
|
637 |
+
)
|
638 |
+
mrr, naucs_mrr = self.evaluate_custom(self.qrels, results, self.k_values, "mrr")
|
639 |
+
scores = {
|
640 |
+
**{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()},
|
641 |
+
**{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()},
|
642 |
+
**{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()},
|
643 |
+
**{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()},
|
644 |
+
**{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr.items()},
|
645 |
+
**{
|
646 |
+
k.replace("@", "_at_").replace("_P", "_precision").lower(): v
|
647 |
+
for k, v in naucs.items()
|
648 |
+
},
|
649 |
+
**{
|
650 |
+
k.replace("@", "_at_").replace("_P", "_precision").lower(): v
|
651 |
+
for k, v in naucs_mrr.items()
|
652 |
+
},
|
653 |
+
}
|
654 |
+
return scores
|
655 |
+
|
656 |
+
def search(
|
657 |
+
self,
|
658 |
+
corpus_embeds,
|
659 |
+
query_embeds,
|
660 |
+
corpus_ids,
|
661 |
+
query_ids,
|
662 |
+
top_k: int,
|
663 |
+
score_function: str,
|
664 |
+
return_sorted: bool = False,
|
665 |
+
**kwargs,
|
666 |
+
) -> dict[str, dict[str, float]]:
|
667 |
+
# Create embeddings for all queries using model.encode()
|
668 |
+
# Runs semantic search against the corpus embeddings
|
669 |
+
# Returns a ranked list with the corpus ids
|
670 |
+
if score_function not in self.score_functions:
|
671 |
+
raise ValueError(
|
672 |
+
f"score function: {score_function} must be either (cos_sim) for cosine similarity or (dot) for dot product"
|
673 |
+
)
|
674 |
+
# make query embeds and corpus embeds torch tensors
|
675 |
+
query_embeds = torch.from_numpy(query_embeds)
|
676 |
+
corpus_embeds = torch.from_numpy(corpus_embeds)
|
677 |
+
itr = range(0, len(corpus_embeds), self.corpus_chunk_size)
|
678 |
+
results = defaultdict(dict)
|
679 |
+
# Keep only the top-k docs for each query
|
680 |
+
result_heaps = defaultdict(list)
|
681 |
+
for batch_num, corpus_start_idx in enumerate(itr):
|
682 |
+
logger.info("Searching Batch {}/{}...".format(batch_num + 1, len(itr)))
|
683 |
+
corpus_end_idx = min(
|
684 |
+
corpus_start_idx + self.corpus_chunk_size, len(corpus_ids)
|
685 |
+
)
|
686 |
+
sub_corpus_embeds = corpus_embeds[corpus_start_idx:corpus_end_idx]
|
687 |
+
# Compute similarites using either cosine-similarity or dot product
|
688 |
+
cos_scores = self.score_functions[score_function](
|
689 |
+
query_embeds, sub_corpus_embeds
|
690 |
+
)
|
691 |
+
cos_scores[torch.isnan(cos_scores)] = -1
|
692 |
+
|
693 |
+
# Get top-k values
|
694 |
+
cos_scores_top_k_values, cos_scores_top_k_idx = torch.topk(
|
695 |
+
cos_scores,
|
696 |
+
min(
|
697 |
+
top_k + 1,
|
698 |
+
len(cos_scores[1]) if len(cos_scores) > 1 else len(cos_scores[-1]),
|
699 |
+
),
|
700 |
+
dim=1,
|
701 |
+
largest=True,
|
702 |
+
sorted=return_sorted,
|
703 |
+
)
|
704 |
+
cos_scores_top_k_values = cos_scores_top_k_values.cpu().tolist()
|
705 |
+
cos_scores_top_k_idx = cos_scores_top_k_idx.cpu().tolist()
|
706 |
+
|
707 |
+
for query_itr in range(len(query_embeds)):
|
708 |
+
query_id = query_ids[query_itr]
|
709 |
+
for sub_corpus_id, score in zip(
|
710 |
+
cos_scores_top_k_idx[query_itr], cos_scores_top_k_values[query_itr]
|
711 |
+
):
|
712 |
+
corpus_id = corpus_ids[corpus_start_idx + sub_corpus_id]
|
713 |
+
if corpus_id != query_id:
|
714 |
+
if len(result_heaps[query_id]) < top_k:
|
715 |
+
# Push item on the heap
|
716 |
+
heapq.heappush(result_heaps[query_id], (score, corpus_id))
|
717 |
+
else:
|
718 |
+
# If item is larger than the smallest in the heap, push it on the heap then pop the smallest element
|
719 |
+
heapq.heappushpop(
|
720 |
+
result_heaps[query_id], (score, corpus_id)
|
721 |
+
)
|
722 |
+
|
723 |
+
for qid in result_heaps:
|
724 |
+
for score, corpus_id in result_heaps[qid]:
|
725 |
+
results[qid][corpus_id] = score
|
726 |
+
|
727 |
+
return results
|
728 |
+
|
729 |
+
@staticmethod
|
730 |
+
def evaluate(
|
731 |
+
qrels: dict[str, dict[str, int]],
|
732 |
+
results: dict[str, dict[str, float]],
|
733 |
+
k_values: List[int],
|
734 |
+
ignore_identical_ids: bool = True,
|
735 |
+
) -> Tuple[Dict[str, float], dict[str, float], dict[str, float], dict[str, float]]:
|
736 |
+
if ignore_identical_ids:
|
737 |
+
logger.info(
|
738 |
+
"For evaluation, we ignore identical query and document ids (default), please explicitly set ``ignore_identical_ids=False`` to ignore this."
|
739 |
+
)
|
740 |
+
popped = []
|
741 |
+
for qid, rels in results.items():
|
742 |
+
for pid in list(rels):
|
743 |
+
if qid == pid:
|
744 |
+
results[qid].pop(pid)
|
745 |
+
popped.append(pid)
|
746 |
+
|
747 |
+
all_ndcgs, all_aps, all_recalls, all_precisions = {}, {}, {}, {}
|
748 |
+
|
749 |
+
for k in k_values:
|
750 |
+
all_ndcgs[f"NDCG@{k}"] = []
|
751 |
+
all_aps[f"MAP@{k}"] = []
|
752 |
+
all_recalls[f"Recall@{k}"] = []
|
753 |
+
all_precisions[f"P@{k}"] = []
|
754 |
+
|
755 |
+
map_string = "map_cut." + ",".join([str(k) for k in k_values])
|
756 |
+
ndcg_string = "ndcg_cut." + ",".join([str(k) for k in k_values])
|
757 |
+
recall_string = "recall." + ",".join([str(k) for k in k_values])
|
758 |
+
precision_string = "P." + ",".join([str(k) for k in k_values])
|
759 |
+
evaluator = pytrec_eval.RelevanceEvaluator(
|
760 |
+
qrels, {map_string, ndcg_string, recall_string, precision_string}
|
761 |
+
)
|
762 |
+
scores = evaluator.evaluate(results)
|
763 |
+
|
764 |
+
for query_id in scores.keys():
|
765 |
+
for k in k_values:
|
766 |
+
all_ndcgs[f"NDCG@{k}"].append(scores[query_id]["ndcg_cut_" + str(k)])
|
767 |
+
all_aps[f"MAP@{k}"].append(scores[query_id]["map_cut_" + str(k)])
|
768 |
+
all_recalls[f"Recall@{k}"].append(scores[query_id]["recall_" + str(k)])
|
769 |
+
all_precisions[f"P@{k}"].append(scores[query_id]["P_" + str(k)])
|
770 |
+
ndcg, _map, recall, precision = (
|
771 |
+
all_ndcgs.copy(),
|
772 |
+
all_aps.copy(),
|
773 |
+
all_recalls.copy(),
|
774 |
+
all_precisions.copy(),
|
775 |
+
)
|
776 |
+
|
777 |
+
for k in k_values:
|
778 |
+
ndcg[f"NDCG@{k}"] = round(sum(ndcg[f"NDCG@{k}"]) / len(scores), 5)
|
779 |
+
_map[f"MAP@{k}"] = round(sum(_map[f"MAP@{k}"]) / len(scores), 5)
|
780 |
+
recall[f"Recall@{k}"] = round(sum(recall[f"Recall@{k}"]) / len(scores), 5)
|
781 |
+
precision[f"P@{k}"] = round(sum(precision[f"P@{k}"]) / len(scores), 5)
|
782 |
+
naucs = RetrievalEvaluator.evaluate_abstention(
|
783 |
+
results, {**all_ndcgs, **all_aps, **all_recalls, **all_precisions}
|
784 |
+
)
|
785 |
+
return ndcg, _map, recall, precision, naucs
|
786 |
+
|
787 |
+
@staticmethod
|
788 |
+
def evaluate_abstention(
|
789 |
+
results: dict[str, dict[str, float]],
|
790 |
+
metric_scores: dict[str, list[float]],
|
791 |
+
) -> Dict[str, float]:
|
792 |
+
"""Computes normalized Area Under the Curve on a set of evaluated instances as presented in the paper https://arxiv.org/abs/2402.12997"""
|
793 |
+
all_sim_scores = [list(results[qid].values()) for qid in list(results.keys())]
|
794 |
+
all_conf_scores = [
|
795 |
+
confidence_scores(sim_scores) for sim_scores in all_sim_scores
|
796 |
+
]
|
797 |
+
conf_fcts = list(all_conf_scores[0].keys())
|
798 |
+
all_conf_scores = {
|
799 |
+
fct: np.array([x[fct] for x in all_conf_scores]) for fct in conf_fcts
|
800 |
+
}
|
801 |
+
metric_scores = {k: np.array(v) for k, v in metric_scores.items()}
|
802 |
+
naucs = {}
|
803 |
+
|
804 |
+
for metric_name, scores in metric_scores.items():
|
805 |
+
for fct, conf_scores in all_conf_scores.items():
|
806 |
+
naucs[f"nAUC_{metric_name}_{fct}"] = nAUC(conf_scores, scores)
|
807 |
+
|
808 |
+
return naucs
|
809 |
+
|
810 |
+
@staticmethod
|
811 |
+
def evaluate_custom(
|
812 |
+
qrels: dict[str, dict[str, int]],
|
813 |
+
results: dict[str, dict[str, float]],
|
814 |
+
k_values: List[int],
|
815 |
+
metric: str,
|
816 |
+
output_type: str = "all",
|
817 |
+
) -> Tuple[Dict[str, float]]:
|
818 |
+
if metric.lower() in ["mrr", "mrr@k", "mrr_cut"]:
|
819 |
+
metric_scores = mrr(qrels, results, k_values, output_type)
|
820 |
+
|
821 |
+
elif metric.lower() in ["recall_cap", "r_cap", "r_cap@k"]:
|
822 |
+
metric_scores = recall_cap(qrels, results, k_values, output_type)
|
823 |
+
|
824 |
+
elif metric.lower() in ["hole", "hole@k"]:
|
825 |
+
metric_scores = hole(qrels, results, k_values, output_type)
|
826 |
+
|
827 |
+
elif metric.lower() in [
|
828 |
+
"acc",
|
829 |
+
"top_k_acc",
|
830 |
+
"accuracy",
|
831 |
+
"accuracy@k",
|
832 |
+
"top_k_accuracy",
|
833 |
+
]:
|
834 |
+
metric_scores = top_k_accuracy(qrels, results, k_values, output_type)
|
835 |
+
|
836 |
+
naucs = RetrievalEvaluator.evaluate_abstention(results, metric_scores)
|
837 |
+
metric_scores_avg = {k: sum(v) / len(v) for k, v in metric_scores.items()}
|
838 |
+
|
839 |
+
return metric_scores_avg, naucs
|
dgeb/modality.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
|
3 |
+
|
4 |
+
class Modality(Enum):
|
5 |
+
"""Data modality, either DNA or protein sequence."""
|
6 |
+
|
7 |
+
PROTEIN = "protein"
|
8 |
+
DNA = "dna"
|
dgeb/models.py
ADDED
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import re
|
3 |
+
from abc import ABC, abstractmethod
|
4 |
+
from functools import partial
|
5 |
+
from types import SimpleNamespace
|
6 |
+
from typing import Dict, List, Literal, Optional
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import torch
|
10 |
+
import tqdm as tqdm
|
11 |
+
from datasets import Dataset
|
12 |
+
from torch import Tensor
|
13 |
+
from torch.nn import functional as F
|
14 |
+
from torch.utils.data import DataLoader
|
15 |
+
from transformers import (
|
16 |
+
AutoConfig,
|
17 |
+
AutoModel,
|
18 |
+
AutoModelForCausalLM,
|
19 |
+
AutoModelForMaskedLM,
|
20 |
+
AutoTokenizer,
|
21 |
+
BatchEncoding,
|
22 |
+
DefaultDataCollator,
|
23 |
+
T5EncoderModel,
|
24 |
+
T5Tokenizer,
|
25 |
+
)
|
26 |
+
from transformers.modeling_outputs import BaseModelOutput
|
27 |
+
|
28 |
+
from .modality import Modality
|
29 |
+
from .eval_utils import ForwardHook, pool
|
30 |
+
|
31 |
+
logger = logging.getLogger(__name__)
|
32 |
+
|
33 |
+
|
34 |
+
class BioSeqTransformer(ABC):
|
35 |
+
"""
|
36 |
+
Abstract class to wrap models which map biological sequences (DNA/Prot) to embeddings.
|
37 |
+
Modelled after SentenceTransformer (https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py)
|
38 |
+
|
39 |
+
Args:
|
40 |
+
model_name: Name or path to the pretrained model.
|
41 |
+
layers: List of model layers to probe. Can be integers or "mid" or "last".
|
42 |
+
devices: List of device ids for inference. If cuda is not available, will use cpu.
|
43 |
+
num_processes: Number of processes to use for data loading.
|
44 |
+
max_seq_length: Maximum sequence length of the input sequences.
|
45 |
+
l2_norm: If true, embeddings are L2-normalized before they are returned.
|
46 |
+
batch_size: Batch size for encoding.
|
47 |
+
pool_type: Pooling strategy to use. One of "mean", "max", "cls", "last".
|
48 |
+
"""
|
49 |
+
|
50 |
+
def __init__(
|
51 |
+
self,
|
52 |
+
model_name: str,
|
53 |
+
layers: Optional[List[int] | Literal["mid"] | Literal["last"]] = None,
|
54 |
+
devices: List[int] = [0],
|
55 |
+
num_processes: int = 16,
|
56 |
+
max_seq_length: int = 1024,
|
57 |
+
l2_norm: bool = False,
|
58 |
+
batch_size: int = 128,
|
59 |
+
pool_type: str = "mean",
|
60 |
+
):
|
61 |
+
super().__init__()
|
62 |
+
|
63 |
+
self.id = self.__class__.__name__
|
64 |
+
self.hf_name = model_name
|
65 |
+
self.encoder = self._load_model(model_name)
|
66 |
+
if not hasattr(self.encoder, "config"):
|
67 |
+
raise ValueError(
|
68 |
+
'The model from `self._load_model()` must have a "config" attribute.'
|
69 |
+
)
|
70 |
+
self.config = self.encoder.config
|
71 |
+
self.tokenizer = self._get_tokenizer(model_name)
|
72 |
+
self.num_param = sum(p.numel() for p in self.encoder.parameters())
|
73 |
+
self.data_collator = DefaultDataCollator()
|
74 |
+
self.gpu_count = len(devices)
|
75 |
+
self.l2_norm = l2_norm
|
76 |
+
|
77 |
+
self.device = torch.device(
|
78 |
+
f"cuda:{devices[0]}" if torch.cuda.is_available() else "cpu"
|
79 |
+
)
|
80 |
+
self.num_processes = num_processes
|
81 |
+
self.max_seq_length = max_seq_length
|
82 |
+
self.batch_size = batch_size
|
83 |
+
self.pool_type = pool_type
|
84 |
+
|
85 |
+
if self.gpu_count > 1:
|
86 |
+
self.encoder = torch.nn.DataParallel(self.encoder, device_ids=devices)
|
87 |
+
self.encoder.to(self.device)
|
88 |
+
self.encoder.eval()
|
89 |
+
|
90 |
+
mid_layer = self.num_layers // 2
|
91 |
+
last_layer = self.num_layers - 1
|
92 |
+
mid_layer_label = f"mid ({mid_layer})"
|
93 |
+
last_layer_label = f"last ({self.num_layers - 1})"
|
94 |
+
|
95 |
+
if layers is None:
|
96 |
+
logger.debug(f"Using default layers: {mid_layer_label}, {last_layer_label}")
|
97 |
+
self.layers = [mid_layer, last_layer]
|
98 |
+
self.layer_labels = [mid_layer_label, last_layer_label]
|
99 |
+
elif layers == "mid":
|
100 |
+
self.layers = [mid_layer]
|
101 |
+
self.layer_labels = [mid_layer_label]
|
102 |
+
elif layers == "last":
|
103 |
+
self.layers = [last_layer]
|
104 |
+
self.layer_labels = [last_layer_label]
|
105 |
+
else:
|
106 |
+
self.layers = layers
|
107 |
+
self.layer_labels = [str(layer) for layer in layers]
|
108 |
+
|
109 |
+
def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
|
110 |
+
"""Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
|
111 |
+
outputs = self.encoder(**batch_dict, output_hidden_states=True)
|
112 |
+
embeds = [outputs.hidden_states[layer] for layer in self.layers]
|
113 |
+
embeds = [
|
114 |
+
pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
|
115 |
+
for layer_embeds in embeds
|
116 |
+
]
|
117 |
+
# Stack with shape [B, num_layers, D].
|
118 |
+
embeds = torch.stack(embeds, dim=1)
|
119 |
+
return embeds
|
120 |
+
|
121 |
+
def _load_model(self, model_name):
|
122 |
+
return AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
123 |
+
|
124 |
+
def _get_tokenizer(self, model_name):
|
125 |
+
return AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
126 |
+
|
127 |
+
def _tokenize_func(
|
128 |
+
self, tokenizer, examples: Dict[str, List], max_seq_length: int
|
129 |
+
) -> BatchEncoding:
|
130 |
+
batch_dict = tokenizer(
|
131 |
+
examples["input_seqs"],
|
132 |
+
max_length=max_seq_length,
|
133 |
+
padding=True,
|
134 |
+
truncation=True,
|
135 |
+
)
|
136 |
+
return batch_dict
|
137 |
+
|
138 |
+
@property
|
139 |
+
def metadata(self) -> Dict:
|
140 |
+
return {
|
141 |
+
"hf_name": self.hf_name,
|
142 |
+
"num_layers": self.num_layers,
|
143 |
+
"num_params": self.num_param,
|
144 |
+
"embed_dim": self.embed_dim,
|
145 |
+
}
|
146 |
+
|
147 |
+
@property
|
148 |
+
@abstractmethod
|
149 |
+
def num_layers(self) -> int:
|
150 |
+
pass
|
151 |
+
|
152 |
+
@property
|
153 |
+
@abstractmethod
|
154 |
+
def embed_dim(self) -> int:
|
155 |
+
pass
|
156 |
+
|
157 |
+
@property
|
158 |
+
@abstractmethod
|
159 |
+
def modality(self) -> Modality:
|
160 |
+
pass
|
161 |
+
|
162 |
+
@torch.no_grad()
|
163 |
+
def encode(self, sequences, **kwargs) -> np.ndarray:
|
164 |
+
"""Returns a list of embeddings for the given sequences.
|
165 |
+
Args:
|
166 |
+
sequences (`List[str]`): List of sequences to encode
|
167 |
+
Returns:
|
168 |
+
`np.ndarray`: Embeddings for the given sequences of shape [num_sequences, num_layers, embedding_dim].
|
169 |
+
"""
|
170 |
+
dataset = Dataset.from_dict({"input_seqs": sequences})
|
171 |
+
dataset.set_transform(
|
172 |
+
partial(
|
173 |
+
self._tokenize_func, self.tokenizer, max_seq_length=self.max_seq_length
|
174 |
+
)
|
175 |
+
)
|
176 |
+
data_loader = DataLoader(
|
177 |
+
dataset,
|
178 |
+
batch_size=self.batch_size * self.gpu_count,
|
179 |
+
shuffle=False,
|
180 |
+
drop_last=False,
|
181 |
+
num_workers=self.num_processes,
|
182 |
+
collate_fn=self.data_collator,
|
183 |
+
pin_memory=True,
|
184 |
+
)
|
185 |
+
|
186 |
+
if max(self.layers) >= self.num_layers:
|
187 |
+
raise ValueError(
|
188 |
+
f"Layer {max(self.layers)} is not available in the model. Choose a layer between 0 and {self.num_layers - 1}"
|
189 |
+
)
|
190 |
+
|
191 |
+
encoded_embeds = []
|
192 |
+
for batch_dict in tqdm.tqdm(
|
193 |
+
data_loader, desc="encoding", mininterval=10, disable=len(sequences) < 128
|
194 |
+
):
|
195 |
+
batch_dict = {k: v.to(self.device) for k, v in batch_dict.items()}
|
196 |
+
|
197 |
+
embeds = self._encode_single_batch(batch_dict)
|
198 |
+
|
199 |
+
if self.l2_norm:
|
200 |
+
embeds = F.normalize(embeds, p=2, dim=-1)
|
201 |
+
encoded_embeds.append(embeds.cpu().numpy())
|
202 |
+
|
203 |
+
return np.concatenate(encoded_embeds, axis=0)
|
204 |
+
|
205 |
+
|
206 |
+
class ESM(BioSeqTransformer):
|
207 |
+
"""ESM model from https://huggingface.co/docs/transformers/en/model_doc/esm"""
|
208 |
+
|
209 |
+
MODEL_NAMES = [
|
210 |
+
"facebook/esm2_t6_8M_UR50D",
|
211 |
+
"facebook/esm2_t12_35M_UR50D",
|
212 |
+
"facebook/esm2_t30_150M_UR50D",
|
213 |
+
"facebook/esm2_t33_650M_UR50D",
|
214 |
+
"facebook/esm2_t36_3B_UR50D",
|
215 |
+
"facebook/esm2_t48_15B_UR50D",
|
216 |
+
]
|
217 |
+
|
218 |
+
@property
|
219 |
+
def modality(self) -> Modality:
|
220 |
+
return Modality.PROTEIN
|
221 |
+
|
222 |
+
@property
|
223 |
+
def num_layers(self) -> int:
|
224 |
+
return self.config.num_hidden_layers
|
225 |
+
|
226 |
+
@property
|
227 |
+
def embed_dim(self) -> int:
|
228 |
+
return self.config.hidden_size
|
229 |
+
|
230 |
+
|
231 |
+
class ESM3(BioSeqTransformer):
|
232 |
+
"""ESM3 model from https://github.com/evolutionaryscale/esm"""
|
233 |
+
|
234 |
+
MODEL_NAMES = ["esm3_sm_open_v1"]
|
235 |
+
|
236 |
+
def __init__(self, *args, **kwargs):
|
237 |
+
super().__init__(*args, **kwargs)
|
238 |
+
# Register forward hooks to store embeddings per layer.
|
239 |
+
self.hooks = [
|
240 |
+
ForwardHook(self.encoder.transformer.blocks[layer]) for layer in self.layers
|
241 |
+
]
|
242 |
+
|
243 |
+
@property
|
244 |
+
def modality(self) -> Modality:
|
245 |
+
return Modality.PROTEIN
|
246 |
+
|
247 |
+
@property
|
248 |
+
def num_layers(self) -> int:
|
249 |
+
return self.config.num_hidden_layers
|
250 |
+
|
251 |
+
@property
|
252 |
+
def embed_dim(self) -> int:
|
253 |
+
return self.config.hidden_size
|
254 |
+
|
255 |
+
def _load_model(self, model_name):
|
256 |
+
try:
|
257 |
+
from esm.models.esm3 import ESM3 as ModelESM3
|
258 |
+
except ImportError:
|
259 |
+
raise ImportError(
|
260 |
+
"ESM3 is not installed. Please install it with `pip install esm`."
|
261 |
+
)
|
262 |
+
model = ModelESM3.from_pretrained("esm3_sm_open_v1")
|
263 |
+
model.config = SimpleNamespace(
|
264 |
+
num_hidden_layers=len(model.transformer.blocks),
|
265 |
+
hidden_size=model.transformer.blocks[0].ffn[-1].out_features,
|
266 |
+
)
|
267 |
+
return model
|
268 |
+
|
269 |
+
def _get_tokenizer(self, model_name):
|
270 |
+
try:
|
271 |
+
from esm.tokenization.sequence_tokenizer import EsmSequenceTokenizer
|
272 |
+
except ImportError:
|
273 |
+
raise ImportError(
|
274 |
+
"ESM3 is not installed. Please install it with `pip install esm`."
|
275 |
+
)
|
276 |
+
return EsmSequenceTokenizer()
|
277 |
+
|
278 |
+
def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
|
279 |
+
_ = self.encoder.forward(sequence_tokens=batch_dict["input_ids"])
|
280 |
+
embeds = [hook.output for hook in self.hooks]
|
281 |
+
embeds = [
|
282 |
+
pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
|
283 |
+
for layer_embeds in embeds
|
284 |
+
]
|
285 |
+
# Stack with shape [B, num_layers, D].
|
286 |
+
embeds = torch.stack(embeds, dim=1)
|
287 |
+
embeds = embeds.to(torch.float32)
|
288 |
+
return embeds
|
289 |
+
|
290 |
+
|
291 |
+
class ProtT5(BioSeqTransformer):
|
292 |
+
"""ProtT5 model from https://github.com/agemagician/ProtTrans"""
|
293 |
+
|
294 |
+
MODEL_NAMES = [
|
295 |
+
"Rostlab/prot_t5_xl_uniref50",
|
296 |
+
"Rostlab/prot_t5_xl_bfd",
|
297 |
+
"Rostlab/prot_t5_xxl_uniref50",
|
298 |
+
"Rostlab/prot_t5_xxl_bfd",
|
299 |
+
]
|
300 |
+
|
301 |
+
@property
|
302 |
+
def modality(self) -> Modality:
|
303 |
+
return Modality.PROTEIN
|
304 |
+
|
305 |
+
@property
|
306 |
+
def num_layers(self) -> int:
|
307 |
+
return self.config.num_layers
|
308 |
+
|
309 |
+
@property
|
310 |
+
def embed_dim(self) -> int:
|
311 |
+
return self.config.d_model
|
312 |
+
|
313 |
+
def _load_model(self, model_name):
|
314 |
+
return T5EncoderModel.from_pretrained(model_name)
|
315 |
+
|
316 |
+
def _get_tokenizer(self, model_name):
|
317 |
+
return T5Tokenizer.from_pretrained(model_name, do_lower_case=False)
|
318 |
+
|
319 |
+
def _tokenize_func(
|
320 |
+
self, tokenizer, examples: Dict[str, List], max_seq_length: int
|
321 |
+
) -> BatchEncoding:
|
322 |
+
example_sequences = examples["input_seqs"]
|
323 |
+
# Add space between amino acids to make sure they are tokenized correctly.
|
324 |
+
example_sequences = [" ".join(sequence) for sequence in example_sequences]
|
325 |
+
example_sequences = [
|
326 |
+
re.sub(r"[UZOB]", "X", sequence) for sequence in example_sequences
|
327 |
+
]
|
328 |
+
batch_dict = tokenizer(
|
329 |
+
example_sequences,
|
330 |
+
max_length=max_seq_length,
|
331 |
+
padding=True,
|
332 |
+
truncation=True,
|
333 |
+
add_special_tokens=True,
|
334 |
+
)
|
335 |
+
|
336 |
+
return batch_dict
|
337 |
+
|
338 |
+
|
339 |
+
class ProGen(BioSeqTransformer):
|
340 |
+
"""ProGen models from https://github.com/salesforce/progen."""
|
341 |
+
|
342 |
+
MODEL_NAMES = [
|
343 |
+
"hugohrban/progen2-small",
|
344 |
+
"hugohrban/progen2-medium",
|
345 |
+
"hugohrban/progen2-base",
|
346 |
+
"hugohrban/progen2-large",
|
347 |
+
"hugohrban/progen2-xlarge",
|
348 |
+
]
|
349 |
+
|
350 |
+
@property
|
351 |
+
def modality(self) -> Modality:
|
352 |
+
return Modality.PROTEIN
|
353 |
+
|
354 |
+
@property
|
355 |
+
def num_layers(self) -> int:
|
356 |
+
return self.config.n_layer
|
357 |
+
|
358 |
+
@property
|
359 |
+
def embed_dim(self) -> int:
|
360 |
+
return self.config.embed_dim
|
361 |
+
|
362 |
+
def _load_model(self, model_name):
|
363 |
+
return AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
364 |
+
|
365 |
+
def _get_tokenizer(self, model_name_or_path):
|
366 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
367 |
+
model_name_or_path, trust_remote_code=True
|
368 |
+
)
|
369 |
+
tokenizer.pad_token = "<|pad|>"
|
370 |
+
return tokenizer
|
371 |
+
|
372 |
+
def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
|
373 |
+
"""Returns the output embedding for the given batch with shape [batch, num_layers, D]."""
|
374 |
+
outputs: BaseModelOutput = self.encoder(
|
375 |
+
input_ids=batch_dict["input_ids"],
|
376 |
+
output_hidden_states=True,
|
377 |
+
use_cache=False,
|
378 |
+
)
|
379 |
+
embeds = [outputs.hidden_states[layer] for layer in self.layers]
|
380 |
+
embeds = [
|
381 |
+
pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
|
382 |
+
for layer_embeds in embeds
|
383 |
+
]
|
384 |
+
# Stack with shape [B, num_layers, D].
|
385 |
+
embeds = torch.stack(embeds, dim=1)
|
386 |
+
return embeds
|
387 |
+
|
388 |
+
|
389 |
+
class EvoModel(BioSeqTransformer):
|
390 |
+
"""https://github.com/evo-design/evo."""
|
391 |
+
|
392 |
+
MODEL_NAMES = [
|
393 |
+
"togethercomputer/evo-1-8k-base",
|
394 |
+
"togethercomputer/evo-1-131k-base",
|
395 |
+
]
|
396 |
+
|
397 |
+
@property
|
398 |
+
def modality(self) -> Modality:
|
399 |
+
return Modality.DNA
|
400 |
+
|
401 |
+
@property
|
402 |
+
def num_layers(self) -> int:
|
403 |
+
return self.config.num_layers
|
404 |
+
|
405 |
+
@property
|
406 |
+
def embed_dim(self) -> int:
|
407 |
+
return self.config.hidden_size
|
408 |
+
|
409 |
+
def __init__(self, *args, **kwargs):
|
410 |
+
super().__init__(*args, **kwargs)
|
411 |
+
# Register forward hooks to store embeddings per layer.
|
412 |
+
self.hooks = []
|
413 |
+
for layer in self.layers:
|
414 |
+
# For the last layer, get the output of `backbone.norm`, which directly precedes `backbone.unembed`.
|
415 |
+
# This is equivalent to the approach in https://github.com/evo-design/evo/issues/32.
|
416 |
+
if layer == self.num_layers - 1 or layer == -1:
|
417 |
+
self.hooks.append(ForwardHook(self.encoder.backbone.norm))
|
418 |
+
else:
|
419 |
+
self.hooks.append(ForwardHook(self.encoder.backbone.blocks[layer]))
|
420 |
+
|
421 |
+
def _load_model(self, model_name):
|
422 |
+
config = AutoConfig.from_pretrained(
|
423 |
+
model_name, trust_remote_code=True, revision="1.1_fix"
|
424 |
+
)
|
425 |
+
model = AutoModelForCausalLM.from_pretrained(
|
426 |
+
model_name, config=config, trust_remote_code=True, revision="1.1_fix"
|
427 |
+
)
|
428 |
+
return model
|
429 |
+
|
430 |
+
def _get_tokenizer(self, model_name):
|
431 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
432 |
+
model_name, revision="1.1_fix", trust_remote_code=True
|
433 |
+
)
|
434 |
+
# Evo tokenizer is missing pad_token by default.
|
435 |
+
tokenizer.add_special_tokens({"pad_token": "N"})
|
436 |
+
return tokenizer
|
437 |
+
|
438 |
+
def _encode_single_batch(self, batch_dict: Dict[str, Tensor]):
|
439 |
+
_ = self.encoder(batch_dict["input_ids"], use_cache=False)
|
440 |
+
embeds = [hook.output for hook in self.hooks]
|
441 |
+
# The hook output for Evo middle layers is a tuple (embedding, inference_params=None).
|
442 |
+
embeds = [x[0] if isinstance(x, tuple) else x for x in embeds]
|
443 |
+
embeds = [
|
444 |
+
pool(layer_embeds, batch_dict["attention_mask"], self.pool_type)
|
445 |
+
for layer_embeds in embeds
|
446 |
+
]
|
447 |
+
# Stack with shape [B, num_layers, D].
|
448 |
+
embeds = torch.stack(embeds, dim=1)
|
449 |
+
embeds = embeds.to(torch.float32)
|
450 |
+
return embeds
|
451 |
+
|
452 |
+
|
453 |
+
class NTModel(BioSeqTransformer):
|
454 |
+
"""Nucleotide Transformer https://github.com/instadeepai/nucleotide-transformer"""
|
455 |
+
|
456 |
+
MODEL_NAMES = [
|
457 |
+
"InstaDeepAI/nucleotide-transformer-v2-50m-multi-species",
|
458 |
+
"InstaDeepAI/nucleotide-transformer-v2-100m-multi-species",
|
459 |
+
"InstaDeepAI/nucleotide-transformer-v2-250m-multi-species",
|
460 |
+
"InstaDeepAI/nucleotide-transformer-v2-500m-multi-species",
|
461 |
+
"InstaDeepAI/nucleotide-transformer-2.5b-multi-species",
|
462 |
+
]
|
463 |
+
|
464 |
+
def __init__(self, *args, **kwargs):
|
465 |
+
super().__init__(*args, **kwargs)
|
466 |
+
self.max_seq_length = self.tokenizer.model_max_length
|
467 |
+
|
468 |
+
@property
|
469 |
+
def modality(self) -> Modality:
|
470 |
+
return Modality.DNA
|
471 |
+
|
472 |
+
@property
|
473 |
+
def num_layers(self) -> int:
|
474 |
+
return self.config.num_hidden_layers
|
475 |
+
|
476 |
+
@property
|
477 |
+
def embed_dim(self) -> int:
|
478 |
+
return self.config.hidden_size
|
479 |
+
|
480 |
+
def _load_model(self, model_name):
|
481 |
+
return AutoModelForMaskedLM.from_pretrained(model_name, trust_remote_code=True)
|
dgeb/tasks/__init__.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ruff: noqa: F403
|
2 |
+
|
3 |
+
from .tasks import Dataset, Task, TaskMetadata, TaskResult
|
4 |
+
from .eds_tasks import *
|
5 |
+
from .pair_classification_tasks import *
|
6 |
+
from .retrieval_tasks import *
|
7 |
+
from .classification_tasks import *
|
8 |
+
from .clustering_tasks import *
|
9 |
+
from .bigene_mining_tasks import *
|
10 |
+
|
11 |
+
__all__ = [
|
12 |
+
"Dataset",
|
13 |
+
"Task",
|
14 |
+
"TaskMetadata",
|
15 |
+
"TaskResult",
|
16 |
+
]
|
dgeb/tasks/bigene_mining_tasks.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Bigene mining tasks are analogous to bitext matching tasks, but for genes.
|
3 |
+
Cosine similarity is used to mine genes of related functions from different organisms.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
from dgeb.evaluators import BiGeneMiningEvaluator
|
10 |
+
from dgeb.modality import Modality
|
11 |
+
from dgeb.models import BioSeqTransformer
|
12 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
13 |
+
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
def run_bigene_mining_tasks(
|
18 |
+
model: BioSeqTransformer, metadata: TaskMetadata, top_k: int = 1
|
19 |
+
) -> TaskResult:
|
20 |
+
"""Evaluate bigene mining task. Utilizes the BiGeneMiningEvaluator."""
|
21 |
+
if len(metadata.datasets) != 1:
|
22 |
+
raise ValueError("BiGeneMining tasks require 1 dataset.")
|
23 |
+
ds = metadata.datasets[0].load()["train"]
|
24 |
+
layer_results = defaultdict(dict)
|
25 |
+
embeds1 = model.encode(ds["Seq1"])
|
26 |
+
embeds2 = model.encode(ds["Seq2"])
|
27 |
+
for i, layer in enumerate(model.layers):
|
28 |
+
evaluator = BiGeneMiningEvaluator(embeds1[:, i], embeds2[:, i], top_k=top_k)
|
29 |
+
layer_results["layers"][layer] = evaluator()
|
30 |
+
logger.info(
|
31 |
+
f"Layer: {layer}, {metadata.display_name} matching results: {layer_results['layers'][layer]}"
|
32 |
+
)
|
33 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
34 |
+
|
35 |
+
|
36 |
+
class BacArchBiGeneMining(Task):
|
37 |
+
metadata = TaskMetadata(
|
38 |
+
id="bacarch_bigene",
|
39 |
+
display_name="BacArch BiGene",
|
40 |
+
description="Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
|
41 |
+
type="bigene_mining",
|
42 |
+
modality=Modality.PROTEIN,
|
43 |
+
datasets=[
|
44 |
+
Dataset(
|
45 |
+
path="tattabio/bac_arch_bigene",
|
46 |
+
revision="d5a65e44bae43a9ba9f2fdc03056dff9c12f6631",
|
47 |
+
)
|
48 |
+
],
|
49 |
+
primary_metric_id="f1",
|
50 |
+
)
|
51 |
+
|
52 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
53 |
+
return run_bigene_mining_tasks(model, self.metadata)
|
54 |
+
|
55 |
+
|
56 |
+
class ModACParalogyBiGeneMining(Task):
|
57 |
+
# ModAC Paralogy matching with top_k=1 is too strict (most models have accuracy < 0.1%)
|
58 |
+
# Instead use recall@50 as the main metric.
|
59 |
+
TOP_K = 50
|
60 |
+
|
61 |
+
metadata = TaskMetadata(
|
62 |
+
id="modac_paralogy_bigene",
|
63 |
+
display_name="ModAC Paralogy BiGene",
|
64 |
+
description="Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
|
65 |
+
type="bigene_mining",
|
66 |
+
modality=Modality.PROTEIN,
|
67 |
+
datasets=[
|
68 |
+
Dataset(
|
69 |
+
path="tattabio/modac_paralogy_bigene",
|
70 |
+
revision="241ca6397856e3360da04422d54933035b1fab87",
|
71 |
+
)
|
72 |
+
],
|
73 |
+
primary_metric_id=f"recall_at_{TOP_K}",
|
74 |
+
)
|
75 |
+
|
76 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
77 |
+
return run_bigene_mining_tasks(model, self.metadata, top_k=self.TOP_K)
|
dgeb/tasks/classification_tasks.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Classification tasks take in biological sequence and functional labels.
|
3 |
+
Multi-class and/or multi-label classification tasks are supported.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
import datasets
|
10 |
+
import numpy as np
|
11 |
+
|
12 |
+
from dgeb.eval_utils import merge_split_elem_embeds
|
13 |
+
from dgeb.evaluators import (
|
14 |
+
MultiClassMultiOutputKNNClassificationEvaluator,
|
15 |
+
logRegClassificationEvaluator,
|
16 |
+
)
|
17 |
+
from dgeb.modality import Modality
|
18 |
+
from dgeb.models import BioSeqTransformer
|
19 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
20 |
+
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
+
|
23 |
+
|
24 |
+
def split_sequences(
|
25 |
+
ds: datasets.DatasetDict, max_seq_length: int
|
26 |
+
) -> datasets.DatasetDict:
|
27 |
+
"""Split sequences into chunks of max_seq_length using datasets.Dataset.map()."""
|
28 |
+
|
29 |
+
def _split_sequence(examples, max_seq_length):
|
30 |
+
assert (
|
31 |
+
len(examples["Sequence"]) == 1
|
32 |
+
), "split map function should use batch size of 1."
|
33 |
+
example = {k: v[0] for k, v in examples.items()}
|
34 |
+
seq = example["Sequence"]
|
35 |
+
# Split by chunks of max_seq_length.
|
36 |
+
seq_split = [
|
37 |
+
seq[i : i + max_seq_length] for i in range(0, len(seq), max_seq_length)
|
38 |
+
]
|
39 |
+
# Repeat other fields by the number of splits.
|
40 |
+
example = {
|
41 |
+
k: [v] * len(seq_split) for k, v in example.items() if k != "Sequence"
|
42 |
+
}
|
43 |
+
example["Sequence"] = seq_split
|
44 |
+
return example
|
45 |
+
|
46 |
+
ds = ds.map(
|
47 |
+
_split_sequence,
|
48 |
+
batched=True,
|
49 |
+
batch_size=1,
|
50 |
+
fn_kwargs={"max_seq_length": max_seq_length},
|
51 |
+
keep_in_memory=True,
|
52 |
+
load_from_cache_file=False,
|
53 |
+
)
|
54 |
+
return ds
|
55 |
+
|
56 |
+
|
57 |
+
def run_classification_task(
|
58 |
+
model: BioSeqTransformer, metadata: TaskMetadata
|
59 |
+
) -> TaskResult:
|
60 |
+
"""Evaluate on classification tasks using logistic regression classifier."""
|
61 |
+
ds = metadata.datasets[0].load()
|
62 |
+
layer_results = defaultdict(dict)
|
63 |
+
train_embeds = model.encode(ds["train"]["Sequence"])
|
64 |
+
test_embeds = model.encode(ds["test"]["Sequence"])
|
65 |
+
for i, layer in enumerate(model.layers):
|
66 |
+
layer_results["layers"][layer] = logRegClassificationEvaluator(
|
67 |
+
train_embeds[:, i],
|
68 |
+
ds["train"]["Label"],
|
69 |
+
test_embeds[:, i],
|
70 |
+
ds["test"]["Label"],
|
71 |
+
)()
|
72 |
+
logger.info(
|
73 |
+
f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
|
74 |
+
)
|
75 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
76 |
+
|
77 |
+
|
78 |
+
class EnzymeCommissionClassification(Task):
|
79 |
+
metadata = TaskMetadata(
|
80 |
+
id="ec_classification",
|
81 |
+
display_name="EC Classification",
|
82 |
+
description="Evaluate on Enzyme Commission number classification task.",
|
83 |
+
type="classification",
|
84 |
+
modality=Modality.PROTEIN,
|
85 |
+
datasets=[
|
86 |
+
Dataset(
|
87 |
+
path="tattabio/ec_classification",
|
88 |
+
revision="ead5570168e6969a5149f6861e8a33d6b5d22498",
|
89 |
+
)
|
90 |
+
],
|
91 |
+
primary_metric_id="f1",
|
92 |
+
)
|
93 |
+
|
94 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
95 |
+
return run_classification_task(model, self.metadata)
|
96 |
+
|
97 |
+
|
98 |
+
class EnzymeCommissionDNAClassification(Task):
|
99 |
+
metadata = TaskMetadata(
|
100 |
+
id="ec_dna_classification",
|
101 |
+
display_name="EC Classification",
|
102 |
+
description="Evaluate on Enzyme Commission number classification task using DNA sequences.",
|
103 |
+
type="classification",
|
104 |
+
modality=Modality.DNA,
|
105 |
+
datasets=[
|
106 |
+
Dataset(
|
107 |
+
path="tattabio/ec_classification_dna",
|
108 |
+
revision="cd61c74b4930cf9f1963e6d73ff7f14e2c8e74dd",
|
109 |
+
)
|
110 |
+
],
|
111 |
+
primary_metric_id="f1",
|
112 |
+
)
|
113 |
+
|
114 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
115 |
+
return run_classification_task(model, self.metadata)
|
116 |
+
|
117 |
+
|
118 |
+
class ConvergentEnzymesClassification(Task):
|
119 |
+
metadata = TaskMetadata(
|
120 |
+
id="convergent_enzymes_classification",
|
121 |
+
display_name="Convergent Enzymes Classification",
|
122 |
+
description="Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
|
123 |
+
type="classification",
|
124 |
+
modality=Modality.PROTEIN,
|
125 |
+
datasets=[
|
126 |
+
Dataset(
|
127 |
+
path="tattabio/convergent_enzymes",
|
128 |
+
revision="37f75609f54de2bc0911ccb72faf1c2f5a4285aa",
|
129 |
+
)
|
130 |
+
],
|
131 |
+
primary_metric_id="f1",
|
132 |
+
)
|
133 |
+
|
134 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
135 |
+
return run_classification_task(model, self.metadata)
|
136 |
+
|
137 |
+
|
138 |
+
def run_mibig_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
|
139 |
+
"""
|
140 |
+
Evaluate on MIBIG classification tasks. Multiclass, multi-label KNN classification is used for evaluation.
|
141 |
+
"""
|
142 |
+
ds = metadata.datasets[0].load()
|
143 |
+
if metadata.modality == Modality.DNA:
|
144 |
+
# MIBiG DNA sequences can be very long. Instead of truncating to max_seq_length,
|
145 |
+
# split into multiple sequences and mean pool the resulting embeddings.
|
146 |
+
ds = split_sequences(ds, model.max_seq_length)
|
147 |
+
|
148 |
+
layer_results = defaultdict(dict)
|
149 |
+
train_embeds = model.encode(ds["train"]["Sequence"])
|
150 |
+
test_embeds = model.encode(ds["test"]["Sequence"])
|
151 |
+
|
152 |
+
train_ids = ds["train"]["Entry"]
|
153 |
+
test_ids = ds["test"]["Entry"]
|
154 |
+
train_labels = ds["train"]["class"]
|
155 |
+
test_labels = ds["test"]["class"]
|
156 |
+
train_id_to_label = {id: label for id, label in zip(train_ids, train_labels)}
|
157 |
+
test_id_to_label = {id: label for id, label in zip(test_ids, test_labels)}
|
158 |
+
# Mean pool embeds with the same ID.
|
159 |
+
train_ids, train_embeds = merge_split_elem_embeds(train_ids, train_embeds)
|
160 |
+
test_ids, test_embeds = merge_split_elem_embeds(test_ids, test_embeds)
|
161 |
+
# Gather the labels after merging by unique ID.
|
162 |
+
train_labels = np.array([train_id_to_label[id] for id in train_ids])
|
163 |
+
test_labels = np.array([test_id_to_label[id] for id in test_ids])
|
164 |
+
|
165 |
+
for i, layer in enumerate(model.layers):
|
166 |
+
evaluator = MultiClassMultiOutputKNNClassificationEvaluator(
|
167 |
+
train_embeds[:, i], train_labels, test_embeds[:, i], test_labels
|
168 |
+
)
|
169 |
+
layer_results["layers"][layer] = evaluator()
|
170 |
+
logger.info(
|
171 |
+
f"Layer: {layer}, MIBiG classification results: {layer_results['layers'][layer]}"
|
172 |
+
)
|
173 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
174 |
+
|
175 |
+
|
176 |
+
class MIBiGProteinClassification(Task):
|
177 |
+
metadata = TaskMetadata(
|
178 |
+
id="MIBIG_protein_classification",
|
179 |
+
display_name="MIBiG Classification",
|
180 |
+
description="Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
|
181 |
+
type="classification",
|
182 |
+
modality=Modality.PROTEIN,
|
183 |
+
datasets=[
|
184 |
+
Dataset(
|
185 |
+
path="tattabio/mibig_classification_prot",
|
186 |
+
revision="915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f",
|
187 |
+
)
|
188 |
+
],
|
189 |
+
primary_metric_id="f1",
|
190 |
+
)
|
191 |
+
|
192 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
193 |
+
return run_mibig_task(model, self.metadata)
|
194 |
+
|
195 |
+
|
196 |
+
class MIBiGDNAClassification(Task):
|
197 |
+
metadata = TaskMetadata(
|
198 |
+
id="MIBIG_dna_classification",
|
199 |
+
display_name="MIBiG Classification",
|
200 |
+
description="Biosynthetic Gene cluster classification using DNA sequences on MIBIG dataset.",
|
201 |
+
type="classification",
|
202 |
+
modality=Modality.DNA,
|
203 |
+
datasets=[
|
204 |
+
Dataset(
|
205 |
+
path="tattabio/mibig_classification_dna",
|
206 |
+
revision="b5ca7a76d469e4e66c46f1b655903972571e6b61",
|
207 |
+
)
|
208 |
+
],
|
209 |
+
primary_metric_id="f1",
|
210 |
+
)
|
211 |
+
|
212 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
213 |
+
return run_mibig_task(model, self.metadata)
|
dgeb/tasks/clustering_tasks.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Biological sequences are clustered and performance is determined by how well clustering matches assigned labels.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import logging
|
6 |
+
from collections import defaultdict
|
7 |
+
|
8 |
+
from dgeb.evaluators import ClusteringEvaluator
|
9 |
+
from dgeb.modality import Modality
|
10 |
+
from dgeb.models import BioSeqTransformer
|
11 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
12 |
+
|
13 |
+
logger = logging.getLogger(__name__)
|
14 |
+
|
15 |
+
|
16 |
+
def run_clustering_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
|
17 |
+
"""Evaluate clustering task. Utilizes the ClusteringEvaluator."""
|
18 |
+
if len(metadata.datasets) != 1:
|
19 |
+
raise ValueError("Clustering tasks require 1 dataset.")
|
20 |
+
ds = metadata.datasets[0].load()["train"]
|
21 |
+
embeds = model.encode(ds["Sequence"])
|
22 |
+
layer_results = defaultdict(dict)
|
23 |
+
for i, layer in enumerate(model.layers):
|
24 |
+
labels = ds["Label"]
|
25 |
+
evaluator = ClusteringEvaluator(embeds[:, i], labels)
|
26 |
+
layer_results["layers"][layer] = evaluator()
|
27 |
+
logger.info(
|
28 |
+
f"Layer: {layer}, {metadata.display_name} results: {layer_results['layers'][layer]}"
|
29 |
+
)
|
30 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
31 |
+
|
32 |
+
|
33 |
+
class RNAclustering(Task):
|
34 |
+
metadata = TaskMetadata(
|
35 |
+
id="ecoli_rna_clustering",
|
36 |
+
display_name="E.coli RNA Clustering",
|
37 |
+
description="Evaluate on RNA clustering task for sRNA/tRNA/rRNA segments in E.coli K-12.",
|
38 |
+
type="clustering",
|
39 |
+
modality=Modality.DNA,
|
40 |
+
datasets=[
|
41 |
+
Dataset(
|
42 |
+
path="tattabio/e_coli_rnas",
|
43 |
+
revision="4c134bb4bdb2b0ef1d59fe10797efdfeaf318de6",
|
44 |
+
)
|
45 |
+
],
|
46 |
+
primary_metric_id="v_measure",
|
47 |
+
)
|
48 |
+
|
49 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
50 |
+
return run_clustering_task(model, self.metadata)
|
51 |
+
|
52 |
+
|
53 |
+
class MopBClustering(Task):
|
54 |
+
metadata = TaskMetadata(
|
55 |
+
id="mopb_clustering",
|
56 |
+
display_name="MopB Clustering",
|
57 |
+
description="Evaluate on MopB clustering task.",
|
58 |
+
type="clustering",
|
59 |
+
modality=Modality.PROTEIN,
|
60 |
+
datasets=[
|
61 |
+
Dataset(
|
62 |
+
path="tattabio/mopb_clustering",
|
63 |
+
revision="eed4bfff9c5bd2dc2500c50757bfcb90425d999a",
|
64 |
+
)
|
65 |
+
],
|
66 |
+
primary_metric_id="v_measure",
|
67 |
+
)
|
68 |
+
|
69 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
70 |
+
return run_clustering_task(model, self.metadata)
|
dgeb/tasks/eds_tasks.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Evolutionary Distance Similarity (EDS) tasks compare embedding distances to continuous evolutionary distances.
|
3 |
+
The label distances are typically derived from phylogenetic trees.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
import pandas as pd
|
11 |
+
|
12 |
+
from dgeb.evaluators import EDSEvaluator
|
13 |
+
from dgeb.modality import Modality
|
14 |
+
from dgeb.models import BioSeqTransformer
|
15 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
16 |
+
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
|
20 |
+
def run_eds_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
|
21 |
+
"""Evaluate phylogeny distance correlation task. Utilizes the Evolutionary Distance Similarity (EDS) evaluator."""
|
22 |
+
if len(metadata.datasets) != 2:
|
23 |
+
raise ValueError("Phylogeny tasks require 2 datasets: sequences and distances.")
|
24 |
+
|
25 |
+
ds = metadata.datasets[0].load()["train"]
|
26 |
+
distance_df = metadata.datasets[1].load()["train"].to_pandas()
|
27 |
+
assert isinstance(
|
28 |
+
distance_df, pd.DataFrame
|
29 |
+
), f"Expected DataFrame, got {type(distance_df)}"
|
30 |
+
|
31 |
+
id_index_dict = {k: i for i, k in enumerate(ds["Entry"])}
|
32 |
+
distance_df["embeds1"] = None
|
33 |
+
distance_df["embeds2"] = None
|
34 |
+
test_embeds = model.encode(ds["Sequence"])
|
35 |
+
layer_results = defaultdict(dict)
|
36 |
+
for i, layer in enumerate(model.layers):
|
37 |
+
for row_idx, row in distance_df.iterrows():
|
38 |
+
id1 = row["ID1"]
|
39 |
+
id2 = row["ID2"]
|
40 |
+
embedding1 = test_embeds[id_index_dict[id1], i]
|
41 |
+
embedding2 = test_embeds[id_index_dict[id2], i]
|
42 |
+
distance_df.at[row_idx, "embeds1"] = embedding1
|
43 |
+
distance_df.at[row_idx, "embeds2"] = embedding2
|
44 |
+
embeds1 = np.array(distance_df["embeds1"].tolist())
|
45 |
+
embeds2 = np.array(distance_df["embeds2"].tolist())
|
46 |
+
dists = np.array(distance_df["distance"].tolist())
|
47 |
+
evaluator = EDSEvaluator(embeds1, embeds2, dists)
|
48 |
+
layer_results["layers"][layer] = evaluator()
|
49 |
+
# log results
|
50 |
+
logger.info(
|
51 |
+
f"Layer: {layer}, {metadata.display_name} distance correlation results: {layer_results['layers'][layer]}"
|
52 |
+
)
|
53 |
+
|
54 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
55 |
+
|
56 |
+
|
57 |
+
class RpobBacPhylogeny(Task):
|
58 |
+
metadata = TaskMetadata(
|
59 |
+
id="rpob_bac_phylogeny",
|
60 |
+
display_name="RpoB Bacterial Phylogeny",
|
61 |
+
description="Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
|
62 |
+
type="eds",
|
63 |
+
modality=Modality.PROTEIN,
|
64 |
+
datasets=[
|
65 |
+
Dataset(
|
66 |
+
path="tattabio/rpob_bac_phylogeny_sequences",
|
67 |
+
revision="b833ef8d8d873ea5387540562873f41d073d3e03",
|
68 |
+
),
|
69 |
+
Dataset(
|
70 |
+
path="tattabio/rpob_bac_phylogeny_distances",
|
71 |
+
revision="0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7",
|
72 |
+
),
|
73 |
+
],
|
74 |
+
primary_metric_id="top_corr",
|
75 |
+
)
|
76 |
+
|
77 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
78 |
+
return run_eds_task(model, self.metadata)
|
79 |
+
|
80 |
+
|
81 |
+
class RpobArchPhylogeny(Task):
|
82 |
+
metadata = TaskMetadata(
|
83 |
+
id="rpob_arch_phylogeny",
|
84 |
+
display_name="RpoB Archaeal Phylogeny",
|
85 |
+
description="Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
|
86 |
+
type="eds",
|
87 |
+
modality=Modality.PROTEIN,
|
88 |
+
datasets=[
|
89 |
+
Dataset(
|
90 |
+
path="tattabio/rpob_arch_phylogeny_sequences",
|
91 |
+
revision="10de75b9f5ad12340d629fd1ad015ef4319d6ee4",
|
92 |
+
),
|
93 |
+
Dataset(
|
94 |
+
path="tattabio/rpob_arch_phylogeny_distances",
|
95 |
+
revision="2a585f0e135fe74b8ae6d31e7801c6031b0dcc18",
|
96 |
+
),
|
97 |
+
],
|
98 |
+
primary_metric_id="top_corr",
|
99 |
+
)
|
100 |
+
|
101 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
102 |
+
return run_eds_task(model, self.metadata)
|
103 |
+
|
104 |
+
|
105 |
+
class RpobBacDNAPhylogeny(Task):
|
106 |
+
metadata = TaskMetadata(
|
107 |
+
id="rpob_bac_dna_phylogeny",
|
108 |
+
display_name="RpoB Bacterial Phylogeny",
|
109 |
+
description="Evaluate on RpoB phylogeny distance correlation task for Bacterial DNA sequences.",
|
110 |
+
type="eds",
|
111 |
+
modality=Modality.DNA,
|
112 |
+
datasets=[
|
113 |
+
Dataset(
|
114 |
+
path="tattabio/rpob_bac_dna_phylogeny_sequences",
|
115 |
+
revision="8e137d3fb8886d8739ce08d1918745444c7d30d6",
|
116 |
+
),
|
117 |
+
Dataset(
|
118 |
+
path="tattabio/rpob_bac_dna_phylogeny_distances",
|
119 |
+
revision="67339e271b2a1602208153d53d70d35ba6fa8876",
|
120 |
+
),
|
121 |
+
],
|
122 |
+
primary_metric_id="top_corr",
|
123 |
+
)
|
124 |
+
|
125 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
126 |
+
return run_eds_task(model, self.metadata)
|
127 |
+
|
128 |
+
|
129 |
+
class RpobArchDNAPhylogeny(Task):
|
130 |
+
metadata = TaskMetadata(
|
131 |
+
id="rpob_arch_dna_phylogeny",
|
132 |
+
display_name="RpoB Archaeal Phylogeny",
|
133 |
+
description="Evaluate on RpoB phylogeny distance correlation task for Archaeal DNA sequences.",
|
134 |
+
type="eds",
|
135 |
+
modality=Modality.DNA,
|
136 |
+
datasets=[
|
137 |
+
Dataset(
|
138 |
+
path="tattabio/rpob_arch_dna_phylogeny_sequences",
|
139 |
+
revision="4453552a0e1021fee8697c71a559f4d3f6da2408",
|
140 |
+
),
|
141 |
+
Dataset(
|
142 |
+
path="tattabio/rpob_arch_dna_phylogeny_distances",
|
143 |
+
revision="51df97684a927ec2203568e80175ef26a62db039",
|
144 |
+
),
|
145 |
+
],
|
146 |
+
primary_metric_id="top_corr",
|
147 |
+
)
|
148 |
+
|
149 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
150 |
+
return run_eds_task(model, self.metadata)
|
151 |
+
|
152 |
+
|
153 |
+
class FeFePhylogeny(Task):
|
154 |
+
metadata = TaskMetadata(
|
155 |
+
id="fefe_phylogeny",
|
156 |
+
display_name="FeFeHydrogenase Phylogeny",
|
157 |
+
description="Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
|
158 |
+
type="eds",
|
159 |
+
modality=Modality.PROTEIN,
|
160 |
+
datasets=[
|
161 |
+
Dataset(
|
162 |
+
path="tattabio/fefe_phylogeny_sequences",
|
163 |
+
revision="bce06d79d9ce58413e7bcbed6943905d1afb8b26",
|
164 |
+
),
|
165 |
+
Dataset(
|
166 |
+
path="tattabio/fefe_phylogeny_distances",
|
167 |
+
revision="d6357cee9b4071a8dcdeef54083006f0d5e94fd2",
|
168 |
+
),
|
169 |
+
],
|
170 |
+
primary_metric_id="top_corr",
|
171 |
+
)
|
172 |
+
|
173 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
174 |
+
return run_eds_task(model, self.metadata)
|
175 |
+
|
176 |
+
|
177 |
+
class Bac16SPhylogeny(Task):
|
178 |
+
metadata = TaskMetadata(
|
179 |
+
id="bac_16S_phylogeny",
|
180 |
+
display_name="16S Bacterial Phylogeny",
|
181 |
+
description="Evaluate on 16S Bacterial phylogeny distance correlation task.",
|
182 |
+
type="eds",
|
183 |
+
modality=Modality.DNA,
|
184 |
+
datasets=[
|
185 |
+
Dataset(
|
186 |
+
path="tattabio/bac_16S_sequences",
|
187 |
+
revision="efde1456b86748909cbcfecb07d783756d570aa3",
|
188 |
+
),
|
189 |
+
Dataset(
|
190 |
+
path="tattabio/bac_16S_distances",
|
191 |
+
revision="5c8ba5dfa600bb930d34af2fbc2b17f0acab62d3",
|
192 |
+
),
|
193 |
+
],
|
194 |
+
primary_metric_id="top_corr",
|
195 |
+
)
|
196 |
+
|
197 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
198 |
+
return run_eds_task(model, self.metadata)
|
199 |
+
|
200 |
+
|
201 |
+
class Arch16SPhylogeny(Task):
|
202 |
+
metadata = TaskMetadata(
|
203 |
+
id="arch_16S_phylogeny",
|
204 |
+
display_name="16S Archaeal Phylogeny",
|
205 |
+
description="Evaluate on 16S Archaeal phylogeny distance correlation task.",
|
206 |
+
type="eds",
|
207 |
+
modality=Modality.DNA,
|
208 |
+
datasets=[
|
209 |
+
Dataset(
|
210 |
+
path="tattabio/arch_16S_sequences",
|
211 |
+
revision="e0f0b5d5bd4b08a329b08c2bf4cc800781dff7f0",
|
212 |
+
),
|
213 |
+
Dataset(
|
214 |
+
path="tattabio/arch_16S_distances",
|
215 |
+
revision="b0356b632a954be70cefd57e3a02e7e1ccd34408",
|
216 |
+
),
|
217 |
+
],
|
218 |
+
primary_metric_id="top_corr",
|
219 |
+
)
|
220 |
+
|
221 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
222 |
+
return run_eds_task(model, self.metadata)
|
223 |
+
|
224 |
+
|
225 |
+
class Euk18SPhylogeny(Task):
|
226 |
+
metadata = TaskMetadata(
|
227 |
+
id="euk_18S_phylogeny",
|
228 |
+
display_name="18S Eukaryotic Phylogeny",
|
229 |
+
description="Evaluate on 18S Eukaryotic phylogeny distance correlation task.",
|
230 |
+
type="eds",
|
231 |
+
modality=Modality.DNA,
|
232 |
+
datasets=[
|
233 |
+
Dataset(
|
234 |
+
path="tattabio/euk_18S_sequences",
|
235 |
+
revision="5174cb3b2c5c46b61307fd1c2c08f5c432655196",
|
236 |
+
),
|
237 |
+
Dataset(
|
238 |
+
path="tattabio/euk_18S_distances",
|
239 |
+
revision="c4cea4fbb1185d08e0e01fd28ffb8b06a25025da",
|
240 |
+
),
|
241 |
+
],
|
242 |
+
primary_metric_id="top_corr",
|
243 |
+
)
|
244 |
+
|
245 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
246 |
+
return run_eds_task(model, self.metadata)
|
dgeb/tasks/pair_classification_tasks.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Pair classification tasks evaluating distances between functionally relevant gene pairs.
|
3 |
+
For instance, distance thresholds distinguish between co-transcribed and non-co-transcribed gene pairs.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
from dgeb.evaluators import PairClassificationEvaluator
|
10 |
+
from dgeb.modality import Modality
|
11 |
+
from dgeb.models import BioSeqTransformer
|
12 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
13 |
+
|
14 |
+
from ..eval_utils import paired_dataset
|
15 |
+
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
|
19 |
+
def run_pair_classification_task(
|
20 |
+
model: BioSeqTransformer, metadata: TaskMetadata
|
21 |
+
) -> TaskResult:
|
22 |
+
"""Evaluate pair classification task. Utilizes the PairClassificationEvaluator."""
|
23 |
+
if len(metadata.datasets) != 1:
|
24 |
+
raise ValueError("Pair classification tasks require 1 dataset.")
|
25 |
+
ds = metadata.datasets[0].load()["train"]
|
26 |
+
embeds = model.encode(ds["Sequence"])
|
27 |
+
layer_results = defaultdict(dict)
|
28 |
+
for i, layer in enumerate(model.layers):
|
29 |
+
labels = ds["Label"]
|
30 |
+
embeds1, embeds2, labels = paired_dataset(labels, embeds[:, i])
|
31 |
+
evaluator = PairClassificationEvaluator(embeds1, embeds2, labels)
|
32 |
+
layer_results["layers"][layer] = evaluator()
|
33 |
+
logger.info(
|
34 |
+
f"Layer: {layer}, {metadata.display_name} classification results: {layer_results['layers'][layer]}"
|
35 |
+
)
|
36 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
37 |
+
|
38 |
+
|
39 |
+
class EcoliOperon(Task):
|
40 |
+
metadata = TaskMetadata(
|
41 |
+
id="ecoli_operonic_pair",
|
42 |
+
display_name="E.coli Operonic Pair",
|
43 |
+
description="Evaluate on E.coli K-12 operonic pair classification task.",
|
44 |
+
type="pair_classification",
|
45 |
+
modality=Modality.PROTEIN,
|
46 |
+
datasets=[
|
47 |
+
Dataset(
|
48 |
+
path="tattabio/ecoli_operonic_pair",
|
49 |
+
revision="a62c01143a842696fc8200b91c1acb825e8cb891",
|
50 |
+
)
|
51 |
+
],
|
52 |
+
primary_metric_id="top_ap",
|
53 |
+
)
|
54 |
+
|
55 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
56 |
+
return run_pair_classification_task(model, self.metadata)
|
57 |
+
|
58 |
+
|
59 |
+
class CyanoOperonPair(Task):
|
60 |
+
metadata = TaskMetadata(
|
61 |
+
id="cyano_operonic_pair",
|
62 |
+
display_name="Cyano Operonic Pair",
|
63 |
+
description="Evaluate on Cyano operonic pair classification task.",
|
64 |
+
type="pair_classification",
|
65 |
+
modality=Modality.PROTEIN,
|
66 |
+
datasets=[
|
67 |
+
Dataset(
|
68 |
+
path="tattabio/cyano_operonic_pair",
|
69 |
+
revision="eeb4cb71ec2a4ff688af9de7c0662123577d32ec",
|
70 |
+
)
|
71 |
+
],
|
72 |
+
primary_metric_id="top_ap",
|
73 |
+
)
|
74 |
+
|
75 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
76 |
+
return run_pair_classification_task(model, self.metadata)
|
77 |
+
|
78 |
+
|
79 |
+
class VibrioOperonPair(Task):
|
80 |
+
metadata = TaskMetadata(
|
81 |
+
id="vibrio_operonic_pair",
|
82 |
+
display_name="Vibrio Operonic Pair",
|
83 |
+
description="Evaluate on Vibrio operonic pair classification task.",
|
84 |
+
type="pair_classification",
|
85 |
+
modality=Modality.PROTEIN,
|
86 |
+
datasets=[
|
87 |
+
Dataset(
|
88 |
+
path="tattabio/vibrio_operonic_pair",
|
89 |
+
revision="24781b12b45bf81a079a6164ef0d2124948c1878",
|
90 |
+
)
|
91 |
+
],
|
92 |
+
primary_metric_id="top_ap",
|
93 |
+
)
|
94 |
+
|
95 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
96 |
+
return run_pair_classification_task(model, self.metadata)
|
dgeb/tasks/retrieval_tasks.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Retrieval tasks find functionally relevant genes in a corpus of genes based on a query gene.
|
3 |
+
Typically corpus is derived from a different phylogenetic group than the query genes.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import logging
|
7 |
+
from collections import defaultdict
|
8 |
+
|
9 |
+
from dgeb.evaluators import RetrievalEvaluator
|
10 |
+
from dgeb.modality import Modality
|
11 |
+
from dgeb.models import BioSeqTransformer
|
12 |
+
from dgeb.tasks import Dataset, Task, TaskMetadata, TaskResult
|
13 |
+
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
|
17 |
+
def run_retrieval_task(model: BioSeqTransformer, metadata: TaskMetadata) -> TaskResult:
|
18 |
+
"""Evaluate retrieval task. Utilizes the Retrieval evaluator."""
|
19 |
+
if len(metadata.datasets) != 2:
|
20 |
+
raise ValueError("Retrieval tasks require 3 datasets: corpus, query and qrels.")
|
21 |
+
corpus_ds = metadata.datasets[0].load()["train"]
|
22 |
+
query_ds = metadata.datasets[0].load()["test"]
|
23 |
+
qrels = metadata.datasets[1].load()
|
24 |
+
corpus_embeds = model.encode(corpus_ds["Sequence"])
|
25 |
+
query_embeds = model.encode(query_ds["Sequence"])
|
26 |
+
qrels_dict = defaultdict(dict)
|
27 |
+
|
28 |
+
def qrels_dict_init(row):
|
29 |
+
qrels_dict[str(row["query_id"])][str(row["corpus_id"])] = int(row["fuzz_ratio"])
|
30 |
+
|
31 |
+
# Populate `qrels_dict` from the dataset.
|
32 |
+
# See https://github.com/cvangysel/pytrec_eval for qrels format.
|
33 |
+
qrels.map(qrels_dict_init)
|
34 |
+
qrels = qrels_dict
|
35 |
+
layer_results = defaultdict(dict)
|
36 |
+
for i, layer in enumerate(model.layers):
|
37 |
+
evaluator = RetrievalEvaluator(
|
38 |
+
corpus_embeds[:, i],
|
39 |
+
query_embeds[:, i],
|
40 |
+
corpus_ds["Entry"],
|
41 |
+
query_ds["Entry"],
|
42 |
+
qrels,
|
43 |
+
)
|
44 |
+
layer_results["layers"][layer] = evaluator()
|
45 |
+
logger.info(
|
46 |
+
f"Layer: {layer}, Retrieval results: {layer_results['layers'][layer]}"
|
47 |
+
)
|
48 |
+
return TaskResult.from_dict(metadata, layer_results, model.metadata)
|
49 |
+
|
50 |
+
|
51 |
+
class ArchRetrieval(Task):
|
52 |
+
metadata = TaskMetadata(
|
53 |
+
id="arch_retrieval",
|
54 |
+
display_name="Arch Retrieval",
|
55 |
+
description="Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
|
56 |
+
type="retrieval",
|
57 |
+
modality=Modality.PROTEIN,
|
58 |
+
datasets=[
|
59 |
+
Dataset(
|
60 |
+
path="tattabio/arch_retrieval",
|
61 |
+
revision="a19124322604a21b26b1b3c13a1bd0b8a63c9f7b",
|
62 |
+
),
|
63 |
+
Dataset(
|
64 |
+
path="tattabio/arch_retrieval_qrels",
|
65 |
+
revision="3f142f2f9a0995d56c6e77188c7251761450afcf",
|
66 |
+
),
|
67 |
+
],
|
68 |
+
primary_metric_id="map_at_5",
|
69 |
+
)
|
70 |
+
|
71 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
72 |
+
return run_retrieval_task(model, self.metadata)
|
73 |
+
|
74 |
+
|
75 |
+
class EukRetrieval(Task):
|
76 |
+
metadata = TaskMetadata(
|
77 |
+
id="euk_retrieval",
|
78 |
+
display_name="Euk Retrieval",
|
79 |
+
description="Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
|
80 |
+
type="retrieval",
|
81 |
+
modality=Modality.PROTEIN,
|
82 |
+
datasets=[
|
83 |
+
Dataset(
|
84 |
+
path="tattabio/euk_retrieval",
|
85 |
+
revision="c93dc56665cedd19fbeaea9ace146f2474c895f0",
|
86 |
+
),
|
87 |
+
Dataset(
|
88 |
+
path="tattabio/euk_retrieval_qrels",
|
89 |
+
revision="a5aa01e9b9738074aba57fc07434e352c4c71e4b",
|
90 |
+
),
|
91 |
+
],
|
92 |
+
primary_metric_id="map_at_5",
|
93 |
+
)
|
94 |
+
|
95 |
+
def run(self, model: BioSeqTransformer) -> TaskResult:
|
96 |
+
return run_retrieval_task(model, self.metadata)
|
dgeb/tasks/tasks.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Task abstract class for evaluation and results."""
|
2 |
+
|
3 |
+
import logging
|
4 |
+
from typing import List, Literal, Optional, Any
|
5 |
+
from importlib.metadata import version
|
6 |
+
from enum import Enum
|
7 |
+
import datasets
|
8 |
+
from pydantic import BaseModel, model_validator
|
9 |
+
from abc import ABC, abstractmethod
|
10 |
+
|
11 |
+
|
12 |
+
# HACK: if Modality is not defined, then import it from modality.py
|
13 |
+
try:
|
14 |
+
from ..modality import Modality
|
15 |
+
except Exception:
|
16 |
+
# if not, super hack to get the leaderboard working.
|
17 |
+
# SHOULD MATCH the code exactly in modality.py
|
18 |
+
# can we read the file and run that code?
|
19 |
+
from enum import Enum
|
20 |
+
|
21 |
+
class Modality(Enum):
|
22 |
+
"""Data modality, either DNA or protein sequence."""
|
23 |
+
|
24 |
+
PROTEIN = "protein"
|
25 |
+
DNA = "dna"
|
26 |
+
|
27 |
+
|
28 |
+
logging.basicConfig(level=logging.INFO)
|
29 |
+
|
30 |
+
TaskType = Literal[
|
31 |
+
"classification",
|
32 |
+
"pair_classification",
|
33 |
+
"clustering",
|
34 |
+
"eds",
|
35 |
+
"bigene_mining",
|
36 |
+
"retrieval",
|
37 |
+
]
|
38 |
+
|
39 |
+
|
40 |
+
class TaskMetric(BaseModel):
|
41 |
+
id: str
|
42 |
+
display_name: str
|
43 |
+
description: Optional[str] = None
|
44 |
+
value: float = 0.0
|
45 |
+
|
46 |
+
|
47 |
+
class LayerResult(BaseModel):
|
48 |
+
layer_number: int
|
49 |
+
layer_display_name: str
|
50 |
+
metrics: List[TaskMetric]
|
51 |
+
|
52 |
+
|
53 |
+
class GEBModel(BaseModel):
|
54 |
+
hf_name: str
|
55 |
+
num_layers: int
|
56 |
+
num_params: int
|
57 |
+
embed_dim: int
|
58 |
+
|
59 |
+
|
60 |
+
class Dataset(BaseModel):
|
61 |
+
path: str
|
62 |
+
revision: str
|
63 |
+
|
64 |
+
def load(self) -> datasets.DatasetDict:
|
65 |
+
ds = datasets.load_dataset(self.path, revision=self.revision)
|
66 |
+
if not isinstance(ds, datasets.DatasetDict):
|
67 |
+
raise ValueError(
|
68 |
+
f"Dataset {self.path} is not a datasets.DatasetDict object."
|
69 |
+
)
|
70 |
+
return ds
|
71 |
+
|
72 |
+
|
73 |
+
class TaskMetadata(BaseModel):
|
74 |
+
id: str
|
75 |
+
display_name: str
|
76 |
+
description: str
|
77 |
+
modality: Modality
|
78 |
+
type: TaskType
|
79 |
+
# List of datasets used by the task.
|
80 |
+
# Each dataset is a dict of all arguments to pass to `datasets.load_dataset()`.
|
81 |
+
datasets: List[Dataset]
|
82 |
+
primary_metric_id: str
|
83 |
+
|
84 |
+
|
85 |
+
# tasks.py
|
86 |
+
class TaskResult(BaseModel):
|
87 |
+
dgeb_version: str
|
88 |
+
task: "TaskMetadata"
|
89 |
+
# TODO: Convert model to ModelMetadata
|
90 |
+
model: GEBModel
|
91 |
+
results: List[LayerResult]
|
92 |
+
|
93 |
+
@model_validator(mode="after")
|
94 |
+
def check_valid_primary_metric(self):
|
95 |
+
for result in self.results:
|
96 |
+
if all(
|
97 |
+
metric.id != self.task.primary_metric_id for metric in result.metrics
|
98 |
+
):
|
99 |
+
raise ValueError(
|
100 |
+
f"Primary metric {self.task.primary_metric_id} not found in results.metrics"
|
101 |
+
)
|
102 |
+
return self
|
103 |
+
|
104 |
+
@staticmethod
|
105 |
+
def from_dict(
|
106 |
+
task_metadata: "TaskMetadata",
|
107 |
+
layer_results: LayerResult,
|
108 |
+
model_metadata: GEBModel,
|
109 |
+
):
|
110 |
+
return TaskResult(
|
111 |
+
dgeb_version=version("dgeb"),
|
112 |
+
task=task_metadata,
|
113 |
+
model=model_metadata,
|
114 |
+
results=list(
|
115 |
+
LayerResult(
|
116 |
+
layer_number=int(layer),
|
117 |
+
layer_display_name=str(layer),
|
118 |
+
metrics=[
|
119 |
+
TaskMetric(id=metric, display_name=metric, value=value)
|
120 |
+
for metric, value in metrics.items()
|
121 |
+
],
|
122 |
+
)
|
123 |
+
for layer, metrics in layer_results["layers"].items()
|
124 |
+
),
|
125 |
+
)
|
126 |
+
|
127 |
+
|
128 |
+
# move to model.py?
|
129 |
+
class Task(ABC):
|
130 |
+
metadata: TaskMetadata
|
131 |
+
|
132 |
+
# using Any instead of "BioSeqTransformer" to avoid installing all deps in leaderboard
|
133 |
+
@abstractmethod
|
134 |
+
def run(self, model: Any, layers: Optional[List[int]] = None) -> TaskResult:
|
135 |
+
pass
|
docker-compose.yml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: "3"
|
2 |
+
services:
|
3 |
+
dgeb-leaderboard:
|
4 |
+
build:
|
5 |
+
context: ./
|
6 |
+
dockerfile: Dockerfile
|
7 |
+
ports:
|
8 |
+
- "7680:7860"
|
docs/images/tatta_logo.png
ADDED
![]() |
leaderboard/.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
/.projectile
|
2 |
+
**/__pycache__/
|
leaderboard/DGEB_Figure.png
ADDED
![]() |
leaderboard/README.md
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# to set up hf repo to recieve origin pushes
|
2 |
+
git remote set-url --add origin [email protected]:spaces/tattabio/DGEB
|
leaderboard/__init__.py
ADDED
File without changes
|
leaderboard/app.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import json
|
3 |
+
from pathlib import Path
|
4 |
+
import gradio as gr
|
5 |
+
from typing import List
|
6 |
+
import pandas as pd
|
7 |
+
import importlib.util
|
8 |
+
from pydantic import ValidationError, parse_obj_as
|
9 |
+
|
10 |
+
SIG_FIGS = 4
|
11 |
+
|
12 |
+
# HACK: very hacky way to import from parent directory, while avoiding needing all the deps of the parent package
|
13 |
+
modality_path = "../dgeb/modality.py"
|
14 |
+
spec = importlib.util.spec_from_file_location("modality", modality_path)
|
15 |
+
modality = importlib.util.module_from_spec(spec)
|
16 |
+
spec.loader.exec_module(modality)
|
17 |
+
Modality = modality.Modality
|
18 |
+
|
19 |
+
|
20 |
+
tasks_path = "../dgeb/tasks/tasks.py"
|
21 |
+
|
22 |
+
# Load the module
|
23 |
+
spec = importlib.util.spec_from_file_location("tasks", tasks_path)
|
24 |
+
tasks = importlib.util.module_from_spec(spec)
|
25 |
+
spec.loader.exec_module(tasks)
|
26 |
+
TaskResult = tasks.TaskResult
|
27 |
+
GEBModel = tasks.GEBModel
|
28 |
+
|
29 |
+
|
30 |
+
# Assuming the class definitions provided above are complete and imported here
|
31 |
+
|
32 |
+
|
33 |
+
def format_num_params(param: int) -> str:
|
34 |
+
# if the number of parameters is greater than 1 billion, display billion
|
35 |
+
million = 1_000_000
|
36 |
+
# billion = 1_000_000_000
|
37 |
+
# if param >= billion:
|
38 |
+
# num_billions = int(param / 1_000_000_000)
|
39 |
+
# return f"{num_billions:}B"
|
40 |
+
if param >= million:
|
41 |
+
num_millions = int(param / 1_000_000)
|
42 |
+
return f"{num_millions:}M"
|
43 |
+
else:
|
44 |
+
return f"{param:,}"
|
45 |
+
|
46 |
+
|
47 |
+
def load_json_files_from_directory(directory_path: Path) -> List[dict]:
|
48 |
+
"""
|
49 |
+
Recursively load all JSON files within the specified directory path.
|
50 |
+
|
51 |
+
:param directory_path: Path to the directory to search for JSON files.
|
52 |
+
:return: List of dictionaries loaded from JSON files.
|
53 |
+
"""
|
54 |
+
json_files_content = []
|
55 |
+
for json_file in directory_path.rglob("*.json"): # Recursively find all JSON files
|
56 |
+
try:
|
57 |
+
with open(json_file, "r", encoding="utf-8") as file:
|
58 |
+
json_content = json.load(file)
|
59 |
+
json_files_content.append(json_content)
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error loading {json_file}: {e}")
|
62 |
+
return json_files_content
|
63 |
+
|
64 |
+
|
65 |
+
def load_results() -> List[TaskResult]:
|
66 |
+
"""
|
67 |
+
Recursively load JSON files in ./submissions/** and return a list of TaskResult objects.
|
68 |
+
"""
|
69 |
+
submissions_path = Path("./submissions")
|
70 |
+
json_contents = load_json_files_from_directory(submissions_path)
|
71 |
+
|
72 |
+
task_results_objects = []
|
73 |
+
for content in json_contents:
|
74 |
+
try:
|
75 |
+
task_result = parse_obj_as(
|
76 |
+
TaskResult, content
|
77 |
+
) # Using Pydantic's parse_obj_as for creating TaskResult objects
|
78 |
+
task_results_objects.append(task_result)
|
79 |
+
except ValidationError as e:
|
80 |
+
print(f"Error parsing TaskResult object: {e}")
|
81 |
+
raise e
|
82 |
+
|
83 |
+
return task_results_objects
|
84 |
+
|
85 |
+
|
86 |
+
def task_results_to_dgeb_score(
|
87 |
+
model: GEBModel, model_results: List[TaskResult]
|
88 |
+
) -> dict:
|
89 |
+
best_scores_per_task = []
|
90 |
+
modalities_seen = set()
|
91 |
+
for task_result in model_results:
|
92 |
+
modalities_seen.add(task_result.task.modality)
|
93 |
+
assert (
|
94 |
+
task_result.model.hf_name == model.hf_name
|
95 |
+
), f"Model names do not match, {task_result.model.hf_name} != {model.hf_name}"
|
96 |
+
primary_metric_id = task_result.task.primary_metric_id
|
97 |
+
scores = []
|
98 |
+
# Get the primary score for each layer.
|
99 |
+
for result in task_result.results:
|
100 |
+
for metric in result.metrics:
|
101 |
+
if metric.id == primary_metric_id:
|
102 |
+
scores.append(metric.value)
|
103 |
+
best_score = max(scores)
|
104 |
+
best_scores_per_task.append(best_score)
|
105 |
+
|
106 |
+
assert (
|
107 |
+
len(modalities_seen) == 1
|
108 |
+
), f"Multiple modalities found for model {model.hf_name}"
|
109 |
+
# Calculate the average of the best scores for each task.
|
110 |
+
assert len(best_scores_per_task) > 0, f"No tasks found for model {model.hf_name}"
|
111 |
+
dgeb_score = sum(best_scores_per_task) / len(best_scores_per_task)
|
112 |
+
return {
|
113 |
+
"Task Name": "DGEB Score",
|
114 |
+
"Task Category": "DGEB",
|
115 |
+
"Model": model.hf_name,
|
116 |
+
"Modality": list(modalities_seen)[0],
|
117 |
+
"Num. Parameters (millions)": format_num_params(model.num_params),
|
118 |
+
"Emb. Dimension": model.embed_dim,
|
119 |
+
"Score": dgeb_score,
|
120 |
+
}
|
121 |
+
|
122 |
+
|
123 |
+
def task_results_to_df(model_results: List[TaskResult]) -> pd.DataFrame:
|
124 |
+
# Initialize an empty list to hold all rows of data
|
125 |
+
data_rows = []
|
126 |
+
all_models = {}
|
127 |
+
for res in model_results:
|
128 |
+
task = res.task
|
129 |
+
model = res.model
|
130 |
+
all_models[model.hf_name] = model
|
131 |
+
print(f"Processing {task.display_name} for {model.hf_name}")
|
132 |
+
for layer in res.results:
|
133 |
+
total_layers = model.num_layers - 1
|
134 |
+
mid_layer = math.ceil(total_layers / 2)
|
135 |
+
if mid_layer == layer.layer_number:
|
136 |
+
layer.layer_display_name = "mid"
|
137 |
+
elif total_layers == layer.layer_number:
|
138 |
+
layer.layer_display_name = "last"
|
139 |
+
|
140 |
+
if layer.layer_display_name not in ["mid", "last"]:
|
141 |
+
# calculate if the layer is mid or last
|
142 |
+
print(
|
143 |
+
f"Layer {layer.layer_number} is not mid or last out of {total_layers}. Skipping"
|
144 |
+
)
|
145 |
+
continue
|
146 |
+
else:
|
147 |
+
# For each Metric in the Layer
|
148 |
+
# pivoting the data so that each metric is a row
|
149 |
+
metric_ids = []
|
150 |
+
primary_metric_label = f"{task.primary_metric_id} (primary metric)"
|
151 |
+
for metric in layer.metrics:
|
152 |
+
if task.primary_metric_id == metric.id:
|
153 |
+
metric_ids.append(primary_metric_label)
|
154 |
+
else:
|
155 |
+
metric_ids.append(metric.id)
|
156 |
+
|
157 |
+
metric_values = [metric.value for metric in layer.metrics]
|
158 |
+
zipped = zip(metric_ids, metric_values)
|
159 |
+
# sort primary metric id first
|
160 |
+
sorted_zip = sorted(
|
161 |
+
zipped,
|
162 |
+
key=lambda x: x[0] != primary_metric_label,
|
163 |
+
)
|
164 |
+
data_rows.append(
|
165 |
+
{
|
166 |
+
"Task Name": task.display_name,
|
167 |
+
"Task Category": task.type,
|
168 |
+
"Model": model.hf_name,
|
169 |
+
"Num. Parameters (millions)": format_num_params(
|
170 |
+
model.num_params
|
171 |
+
),
|
172 |
+
"Emb. Dimension": model.embed_dim,
|
173 |
+
"Modality": task.modality,
|
174 |
+
"Layer": layer.layer_display_name,
|
175 |
+
**dict(sorted_zip),
|
176 |
+
}
|
177 |
+
)
|
178 |
+
for model_name, model in all_models.items():
|
179 |
+
results_for_model = [
|
180 |
+
res for res in model_results if res.model.hf_name == model_name
|
181 |
+
]
|
182 |
+
assert len(results_for_model) > 0, f"No results found for model {model_name}"
|
183 |
+
dgeb_score_record = task_results_to_dgeb_score(model, results_for_model)
|
184 |
+
print(f'model {model.hf_name} dgeb score: {dgeb_score_record["Score"]}')
|
185 |
+
data_rows.append(dgeb_score_record)
|
186 |
+
print("Finished processing all results")
|
187 |
+
df = pd.DataFrame(data_rows)
|
188 |
+
return df
|
189 |
+
|
190 |
+
|
191 |
+
df = task_results_to_df(load_results())
|
192 |
+
image_path = "./DGEB_Figure.png"
|
193 |
+
with gr.Blocks() as demo:
|
194 |
+
gr.Label("Diverse Genomic Embedding Benchmark", show_label=False, scale=2)
|
195 |
+
gr.HTML(
|
196 |
+
f"<img src='file/{image_path}' alt='DGEB Figure' style='border-radius: 0.8rem; width: 50%; margin-left: auto; margin-right: auto; margin-top:12px;'>"
|
197 |
+
)
|
198 |
+
gr.HTML(
|
199 |
+
"""
|
200 |
+
<div style='width: 50%; margin-left: auto; margin-right: auto; padding-bottom: 8px;text-align: center;'>
|
201 |
+
DGEB Leaderboard. To submit, refer to the <a href="https://github.com/TattaBio/DGEB/blob/leaderboard/README.md" target="_blank" style="text-decoration: underline">DGEB GitHub repository</a> Refer to the <a href="https://example.com" target="_blank" style="text-decoration: underline">DGEB paper</a> for details on metrics, tasks, and models.
|
202 |
+
</div>
|
203 |
+
"""
|
204 |
+
)
|
205 |
+
|
206 |
+
unique_categories = df["Task Category"].unique()
|
207 |
+
# sort "DGEB" to the start
|
208 |
+
unique_categories = sorted(unique_categories, key=lambda x: x != "DGEB")
|
209 |
+
for category in unique_categories:
|
210 |
+
with gr.Tab(label=category):
|
211 |
+
unique_tasks_in_category = df[df["Task Category"] == category][
|
212 |
+
"Task Name"
|
213 |
+
].unique()
|
214 |
+
# sort "Overall" to the start
|
215 |
+
unique_tasks_in_category = sorted(
|
216 |
+
unique_tasks_in_category, key=lambda x: x != "Overall"
|
217 |
+
)
|
218 |
+
for task in unique_tasks_in_category:
|
219 |
+
with gr.Tab(label=task):
|
220 |
+
columns_to_hide = ["Task Name", "Task Category"]
|
221 |
+
# get rows where Task Name == task and Task Category == category
|
222 |
+
filtered_df = (
|
223 |
+
df[
|
224 |
+
(df["Task Name"] == task)
|
225 |
+
& (df["Task Category"] == category)
|
226 |
+
].drop(columns=columns_to_hide)
|
227 |
+
).dropna(axis=1, how="all") # drop all NaN columns for Overall tab
|
228 |
+
# round all values to 4 decimal places
|
229 |
+
rounded_df = filtered_df.round(SIG_FIGS)
|
230 |
+
|
231 |
+
# calculate ranking column
|
232 |
+
# if in Overview tab, rank by average metric value
|
233 |
+
if task == "Overall":
|
234 |
+
# rank by average col
|
235 |
+
rounded_df["Rank"] = filtered_df["Average"].rank(
|
236 |
+
ascending=False
|
237 |
+
)
|
238 |
+
else:
|
239 |
+
avoid_cols = [
|
240 |
+
"Model",
|
241 |
+
"Emb. Dimension",
|
242 |
+
"Num. Parameters (millions)",
|
243 |
+
"Modality",
|
244 |
+
"Layer",
|
245 |
+
]
|
246 |
+
rounded_df["Rank"] = (
|
247 |
+
rounded_df.drop(columns=avoid_cols, errors="ignore")
|
248 |
+
.sum(axis=1)
|
249 |
+
.rank(ascending=False)
|
250 |
+
)
|
251 |
+
# make Rank first column
|
252 |
+
cols = list(rounded_df.columns)
|
253 |
+
cols.insert(0, cols.pop(cols.index("Rank")))
|
254 |
+
rounded_df = rounded_df[cols]
|
255 |
+
# sort by rank
|
256 |
+
rounded_df = rounded_df.sort_values("Rank")
|
257 |
+
data_frame = gr.DataFrame(rounded_df)
|
258 |
+
|
259 |
+
|
260 |
+
demo.launch(allowed_paths=["."])
|
leaderboard/requirements.txt
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1
|
2 |
+
aiohttp==3.9.5
|
3 |
+
aiosignal==1.3.1
|
4 |
+
altair==5.3.0
|
5 |
+
annotated-types==0.7.0
|
6 |
+
anyio==4.4.0
|
7 |
+
attrs==23.2.0
|
8 |
+
certifi==2024.6.2
|
9 |
+
charset-normalizer==3.3.2
|
10 |
+
click==8.1.7
|
11 |
+
contourpy==1.2.1
|
12 |
+
cycler==0.12.1
|
13 |
+
datasets==2.14.4
|
14 |
+
dill==0.3.7
|
15 |
+
dnspython==2.6.1
|
16 |
+
email-validator==2.1.2
|
17 |
+
fastapi==0.111.0
|
18 |
+
fastapi-cli==0.0.4
|
19 |
+
ffmpy==0.3.2
|
20 |
+
filelock==3.15.1
|
21 |
+
fonttools==4.53.0
|
22 |
+
frozenlist==1.4.1
|
23 |
+
fsspec==2024.6.0
|
24 |
+
gradio==4.37.2
|
25 |
+
gradio-client==1.0.2
|
26 |
+
h11==0.14.0
|
27 |
+
httpcore==1.0.5
|
28 |
+
httptools==0.6.1
|
29 |
+
httpx==0.27.0
|
30 |
+
huggingface-hub==0.23.4
|
31 |
+
idna==3.7
|
32 |
+
importlib-resources==6.4.0
|
33 |
+
jinja2==3.1.4
|
34 |
+
jsonschema==4.22.0
|
35 |
+
jsonschema-specifications==2023.12.1
|
36 |
+
kiwisolver==1.4.5
|
37 |
+
markdown-it-py==3.0.0
|
38 |
+
markupsafe==2.1.5
|
39 |
+
matplotlib==3.9.0
|
40 |
+
mdurl==0.1.2
|
41 |
+
multidict==6.0.5
|
42 |
+
multiprocess==0.70.15
|
43 |
+
numpy==2.0.0
|
44 |
+
orjson==3.10.5
|
45 |
+
packaging==24.1
|
46 |
+
pandas==2.2.2
|
47 |
+
pillow==10.3.0
|
48 |
+
pyarrow==16.1.0
|
49 |
+
pydantic==2.7.4
|
50 |
+
pydantic-core==2.18.4
|
51 |
+
pydub==0.25.1
|
52 |
+
pygments==2.18.0
|
53 |
+
pyparsing==3.1.2
|
54 |
+
python-dateutil==2.9.0.post0
|
55 |
+
python-dotenv==1.0.1
|
56 |
+
python-multipart==0.0.9
|
57 |
+
pytz==2024.1
|
58 |
+
pyyaml==6.0.1
|
59 |
+
referencing==0.35.1
|
60 |
+
requests==2.32.3
|
61 |
+
rich==13.7.1
|
62 |
+
rpds-py==0.18.1
|
63 |
+
ruff==0.4.9
|
64 |
+
semantic-version==2.10.0
|
65 |
+
shellingham==1.5.4
|
66 |
+
six==1.16.0
|
67 |
+
sniffio==1.3.1
|
68 |
+
starlette==0.37.2
|
69 |
+
tomlkit==0.12.0
|
70 |
+
toolz==0.12.1
|
71 |
+
tqdm==4.66.4
|
72 |
+
typer==0.12.3
|
73 |
+
typing-extensions==4.12.2
|
74 |
+
tzdata==2024.1
|
75 |
+
ujson==5.10.0
|
76 |
+
urllib3==2.2.2
|
77 |
+
uvicorn==0.30.1
|
78 |
+
uvloop==0.19.0
|
79 |
+
watchfiles==0.22.0
|
80 |
+
websockets==11.0.3
|
81 |
+
xxhash==3.4.1
|
82 |
+
yarl==1.9.4
|
leaderboard/submissions/.DS_Store
ADDED
Binary file (12.3 kB). View file
|
|
leaderboard/submissions/esm2_t12_35M_UR50D/MIBIG_protein_classification.json
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "MIBIG_protein_classification",
|
4 |
+
"display_name": "MIBiG Classification",
|
5 |
+
"description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/mibig_classification_prot",
|
11 |
+
"revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "f1",
|
31 |
+
"display_name": "f1",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.6537260383267297
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "accuracy",
|
37 |
+
"display_name": "accuracy",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.6689342403628118
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "precision",
|
43 |
+
"display_name": "precision",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.7853286513915045
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "recall",
|
49 |
+
"display_name": "recall",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.6020175670931918
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "lrap",
|
55 |
+
"display_name": "lrap",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.798563869992442
|
58 |
+
}
|
59 |
+
]
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"layer_number": 11,
|
63 |
+
"layer_display_name": "11",
|
64 |
+
"metrics": [
|
65 |
+
{
|
66 |
+
"id": "f1",
|
67 |
+
"display_name": "f1",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.645844633541225
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "accuracy",
|
73 |
+
"display_name": "accuracy",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.655328798185941
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "precision",
|
79 |
+
"display_name": "precision",
|
80 |
+
"description": null,
|
81 |
+
"value": 0.7407876819384401
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "recall",
|
85 |
+
"display_name": "recall",
|
86 |
+
"description": null,
|
87 |
+
"value": 0.5970376985838431
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"id": "lrap",
|
91 |
+
"display_name": "lrap",
|
92 |
+
"description": null,
|
93 |
+
"value": 0.7849584278155715
|
94 |
+
}
|
95 |
+
]
|
96 |
+
}
|
97 |
+
]
|
98 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/arch_retrieval.json
ADDED
@@ -0,0 +1,762 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "arch_retrieval",
|
4 |
+
"display_name": "Arch Retrieval",
|
5 |
+
"description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "retrieval",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/arch_retrieval",
|
11 |
+
"revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/arch_retrieval_qrels",
|
15 |
+
"revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "map_at_5"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 12,
|
24 |
+
"num_params": 33992881,
|
25 |
+
"embed_dim": 480
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 6,
|
31 |
+
"layer_display_name": "6",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "ndcg_at_5",
|
35 |
+
"display_name": "ndcg_at_5",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.84127
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "ndcg_at_10",
|
41 |
+
"display_name": "ndcg_at_10",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.82701
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "ndcg_at_50",
|
47 |
+
"display_name": "ndcg_at_50",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.79635
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "map_at_5",
|
53 |
+
"display_name": "map_at_5",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.27329
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"id": "map_at_10",
|
59 |
+
"display_name": "map_at_10",
|
60 |
+
"description": null,
|
61 |
+
"value": 0.37939
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": "map_at_50",
|
65 |
+
"display_name": "map_at_50",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.64453
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "recall_at_5",
|
71 |
+
"display_name": "recall_at_5",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.2839
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "recall_at_10",
|
77 |
+
"display_name": "recall_at_10",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.40033
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "recall_at_50",
|
83 |
+
"display_name": "recall_at_50",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.70443
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"id": "precision_at_5",
|
89 |
+
"display_name": "precision_at_5",
|
90 |
+
"description": null,
|
91 |
+
"value": 0.7621
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": "precision_at_10",
|
95 |
+
"display_name": "precision_at_10",
|
96 |
+
"description": null,
|
97 |
+
"value": 0.69407
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"id": "precision_at_50",
|
101 |
+
"display_name": "precision_at_50",
|
102 |
+
"description": null,
|
103 |
+
"value": 0.42452
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"id": "mrr_at_5",
|
107 |
+
"display_name": "mrr_at_5",
|
108 |
+
"description": null,
|
109 |
+
"value": 0.8853108550291645
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"id": "mrr_at_10",
|
113 |
+
"display_name": "mrr_at_10",
|
114 |
+
"description": null,
|
115 |
+
"value": 0.8879126611520968
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"id": "mrr_at_50",
|
119 |
+
"display_name": "mrr_at_50",
|
120 |
+
"description": null,
|
121 |
+
"value": 0.8892435700922602
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": "nauc_ndcg_at_5_max",
|
125 |
+
"display_name": "nauc_ndcg_at_5_max",
|
126 |
+
"description": null,
|
127 |
+
"value": 0.6178391415234327
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"id": "nauc_ndcg_at_5_std",
|
131 |
+
"display_name": "nauc_ndcg_at_5_std",
|
132 |
+
"description": null,
|
133 |
+
"value": 0.27510768020625387
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"id": "nauc_ndcg_at_5_diff1",
|
137 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
138 |
+
"description": null,
|
139 |
+
"value": -0.2751226626247053
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"id": "nauc_ndcg_at_10_max",
|
143 |
+
"display_name": "nauc_ndcg_at_10_max",
|
144 |
+
"description": null,
|
145 |
+
"value": 0.6158935362175889
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"id": "nauc_ndcg_at_10_std",
|
149 |
+
"display_name": "nauc_ndcg_at_10_std",
|
150 |
+
"description": null,
|
151 |
+
"value": 0.29490376307826244
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"id": "nauc_ndcg_at_10_diff1",
|
155 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
156 |
+
"description": null,
|
157 |
+
"value": -0.3173510395378902
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"id": "nauc_ndcg_at_50_max",
|
161 |
+
"display_name": "nauc_ndcg_at_50_max",
|
162 |
+
"description": null,
|
163 |
+
"value": 0.6282820888186709
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"id": "nauc_ndcg_at_50_std",
|
167 |
+
"display_name": "nauc_ndcg_at_50_std",
|
168 |
+
"description": null,
|
169 |
+
"value": 0.217967587602592
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"id": "nauc_ndcg_at_50_diff1",
|
173 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
174 |
+
"description": null,
|
175 |
+
"value": -0.3392167130961565
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"id": "nauc_map_at_5_max",
|
179 |
+
"display_name": "nauc_map_at_5_max",
|
180 |
+
"description": null,
|
181 |
+
"value": 0.02706102865662817
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"id": "nauc_map_at_5_std",
|
185 |
+
"display_name": "nauc_map_at_5_std",
|
186 |
+
"description": null,
|
187 |
+
"value": 0.33465305568189146
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"id": "nauc_map_at_5_diff1",
|
191 |
+
"display_name": "nauc_map_at_5_diff1",
|
192 |
+
"description": null,
|
193 |
+
"value": 0.29252115202920864
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"id": "nauc_map_at_10_max",
|
197 |
+
"display_name": "nauc_map_at_10_max",
|
198 |
+
"description": null,
|
199 |
+
"value": 0.1461797349288265
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"id": "nauc_map_at_10_std",
|
203 |
+
"display_name": "nauc_map_at_10_std",
|
204 |
+
"description": null,
|
205 |
+
"value": 0.3984979781227535
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"id": "nauc_map_at_10_diff1",
|
209 |
+
"display_name": "nauc_map_at_10_diff1",
|
210 |
+
"description": null,
|
211 |
+
"value": 0.15678893453735943
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": "nauc_map_at_50_max",
|
215 |
+
"display_name": "nauc_map_at_50_max",
|
216 |
+
"description": null,
|
217 |
+
"value": 0.5443958382387585
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"id": "nauc_map_at_50_std",
|
221 |
+
"display_name": "nauc_map_at_50_std",
|
222 |
+
"description": null,
|
223 |
+
"value": 0.3379769732428374
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"id": "nauc_map_at_50_diff1",
|
227 |
+
"display_name": "nauc_map_at_50_diff1",
|
228 |
+
"description": null,
|
229 |
+
"value": -0.23212587702223994
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"id": "nauc_recall_at_5_max",
|
233 |
+
"display_name": "nauc_recall_at_5_max",
|
234 |
+
"description": null,
|
235 |
+
"value": 0.008899383756080657
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"id": "nauc_recall_at_5_std",
|
239 |
+
"display_name": "nauc_recall_at_5_std",
|
240 |
+
"description": null,
|
241 |
+
"value": 0.3376357180005265
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"id": "nauc_recall_at_5_diff1",
|
245 |
+
"display_name": "nauc_recall_at_5_diff1",
|
246 |
+
"description": null,
|
247 |
+
"value": 0.2949278653804833
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"id": "nauc_recall_at_10_max",
|
251 |
+
"display_name": "nauc_recall_at_10_max",
|
252 |
+
"description": null,
|
253 |
+
"value": 0.11957594632298725
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"id": "nauc_recall_at_10_std",
|
257 |
+
"display_name": "nauc_recall_at_10_std",
|
258 |
+
"description": null,
|
259 |
+
"value": 0.4084900248156052
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"id": "nauc_recall_at_10_diff1",
|
263 |
+
"display_name": "nauc_recall_at_10_diff1",
|
264 |
+
"description": null,
|
265 |
+
"value": 0.16409679466126934
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"id": "nauc_recall_at_50_max",
|
269 |
+
"display_name": "nauc_recall_at_50_max",
|
270 |
+
"description": null,
|
271 |
+
"value": 0.5478175261971683
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"id": "nauc_recall_at_50_std",
|
275 |
+
"display_name": "nauc_recall_at_50_std",
|
276 |
+
"description": null,
|
277 |
+
"value": 0.3566768602643857
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"id": "nauc_recall_at_50_diff1",
|
281 |
+
"display_name": "nauc_recall_at_50_diff1",
|
282 |
+
"description": null,
|
283 |
+
"value": -0.24770750166012404
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"id": "nauc_precision_at_5_max",
|
287 |
+
"display_name": "nauc_precision_at_5_max",
|
288 |
+
"description": null,
|
289 |
+
"value": 0.5588205820812548
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"id": "nauc_precision_at_5_std",
|
293 |
+
"display_name": "nauc_precision_at_5_std",
|
294 |
+
"description": null,
|
295 |
+
"value": 0.053528426968584814
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"id": "nauc_precision_at_5_diff1",
|
299 |
+
"display_name": "nauc_precision_at_5_diff1",
|
300 |
+
"description": null,
|
301 |
+
"value": -0.5895997876864452
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": "nauc_precision_at_10_max",
|
305 |
+
"display_name": "nauc_precision_at_10_max",
|
306 |
+
"description": null,
|
307 |
+
"value": 0.5109397710788774
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"id": "nauc_precision_at_10_std",
|
311 |
+
"display_name": "nauc_precision_at_10_std",
|
312 |
+
"description": null,
|
313 |
+
"value": -0.0014360394688449447
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"id": "nauc_precision_at_10_diff1",
|
317 |
+
"display_name": "nauc_precision_at_10_diff1",
|
318 |
+
"description": null,
|
319 |
+
"value": -0.5972188824684267
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"id": "nauc_precision_at_50_max",
|
323 |
+
"display_name": "nauc_precision_at_50_max",
|
324 |
+
"description": null,
|
325 |
+
"value": 0.30493219390483955
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"id": "nauc_precision_at_50_std",
|
329 |
+
"display_name": "nauc_precision_at_50_std",
|
330 |
+
"description": null,
|
331 |
+
"value": -0.35096314542920914
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"id": "nauc_precision_at_50_diff1",
|
335 |
+
"display_name": "nauc_precision_at_50_diff1",
|
336 |
+
"description": null,
|
337 |
+
"value": -0.4163370977258702
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"id": "nauc_mrr_at_5_max",
|
341 |
+
"display_name": "nauc_mrr_at_5_max",
|
342 |
+
"description": null,
|
343 |
+
"value": 0.6041064087877195
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"id": "nauc_mrr_at_5_std",
|
347 |
+
"display_name": "nauc_mrr_at_5_std",
|
348 |
+
"description": null,
|
349 |
+
"value": 0.2995447501683336
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"id": "nauc_mrr_at_5_diff1",
|
353 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
354 |
+
"description": null,
|
355 |
+
"value": -0.1176892239839227
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"id": "nauc_mrr_at_10_max",
|
359 |
+
"display_name": "nauc_mrr_at_10_max",
|
360 |
+
"description": null,
|
361 |
+
"value": 0.6055526314461911
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"id": "nauc_mrr_at_10_std",
|
365 |
+
"display_name": "nauc_mrr_at_10_std",
|
366 |
+
"description": null,
|
367 |
+
"value": 0.3015594122136539
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"id": "nauc_mrr_at_10_diff1",
|
371 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
372 |
+
"description": null,
|
373 |
+
"value": -0.11951448723943421
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"id": "nauc_mrr_at_50_max",
|
377 |
+
"display_name": "nauc_mrr_at_50_max",
|
378 |
+
"description": null,
|
379 |
+
"value": 0.6050403183375579
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"id": "nauc_mrr_at_50_std",
|
383 |
+
"display_name": "nauc_mrr_at_50_std",
|
384 |
+
"description": null,
|
385 |
+
"value": 0.3012299482545067
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"id": "nauc_mrr_at_50_diff1",
|
389 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
390 |
+
"description": null,
|
391 |
+
"value": -0.12091114334431136
|
392 |
+
}
|
393 |
+
]
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"layer_number": 11,
|
397 |
+
"layer_display_name": "11",
|
398 |
+
"metrics": [
|
399 |
+
{
|
400 |
+
"id": "ndcg_at_5",
|
401 |
+
"display_name": "ndcg_at_5",
|
402 |
+
"description": null,
|
403 |
+
"value": 0.82819
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"id": "ndcg_at_10",
|
407 |
+
"display_name": "ndcg_at_10",
|
408 |
+
"description": null,
|
409 |
+
"value": 0.81615
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"id": "ndcg_at_50",
|
413 |
+
"display_name": "ndcg_at_50",
|
414 |
+
"description": null,
|
415 |
+
"value": 0.78982
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"id": "map_at_5",
|
419 |
+
"display_name": "map_at_5",
|
420 |
+
"description": null,
|
421 |
+
"value": 0.27067
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"id": "map_at_10",
|
425 |
+
"display_name": "map_at_10",
|
426 |
+
"description": null,
|
427 |
+
"value": 0.37321
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"id": "map_at_50",
|
431 |
+
"display_name": "map_at_50",
|
432 |
+
"description": null,
|
433 |
+
"value": 0.63596
|
434 |
+
},
|
435 |
+
{
|
436 |
+
"id": "recall_at_5",
|
437 |
+
"display_name": "recall_at_5",
|
438 |
+
"description": null,
|
439 |
+
"value": 0.27906
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"id": "recall_at_10",
|
443 |
+
"display_name": "recall_at_10",
|
444 |
+
"description": null,
|
445 |
+
"value": 0.39106
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"id": "recall_at_50",
|
449 |
+
"display_name": "recall_at_50",
|
450 |
+
"description": null,
|
451 |
+
"value": 0.69746
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"id": "precision_at_5",
|
455 |
+
"display_name": "precision_at_5",
|
456 |
+
"description": null,
|
457 |
+
"value": 0.7487
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"id": "precision_at_10",
|
461 |
+
"display_name": "precision_at_10",
|
462 |
+
"description": null,
|
463 |
+
"value": 0.68506
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"id": "precision_at_50",
|
467 |
+
"display_name": "precision_at_50",
|
468 |
+
"description": null,
|
469 |
+
"value": 0.42266
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"id": "mrr_at_5",
|
473 |
+
"display_name": "mrr_at_5",
|
474 |
+
"description": null,
|
475 |
+
"value": 0.8752382984777344
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"id": "mrr_at_10",
|
479 |
+
"display_name": "mrr_at_10",
|
480 |
+
"description": null,
|
481 |
+
"value": 0.878253189168681
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"id": "mrr_at_50",
|
485 |
+
"display_name": "mrr_at_50",
|
486 |
+
"description": null,
|
487 |
+
"value": 0.8795454419523189
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"id": "nauc_ndcg_at_5_max",
|
491 |
+
"display_name": "nauc_ndcg_at_5_max",
|
492 |
+
"description": null,
|
493 |
+
"value": 0.6238124910465183
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"id": "nauc_ndcg_at_5_std",
|
497 |
+
"display_name": "nauc_ndcg_at_5_std",
|
498 |
+
"description": null,
|
499 |
+
"value": 0.3878031710482511
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"id": "nauc_ndcg_at_5_diff1",
|
503 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
504 |
+
"description": null,
|
505 |
+
"value": -0.22961445620397436
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"id": "nauc_ndcg_at_10_max",
|
509 |
+
"display_name": "nauc_ndcg_at_10_max",
|
510 |
+
"description": null,
|
511 |
+
"value": 0.6136556294192528
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"id": "nauc_ndcg_at_10_std",
|
515 |
+
"display_name": "nauc_ndcg_at_10_std",
|
516 |
+
"description": null,
|
517 |
+
"value": 0.4027695454909326
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"id": "nauc_ndcg_at_10_diff1",
|
521 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
522 |
+
"description": null,
|
523 |
+
"value": -0.23933162739820324
|
524 |
+
},
|
525 |
+
{
|
526 |
+
"id": "nauc_ndcg_at_50_max",
|
527 |
+
"display_name": "nauc_ndcg_at_50_max",
|
528 |
+
"description": null,
|
529 |
+
"value": 0.6039490411056802
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"id": "nauc_ndcg_at_50_std",
|
533 |
+
"display_name": "nauc_ndcg_at_50_std",
|
534 |
+
"description": null,
|
535 |
+
"value": 0.379240829313294
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"id": "nauc_ndcg_at_50_diff1",
|
539 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
540 |
+
"description": null,
|
541 |
+
"value": -0.23134380586116654
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"id": "nauc_map_at_5_max",
|
545 |
+
"display_name": "nauc_map_at_5_max",
|
546 |
+
"description": null,
|
547 |
+
"value": -0.018274861348075953
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"id": "nauc_map_at_5_std",
|
551 |
+
"display_name": "nauc_map_at_5_std",
|
552 |
+
"description": null,
|
553 |
+
"value": 0.3153330580523699
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"id": "nauc_map_at_5_diff1",
|
557 |
+
"display_name": "nauc_map_at_5_diff1",
|
558 |
+
"description": null,
|
559 |
+
"value": 0.31839102956934573
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"id": "nauc_map_at_10_max",
|
563 |
+
"display_name": "nauc_map_at_10_max",
|
564 |
+
"description": null,
|
565 |
+
"value": 0.10106646301687382
|
566 |
+
},
|
567 |
+
{
|
568 |
+
"id": "nauc_map_at_10_std",
|
569 |
+
"display_name": "nauc_map_at_10_std",
|
570 |
+
"description": null,
|
571 |
+
"value": 0.4143687386138405
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"id": "nauc_map_at_10_diff1",
|
575 |
+
"display_name": "nauc_map_at_10_diff1",
|
576 |
+
"description": null,
|
577 |
+
"value": 0.18923312509326384
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"id": "nauc_map_at_50_max",
|
581 |
+
"display_name": "nauc_map_at_50_max",
|
582 |
+
"description": null,
|
583 |
+
"value": 0.5144031685310609
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"id": "nauc_map_at_50_std",
|
587 |
+
"display_name": "nauc_map_at_50_std",
|
588 |
+
"description": null,
|
589 |
+
"value": 0.45693618989546114
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"id": "nauc_map_at_50_diff1",
|
593 |
+
"display_name": "nauc_map_at_50_diff1",
|
594 |
+
"description": null,
|
595 |
+
"value": -0.1513413062960939
|
596 |
+
},
|
597 |
+
{
|
598 |
+
"id": "nauc_recall_at_5_max",
|
599 |
+
"display_name": "nauc_recall_at_5_max",
|
600 |
+
"description": null,
|
601 |
+
"value": -0.031265621786664255
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"id": "nauc_recall_at_5_std",
|
605 |
+
"display_name": "nauc_recall_at_5_std",
|
606 |
+
"description": null,
|
607 |
+
"value": 0.32028522957198785
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"id": "nauc_recall_at_5_diff1",
|
611 |
+
"display_name": "nauc_recall_at_5_diff1",
|
612 |
+
"description": null,
|
613 |
+
"value": 0.32056979656535384
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"id": "nauc_recall_at_10_max",
|
617 |
+
"display_name": "nauc_recall_at_10_max",
|
618 |
+
"description": null,
|
619 |
+
"value": 0.07820354892522365
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"id": "nauc_recall_at_10_std",
|
623 |
+
"display_name": "nauc_recall_at_10_std",
|
624 |
+
"description": null,
|
625 |
+
"value": 0.42551786412535775
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"id": "nauc_recall_at_10_diff1",
|
629 |
+
"display_name": "nauc_recall_at_10_diff1",
|
630 |
+
"description": null,
|
631 |
+
"value": 0.2040509113490322
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"id": "nauc_recall_at_50_max",
|
635 |
+
"display_name": "nauc_recall_at_50_max",
|
636 |
+
"description": null,
|
637 |
+
"value": 0.5060801621108716
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"id": "nauc_recall_at_50_std",
|
641 |
+
"display_name": "nauc_recall_at_50_std",
|
642 |
+
"description": null,
|
643 |
+
"value": 0.5071691349011768
|
644 |
+
},
|
645 |
+
{
|
646 |
+
"id": "nauc_recall_at_50_diff1",
|
647 |
+
"display_name": "nauc_recall_at_50_diff1",
|
648 |
+
"description": null,
|
649 |
+
"value": -0.11952783139053508
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"id": "nauc_precision_at_5_max",
|
653 |
+
"display_name": "nauc_precision_at_5_max",
|
654 |
+
"description": null,
|
655 |
+
"value": 0.5923656191314365
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"id": "nauc_precision_at_5_std",
|
659 |
+
"display_name": "nauc_precision_at_5_std",
|
660 |
+
"description": null,
|
661 |
+
"value": 0.1954332256400316
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"id": "nauc_precision_at_5_diff1",
|
665 |
+
"display_name": "nauc_precision_at_5_diff1",
|
666 |
+
"description": null,
|
667 |
+
"value": -0.5508269378169939
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"id": "nauc_precision_at_10_max",
|
671 |
+
"display_name": "nauc_precision_at_10_max",
|
672 |
+
"description": null,
|
673 |
+
"value": 0.5458701611463479
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"id": "nauc_precision_at_10_std",
|
677 |
+
"display_name": "nauc_precision_at_10_std",
|
678 |
+
"description": null,
|
679 |
+
"value": 0.12975949111453675
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"id": "nauc_precision_at_10_diff1",
|
683 |
+
"display_name": "nauc_precision_at_10_diff1",
|
684 |
+
"description": null,
|
685 |
+
"value": -0.5537528325655148
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"id": "nauc_precision_at_50_max",
|
689 |
+
"display_name": "nauc_precision_at_50_max",
|
690 |
+
"description": null,
|
691 |
+
"value": 0.3549845967268747
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"id": "nauc_precision_at_50_std",
|
695 |
+
"display_name": "nauc_precision_at_50_std",
|
696 |
+
"description": null,
|
697 |
+
"value": -0.26254902560124815
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"id": "nauc_precision_at_50_diff1",
|
701 |
+
"display_name": "nauc_precision_at_50_diff1",
|
702 |
+
"description": null,
|
703 |
+
"value": -0.3919186481758992
|
704 |
+
},
|
705 |
+
{
|
706 |
+
"id": "nauc_mrr_at_5_max",
|
707 |
+
"display_name": "nauc_mrr_at_5_max",
|
708 |
+
"description": null,
|
709 |
+
"value": 0.6284613562335846
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"id": "nauc_mrr_at_5_std",
|
713 |
+
"display_name": "nauc_mrr_at_5_std",
|
714 |
+
"description": null,
|
715 |
+
"value": 0.3609822238622607
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"id": "nauc_mrr_at_5_diff1",
|
719 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
720 |
+
"description": null,
|
721 |
+
"value": -0.13691647729285375
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"id": "nauc_mrr_at_10_max",
|
725 |
+
"display_name": "nauc_mrr_at_10_max",
|
726 |
+
"description": null,
|
727 |
+
"value": 0.6282780633119702
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"id": "nauc_mrr_at_10_std",
|
731 |
+
"display_name": "nauc_mrr_at_10_std",
|
732 |
+
"description": null,
|
733 |
+
"value": 0.36649482857679033
|
734 |
+
},
|
735 |
+
{
|
736 |
+
"id": "nauc_mrr_at_10_diff1",
|
737 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
738 |
+
"description": null,
|
739 |
+
"value": -0.1301211341279461
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"id": "nauc_mrr_at_50_max",
|
743 |
+
"display_name": "nauc_mrr_at_50_max",
|
744 |
+
"description": null,
|
745 |
+
"value": 0.6290574535816186
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"id": "nauc_mrr_at_50_std",
|
749 |
+
"display_name": "nauc_mrr_at_50_std",
|
750 |
+
"description": null,
|
751 |
+
"value": 0.367920824556504
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"id": "nauc_mrr_at_50_diff1",
|
755 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
756 |
+
"description": null,
|
757 |
+
"value": -0.13036774230606793
|
758 |
+
}
|
759 |
+
]
|
760 |
+
}
|
761 |
+
]
|
762 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/bacarch_bigene.json
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "bacarch_bigene",
|
4 |
+
"display_name": "BacArch BiGene",
|
5 |
+
"description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "bigene_mining",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/bac_arch_bigene",
|
11 |
+
"revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "precision",
|
31 |
+
"display_name": "precision",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.6215094339622641
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "recall",
|
37 |
+
"display_name": "recall",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.7056603773584905
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "f1",
|
43 |
+
"display_name": "f1",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.6469182389937107
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "accuracy",
|
49 |
+
"display_name": "accuracy",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.7056603773584905
|
52 |
+
}
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"layer_number": 11,
|
57 |
+
"layer_display_name": "11",
|
58 |
+
"metrics": [
|
59 |
+
{
|
60 |
+
"id": "precision",
|
61 |
+
"display_name": "precision",
|
62 |
+
"description": null,
|
63 |
+
"value": 0.6138364779874214
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "recall",
|
67 |
+
"display_name": "recall",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.7018867924528301
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "f1",
|
73 |
+
"display_name": "f1",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.6413836477987421
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "accuracy",
|
79 |
+
"display_name": "accuracy",
|
80 |
+
"description": null,
|
81 |
+
"value": 0.7018867924528301
|
82 |
+
}
|
83 |
+
]
|
84 |
+
}
|
85 |
+
]
|
86 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/convergent_enzymes_classification.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "convergent_enzymes_classification",
|
4 |
+
"display_name": "Convergent Enzymes Classification",
|
5 |
+
"description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/convergent_enzymes",
|
11 |
+
"revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "accuracy",
|
31 |
+
"display_name": "accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.2475
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "f1",
|
37 |
+
"display_name": "f1",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.20116666666666666
|
40 |
+
}
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"layer_number": 11,
|
45 |
+
"layer_display_name": "11",
|
46 |
+
"metrics": [
|
47 |
+
{
|
48 |
+
"id": "accuracy",
|
49 |
+
"display_name": "accuracy",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.2425
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "f1",
|
55 |
+
"display_name": "f1",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.19904761904761906
|
58 |
+
}
|
59 |
+
]
|
60 |
+
}
|
61 |
+
]
|
62 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/cyano_operonic_pair.json
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "cyano_operonic_pair",
|
4 |
+
"display_name": "Cyano Operonic Pair",
|
5 |
+
"description": "Evaluate on Cyano operonic pair classification task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "pair_classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/cyano_operonic_pair",
|
11 |
+
"revision": "eeb4cb71ec2a4ff688af9de7c0662123577d32ec"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "top_ap"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "cos_sim_accuracy",
|
31 |
+
"display_name": "cos_sim_accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.7203065134099617
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "cos_sim_accuracy_threshold",
|
37 |
+
"display_name": "cos_sim_accuracy_threshold",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.990619957447052
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "cos_sim_f1",
|
43 |
+
"display_name": "cos_sim_f1",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.44058665070338227
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "cos_sim_f1_threshold",
|
49 |
+
"display_name": "cos_sim_f1_threshold",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.815308690071106
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "cos_sim_precision",
|
55 |
+
"display_name": "cos_sim_precision",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.28253358925143957
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"id": "cos_sim_recall",
|
61 |
+
"display_name": "cos_sim_recall",
|
62 |
+
"description": null,
|
63 |
+
"value": 1.0
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "cos_sim_ap",
|
67 |
+
"display_name": "cos_sim_ap",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.32424099100055437
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "manhattan_accuracy",
|
73 |
+
"display_name": "manhattan_accuracy",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.7187739463601532
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "manhattan_accuracy_threshold",
|
79 |
+
"display_name": "manhattan_accuracy_threshold",
|
80 |
+
"description": null,
|
81 |
+
"value": 40.061012268066406
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "manhattan_f1",
|
85 |
+
"display_name": "manhattan_f1",
|
86 |
+
"description": null,
|
87 |
+
"value": 0.43963963963963965
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"id": "manhattan_f1_threshold",
|
91 |
+
"display_name": "manhattan_f1_threshold",
|
92 |
+
"description": null,
|
93 |
+
"value": 380.5898742675781
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"id": "manhattan_precision",
|
97 |
+
"display_name": "manhattan_precision",
|
98 |
+
"description": null,
|
99 |
+
"value": 0.28218966846569005
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"id": "manhattan_recall",
|
103 |
+
"display_name": "manhattan_recall",
|
104 |
+
"description": null,
|
105 |
+
"value": 0.9945652173913043
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"id": "manhattan_ap",
|
109 |
+
"display_name": "manhattan_ap",
|
110 |
+
"description": null,
|
111 |
+
"value": 0.3051200502841412
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": "euclidean_accuracy",
|
115 |
+
"display_name": "euclidean_accuracy",
|
116 |
+
"description": null,
|
117 |
+
"value": 0.7187739463601532
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"id": "euclidean_accuracy_threshold",
|
121 |
+
"display_name": "euclidean_accuracy_threshold",
|
122 |
+
"description": null,
|
123 |
+
"value": 2.2720906734466553
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"id": "euclidean_f1",
|
127 |
+
"display_name": "euclidean_f1",
|
128 |
+
"description": null,
|
129 |
+
"value": 0.4404548174745661
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"id": "euclidean_f1_threshold",
|
133 |
+
"display_name": "euclidean_f1_threshold",
|
134 |
+
"description": null,
|
135 |
+
"value": 25.41253662109375
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"id": "euclidean_precision",
|
139 |
+
"display_name": "euclidean_precision",
|
140 |
+
"description": null,
|
141 |
+
"value": 0.28242517267843437
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": "euclidean_recall",
|
145 |
+
"display_name": "euclidean_recall",
|
146 |
+
"description": null,
|
147 |
+
"value": 1.0
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"id": "euclidean_ap",
|
151 |
+
"display_name": "euclidean_ap",
|
152 |
+
"description": null,
|
153 |
+
"value": 0.3117112729287826
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"id": "dot_accuracy",
|
157 |
+
"display_name": "dot_accuracy",
|
158 |
+
"description": null,
|
159 |
+
"value": 0.7206896551724138
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"id": "dot_accuracy_threshold",
|
163 |
+
"display_name": "dot_accuracy_threshold",
|
164 |
+
"description": null,
|
165 |
+
"value": 1764.11328125
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"id": "dot_f1",
|
169 |
+
"display_name": "dot_f1",
|
170 |
+
"description": null,
|
171 |
+
"value": 0.44177215189873426
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": "dot_f1_threshold",
|
175 |
+
"display_name": "dot_f1_threshold",
|
176 |
+
"description": null,
|
177 |
+
"value": 1021.9218139648438
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"id": "dot_precision",
|
181 |
+
"display_name": "dot_precision",
|
182 |
+
"description": null,
|
183 |
+
"value": 0.28795379537953797
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"id": "dot_recall",
|
187 |
+
"display_name": "dot_recall",
|
188 |
+
"description": null,
|
189 |
+
"value": 0.9483695652173914
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"id": "dot_ap",
|
193 |
+
"display_name": "dot_ap",
|
194 |
+
"description": null,
|
195 |
+
"value": 0.35181607664099845
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"id": "top_ap",
|
199 |
+
"display_name": "top_ap",
|
200 |
+
"description": null,
|
201 |
+
"value": 0.35181607664099845
|
202 |
+
}
|
203 |
+
]
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"layer_number": 11,
|
207 |
+
"layer_display_name": "11",
|
208 |
+
"metrics": [
|
209 |
+
{
|
210 |
+
"id": "cos_sim_accuracy",
|
211 |
+
"display_name": "cos_sim_accuracy",
|
212 |
+
"description": null,
|
213 |
+
"value": 0.7206896551724138
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"id": "cos_sim_accuracy_threshold",
|
217 |
+
"display_name": "cos_sim_accuracy_threshold",
|
218 |
+
"description": null,
|
219 |
+
"value": 0.9833309650421143
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "cos_sim_f1",
|
223 |
+
"display_name": "cos_sim_f1",
|
224 |
+
"description": null,
|
225 |
+
"value": 0.4454067429631921
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"id": "cos_sim_f1_threshold",
|
229 |
+
"display_name": "cos_sim_f1_threshold",
|
230 |
+
"description": null,
|
231 |
+
"value": 0.8805520534515381
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": "cos_sim_precision",
|
235 |
+
"display_name": "cos_sim_precision",
|
236 |
+
"description": null,
|
237 |
+
"value": 0.2883460152182619
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"id": "cos_sim_recall",
|
241 |
+
"display_name": "cos_sim_recall",
|
242 |
+
"description": null,
|
243 |
+
"value": 0.9782608695652174
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"id": "cos_sim_ap",
|
247 |
+
"display_name": "cos_sim_ap",
|
248 |
+
"description": null,
|
249 |
+
"value": 0.3325946475342702
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"id": "manhattan_accuracy",
|
253 |
+
"display_name": "manhattan_accuracy",
|
254 |
+
"description": null,
|
255 |
+
"value": 0.721455938697318
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"id": "manhattan_accuracy_threshold",
|
259 |
+
"display_name": "manhattan_accuracy_threshold",
|
260 |
+
"description": null,
|
261 |
+
"value": 230.74539184570312
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"id": "manhattan_f1",
|
265 |
+
"display_name": "manhattan_f1",
|
266 |
+
"description": null,
|
267 |
+
"value": 0.4439615026389321
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"id": "manhattan_f1_threshold",
|
271 |
+
"display_name": "manhattan_f1_threshold",
|
272 |
+
"description": null,
|
273 |
+
"value": 690.979248046875
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"id": "manhattan_precision",
|
277 |
+
"display_name": "manhattan_precision",
|
278 |
+
"description": null,
|
279 |
+
"value": 0.28772635814889336
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"id": "manhattan_recall",
|
283 |
+
"display_name": "manhattan_recall",
|
284 |
+
"description": null,
|
285 |
+
"value": 0.9714673913043478
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"id": "manhattan_ap",
|
289 |
+
"display_name": "manhattan_ap",
|
290 |
+
"description": null,
|
291 |
+
"value": 0.33577510329678106
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"id": "euclidean_accuracy",
|
295 |
+
"display_name": "euclidean_accuracy",
|
296 |
+
"description": null,
|
297 |
+
"value": 0.7210727969348659
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"id": "euclidean_accuracy_threshold",
|
301 |
+
"display_name": "euclidean_accuracy_threshold",
|
302 |
+
"description": null,
|
303 |
+
"value": 13.784924507141113
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"id": "euclidean_f1",
|
307 |
+
"display_name": "euclidean_f1",
|
308 |
+
"description": null,
|
309 |
+
"value": 0.44413697682462816
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"id": "euclidean_f1_threshold",
|
313 |
+
"display_name": "euclidean_f1_threshold",
|
314 |
+
"description": null,
|
315 |
+
"value": 39.12321472167969
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"id": "euclidean_precision",
|
319 |
+
"display_name": "euclidean_precision",
|
320 |
+
"description": null,
|
321 |
+
"value": 0.29791183294663576
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"id": "euclidean_recall",
|
325 |
+
"display_name": "euclidean_recall",
|
326 |
+
"description": null,
|
327 |
+
"value": 0.8722826086956522
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"id": "euclidean_ap",
|
331 |
+
"display_name": "euclidean_ap",
|
332 |
+
"description": null,
|
333 |
+
"value": 0.33823458280589236
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"id": "dot_accuracy",
|
337 |
+
"display_name": "dot_accuracy",
|
338 |
+
"description": null,
|
339 |
+
"value": 0.7191570881226054
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"id": "dot_accuracy_threshold",
|
343 |
+
"display_name": "dot_accuracy_threshold",
|
344 |
+
"description": null,
|
345 |
+
"value": 10542.0
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"id": "dot_f1",
|
349 |
+
"display_name": "dot_f1",
|
350 |
+
"description": null,
|
351 |
+
"value": 0.4403230631169608
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"id": "dot_f1_threshold",
|
355 |
+
"display_name": "dot_f1_threshold",
|
356 |
+
"description": null,
|
357 |
+
"value": 4913.24560546875
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"id": "dot_precision",
|
361 |
+
"display_name": "dot_precision",
|
362 |
+
"description": null,
|
363 |
+
"value": 0.2823168392788646
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"id": "dot_recall",
|
367 |
+
"display_name": "dot_recall",
|
368 |
+
"description": null,
|
369 |
+
"value": 1.0
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"id": "dot_ap",
|
373 |
+
"display_name": "dot_ap",
|
374 |
+
"description": null,
|
375 |
+
"value": 0.28278909833025945
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"id": "top_ap",
|
379 |
+
"display_name": "top_ap",
|
380 |
+
"description": null,
|
381 |
+
"value": 0.33823458280589236
|
382 |
+
}
|
383 |
+
]
|
384 |
+
}
|
385 |
+
]
|
386 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/ec_classification.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "ec_classification",
|
4 |
+
"display_name": "EC Classification",
|
5 |
+
"description": "Evaluate on Enzyme Commission number classification task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/ec_classification",
|
11 |
+
"revision": "ead5570168e6969a5149f6861e8a33d6b5d22498"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "accuracy",
|
31 |
+
"display_name": "accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.6015625
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "f1",
|
37 |
+
"display_name": "f1",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.55390625
|
40 |
+
}
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"layer_number": 11,
|
45 |
+
"layer_display_name": "11",
|
46 |
+
"metrics": [
|
47 |
+
{
|
48 |
+
"id": "accuracy",
|
49 |
+
"display_name": "accuracy",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.5546875
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "f1",
|
55 |
+
"display_name": "f1",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.5096354166666667
|
58 |
+
}
|
59 |
+
]
|
60 |
+
}
|
61 |
+
]
|
62 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/ecoli_operonic_pair.json
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "ecoli_operonic_pair",
|
4 |
+
"display_name": "E.coli Operonic Pair",
|
5 |
+
"description": "Evaluate on E.coli K-12 operonic pair classification task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "pair_classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/ecoli_operonic_pair",
|
11 |
+
"revision": "a62c01143a842696fc8200b91c1acb825e8cb891"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "top_ap"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "cos_sim_accuracy",
|
31 |
+
"display_name": "cos_sim_accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.6309689383402874
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "cos_sim_accuracy_threshold",
|
37 |
+
"display_name": "cos_sim_accuracy_threshold",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.9664175510406494
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "cos_sim_f1",
|
43 |
+
"display_name": "cos_sim_f1",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.5831148400629261
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "cos_sim_f1_threshold",
|
49 |
+
"display_name": "cos_sim_f1_threshold",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.876137375831604
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "cos_sim_precision",
|
55 |
+
"display_name": "cos_sim_precision",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.41972823351786614
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"id": "cos_sim_recall",
|
61 |
+
"display_name": "cos_sim_recall",
|
62 |
+
"description": null,
|
63 |
+
"value": 0.954779622209502
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "cos_sim_ap",
|
67 |
+
"display_name": "cos_sim_ap",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.5226436718954207
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "manhattan_accuracy",
|
73 |
+
"display_name": "manhattan_accuracy",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.6237830319888734
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "manhattan_accuracy_threshold",
|
79 |
+
"display_name": "manhattan_accuracy_threshold",
|
80 |
+
"description": null,
|
81 |
+
"value": 151.0961456298828
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "manhattan_f1",
|
85 |
+
"display_name": "manhattan_f1",
|
86 |
+
"description": null,
|
87 |
+
"value": 0.5765230312035661
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"id": "manhattan_f1_threshold",
|
91 |
+
"display_name": "manhattan_f1_threshold",
|
92 |
+
"description": null,
|
93 |
+
"value": 417.6656494140625
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"id": "manhattan_precision",
|
97 |
+
"display_name": "manhattan_precision",
|
98 |
+
"description": null,
|
99 |
+
"value": 0.4051044083526682
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"id": "manhattan_recall",
|
103 |
+
"display_name": "manhattan_recall",
|
104 |
+
"description": null,
|
105 |
+
"value": 0.9994275901545506
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"id": "manhattan_ap",
|
109 |
+
"display_name": "manhattan_ap",
|
110 |
+
"description": null,
|
111 |
+
"value": 0.5038561800803791
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": "euclidean_accuracy",
|
115 |
+
"display_name": "euclidean_accuracy",
|
116 |
+
"description": null,
|
117 |
+
"value": 0.624246638850255
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"id": "euclidean_accuracy_threshold",
|
121 |
+
"display_name": "euclidean_accuracy_threshold",
|
122 |
+
"description": null,
|
123 |
+
"value": 9.827131271362305
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"id": "euclidean_f1",
|
127 |
+
"display_name": "euclidean_f1",
|
128 |
+
"description": null,
|
129 |
+
"value": 0.5778148457047539
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"id": "euclidean_f1_threshold",
|
133 |
+
"display_name": "euclidean_f1_threshold",
|
134 |
+
"description": null,
|
135 |
+
"value": 23.485851287841797
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"id": "euclidean_precision",
|
139 |
+
"display_name": "euclidean_precision",
|
140 |
+
"description": null,
|
141 |
+
"value": 0.4077212806026365
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": "euclidean_recall",
|
145 |
+
"display_name": "euclidean_recall",
|
146 |
+
"description": null,
|
147 |
+
"value": 0.9914138523182598
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"id": "euclidean_ap",
|
151 |
+
"display_name": "euclidean_ap",
|
152 |
+
"description": null,
|
153 |
+
"value": 0.5109707609256201
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"id": "dot_accuracy",
|
157 |
+
"display_name": "dot_accuracy",
|
158 |
+
"description": null,
|
159 |
+
"value": 0.6200741770978211
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"id": "dot_accuracy_threshold",
|
163 |
+
"display_name": "dot_accuracy_threshold",
|
164 |
+
"description": null,
|
165 |
+
"value": 1509.6474609375
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"id": "dot_f1",
|
169 |
+
"display_name": "dot_f1",
|
170 |
+
"description": null,
|
171 |
+
"value": 0.576427863981512
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": "dot_f1_threshold",
|
175 |
+
"display_name": "dot_f1_threshold",
|
176 |
+
"description": null,
|
177 |
+
"value": 827.195556640625
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"id": "dot_precision",
|
181 |
+
"display_name": "dot_precision",
|
182 |
+
"description": null,
|
183 |
+
"value": 0.40501043841336115
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"id": "dot_recall",
|
187 |
+
"display_name": "dot_recall",
|
188 |
+
"description": null,
|
189 |
+
"value": 0.9994275901545506
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"id": "dot_ap",
|
193 |
+
"display_name": "dot_ap",
|
194 |
+
"description": null,
|
195 |
+
"value": 0.498147478687894
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"id": "top_ap",
|
199 |
+
"display_name": "top_ap",
|
200 |
+
"description": null,
|
201 |
+
"value": 0.5226436718954207
|
202 |
+
}
|
203 |
+
]
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"layer_number": 11,
|
207 |
+
"layer_display_name": "11",
|
208 |
+
"metrics": [
|
209 |
+
{
|
210 |
+
"id": "cos_sim_accuracy",
|
211 |
+
"display_name": "cos_sim_accuracy",
|
212 |
+
"description": null,
|
213 |
+
"value": 0.6305053314789059
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"id": "cos_sim_accuracy_threshold",
|
217 |
+
"display_name": "cos_sim_accuracy_threshold",
|
218 |
+
"description": null,
|
219 |
+
"value": 0.9585829377174377
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "cos_sim_f1",
|
223 |
+
"display_name": "cos_sim_f1",
|
224 |
+
"description": null,
|
225 |
+
"value": 0.5934650455927052
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"id": "cos_sim_f1_threshold",
|
229 |
+
"display_name": "cos_sim_f1_threshold",
|
230 |
+
"description": null,
|
231 |
+
"value": 0.9002124071121216
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": "cos_sim_precision",
|
235 |
+
"display_name": "cos_sim_precision",
|
236 |
+
"description": null,
|
237 |
+
"value": 0.44412851862382713
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"id": "cos_sim_recall",
|
241 |
+
"display_name": "cos_sim_recall",
|
242 |
+
"description": null,
|
243 |
+
"value": 0.8941041785918717
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"id": "cos_sim_ap",
|
247 |
+
"display_name": "cos_sim_ap",
|
248 |
+
"description": null,
|
249 |
+
"value": 0.545021841060869
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"id": "manhattan_accuracy",
|
253 |
+
"display_name": "manhattan_accuracy",
|
254 |
+
"description": null,
|
255 |
+
"value": 0.6342141863699583
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"id": "manhattan_accuracy_threshold",
|
259 |
+
"display_name": "manhattan_accuracy_threshold",
|
260 |
+
"description": null,
|
261 |
+
"value": 444.21954345703125
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"id": "manhattan_f1",
|
265 |
+
"display_name": "manhattan_f1",
|
266 |
+
"description": null,
|
267 |
+
"value": 0.6035735322992343
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"id": "manhattan_f1_threshold",
|
271 |
+
"display_name": "manhattan_f1_threshold",
|
272 |
+
"description": null,
|
273 |
+
"value": 612.2872314453125
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"id": "manhattan_precision",
|
277 |
+
"display_name": "manhattan_precision",
|
278 |
+
"description": null,
|
279 |
+
"value": 0.45935445307830247
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"id": "manhattan_recall",
|
283 |
+
"display_name": "manhattan_recall",
|
284 |
+
"description": null,
|
285 |
+
"value": 0.8797939324556382
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"id": "manhattan_ap",
|
289 |
+
"display_name": "manhattan_ap",
|
290 |
+
"description": null,
|
291 |
+
"value": 0.5574639922170803
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"id": "euclidean_accuracy",
|
295 |
+
"display_name": "euclidean_accuracy",
|
296 |
+
"description": null,
|
297 |
+
"value": 0.6339823829392675
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"id": "euclidean_accuracy_threshold",
|
301 |
+
"display_name": "euclidean_accuracy_threshold",
|
302 |
+
"description": null,
|
303 |
+
"value": 29.62457275390625
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"id": "euclidean_f1",
|
307 |
+
"display_name": "euclidean_f1",
|
308 |
+
"description": null,
|
309 |
+
"value": 0.5996841689696012
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"id": "euclidean_f1_threshold",
|
313 |
+
"display_name": "euclidean_f1_threshold",
|
314 |
+
"description": null,
|
315 |
+
"value": 38.6270751953125
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"id": "euclidean_precision",
|
319 |
+
"display_name": "euclidean_precision",
|
320 |
+
"description": null,
|
321 |
+
"value": 0.45766797228080747
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"id": "euclidean_recall",
|
325 |
+
"display_name": "euclidean_recall",
|
326 |
+
"description": null,
|
327 |
+
"value": 0.86949055523755
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"id": "euclidean_ap",
|
331 |
+
"display_name": "euclidean_ap",
|
332 |
+
"description": null,
|
333 |
+
"value": 0.5553872058517757
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"id": "dot_accuracy",
|
337 |
+
"display_name": "dot_accuracy",
|
338 |
+
"description": null,
|
339 |
+
"value": 0.5948076031525267
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"id": "dot_accuracy_threshold",
|
343 |
+
"display_name": "dot_accuracy_threshold",
|
344 |
+
"description": null,
|
345 |
+
"value": 14395.623046875
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"id": "dot_f1",
|
349 |
+
"display_name": "dot_f1",
|
350 |
+
"description": null,
|
351 |
+
"value": 0.577018736527939
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"id": "dot_f1_threshold",
|
355 |
+
"display_name": "dot_f1_threshold",
|
356 |
+
"description": null,
|
357 |
+
"value": 5674.908203125
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"id": "dot_precision",
|
361 |
+
"display_name": "dot_precision",
|
362 |
+
"description": null,
|
363 |
+
"value": 0.4061624649859944
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"id": "dot_recall",
|
367 |
+
"display_name": "dot_recall",
|
368 |
+
"description": null,
|
369 |
+
"value": 0.9959931310818546
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"id": "dot_ap",
|
373 |
+
"display_name": "dot_ap",
|
374 |
+
"description": null,
|
375 |
+
"value": 0.3862357442891778
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"id": "top_ap",
|
379 |
+
"display_name": "top_ap",
|
380 |
+
"description": null,
|
381 |
+
"value": 0.5574639922170803
|
382 |
+
}
|
383 |
+
]
|
384 |
+
}
|
385 |
+
]
|
386 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/euk_retrieval.json
ADDED
@@ -0,0 +1,762 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "euk_retrieval",
|
4 |
+
"display_name": "Euk Retrieval",
|
5 |
+
"description": "Retrieves bacterial proteins with similar swissprot annotations to a query eukaryotic protein",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "retrieval",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/euk_retrieval",
|
11 |
+
"revision": "c93dc56665cedd19fbeaea9ace146f2474c895f0"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/euk_retrieval_qrels",
|
15 |
+
"revision": "a5aa01e9b9738074aba57fc07434e352c4c71e4b"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "map_at_5"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 12,
|
24 |
+
"num_params": 33992881,
|
25 |
+
"embed_dim": 480
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 6,
|
31 |
+
"layer_display_name": "6",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "ndcg_at_5",
|
35 |
+
"display_name": "ndcg_at_5",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.80067
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "ndcg_at_10",
|
41 |
+
"display_name": "ndcg_at_10",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.79455
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "ndcg_at_50",
|
47 |
+
"display_name": "ndcg_at_50",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.77429
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "map_at_5",
|
53 |
+
"display_name": "map_at_5",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.30914
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"id": "map_at_10",
|
59 |
+
"display_name": "map_at_10",
|
60 |
+
"description": null,
|
61 |
+
"value": 0.41095
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": "map_at_50",
|
65 |
+
"display_name": "map_at_50",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.60087
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "recall_at_5",
|
71 |
+
"display_name": "recall_at_5",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.31905
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "recall_at_10",
|
77 |
+
"display_name": "recall_at_10",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.43473
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "recall_at_50",
|
83 |
+
"display_name": "recall_at_50",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.66233
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"id": "precision_at_5",
|
89 |
+
"display_name": "precision_at_5",
|
90 |
+
"description": null,
|
91 |
+
"value": 0.7119
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": "precision_at_10",
|
95 |
+
"display_name": "precision_at_10",
|
96 |
+
"description": null,
|
97 |
+
"value": 0.63408
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"id": "precision_at_50",
|
101 |
+
"display_name": "precision_at_50",
|
102 |
+
"description": null,
|
103 |
+
"value": 0.3663
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"id": "mrr_at_5",
|
107 |
+
"display_name": "mrr_at_5",
|
108 |
+
"description": null,
|
109 |
+
"value": 0.8471596998928188
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"id": "mrr_at_10",
|
113 |
+
"display_name": "mrr_at_10",
|
114 |
+
"description": null,
|
115 |
+
"value": 0.8513856989741232
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"id": "mrr_at_50",
|
119 |
+
"display_name": "mrr_at_50",
|
120 |
+
"description": null,
|
121 |
+
"value": 0.8527043294326252
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": "nauc_ndcg_at_5_max",
|
125 |
+
"display_name": "nauc_ndcg_at_5_max",
|
126 |
+
"description": null,
|
127 |
+
"value": 0.7166495695870103
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"id": "nauc_ndcg_at_5_std",
|
131 |
+
"display_name": "nauc_ndcg_at_5_std",
|
132 |
+
"description": null,
|
133 |
+
"value": 0.5383304196281262
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"id": "nauc_ndcg_at_5_diff1",
|
137 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
138 |
+
"description": null,
|
139 |
+
"value": -0.38408074718110424
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"id": "nauc_ndcg_at_10_max",
|
143 |
+
"display_name": "nauc_ndcg_at_10_max",
|
144 |
+
"description": null,
|
145 |
+
"value": 0.71056350273151
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"id": "nauc_ndcg_at_10_std",
|
149 |
+
"display_name": "nauc_ndcg_at_10_std",
|
150 |
+
"description": null,
|
151 |
+
"value": 0.5386325626626473
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"id": "nauc_ndcg_at_10_diff1",
|
155 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
156 |
+
"description": null,
|
157 |
+
"value": -0.3678412023083028
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"id": "nauc_ndcg_at_50_max",
|
161 |
+
"display_name": "nauc_ndcg_at_50_max",
|
162 |
+
"description": null,
|
163 |
+
"value": 0.6787542765531929
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"id": "nauc_ndcg_at_50_std",
|
167 |
+
"display_name": "nauc_ndcg_at_50_std",
|
168 |
+
"description": null,
|
169 |
+
"value": 0.4678010355684318
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"id": "nauc_ndcg_at_50_diff1",
|
173 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
174 |
+
"description": null,
|
175 |
+
"value": -0.3023078330221261
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"id": "nauc_map_at_5_max",
|
179 |
+
"display_name": "nauc_map_at_5_max",
|
180 |
+
"description": null,
|
181 |
+
"value": 0.17506411594869709
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"id": "nauc_map_at_5_std",
|
185 |
+
"display_name": "nauc_map_at_5_std",
|
186 |
+
"description": null,
|
187 |
+
"value": 0.344228905317099
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"id": "nauc_map_at_5_diff1",
|
191 |
+
"display_name": "nauc_map_at_5_diff1",
|
192 |
+
"description": null,
|
193 |
+
"value": 0.26025197550499063
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"id": "nauc_map_at_10_max",
|
197 |
+
"display_name": "nauc_map_at_10_max",
|
198 |
+
"description": null,
|
199 |
+
"value": 0.28364735198157687
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"id": "nauc_map_at_10_std",
|
203 |
+
"display_name": "nauc_map_at_10_std",
|
204 |
+
"description": null,
|
205 |
+
"value": 0.4946084063548821
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"id": "nauc_map_at_10_diff1",
|
209 |
+
"display_name": "nauc_map_at_10_diff1",
|
210 |
+
"description": null,
|
211 |
+
"value": 0.13024980686869012
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": "nauc_map_at_50_max",
|
215 |
+
"display_name": "nauc_map_at_50_max",
|
216 |
+
"description": null,
|
217 |
+
"value": 0.6456837506614725
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"id": "nauc_map_at_50_std",
|
221 |
+
"display_name": "nauc_map_at_50_std",
|
222 |
+
"description": null,
|
223 |
+
"value": 0.5024354435806796
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"id": "nauc_map_at_50_diff1",
|
227 |
+
"display_name": "nauc_map_at_50_diff1",
|
228 |
+
"description": null,
|
229 |
+
"value": -0.18849105999507082
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"id": "nauc_recall_at_5_max",
|
233 |
+
"display_name": "nauc_recall_at_5_max",
|
234 |
+
"description": null,
|
235 |
+
"value": 0.15537143366366737
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"id": "nauc_recall_at_5_std",
|
239 |
+
"display_name": "nauc_recall_at_5_std",
|
240 |
+
"description": null,
|
241 |
+
"value": 0.3338972930408563
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"id": "nauc_recall_at_5_diff1",
|
245 |
+
"display_name": "nauc_recall_at_5_diff1",
|
246 |
+
"description": null,
|
247 |
+
"value": 0.27534514133854515
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"id": "nauc_recall_at_10_max",
|
251 |
+
"display_name": "nauc_recall_at_10_max",
|
252 |
+
"description": null,
|
253 |
+
"value": 0.24230061291494534
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"id": "nauc_recall_at_10_std",
|
257 |
+
"display_name": "nauc_recall_at_10_std",
|
258 |
+
"description": null,
|
259 |
+
"value": 0.4763992415794819
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"id": "nauc_recall_at_10_diff1",
|
263 |
+
"display_name": "nauc_recall_at_10_diff1",
|
264 |
+
"description": null,
|
265 |
+
"value": 0.17167004025145782
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"id": "nauc_recall_at_50_max",
|
269 |
+
"display_name": "nauc_recall_at_50_max",
|
270 |
+
"description": null,
|
271 |
+
"value": 0.6062660448007379
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"id": "nauc_recall_at_50_std",
|
275 |
+
"display_name": "nauc_recall_at_50_std",
|
276 |
+
"description": null,
|
277 |
+
"value": 0.45445564371902375
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"id": "nauc_recall_at_50_diff1",
|
281 |
+
"display_name": "nauc_recall_at_50_diff1",
|
282 |
+
"description": null,
|
283 |
+
"value": -0.09621042247019258
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"id": "nauc_precision_at_5_max",
|
287 |
+
"display_name": "nauc_precision_at_5_max",
|
288 |
+
"description": null,
|
289 |
+
"value": 0.5420327575630611
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"id": "nauc_precision_at_5_std",
|
293 |
+
"display_name": "nauc_precision_at_5_std",
|
294 |
+
"description": null,
|
295 |
+
"value": 0.37248428210075407
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"id": "nauc_precision_at_5_diff1",
|
299 |
+
"display_name": "nauc_precision_at_5_diff1",
|
300 |
+
"description": null,
|
301 |
+
"value": -0.6517795575595553
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": "nauc_precision_at_10_max",
|
305 |
+
"display_name": "nauc_precision_at_10_max",
|
306 |
+
"description": null,
|
307 |
+
"value": 0.46182346579179107
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"id": "nauc_precision_at_10_std",
|
311 |
+
"display_name": "nauc_precision_at_10_std",
|
312 |
+
"description": null,
|
313 |
+
"value": 0.2556997419766225
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"id": "nauc_precision_at_10_diff1",
|
317 |
+
"display_name": "nauc_precision_at_10_diff1",
|
318 |
+
"description": null,
|
319 |
+
"value": -0.6371093546193429
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"id": "nauc_precision_at_50_max",
|
323 |
+
"display_name": "nauc_precision_at_50_max",
|
324 |
+
"description": null,
|
325 |
+
"value": 0.22395520722060117
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"id": "nauc_precision_at_50_std",
|
329 |
+
"display_name": "nauc_precision_at_50_std",
|
330 |
+
"description": null,
|
331 |
+
"value": -0.27077611986871364
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"id": "nauc_precision_at_50_diff1",
|
335 |
+
"display_name": "nauc_precision_at_50_diff1",
|
336 |
+
"description": null,
|
337 |
+
"value": -0.4324048296185153
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"id": "nauc_mrr_at_5_max",
|
341 |
+
"display_name": "nauc_mrr_at_5_max",
|
342 |
+
"description": null,
|
343 |
+
"value": 0.7966902615822546
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"id": "nauc_mrr_at_5_std",
|
347 |
+
"display_name": "nauc_mrr_at_5_std",
|
348 |
+
"description": null,
|
349 |
+
"value": 0.5623896062382641
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"id": "nauc_mrr_at_5_diff1",
|
353 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
354 |
+
"description": null,
|
355 |
+
"value": -0.27875113624180275
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"id": "nauc_mrr_at_10_max",
|
359 |
+
"display_name": "nauc_mrr_at_10_max",
|
360 |
+
"description": null,
|
361 |
+
"value": 0.7982850278647994
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"id": "nauc_mrr_at_10_std",
|
365 |
+
"display_name": "nauc_mrr_at_10_std",
|
366 |
+
"description": null,
|
367 |
+
"value": 0.5623589312727257
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"id": "nauc_mrr_at_10_diff1",
|
371 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
372 |
+
"description": null,
|
373 |
+
"value": -0.27578274493030464
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"id": "nauc_mrr_at_50_max",
|
377 |
+
"display_name": "nauc_mrr_at_50_max",
|
378 |
+
"description": null,
|
379 |
+
"value": 0.7977600079745486
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"id": "nauc_mrr_at_50_std",
|
383 |
+
"display_name": "nauc_mrr_at_50_std",
|
384 |
+
"description": null,
|
385 |
+
"value": 0.5625363754999084
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"id": "nauc_mrr_at_50_diff1",
|
389 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
390 |
+
"description": null,
|
391 |
+
"value": -0.2708948491113527
|
392 |
+
}
|
393 |
+
]
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"layer_number": 11,
|
397 |
+
"layer_display_name": "11",
|
398 |
+
"metrics": [
|
399 |
+
{
|
400 |
+
"id": "ndcg_at_5",
|
401 |
+
"display_name": "ndcg_at_5",
|
402 |
+
"description": null,
|
403 |
+
"value": 0.79574
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"id": "ndcg_at_10",
|
407 |
+
"display_name": "ndcg_at_10",
|
408 |
+
"description": null,
|
409 |
+
"value": 0.7872
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"id": "ndcg_at_50",
|
413 |
+
"display_name": "ndcg_at_50",
|
414 |
+
"description": null,
|
415 |
+
"value": 0.76804
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"id": "map_at_5",
|
419 |
+
"display_name": "map_at_5",
|
420 |
+
"description": null,
|
421 |
+
"value": 0.30344
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"id": "map_at_10",
|
425 |
+
"display_name": "map_at_10",
|
426 |
+
"description": null,
|
427 |
+
"value": 0.40308
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"id": "map_at_50",
|
431 |
+
"display_name": "map_at_50",
|
432 |
+
"description": null,
|
433 |
+
"value": 0.59158
|
434 |
+
},
|
435 |
+
{
|
436 |
+
"id": "recall_at_5",
|
437 |
+
"display_name": "recall_at_5",
|
438 |
+
"description": null,
|
439 |
+
"value": 0.31068
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"id": "recall_at_10",
|
443 |
+
"display_name": "recall_at_10",
|
444 |
+
"description": null,
|
445 |
+
"value": 0.41808
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"id": "recall_at_50",
|
449 |
+
"display_name": "recall_at_50",
|
450 |
+
"description": null,
|
451 |
+
"value": 0.64688
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"id": "precision_at_5",
|
455 |
+
"display_name": "precision_at_5",
|
456 |
+
"description": null,
|
457 |
+
"value": 0.70611
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"id": "precision_at_10",
|
461 |
+
"display_name": "precision_at_10",
|
462 |
+
"description": null,
|
463 |
+
"value": 0.63055
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"id": "precision_at_50",
|
467 |
+
"display_name": "precision_at_50",
|
468 |
+
"description": null,
|
469 |
+
"value": 0.36862
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"id": "mrr_at_5",
|
473 |
+
"display_name": "mrr_at_5",
|
474 |
+
"description": null,
|
475 |
+
"value": 0.8521436227224009
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"id": "mrr_at_10",
|
479 |
+
"display_name": "mrr_at_10",
|
480 |
+
"description": null,
|
481 |
+
"value": 0.8555504516919309
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"id": "mrr_at_50",
|
485 |
+
"display_name": "mrr_at_50",
|
486 |
+
"description": null,
|
487 |
+
"value": 0.8571980685347454
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"id": "nauc_ndcg_at_5_max",
|
491 |
+
"display_name": "nauc_ndcg_at_5_max",
|
492 |
+
"description": null,
|
493 |
+
"value": 0.687147173549288
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"id": "nauc_ndcg_at_5_std",
|
497 |
+
"display_name": "nauc_ndcg_at_5_std",
|
498 |
+
"description": null,
|
499 |
+
"value": 0.534917528750057
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"id": "nauc_ndcg_at_5_diff1",
|
503 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
504 |
+
"description": null,
|
505 |
+
"value": -0.039388068191112346
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"id": "nauc_ndcg_at_10_max",
|
509 |
+
"display_name": "nauc_ndcg_at_10_max",
|
510 |
+
"description": null,
|
511 |
+
"value": 0.6821413074357394
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"id": "nauc_ndcg_at_10_std",
|
515 |
+
"display_name": "nauc_ndcg_at_10_std",
|
516 |
+
"description": null,
|
517 |
+
"value": 0.541004104911246
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"id": "nauc_ndcg_at_10_diff1",
|
521 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
522 |
+
"description": null,
|
523 |
+
"value": -0.06613569078084217
|
524 |
+
},
|
525 |
+
{
|
526 |
+
"id": "nauc_ndcg_at_50_max",
|
527 |
+
"display_name": "nauc_ndcg_at_50_max",
|
528 |
+
"description": null,
|
529 |
+
"value": 0.6546658854714889
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"id": "nauc_ndcg_at_50_std",
|
533 |
+
"display_name": "nauc_ndcg_at_50_std",
|
534 |
+
"description": null,
|
535 |
+
"value": 0.5141528362539365
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"id": "nauc_ndcg_at_50_diff1",
|
539 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
540 |
+
"description": null,
|
541 |
+
"value": -0.045010206374762184
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"id": "nauc_map_at_5_max",
|
545 |
+
"display_name": "nauc_map_at_5_max",
|
546 |
+
"description": null,
|
547 |
+
"value": 0.1717014705213338
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"id": "nauc_map_at_5_std",
|
551 |
+
"display_name": "nauc_map_at_5_std",
|
552 |
+
"description": null,
|
553 |
+
"value": 0.298486867259319
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"id": "nauc_map_at_5_diff1",
|
557 |
+
"display_name": "nauc_map_at_5_diff1",
|
558 |
+
"description": null,
|
559 |
+
"value": 0.3158992753503486
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"id": "nauc_map_at_10_max",
|
563 |
+
"display_name": "nauc_map_at_10_max",
|
564 |
+
"description": null,
|
565 |
+
"value": 0.29394629114728443
|
566 |
+
},
|
567 |
+
{
|
568 |
+
"id": "nauc_map_at_10_std",
|
569 |
+
"display_name": "nauc_map_at_10_std",
|
570 |
+
"description": null,
|
571 |
+
"value": 0.4807193931287969
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"id": "nauc_map_at_10_diff1",
|
575 |
+
"display_name": "nauc_map_at_10_diff1",
|
576 |
+
"description": null,
|
577 |
+
"value": 0.200767704240122
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"id": "nauc_map_at_50_max",
|
581 |
+
"display_name": "nauc_map_at_50_max",
|
582 |
+
"description": null,
|
583 |
+
"value": 0.6266013107050147
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"id": "nauc_map_at_50_std",
|
587 |
+
"display_name": "nauc_map_at_50_std",
|
588 |
+
"description": null,
|
589 |
+
"value": 0.5400967080146492
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"id": "nauc_map_at_50_diff1",
|
593 |
+
"display_name": "nauc_map_at_50_diff1",
|
594 |
+
"description": null,
|
595 |
+
"value": -0.06821295960747309
|
596 |
+
},
|
597 |
+
{
|
598 |
+
"id": "nauc_recall_at_5_max",
|
599 |
+
"display_name": "nauc_recall_at_5_max",
|
600 |
+
"description": null,
|
601 |
+
"value": 0.15728927641821855
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"id": "nauc_recall_at_5_std",
|
605 |
+
"display_name": "nauc_recall_at_5_std",
|
606 |
+
"description": null,
|
607 |
+
"value": 0.3020952193182204
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"id": "nauc_recall_at_5_diff1",
|
611 |
+
"display_name": "nauc_recall_at_5_diff1",
|
612 |
+
"description": null,
|
613 |
+
"value": 0.3196038571595756
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"id": "nauc_recall_at_10_max",
|
617 |
+
"display_name": "nauc_recall_at_10_max",
|
618 |
+
"description": null,
|
619 |
+
"value": 0.273851179897414
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"id": "nauc_recall_at_10_std",
|
623 |
+
"display_name": "nauc_recall_at_10_std",
|
624 |
+
"description": null,
|
625 |
+
"value": 0.4822263524474807
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"id": "nauc_recall_at_10_diff1",
|
629 |
+
"display_name": "nauc_recall_at_10_diff1",
|
630 |
+
"description": null,
|
631 |
+
"value": 0.1998852576547706
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"id": "nauc_recall_at_50_max",
|
635 |
+
"display_name": "nauc_recall_at_50_max",
|
636 |
+
"description": null,
|
637 |
+
"value": 0.610064992339158
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"id": "nauc_recall_at_50_std",
|
641 |
+
"display_name": "nauc_recall_at_50_std",
|
642 |
+
"description": null,
|
643 |
+
"value": 0.5237697244132881
|
644 |
+
},
|
645 |
+
{
|
646 |
+
"id": "nauc_recall_at_50_diff1",
|
647 |
+
"display_name": "nauc_recall_at_50_diff1",
|
648 |
+
"description": null,
|
649 |
+
"value": -0.047861477876695854
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"id": "nauc_precision_at_5_max",
|
653 |
+
"display_name": "nauc_precision_at_5_max",
|
654 |
+
"description": null,
|
655 |
+
"value": 0.5642831983945668
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"id": "nauc_precision_at_5_std",
|
659 |
+
"display_name": "nauc_precision_at_5_std",
|
660 |
+
"description": null,
|
661 |
+
"value": 0.41268016275342806
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"id": "nauc_precision_at_5_diff1",
|
665 |
+
"display_name": "nauc_precision_at_5_diff1",
|
666 |
+
"description": null,
|
667 |
+
"value": -0.3902377594145758
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"id": "nauc_precision_at_10_max",
|
671 |
+
"display_name": "nauc_precision_at_10_max",
|
672 |
+
"description": null,
|
673 |
+
"value": 0.4757631079174044
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"id": "nauc_precision_at_10_std",
|
677 |
+
"display_name": "nauc_precision_at_10_std",
|
678 |
+
"description": null,
|
679 |
+
"value": 0.32238368240767273
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"id": "nauc_precision_at_10_diff1",
|
683 |
+
"display_name": "nauc_precision_at_10_diff1",
|
684 |
+
"description": null,
|
685 |
+
"value": -0.4280345103983777
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"id": "nauc_precision_at_50_max",
|
689 |
+
"display_name": "nauc_precision_at_50_max",
|
690 |
+
"description": null,
|
691 |
+
"value": 0.19318747544949869
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"id": "nauc_precision_at_50_std",
|
695 |
+
"display_name": "nauc_precision_at_50_std",
|
696 |
+
"description": null,
|
697 |
+
"value": -0.2262940005534252
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"id": "nauc_precision_at_50_diff1",
|
701 |
+
"display_name": "nauc_precision_at_50_diff1",
|
702 |
+
"description": null,
|
703 |
+
"value": -0.2898939009819229
|
704 |
+
},
|
705 |
+
{
|
706 |
+
"id": "nauc_mrr_at_5_max",
|
707 |
+
"display_name": "nauc_mrr_at_5_max",
|
708 |
+
"description": null,
|
709 |
+
"value": 0.7559907957579797
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"id": "nauc_mrr_at_5_std",
|
713 |
+
"display_name": "nauc_mrr_at_5_std",
|
714 |
+
"description": null,
|
715 |
+
"value": 0.5232164154691852
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"id": "nauc_mrr_at_5_diff1",
|
719 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
720 |
+
"description": null,
|
721 |
+
"value": 0.016325972601983724
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"id": "nauc_mrr_at_10_max",
|
725 |
+
"display_name": "nauc_mrr_at_10_max",
|
726 |
+
"description": null,
|
727 |
+
"value": 0.7604182097391701
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"id": "nauc_mrr_at_10_std",
|
731 |
+
"display_name": "nauc_mrr_at_10_std",
|
732 |
+
"description": null,
|
733 |
+
"value": 0.5188685708290457
|
734 |
+
},
|
735 |
+
{
|
736 |
+
"id": "nauc_mrr_at_10_diff1",
|
737 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
738 |
+
"description": null,
|
739 |
+
"value": 0.008720431706015956
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"id": "nauc_mrr_at_50_max",
|
743 |
+
"display_name": "nauc_mrr_at_50_max",
|
744 |
+
"description": null,
|
745 |
+
"value": 0.7617325890747185
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"id": "nauc_mrr_at_50_std",
|
749 |
+
"display_name": "nauc_mrr_at_50_std",
|
750 |
+
"description": null,
|
751 |
+
"value": 0.5213157058041827
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"id": "nauc_mrr_at_50_diff1",
|
755 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
756 |
+
"description": null,
|
757 |
+
"value": 0.015621035073521741
|
758 |
+
}
|
759 |
+
]
|
760 |
+
}
|
761 |
+
]
|
762 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/fefe_phylogeny.json
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "fefe_phylogeny",
|
4 |
+
"display_name": "FeFeHydrogenase Phylogeny",
|
5 |
+
"description": "Evaluate on FeFeHydrogenase phylogeny distance correlation task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "eds",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/fefe_phylogeny_sequences",
|
11 |
+
"revision": "bce06d79d9ce58413e7bcbed6943905d1afb8b26"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/fefe_phylogeny_distances",
|
15 |
+
"revision": "d6357cee9b4071a8dcdeef54083006f0d5e94fd2"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "top_corr"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 12,
|
24 |
+
"num_params": 33992881,
|
25 |
+
"embed_dim": 480
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 6,
|
31 |
+
"layer_display_name": "6",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "cos_sim",
|
35 |
+
"display_name": "cos_sim",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.46213607103563425
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "manhattan",
|
41 |
+
"display_name": "manhattan",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.5621218764061721
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "euclidean",
|
47 |
+
"display_name": "euclidean",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.5442663405841599
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "top_corr",
|
53 |
+
"display_name": "top_corr",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.5621218764061721
|
56 |
+
}
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"layer_number": 11,
|
61 |
+
"layer_display_name": "11",
|
62 |
+
"metrics": [
|
63 |
+
{
|
64 |
+
"id": "cos_sim",
|
65 |
+
"display_name": "cos_sim",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.1524486344353939
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "manhattan",
|
71 |
+
"display_name": "manhattan",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.5194125891005561
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "euclidean",
|
77 |
+
"display_name": "euclidean",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.48868066660269227
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "top_corr",
|
83 |
+
"display_name": "top_corr",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.5194125891005561
|
86 |
+
}
|
87 |
+
]
|
88 |
+
}
|
89 |
+
]
|
90 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/modac_paralogy_bigene.json
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "modac_paralogy_bigene",
|
4 |
+
"display_name": "ModAC Paralogy BiGene",
|
5 |
+
"description": "Evaluate on paralogy bitext matching task between paralogous protein (ModA and ModC).",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "bigene_mining",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/modac_paralogy_bigene",
|
11 |
+
"revision": "241ca6397856e3360da04422d54933035b1fab87"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "recall_at_50"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"num_layers": 12,
|
19 |
+
"num_params": 33992881,
|
20 |
+
"embed_dim": 480
|
21 |
+
},
|
22 |
+
"dgeb_version": "0.0.0",
|
23 |
+
"results": [
|
24 |
+
{
|
25 |
+
"layer_number": 6,
|
26 |
+
"layer_display_name": "6",
|
27 |
+
"metrics": [
|
28 |
+
{
|
29 |
+
"id": "precision",
|
30 |
+
"display_name": "precision",
|
31 |
+
"description": null,
|
32 |
+
"value": 4.4952467261118094e-7
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"id": "recall",
|
36 |
+
"display_name": "recall",
|
37 |
+
"description": null,
|
38 |
+
"value": 0.0006702412868632708
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"id": "f1",
|
42 |
+
"display_name": "f1",
|
43 |
+
"description": null,
|
44 |
+
"value": 8.984467652322665e-7
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"id": "accuracy",
|
48 |
+
"display_name": "accuracy",
|
49 |
+
"description": null,
|
50 |
+
"value": 0.0006702412868632708
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"id": "recall_at_50",
|
54 |
+
"display_name": "recall_at_50",
|
55 |
+
"description": null,
|
56 |
+
"value": 0.03485254691689008
|
57 |
+
}
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"layer_number": 11,
|
62 |
+
"layer_display_name": "11",
|
63 |
+
"metrics": [
|
64 |
+
{
|
65 |
+
"id": "precision",
|
66 |
+
"display_name": "precision",
|
67 |
+
"description": null,
|
68 |
+
"value": 4.4952467261118094e-7
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"id": "recall",
|
72 |
+
"display_name": "recall",
|
73 |
+
"description": null,
|
74 |
+
"value": 0.0006702412868632708
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"id": "f1",
|
78 |
+
"display_name": "f1",
|
79 |
+
"description": null,
|
80 |
+
"value": 8.984467652322665e-7
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"id": "accuracy",
|
84 |
+
"display_name": "accuracy",
|
85 |
+
"description": null,
|
86 |
+
"value": 0.0006702412868632708
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"id": "recall_at_50",
|
90 |
+
"display_name": "recall_at_50",
|
91 |
+
"description": null,
|
92 |
+
"value": 0.05361930294906166
|
93 |
+
}
|
94 |
+
]
|
95 |
+
}
|
96 |
+
]
|
97 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/mopb_clustering.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "mopb_clustering",
|
4 |
+
"display_name": "MopB Clustering",
|
5 |
+
"description": "Evaluate on MopB clustering task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "clustering",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/mopb_clustering",
|
11 |
+
"revision": "eed4bfff9c5bd2dc2500c50757bfcb90425d999a"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "v_measure"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "v_measure",
|
31 |
+
"display_name": "v_measure",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.7366377426487285
|
34 |
+
}
|
35 |
+
]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"layer_number": 11,
|
39 |
+
"layer_display_name": "11",
|
40 |
+
"metrics": [
|
41 |
+
{
|
42 |
+
"id": "v_measure",
|
43 |
+
"display_name": "v_measure",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.7842647128962572
|
46 |
+
}
|
47 |
+
]
|
48 |
+
}
|
49 |
+
]
|
50 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_arch_phylogeny.json
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "rpob_arch_phylogeny",
|
4 |
+
"display_name": "RpoB Archaeal Phylogeny",
|
5 |
+
"description": "Evaluate on RpoB phylogeny distance correlation task for Archaeal sequences.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "eds",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/rpob_arch_phylogeny_sequences",
|
11 |
+
"revision": "10de75b9f5ad12340d629fd1ad015ef4319d6ee4"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/rpob_arch_phylogeny_distances",
|
15 |
+
"revision": "2a585f0e135fe74b8ae6d31e7801c6031b0dcc18"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "top_corr"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 12,
|
24 |
+
"num_params": 33992881,
|
25 |
+
"embed_dim": 480
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 6,
|
31 |
+
"layer_display_name": "6",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "cos_sim",
|
35 |
+
"display_name": "cos_sim",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.2624971928673971
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "manhattan",
|
41 |
+
"display_name": "manhattan",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.31502824152693154
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "euclidean",
|
47 |
+
"display_name": "euclidean",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.3088945849814121
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "top_corr",
|
53 |
+
"display_name": "top_corr",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.31502824152693154
|
56 |
+
}
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"layer_number": 11,
|
61 |
+
"layer_display_name": "11",
|
62 |
+
"metrics": [
|
63 |
+
{
|
64 |
+
"id": "cos_sim",
|
65 |
+
"display_name": "cos_sim",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.34668475738519444
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "manhattan",
|
71 |
+
"display_name": "manhattan",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.372455403853565
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "euclidean",
|
77 |
+
"display_name": "euclidean",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.369729316093801
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "top_corr",
|
83 |
+
"display_name": "top_corr",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.372455403853565
|
86 |
+
}
|
87 |
+
]
|
88 |
+
}
|
89 |
+
]
|
90 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/rpob_bac_phylogeny.json
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "rpob_bac_phylogeny",
|
4 |
+
"display_name": "RpoB Bacterial Phylogeny",
|
5 |
+
"description": "Evaluate on RpoB phylogeny distance correlation task for Bacterial sequences.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "eds",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/rpob_bac_phylogeny_sequences",
|
11 |
+
"revision": "b833ef8d8d873ea5387540562873f41d073d3e03"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/rpob_bac_phylogeny_distances",
|
15 |
+
"revision": "0594e1501ac9fd0e3de49257b8ec318c2a0ea6f7"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "top_corr"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 12,
|
24 |
+
"num_params": 33992881,
|
25 |
+
"embed_dim": 480
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 6,
|
31 |
+
"layer_display_name": "6",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "cos_sim",
|
35 |
+
"display_name": "cos_sim",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.12971577033648743
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "manhattan",
|
41 |
+
"display_name": "manhattan",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.18177734472255433
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "euclidean",
|
47 |
+
"display_name": "euclidean",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.16423413011355156
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "top_corr",
|
53 |
+
"display_name": "top_corr",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.18177734472255433
|
56 |
+
}
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"layer_number": 11,
|
61 |
+
"layer_display_name": "11",
|
62 |
+
"metrics": [
|
63 |
+
{
|
64 |
+
"id": "cos_sim",
|
65 |
+
"display_name": "cos_sim",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.10194557773024183
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "manhattan",
|
71 |
+
"display_name": "manhattan",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.18622026845391912
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "euclidean",
|
77 |
+
"display_name": "euclidean",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.15405389239655473
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "top_corr",
|
83 |
+
"display_name": "top_corr",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.18622026845391912
|
86 |
+
}
|
87 |
+
]
|
88 |
+
}
|
89 |
+
]
|
90 |
+
}
|
leaderboard/submissions/esm2_t12_35M_UR50D/vibrio_operonic_pair.json
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "vibrio_operonic_pair",
|
4 |
+
"display_name": "Vibrio Operonic Pair",
|
5 |
+
"description": "Evaluate on Vibrio operonic pair classification task.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "pair_classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/vibrio_operonic_pair",
|
11 |
+
"revision": "24781b12b45bf81a079a6164ef0d2124948c1878"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "top_ap"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t12_35M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 12,
|
20 |
+
"num_params": 33992881,
|
21 |
+
"embed_dim": 480
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 6,
|
27 |
+
"layer_display_name": "6",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "cos_sim_accuracy",
|
31 |
+
"display_name": "cos_sim_accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.6781966575981345
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "cos_sim_accuracy_threshold",
|
37 |
+
"display_name": "cos_sim_accuracy_threshold",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.970278263092041
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "cos_sim_f1",
|
43 |
+
"display_name": "cos_sim_f1",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.518608169440242
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "cos_sim_f1_threshold",
|
49 |
+
"display_name": "cos_sim_f1_threshold",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.8757017254829407
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "cos_sim_precision",
|
55 |
+
"display_name": "cos_sim_precision",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.35501242750621376
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"id": "cos_sim_recall",
|
61 |
+
"display_name": "cos_sim_recall",
|
62 |
+
"description": null,
|
63 |
+
"value": 0.9618406285072951
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "cos_sim_ap",
|
67 |
+
"display_name": "cos_sim_ap",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.4581544787406372
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "manhattan_accuracy",
|
73 |
+
"display_name": "manhattan_accuracy",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.6731441896618733
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "manhattan_accuracy_threshold",
|
79 |
+
"display_name": "manhattan_accuracy_threshold",
|
80 |
+
"description": null,
|
81 |
+
"value": 137.3688507080078
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "manhattan_f1",
|
85 |
+
"display_name": "manhattan_f1",
|
86 |
+
"description": null,
|
87 |
+
"value": 0.5146164978292329
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"id": "manhattan_f1_threshold",
|
91 |
+
"display_name": "manhattan_f1_threshold",
|
92 |
+
"description": null,
|
93 |
+
"value": 391.87298583984375
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"id": "manhattan_precision",
|
97 |
+
"display_name": "manhattan_precision",
|
98 |
+
"description": null,
|
99 |
+
"value": 0.3467238689547582
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"id": "manhattan_recall",
|
103 |
+
"display_name": "manhattan_recall",
|
104 |
+
"description": null,
|
105 |
+
"value": 0.9977553310886644
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"id": "manhattan_ap",
|
109 |
+
"display_name": "manhattan_ap",
|
110 |
+
"description": null,
|
111 |
+
"value": 0.4383109013756369
|
112 |
+
},
|
113 |
+
{
|
114 |
+
"id": "euclidean_accuracy",
|
115 |
+
"display_name": "euclidean_accuracy",
|
116 |
+
"description": null,
|
117 |
+
"value": 0.672755538282161
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"id": "euclidean_accuracy_threshold",
|
121 |
+
"display_name": "euclidean_accuracy_threshold",
|
122 |
+
"description": null,
|
123 |
+
"value": 8.506048202514648
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"id": "euclidean_f1",
|
127 |
+
"display_name": "euclidean_f1",
|
128 |
+
"description": null,
|
129 |
+
"value": 0.5152786099460755
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"id": "euclidean_f1_threshold",
|
133 |
+
"display_name": "euclidean_f1_threshold",
|
134 |
+
"description": null,
|
135 |
+
"value": 21.124141693115234
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"id": "euclidean_precision",
|
139 |
+
"display_name": "euclidean_precision",
|
140 |
+
"description": null,
|
141 |
+
"value": 0.35145075602778914
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"id": "euclidean_recall",
|
145 |
+
"display_name": "euclidean_recall",
|
146 |
+
"description": null,
|
147 |
+
"value": 0.9652076318742986
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"id": "euclidean_ap",
|
151 |
+
"display_name": "euclidean_ap",
|
152 |
+
"description": null,
|
153 |
+
"value": 0.4438681594614018
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"id": "dot_accuracy",
|
157 |
+
"display_name": "dot_accuracy",
|
158 |
+
"description": null,
|
159 |
+
"value": 0.6599300427516518
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"id": "dot_accuracy_threshold",
|
163 |
+
"display_name": "dot_accuracy_threshold",
|
164 |
+
"description": null,
|
165 |
+
"value": 1570.195556640625
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"id": "dot_f1",
|
169 |
+
"display_name": "dot_f1",
|
170 |
+
"description": null,
|
171 |
+
"value": 0.5147654892877822
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"id": "dot_f1_threshold",
|
175 |
+
"display_name": "dot_f1_threshold",
|
176 |
+
"description": null,
|
177 |
+
"value": 898.4225463867188
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"id": "dot_precision",
|
181 |
+
"display_name": "dot_precision",
|
182 |
+
"description": null,
|
183 |
+
"value": 0.3468591494342567
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"id": "dot_recall",
|
187 |
+
"display_name": "dot_recall",
|
188 |
+
"description": null,
|
189 |
+
"value": 0.9977553310886644
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"id": "dot_ap",
|
193 |
+
"display_name": "dot_ap",
|
194 |
+
"description": null,
|
195 |
+
"value": 0.4179931403914694
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"id": "top_ap",
|
199 |
+
"display_name": "top_ap",
|
200 |
+
"description": null,
|
201 |
+
"value": 0.4581544787406372
|
202 |
+
}
|
203 |
+
]
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"layer_number": 11,
|
207 |
+
"layer_display_name": "11",
|
208 |
+
"metrics": [
|
209 |
+
{
|
210 |
+
"id": "cos_sim_accuracy",
|
211 |
+
"display_name": "cos_sim_accuracy",
|
212 |
+
"description": null,
|
213 |
+
"value": 0.6746987951807228
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"id": "cos_sim_accuracy_threshold",
|
217 |
+
"display_name": "cos_sim_accuracy_threshold",
|
218 |
+
"description": null,
|
219 |
+
"value": 0.9681814312934875
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"id": "cos_sim_f1",
|
223 |
+
"display_name": "cos_sim_f1",
|
224 |
+
"description": null,
|
225 |
+
"value": 0.5363604114934374
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"id": "cos_sim_f1_threshold",
|
229 |
+
"display_name": "cos_sim_f1_threshold",
|
230 |
+
"description": null,
|
231 |
+
"value": 0.9120055437088013
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"id": "cos_sim_precision",
|
235 |
+
"display_name": "cos_sim_precision",
|
236 |
+
"description": null,
|
237 |
+
"value": 0.3921161825726141
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"id": "cos_sim_recall",
|
241 |
+
"display_name": "cos_sim_recall",
|
242 |
+
"description": null,
|
243 |
+
"value": 0.8484848484848485
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"id": "cos_sim_ap",
|
247 |
+
"display_name": "cos_sim_ap",
|
248 |
+
"description": null,
|
249 |
+
"value": 0.46704651746605186
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"id": "manhattan_accuracy",
|
253 |
+
"display_name": "manhattan_accuracy",
|
254 |
+
"description": null,
|
255 |
+
"value": 0.6746987951807228
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"id": "manhattan_accuracy_threshold",
|
259 |
+
"display_name": "manhattan_accuracy_threshold",
|
260 |
+
"description": null,
|
261 |
+
"value": 360.30352783203125
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"id": "manhattan_f1",
|
265 |
+
"display_name": "manhattan_f1",
|
266 |
+
"description": null,
|
267 |
+
"value": 0.5305821665438467
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"id": "manhattan_f1_threshold",
|
271 |
+
"display_name": "manhattan_f1_threshold",
|
272 |
+
"description": null,
|
273 |
+
"value": 576.9113159179688
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"id": "manhattan_precision",
|
277 |
+
"display_name": "manhattan_precision",
|
278 |
+
"description": null,
|
279 |
+
"value": 0.3949533735600658
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"id": "manhattan_recall",
|
283 |
+
"display_name": "manhattan_recall",
|
284 |
+
"description": null,
|
285 |
+
"value": 0.8080808080808081
|
286 |
+
},
|
287 |
+
{
|
288 |
+
"id": "manhattan_ap",
|
289 |
+
"display_name": "manhattan_ap",
|
290 |
+
"description": null,
|
291 |
+
"value": 0.468990806236423
|
292 |
+
},
|
293 |
+
{
|
294 |
+
"id": "euclidean_accuracy",
|
295 |
+
"display_name": "euclidean_accuracy",
|
296 |
+
"description": null,
|
297 |
+
"value": 0.6758647493198601
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"id": "euclidean_accuracy_threshold",
|
301 |
+
"display_name": "euclidean_accuracy_threshold",
|
302 |
+
"description": null,
|
303 |
+
"value": 22.342727661132812
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"id": "euclidean_f1",
|
307 |
+
"display_name": "euclidean_f1",
|
308 |
+
"description": null,
|
309 |
+
"value": 0.5301837270341208
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"id": "euclidean_f1_threshold",
|
313 |
+
"display_name": "euclidean_f1_threshold",
|
314 |
+
"description": null,
|
315 |
+
"value": 39.38741683959961
|
316 |
+
},
|
317 |
+
{
|
318 |
+
"id": "euclidean_precision",
|
319 |
+
"display_name": "euclidean_precision",
|
320 |
+
"description": null,
|
321 |
+
"value": 0.37459434399629116
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"id": "euclidean_recall",
|
325 |
+
"display_name": "euclidean_recall",
|
326 |
+
"description": null,
|
327 |
+
"value": 0.9068462401795735
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"id": "euclidean_ap",
|
331 |
+
"display_name": "euclidean_ap",
|
332 |
+
"description": null,
|
333 |
+
"value": 0.46775797789146023
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"id": "dot_accuracy",
|
337 |
+
"display_name": "dot_accuracy",
|
338 |
+
"description": null,
|
339 |
+
"value": 0.6541002720559658
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"id": "dot_accuracy_threshold",
|
343 |
+
"display_name": "dot_accuracy_threshold",
|
344 |
+
"description": null,
|
345 |
+
"value": 9448.685546875
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"id": "dot_f1",
|
349 |
+
"display_name": "dot_f1",
|
350 |
+
"description": null,
|
351 |
+
"value": 0.5145827317354895
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"id": "dot_f1_threshold",
|
355 |
+
"display_name": "dot_f1_threshold",
|
356 |
+
"description": null,
|
357 |
+
"value": 4854.8955078125
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"id": "dot_precision",
|
361 |
+
"display_name": "dot_precision",
|
362 |
+
"description": null,
|
363 |
+
"value": 0.3464230171073095
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"id": "dot_recall",
|
367 |
+
"display_name": "dot_recall",
|
368 |
+
"description": null,
|
369 |
+
"value": 1.0
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"id": "dot_ap",
|
373 |
+
"display_name": "dot_ap",
|
374 |
+
"description": null,
|
375 |
+
"value": 0.3679854825040224
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"id": "top_ap",
|
379 |
+
"display_name": "top_ap",
|
380 |
+
"description": null,
|
381 |
+
"value": 0.468990806236423
|
382 |
+
}
|
383 |
+
]
|
384 |
+
}
|
385 |
+
]
|
386 |
+
}
|
leaderboard/submissions/esm2_t30_150M_UR50D/MIBIG_protein_classification.json
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "MIBIG_protein_classification",
|
4 |
+
"display_name": "MIBiG Classification",
|
5 |
+
"description": "Biosynthetic Gene cluster classification using protein sequences on MIBIG dataset.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/mibig_classification_prot",
|
11 |
+
"revision": "915a7ff28dc9820e35c4d7fd03d4c8c44a88ff1f"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t30_150M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 30,
|
20 |
+
"num_params": 148795481,
|
21 |
+
"embed_dim": 640
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 15,
|
27 |
+
"layer_display_name": "15",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "f1",
|
31 |
+
"display_name": "f1",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.721568117708931
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "accuracy",
|
37 |
+
"display_name": "accuracy",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.7165532879818595
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "precision",
|
43 |
+
"display_name": "precision",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.820388189148414
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "recall",
|
49 |
+
"display_name": "recall",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.6689951528396479
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "lrap",
|
55 |
+
"display_name": "lrap",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.8363567649281944
|
58 |
+
}
|
59 |
+
]
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"layer_number": 29,
|
63 |
+
"layer_display_name": "29",
|
64 |
+
"metrics": [
|
65 |
+
{
|
66 |
+
"id": "f1",
|
67 |
+
"display_name": "f1",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.6298307655443518
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "accuracy",
|
73 |
+
"display_name": "accuracy",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.6099773242630385
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "precision",
|
79 |
+
"display_name": "precision",
|
80 |
+
"description": null,
|
81 |
+
"value": 0.7648458169950588
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"id": "recall",
|
85 |
+
"display_name": "recall",
|
86 |
+
"description": null,
|
87 |
+
"value": 0.5789820341918578
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"id": "lrap",
|
91 |
+
"display_name": "lrap",
|
92 |
+
"description": null,
|
93 |
+
"value": 0.752078609221467
|
94 |
+
}
|
95 |
+
]
|
96 |
+
}
|
97 |
+
]
|
98 |
+
}
|
leaderboard/submissions/esm2_t30_150M_UR50D/arch_retrieval.json
ADDED
@@ -0,0 +1,762 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "arch_retrieval",
|
4 |
+
"display_name": "Arch Retrieval",
|
5 |
+
"description": "Retrieves bacterial proteins with similar swissprot annotations to a query archaeal protein",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "retrieval",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/arch_retrieval",
|
11 |
+
"revision": "a19124322604a21b26b1b3c13a1bd0b8a63c9f7b"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"path": "tattabio/arch_retrieval_qrels",
|
15 |
+
"revision": "3f142f2f9a0995d56c6e77188c7251761450afcf"
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"primary_metric_id": "map_at_5"
|
19 |
+
},
|
20 |
+
"model": {
|
21 |
+
"hf_name": "facebook/esm2_t30_150M_UR50D",
|
22 |
+
"revision": "...",
|
23 |
+
"num_layers": 30,
|
24 |
+
"num_params": 148795481,
|
25 |
+
"embed_dim": 640
|
26 |
+
},
|
27 |
+
"dgeb_version": "0.0.0",
|
28 |
+
"results": [
|
29 |
+
{
|
30 |
+
"layer_number": 15,
|
31 |
+
"layer_display_name": "15",
|
32 |
+
"metrics": [
|
33 |
+
{
|
34 |
+
"id": "ndcg_at_5",
|
35 |
+
"display_name": "ndcg_at_5",
|
36 |
+
"description": null,
|
37 |
+
"value": 0.91537
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"id": "ndcg_at_10",
|
41 |
+
"display_name": "ndcg_at_10",
|
42 |
+
"description": null,
|
43 |
+
"value": 0.90635
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"id": "ndcg_at_50",
|
47 |
+
"display_name": "ndcg_at_50",
|
48 |
+
"description": null,
|
49 |
+
"value": 0.87424
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"id": "map_at_5",
|
53 |
+
"display_name": "map_at_5",
|
54 |
+
"description": null,
|
55 |
+
"value": 0.30526
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"id": "map_at_10",
|
59 |
+
"display_name": "map_at_10",
|
60 |
+
"description": null,
|
61 |
+
"value": 0.42635
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"id": "map_at_50",
|
65 |
+
"display_name": "map_at_50",
|
66 |
+
"description": null,
|
67 |
+
"value": 0.72433
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"id": "recall_at_5",
|
71 |
+
"display_name": "recall_at_5",
|
72 |
+
"description": null,
|
73 |
+
"value": 0.31067
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"id": "recall_at_10",
|
77 |
+
"display_name": "recall_at_10",
|
78 |
+
"description": null,
|
79 |
+
"value": 0.4378
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"id": "recall_at_50",
|
83 |
+
"display_name": "recall_at_50",
|
84 |
+
"description": null,
|
85 |
+
"value": 0.75859
|
86 |
+
},
|
87 |
+
{
|
88 |
+
"id": "precision_at_5",
|
89 |
+
"display_name": "precision_at_5",
|
90 |
+
"description": null,
|
91 |
+
"value": 0.82689
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"id": "precision_at_10",
|
95 |
+
"display_name": "precision_at_10",
|
96 |
+
"description": null,
|
97 |
+
"value": 0.76159
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"id": "precision_at_50",
|
101 |
+
"display_name": "precision_at_50",
|
102 |
+
"description": null,
|
103 |
+
"value": 0.46726
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"id": "mrr_at_5",
|
107 |
+
"display_name": "mrr_at_5",
|
108 |
+
"description": null,
|
109 |
+
"value": 0.9422321809645754
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"id": "mrr_at_10",
|
113 |
+
"display_name": "mrr_at_10",
|
114 |
+
"description": null,
|
115 |
+
"value": 0.9439900344829917
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"id": "mrr_at_50",
|
119 |
+
"display_name": "mrr_at_50",
|
120 |
+
"description": null,
|
121 |
+
"value": 0.9446453591992101
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"id": "nauc_ndcg_at_5_max",
|
125 |
+
"display_name": "nauc_ndcg_at_5_max",
|
126 |
+
"description": null,
|
127 |
+
"value": 0.6549640359156222
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"id": "nauc_ndcg_at_5_std",
|
131 |
+
"display_name": "nauc_ndcg_at_5_std",
|
132 |
+
"description": null,
|
133 |
+
"value": 0.11037035667235007
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"id": "nauc_ndcg_at_5_diff1",
|
137 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
138 |
+
"description": null,
|
139 |
+
"value": -0.41554431142868614
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"id": "nauc_ndcg_at_10_max",
|
143 |
+
"display_name": "nauc_ndcg_at_10_max",
|
144 |
+
"description": null,
|
145 |
+
"value": 0.6536082943031309
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"id": "nauc_ndcg_at_10_std",
|
149 |
+
"display_name": "nauc_ndcg_at_10_std",
|
150 |
+
"description": null,
|
151 |
+
"value": 0.140251553474609
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"id": "nauc_ndcg_at_10_diff1",
|
155 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
156 |
+
"description": null,
|
157 |
+
"value": -0.4541965457157918
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"id": "nauc_ndcg_at_50_max",
|
161 |
+
"display_name": "nauc_ndcg_at_50_max",
|
162 |
+
"description": null,
|
163 |
+
"value": 0.6159871931946869
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"id": "nauc_ndcg_at_50_std",
|
167 |
+
"display_name": "nauc_ndcg_at_50_std",
|
168 |
+
"description": null,
|
169 |
+
"value": 0.006651176818080506
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"id": "nauc_ndcg_at_50_diff1",
|
173 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
174 |
+
"description": null,
|
175 |
+
"value": -0.39627086499203873
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"id": "nauc_map_at_5_max",
|
179 |
+
"display_name": "nauc_map_at_5_max",
|
180 |
+
"description": null,
|
181 |
+
"value": -0.047556791244411895
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"id": "nauc_map_at_5_std",
|
185 |
+
"display_name": "nauc_map_at_5_std",
|
186 |
+
"description": null,
|
187 |
+
"value": 0.16420917659496206
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"id": "nauc_map_at_5_diff1",
|
191 |
+
"display_name": "nauc_map_at_5_diff1",
|
192 |
+
"description": null,
|
193 |
+
"value": 0.28627326792803204
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"id": "nauc_map_at_10_max",
|
197 |
+
"display_name": "nauc_map_at_10_max",
|
198 |
+
"description": null,
|
199 |
+
"value": 0.06426190649373154
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"id": "nauc_map_at_10_std",
|
203 |
+
"display_name": "nauc_map_at_10_std",
|
204 |
+
"description": null,
|
205 |
+
"value": 0.23746446970773183
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"id": "nauc_map_at_10_diff1",
|
209 |
+
"display_name": "nauc_map_at_10_diff1",
|
210 |
+
"description": null,
|
211 |
+
"value": 0.15565045001627686
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"id": "nauc_map_at_50_max",
|
215 |
+
"display_name": "nauc_map_at_50_max",
|
216 |
+
"description": null,
|
217 |
+
"value": 0.5237897180891637
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"id": "nauc_map_at_50_std",
|
221 |
+
"display_name": "nauc_map_at_50_std",
|
222 |
+
"description": null,
|
223 |
+
"value": 0.1865080232459892
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"id": "nauc_map_at_50_diff1",
|
227 |
+
"display_name": "nauc_map_at_50_diff1",
|
228 |
+
"description": null,
|
229 |
+
"value": -0.2688572949738638
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"id": "nauc_recall_at_5_max",
|
233 |
+
"display_name": "nauc_recall_at_5_max",
|
234 |
+
"description": null,
|
235 |
+
"value": -0.054074967730710764
|
236 |
+
},
|
237 |
+
{
|
238 |
+
"id": "nauc_recall_at_5_std",
|
239 |
+
"display_name": "nauc_recall_at_5_std",
|
240 |
+
"description": null,
|
241 |
+
"value": 0.1711511016438979
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"id": "nauc_recall_at_5_diff1",
|
245 |
+
"display_name": "nauc_recall_at_5_diff1",
|
246 |
+
"description": null,
|
247 |
+
"value": 0.2896050332877169
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"id": "nauc_recall_at_10_max",
|
251 |
+
"display_name": "nauc_recall_at_10_max",
|
252 |
+
"description": null,
|
253 |
+
"value": 0.05005034152582497
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"id": "nauc_recall_at_10_std",
|
257 |
+
"display_name": "nauc_recall_at_10_std",
|
258 |
+
"description": null,
|
259 |
+
"value": 0.24918235642253458
|
260 |
+
},
|
261 |
+
{
|
262 |
+
"id": "nauc_recall_at_10_diff1",
|
263 |
+
"display_name": "nauc_recall_at_10_diff1",
|
264 |
+
"description": null,
|
265 |
+
"value": 0.16768640965952947
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"id": "nauc_recall_at_50_max",
|
269 |
+
"display_name": "nauc_recall_at_50_max",
|
270 |
+
"description": null,
|
271 |
+
"value": 0.5114754425984644
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"id": "nauc_recall_at_50_std",
|
275 |
+
"display_name": "nauc_recall_at_50_std",
|
276 |
+
"description": null,
|
277 |
+
"value": 0.2173420630028766
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"id": "nauc_recall_at_50_diff1",
|
281 |
+
"display_name": "nauc_recall_at_50_diff1",
|
282 |
+
"description": null,
|
283 |
+
"value": -0.2526274232326276
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"id": "nauc_precision_at_5_max",
|
287 |
+
"display_name": "nauc_precision_at_5_max",
|
288 |
+
"description": null,
|
289 |
+
"value": 0.5525639421444303
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"id": "nauc_precision_at_5_std",
|
293 |
+
"display_name": "nauc_precision_at_5_std",
|
294 |
+
"description": null,
|
295 |
+
"value": 0.01857146637175079
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"id": "nauc_precision_at_5_diff1",
|
299 |
+
"display_name": "nauc_precision_at_5_diff1",
|
300 |
+
"description": null,
|
301 |
+
"value": -0.7765476306675947
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"id": "nauc_precision_at_10_max",
|
305 |
+
"display_name": "nauc_precision_at_10_max",
|
306 |
+
"description": null,
|
307 |
+
"value": 0.48362026531371466
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"id": "nauc_precision_at_10_std",
|
311 |
+
"display_name": "nauc_precision_at_10_std",
|
312 |
+
"description": null,
|
313 |
+
"value": -0.0051297270434755475
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"id": "nauc_precision_at_10_diff1",
|
317 |
+
"display_name": "nauc_precision_at_10_diff1",
|
318 |
+
"description": null,
|
319 |
+
"value": -0.7004665714420365
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"id": "nauc_precision_at_50_max",
|
323 |
+
"display_name": "nauc_precision_at_50_max",
|
324 |
+
"description": null,
|
325 |
+
"value": 0.24671476154878727
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"id": "nauc_precision_at_50_std",
|
329 |
+
"display_name": "nauc_precision_at_50_std",
|
330 |
+
"description": null,
|
331 |
+
"value": -0.37006645670815747
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"id": "nauc_precision_at_50_diff1",
|
335 |
+
"display_name": "nauc_precision_at_50_diff1",
|
336 |
+
"description": null,
|
337 |
+
"value": -0.36951553698605216
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"id": "nauc_mrr_at_5_max",
|
341 |
+
"display_name": "nauc_mrr_at_5_max",
|
342 |
+
"description": null,
|
343 |
+
"value": 0.64312359548717
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"id": "nauc_mrr_at_5_std",
|
347 |
+
"display_name": "nauc_mrr_at_5_std",
|
348 |
+
"description": null,
|
349 |
+
"value": 0.04622765419712948
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"id": "nauc_mrr_at_5_diff1",
|
353 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
354 |
+
"description": null,
|
355 |
+
"value": -0.22259410250972433
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"id": "nauc_mrr_at_10_max",
|
359 |
+
"display_name": "nauc_mrr_at_10_max",
|
360 |
+
"description": null,
|
361 |
+
"value": 0.6385468425832173
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"id": "nauc_mrr_at_10_std",
|
365 |
+
"display_name": "nauc_mrr_at_10_std",
|
366 |
+
"description": null,
|
367 |
+
"value": 0.058640802937365115
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"id": "nauc_mrr_at_10_diff1",
|
371 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
372 |
+
"description": null,
|
373 |
+
"value": -0.21579087208897282
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"id": "nauc_mrr_at_50_max",
|
377 |
+
"display_name": "nauc_mrr_at_50_max",
|
378 |
+
"description": null,
|
379 |
+
"value": 0.6402042049799889
|
380 |
+
},
|
381 |
+
{
|
382 |
+
"id": "nauc_mrr_at_50_std",
|
383 |
+
"display_name": "nauc_mrr_at_50_std",
|
384 |
+
"description": null,
|
385 |
+
"value": 0.052782783025246006
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"id": "nauc_mrr_at_50_diff1",
|
389 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
390 |
+
"description": null,
|
391 |
+
"value": -0.21896215733129423
|
392 |
+
}
|
393 |
+
]
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"layer_number": 29,
|
397 |
+
"layer_display_name": "29",
|
398 |
+
"metrics": [
|
399 |
+
{
|
400 |
+
"id": "ndcg_at_5",
|
401 |
+
"display_name": "ndcg_at_5",
|
402 |
+
"description": null,
|
403 |
+
"value": 0.83285
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"id": "ndcg_at_10",
|
407 |
+
"display_name": "ndcg_at_10",
|
408 |
+
"description": null,
|
409 |
+
"value": 0.81413
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"id": "ndcg_at_50",
|
413 |
+
"display_name": "ndcg_at_50",
|
414 |
+
"description": null,
|
415 |
+
"value": 0.76701
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"id": "map_at_5",
|
419 |
+
"display_name": "map_at_5",
|
420 |
+
"description": null,
|
421 |
+
"value": 0.25404
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"id": "map_at_10",
|
425 |
+
"display_name": "map_at_10",
|
426 |
+
"description": null,
|
427 |
+
"value": 0.35083
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"id": "map_at_50",
|
431 |
+
"display_name": "map_at_50",
|
432 |
+
"description": null,
|
433 |
+
"value": 0.58387
|
434 |
+
},
|
435 |
+
{
|
436 |
+
"id": "recall_at_5",
|
437 |
+
"display_name": "recall_at_5",
|
438 |
+
"description": null,
|
439 |
+
"value": 0.266
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"id": "recall_at_10",
|
443 |
+
"display_name": "recall_at_10",
|
444 |
+
"description": null,
|
445 |
+
"value": 0.37545
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"id": "recall_at_50",
|
449 |
+
"display_name": "recall_at_50",
|
450 |
+
"description": null,
|
451 |
+
"value": 0.66303
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"id": "precision_at_5",
|
455 |
+
"display_name": "precision_at_5",
|
456 |
+
"description": null,
|
457 |
+
"value": 0.75621
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"id": "precision_at_10",
|
461 |
+
"display_name": "precision_at_10",
|
462 |
+
"description": null,
|
463 |
+
"value": 0.6866
|
464 |
+
},
|
465 |
+
{
|
466 |
+
"id": "precision_at_50",
|
467 |
+
"display_name": "precision_at_50",
|
468 |
+
"description": null,
|
469 |
+
"value": 0.41047
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"id": "mrr_at_5",
|
473 |
+
"display_name": "mrr_at_5",
|
474 |
+
"description": null,
|
475 |
+
"value": 0.8947289799402471
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"id": "mrr_at_10",
|
479 |
+
"display_name": "mrr_at_10",
|
480 |
+
"description": null,
|
481 |
+
"value": 0.895975855130784
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"id": "mrr_at_50",
|
485 |
+
"display_name": "mrr_at_50",
|
486 |
+
"description": null,
|
487 |
+
"value": 0.8970771214115124
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"id": "nauc_ndcg_at_5_max",
|
491 |
+
"display_name": "nauc_ndcg_at_5_max",
|
492 |
+
"description": null,
|
493 |
+
"value": 0.6033756709037629
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"id": "nauc_ndcg_at_5_std",
|
497 |
+
"display_name": "nauc_ndcg_at_5_std",
|
498 |
+
"description": null,
|
499 |
+
"value": 0.48175424620769186
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"id": "nauc_ndcg_at_5_diff1",
|
503 |
+
"display_name": "nauc_ndcg_at_5_diff1",
|
504 |
+
"description": null,
|
505 |
+
"value": -0.1614695329433979
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"id": "nauc_ndcg_at_10_max",
|
509 |
+
"display_name": "nauc_ndcg_at_10_max",
|
510 |
+
"description": null,
|
511 |
+
"value": 0.5820557360820439
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"id": "nauc_ndcg_at_10_std",
|
515 |
+
"display_name": "nauc_ndcg_at_10_std",
|
516 |
+
"description": null,
|
517 |
+
"value": 0.48937482522317327
|
518 |
+
},
|
519 |
+
{
|
520 |
+
"id": "nauc_ndcg_at_10_diff1",
|
521 |
+
"display_name": "nauc_ndcg_at_10_diff1",
|
522 |
+
"description": null,
|
523 |
+
"value": -0.18205509390904553
|
524 |
+
},
|
525 |
+
{
|
526 |
+
"id": "nauc_ndcg_at_50_max",
|
527 |
+
"display_name": "nauc_ndcg_at_50_max",
|
528 |
+
"description": null,
|
529 |
+
"value": 0.49384788238425553
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"id": "nauc_ndcg_at_50_std",
|
533 |
+
"display_name": "nauc_ndcg_at_50_std",
|
534 |
+
"description": null,
|
535 |
+
"value": 0.354953353704701
|
536 |
+
},
|
537 |
+
{
|
538 |
+
"id": "nauc_ndcg_at_50_diff1",
|
539 |
+
"display_name": "nauc_ndcg_at_50_diff1",
|
540 |
+
"description": null,
|
541 |
+
"value": -0.10767304568721194
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"id": "nauc_map_at_5_max",
|
545 |
+
"display_name": "nauc_map_at_5_max",
|
546 |
+
"description": null,
|
547 |
+
"value": 0.03598090314920231
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"id": "nauc_map_at_5_std",
|
551 |
+
"display_name": "nauc_map_at_5_std",
|
552 |
+
"description": null,
|
553 |
+
"value": 0.11662947626949612
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"id": "nauc_map_at_5_diff1",
|
557 |
+
"display_name": "nauc_map_at_5_diff1",
|
558 |
+
"description": null,
|
559 |
+
"value": 0.28974453988735166
|
560 |
+
},
|
561 |
+
{
|
562 |
+
"id": "nauc_map_at_10_max",
|
563 |
+
"display_name": "nauc_map_at_10_max",
|
564 |
+
"description": null,
|
565 |
+
"value": 0.13482748795676255
|
566 |
+
},
|
567 |
+
{
|
568 |
+
"id": "nauc_map_at_10_std",
|
569 |
+
"display_name": "nauc_map_at_10_std",
|
570 |
+
"description": null,
|
571 |
+
"value": 0.22360013731689057
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"id": "nauc_map_at_10_diff1",
|
575 |
+
"display_name": "nauc_map_at_10_diff1",
|
576 |
+
"description": null,
|
577 |
+
"value": 0.19043309088480928
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"id": "nauc_map_at_50_max",
|
581 |
+
"display_name": "nauc_map_at_50_max",
|
582 |
+
"description": null,
|
583 |
+
"value": 0.42287317105206507
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"id": "nauc_map_at_50_std",
|
587 |
+
"display_name": "nauc_map_at_50_std",
|
588 |
+
"description": null,
|
589 |
+
"value": 0.32712992457779794
|
590 |
+
},
|
591 |
+
{
|
592 |
+
"id": "nauc_map_at_50_diff1",
|
593 |
+
"display_name": "nauc_map_at_50_diff1",
|
594 |
+
"description": null,
|
595 |
+
"value": -0.02056986996465222
|
596 |
+
},
|
597 |
+
{
|
598 |
+
"id": "nauc_recall_at_5_max",
|
599 |
+
"display_name": "nauc_recall_at_5_max",
|
600 |
+
"description": null,
|
601 |
+
"value": 0.021824220192766298
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"id": "nauc_recall_at_5_std",
|
605 |
+
"display_name": "nauc_recall_at_5_std",
|
606 |
+
"description": null,
|
607 |
+
"value": 0.11009705855814085
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"id": "nauc_recall_at_5_diff1",
|
611 |
+
"display_name": "nauc_recall_at_5_diff1",
|
612 |
+
"description": null,
|
613 |
+
"value": 0.28505819859304804
|
614 |
+
},
|
615 |
+
{
|
616 |
+
"id": "nauc_recall_at_10_max",
|
617 |
+
"display_name": "nauc_recall_at_10_max",
|
618 |
+
"description": null,
|
619 |
+
"value": 0.10661440304261144
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"id": "nauc_recall_at_10_std",
|
623 |
+
"display_name": "nauc_recall_at_10_std",
|
624 |
+
"description": null,
|
625 |
+
"value": 0.2092712287791401
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"id": "nauc_recall_at_10_diff1",
|
629 |
+
"display_name": "nauc_recall_at_10_diff1",
|
630 |
+
"description": null,
|
631 |
+
"value": 0.19742570630860265
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"id": "nauc_recall_at_50_max",
|
635 |
+
"display_name": "nauc_recall_at_50_max",
|
636 |
+
"description": null,
|
637 |
+
"value": 0.38620604109572715
|
638 |
+
},
|
639 |
+
{
|
640 |
+
"id": "nauc_recall_at_50_std",
|
641 |
+
"display_name": "nauc_recall_at_50_std",
|
642 |
+
"description": null,
|
643 |
+
"value": 0.2924386961038862
|
644 |
+
},
|
645 |
+
{
|
646 |
+
"id": "nauc_recall_at_50_diff1",
|
647 |
+
"display_name": "nauc_recall_at_50_diff1",
|
648 |
+
"description": null,
|
649 |
+
"value": 0.025319280347884648
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"id": "nauc_precision_at_5_max",
|
653 |
+
"display_name": "nauc_precision_at_5_max",
|
654 |
+
"description": null,
|
655 |
+
"value": 0.5425386973889819
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"id": "nauc_precision_at_5_std",
|
659 |
+
"display_name": "nauc_precision_at_5_std",
|
660 |
+
"description": null,
|
661 |
+
"value": 0.4063280755847313
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"id": "nauc_precision_at_5_diff1",
|
665 |
+
"display_name": "nauc_precision_at_5_diff1",
|
666 |
+
"description": null,
|
667 |
+
"value": -0.43965420847555414
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"id": "nauc_precision_at_10_max",
|
671 |
+
"display_name": "nauc_precision_at_10_max",
|
672 |
+
"description": null,
|
673 |
+
"value": 0.4721960038905336
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"id": "nauc_precision_at_10_std",
|
677 |
+
"display_name": "nauc_precision_at_10_std",
|
678 |
+
"description": null,
|
679 |
+
"value": 0.35700671463443756
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"id": "nauc_precision_at_10_diff1",
|
683 |
+
"display_name": "nauc_precision_at_10_diff1",
|
684 |
+
"description": null,
|
685 |
+
"value": -0.44652985217538876
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"id": "nauc_precision_at_50_max",
|
689 |
+
"display_name": "nauc_precision_at_50_max",
|
690 |
+
"description": null,
|
691 |
+
"value": 0.2526299155090765
|
692 |
+
},
|
693 |
+
{
|
694 |
+
"id": "nauc_precision_at_50_std",
|
695 |
+
"display_name": "nauc_precision_at_50_std",
|
696 |
+
"description": null,
|
697 |
+
"value": -0.021434326602753354
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"id": "nauc_precision_at_50_diff1",
|
701 |
+
"display_name": "nauc_precision_at_50_diff1",
|
702 |
+
"description": null,
|
703 |
+
"value": -0.3009002533330021
|
704 |
+
},
|
705 |
+
{
|
706 |
+
"id": "nauc_mrr_at_5_max",
|
707 |
+
"display_name": "nauc_mrr_at_5_max",
|
708 |
+
"description": null,
|
709 |
+
"value": 0.6726463178530804
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"id": "nauc_mrr_at_5_std",
|
713 |
+
"display_name": "nauc_mrr_at_5_std",
|
714 |
+
"description": null,
|
715 |
+
"value": 0.49687521406966506
|
716 |
+
},
|
717 |
+
{
|
718 |
+
"id": "nauc_mrr_at_5_diff1",
|
719 |
+
"display_name": "nauc_mrr_at_5_diff1",
|
720 |
+
"description": null,
|
721 |
+
"value": 0.05561071266486503
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"id": "nauc_mrr_at_10_max",
|
725 |
+
"display_name": "nauc_mrr_at_10_max",
|
726 |
+
"description": null,
|
727 |
+
"value": 0.6731608376359998
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"id": "nauc_mrr_at_10_std",
|
731 |
+
"display_name": "nauc_mrr_at_10_std",
|
732 |
+
"description": null,
|
733 |
+
"value": 0.49491217127896847
|
734 |
+
},
|
735 |
+
{
|
736 |
+
"id": "nauc_mrr_at_10_diff1",
|
737 |
+
"display_name": "nauc_mrr_at_10_diff1",
|
738 |
+
"description": null,
|
739 |
+
"value": 0.05832429376042118
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"id": "nauc_mrr_at_50_max",
|
743 |
+
"display_name": "nauc_mrr_at_50_max",
|
744 |
+
"description": null,
|
745 |
+
"value": 0.6735463200113443
|
746 |
+
},
|
747 |
+
{
|
748 |
+
"id": "nauc_mrr_at_50_std",
|
749 |
+
"display_name": "nauc_mrr_at_50_std",
|
750 |
+
"description": null,
|
751 |
+
"value": 0.495779540068593
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"id": "nauc_mrr_at_50_diff1",
|
755 |
+
"display_name": "nauc_mrr_at_50_diff1",
|
756 |
+
"description": null,
|
757 |
+
"value": 0.06154966156964915
|
758 |
+
}
|
759 |
+
]
|
760 |
+
}
|
761 |
+
]
|
762 |
+
}
|
leaderboard/submissions/esm2_t30_150M_UR50D/bacarch_bigene.json
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "bacarch_bigene",
|
4 |
+
"display_name": "BacArch BiGene",
|
5 |
+
"description": "Evaluate on BacArch bigene matching task between bacterial (E.coli K-12) proteins and archaeal (Sulfolobus acidocaldarius DSM 639) proteins.",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "bigene_mining",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/bac_arch_bigene",
|
11 |
+
"revision": "d5a65e44bae43a9ba9f2fdc03056dff9c12f6631"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t30_150M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 30,
|
20 |
+
"num_params": 148795481,
|
21 |
+
"embed_dim": 640
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 15,
|
27 |
+
"layer_display_name": "15",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "precision",
|
31 |
+
"display_name": "precision",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.7591194968553459
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "recall",
|
37 |
+
"display_name": "recall",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.8188679245283019
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"id": "f1",
|
43 |
+
"display_name": "f1",
|
44 |
+
"description": null,
|
45 |
+
"value": 0.7779874213836478
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"id": "accuracy",
|
49 |
+
"display_name": "accuracy",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.8188679245283019
|
52 |
+
}
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"layer_number": 29,
|
57 |
+
"layer_display_name": "29",
|
58 |
+
"metrics": [
|
59 |
+
{
|
60 |
+
"id": "precision",
|
61 |
+
"display_name": "precision",
|
62 |
+
"description": null,
|
63 |
+
"value": 0.656010781671159
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"id": "recall",
|
67 |
+
"display_name": "recall",
|
68 |
+
"description": null,
|
69 |
+
"value": 0.7320754716981132
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"id": "f1",
|
73 |
+
"display_name": "f1",
|
74 |
+
"description": null,
|
75 |
+
"value": 0.6774213836477987
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"id": "accuracy",
|
79 |
+
"display_name": "accuracy",
|
80 |
+
"description": null,
|
81 |
+
"value": 0.7320754716981132
|
82 |
+
}
|
83 |
+
]
|
84 |
+
}
|
85 |
+
]
|
86 |
+
}
|
leaderboard/submissions/esm2_t30_150M_UR50D/convergent_enzymes_classification.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"task": {
|
3 |
+
"id": "convergent_enzymes_classification",
|
4 |
+
"display_name": "Convergent Enzymes Classification",
|
5 |
+
"description": "Evaluate on convergent enzymes classification task, where convergent enzymes are proteins with the same EC number but without blastp hits against each other",
|
6 |
+
"modality": "protein",
|
7 |
+
"type": "classification",
|
8 |
+
"datasets": [
|
9 |
+
{
|
10 |
+
"path": "tattabio/convergent_enzymes",
|
11 |
+
"revision": "37f75609f54de2bc0911ccb72faf1c2f5a4285aa"
|
12 |
+
}
|
13 |
+
],
|
14 |
+
"primary_metric_id": "f1"
|
15 |
+
},
|
16 |
+
"model": {
|
17 |
+
"hf_name": "facebook/esm2_t30_150M_UR50D",
|
18 |
+
"revision": "...",
|
19 |
+
"num_layers": 30,
|
20 |
+
"num_params": 148795481,
|
21 |
+
"embed_dim": 640
|
22 |
+
},
|
23 |
+
"dgeb_version": "0.0.0",
|
24 |
+
"results": [
|
25 |
+
{
|
26 |
+
"layer_number": 15,
|
27 |
+
"layer_display_name": "15",
|
28 |
+
"metrics": [
|
29 |
+
{
|
30 |
+
"id": "accuracy",
|
31 |
+
"display_name": "accuracy",
|
32 |
+
"description": null,
|
33 |
+
"value": 0.2975
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"id": "f1",
|
37 |
+
"display_name": "f1",
|
38 |
+
"description": null,
|
39 |
+
"value": 0.24646428571428572
|
40 |
+
}
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"layer_number": 29,
|
45 |
+
"layer_display_name": "29",
|
46 |
+
"metrics": [
|
47 |
+
{
|
48 |
+
"id": "accuracy",
|
49 |
+
"display_name": "accuracy",
|
50 |
+
"description": null,
|
51 |
+
"value": 0.2475
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"id": "f1",
|
55 |
+
"display_name": "f1",
|
56 |
+
"description": null,
|
57 |
+
"value": 0.20091666666666666
|
58 |
+
}
|
59 |
+
]
|
60 |
+
}
|
61 |
+
]
|
62 |
+
}
|