Spaces:
Sleeping
Sleeping
Commit
·
24364c6
1
Parent(s):
ef88f19
Automated update from GitHub
Browse files- .github/workflows/deploy.yml +44 -0
- .github/workflows/test.yml +26 -0
- space/space/.github/workflows/update-hf-space.yml +24 -1
- space/space/.gitignore +2 -1
- space/space/README.md +3 -1
- space/space/space/space/README.md +1 -1
- space/space/space/space/space/README.md +2 -0
- space/space/space/space/space/space/.github/workflows/update-hf-space.yml +16 -0
- space/space/space/space/space/space/README.md +0 -15
- space/space/space/space/space/space/space/README.md +16 -14
- space/space/space/space/space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml +27 -0
- space/space/space/space/space/space/space/space/space/space/space/.gitignore +51 -0
- space/space/space/space/space/space/space/space/space/space/space/CHANGELOG.md +15 -0
- space/space/space/space/space/space/space/space/space/space/space/LICENSE +21 -0
- space/space/space/space/space/space/space/space/space/space/space/data/scores.csv +0 -0
- space/space/space/space/space/space/space/space/space/space/space/example.py +36 -0
- space/space/space/space/space/space/space/space/space/space/space/pyproject.toml +31 -0
- space/space/space/space/space/space/space/space/space/space/space/results/classification_report.pdf +0 -0
- space/space/space/space/space/space/space/space/space/space/space/space/.gitattributes +35 -0
- space/space/space/space/space/space/space/space/space/space/space/space/README.md +112 -0
- space/space/space/space/space/space/space/space/space/space/space/space/app.py +90 -0
- space/space/space/space/space/space/space/space/space/space/space/space/omnibin-0.1.4-py3-none-any.whl +0 -0
- space/space/space/space/space/space/space/space/space/space/space/space/omnibin/__init__.py +4 -0
- space/space/space/space/space/space/space/space/space/space/space/space/omnibin/metrics.py +62 -0
- space/space/space/space/space/space/space/space/space/space/space/space/omnibin/utils.py +263 -0
- space/space/space/space/space/space/space/space/space/space/space/space/requirements.txt +6 -0
- space/space/space/space/space/space/space/space/space/space/space/space/scores.csv +0 -0
- space/space/space/space/space/space/space/space/space/space/space/tests/__init__.py +3 -0
- space/space/space/space/space/space/space/space/space/space/space/tests/test_metrics.py +141 -0
- space/space/tests/test_metrics.py +18 -7
.github/workflows/deploy.yml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Deploy to Hugging Face Space
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
deploy:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout source repo
|
14 |
+
uses: actions/checkout@v3
|
15 |
+
|
16 |
+
- name: Clone HF Space repo
|
17 |
+
run: |
|
18 |
+
git config --global user.email "[email protected]"
|
19 |
+
git config --global user.name "kitamura-felipe"
|
20 |
+
git clone https://huggingface.co/spaces/felipekitamura/omnibin space
|
21 |
+
rsync -av --exclude='.git' ./ space/
|
22 |
+
|
23 |
+
echo '---' > temp_readme.md
|
24 |
+
echo 'title: Omnibin' >> temp_readme.md
|
25 |
+
echo 'emoji: ⚡' >> temp_readme.md
|
26 |
+
echo 'colorFrom: pink' >> temp_readme.md
|
27 |
+
echo 'colorTo: yellow' >> temp_readme.md
|
28 |
+
echo 'sdk: gradio' >> temp_readme.md
|
29 |
+
echo 'sdk_version: 5.29.0' >> temp_readme.md
|
30 |
+
echo 'app_file: app.py' >> temp_readme.md
|
31 |
+
echo 'pinned: false' >> temp_readme.md
|
32 |
+
echo 'license: mit' >> temp_readme.md
|
33 |
+
echo 'short_description: A Python package for generating comprehensive binary classi' >> temp_readme.md
|
34 |
+
echo '---' >> temp_readme.md
|
35 |
+
echo '' >> temp_readme.md
|
36 |
+
cat README.md >> temp_readme.md
|
37 |
+
mv temp_readme.md space/README.md
|
38 |
+
|
39 |
+
cd space
|
40 |
+
git add .
|
41 |
+
git commit -m "Automated update from GitHub" || echo "No changes"
|
42 |
+
git push https://USER:[email protected]/spaces/felipekitamura/omnibin HEAD:main
|
43 |
+
env:
|
44 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
.github/workflows/test.yml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Run Tests
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches: [main]
|
6 |
+
|
7 |
+
jobs:
|
8 |
+
test:
|
9 |
+
runs-on: ubuntu-latest
|
10 |
+
|
11 |
+
steps:
|
12 |
+
- name: Checkout code
|
13 |
+
uses: actions/checkout@v3
|
14 |
+
|
15 |
+
- name: Set up Python
|
16 |
+
uses: actions/setup-python@v4
|
17 |
+
with:
|
18 |
+
python-version: '3.10'
|
19 |
+
|
20 |
+
- name: Install dependencies
|
21 |
+
run: |
|
22 |
+
pip install -r requirements.txt
|
23 |
+
pip install pytest
|
24 |
+
|
25 |
+
- name: Run tests
|
26 |
+
run: pytest tests/
|
space/space/.github/workflows/update-hf-space.yml
CHANGED
@@ -3,11 +3,33 @@ name: Deploy to Hugging Face Space
|
|
3 |
on:
|
4 |
push:
|
5 |
branches:
|
6 |
-
- main
|
7 |
|
8 |
jobs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
deploy:
|
10 |
runs-on: ubuntu-latest
|
|
|
11 |
|
12 |
steps:
|
13 |
- name: Checkout source repo
|
@@ -35,6 +57,7 @@ jobs:
|
|
35 |
echo '' >> temp_readme.md
|
36 |
cat README.md >> temp_readme.md
|
37 |
mv temp_readme.md space/README.md
|
|
|
38 |
cd space
|
39 |
git add .
|
40 |
git commit -m "Automated update from GitHub" || echo "No changes"
|
|
|
3 |
on:
|
4 |
push:
|
5 |
branches:
|
6 |
+
- main
|
7 |
|
8 |
jobs:
|
9 |
+
test:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout source repo
|
14 |
+
uses: actions/checkout@v3
|
15 |
+
|
16 |
+
- name: Set up Python
|
17 |
+
uses: actions/setup-python@v4
|
18 |
+
with:
|
19 |
+
python-version: '3.10'
|
20 |
+
|
21 |
+
- name: Install dependencies
|
22 |
+
run: |
|
23 |
+
python -m pip install --upgrade pip
|
24 |
+
pip install -r requirements.txt
|
25 |
+
pip install pytest
|
26 |
+
|
27 |
+
- name: Run tests
|
28 |
+
run: pytest tests/
|
29 |
+
|
30 |
deploy:
|
31 |
runs-on: ubuntu-latest
|
32 |
+
needs: test # 👈 only runs if 'test' job passes
|
33 |
|
34 |
steps:
|
35 |
- name: Checkout source repo
|
|
|
57 |
echo '' >> temp_readme.md
|
58 |
cat README.md >> temp_readme.md
|
59 |
mv temp_readme.md space/README.md
|
60 |
+
|
61 |
cd space
|
62 |
git add .
|
63 |
git commit -m "Automated update from GitHub" || echo "No changes"
|
space/space/.gitignore
CHANGED
@@ -48,4 +48,5 @@ htmlcov/
|
|
48 |
*.xlsx
|
49 |
*.xls
|
50 |
*.db
|
51 |
-
*.sqlite3
|
|
|
|
48 |
*.xlsx
|
49 |
*.xls
|
50 |
*.db
|
51 |
+
*.sqlite3
|
52 |
+
plots/
|
space/space/README.md
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
-
[](https://github.com/kitamura-felipe/omnibin/actions/workflows/test.yml)
|
2 |
+
[](https://github.com/kitamura-felipe/omnibin/actions/workflows/deploy.yml)
|
3 |
+
|
4 |
|
5 |
# Omnibin
|
6 |
|
space/space/space/space/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
[](https://github.com/kitamura-felipe/omnibin/actions/workflows/update-hf-space.yml)
|
2 |
|
3 |
# Omnibin
|
4 |
|
space/space/space/space/space/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
# Omnibin
|
2 |
|
3 |
A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
|
|
|
1 |
+
[](https://github.com/felipekitamura/omnibin/actions/workflows/update-hf-space.yml)
|
2 |
+
|
3 |
# Omnibin
|
4 |
|
5 |
A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
|
space/space/space/space/space/space/.github/workflows/update-hf-space.yml
CHANGED
@@ -19,6 +19,22 @@ jobs:
|
|
19 |
git config --global user.name "kitamura-felipe"
|
20 |
git clone https://huggingface.co/spaces/felipekitamura/omnibin space
|
21 |
rsync -av --exclude='.git' ./ space/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
cd space
|
23 |
git add .
|
24 |
git commit -m "Automated update from GitHub" || echo "No changes"
|
|
|
19 |
git config --global user.name "kitamura-felipe"
|
20 |
git clone https://huggingface.co/spaces/felipekitamura/omnibin space
|
21 |
rsync -av --exclude='.git' ./ space/
|
22 |
+
|
23 |
+
echo '---' > temp_readme.md
|
24 |
+
echo 'title: Omnibin' >> temp_readme.md
|
25 |
+
echo 'emoji: ⚡' >> temp_readme.md
|
26 |
+
echo 'colorFrom: pink' >> temp_readme.md
|
27 |
+
echo 'colorTo: yellow' >> temp_readme.md
|
28 |
+
echo 'sdk: gradio' >> temp_readme.md
|
29 |
+
echo 'sdk_version: 5.29.0' >> temp_readme.md
|
30 |
+
echo 'app_file: app.py' >> temp_readme.md
|
31 |
+
echo 'pinned: false' >> temp_readme.md
|
32 |
+
echo 'license: mit' >> temp_readme.md
|
33 |
+
echo 'short_description: A Python package for generating comprehensive binary classi' >> temp_readme.md
|
34 |
+
echo '---' >> temp_readme.md
|
35 |
+
echo '' >> temp_readme.md
|
36 |
+
cat README.md >> temp_readme.md
|
37 |
+
mv temp_readme.md space/README.md
|
38 |
cd space
|
39 |
git add .
|
40 |
git commit -m "Automated update from GitHub" || echo "No changes"
|
space/space/space/space/space/space/README.md
CHANGED
@@ -97,18 +97,3 @@ Here are examples of the visualizations generated by Omnibin:
|
|
97 |
|
98 |
### Metrics Summary
|
99 |

|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
---
|
104 |
-
title: Omnibin
|
105 |
-
emoji: ⚡
|
106 |
-
colorFrom: pink
|
107 |
-
colorTo: yellow
|
108 |
-
sdk: gradio
|
109 |
-
sdk_version: 5.29.0
|
110 |
-
app_file: app.py
|
111 |
-
pinned: false
|
112 |
-
license: mit
|
113 |
-
short_description: A Python package for generating comprehensive binary classi
|
114 |
-
---
|
|
|
97 |
|
98 |
### Metrics Summary
|
99 |

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
space/space/space/space/space/space/space/README.md
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
-
---
|
2 |
-
title: Omnibin
|
3 |
-
emoji: ⚡
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: yellow
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.29.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
short_description: A Python package for generating comprehensive binary classi
|
12 |
-
---
|
13 |
-
|
14 |
# Omnibin
|
15 |
|
16 |
A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
|
@@ -109,4 +96,19 @@ Here are examples of the visualizations generated by Omnibin:
|
|
109 |
<img src="results/plots/prediction_distribution.png">
|
110 |
|
111 |
### Metrics Summary
|
112 |
-

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# Omnibin
|
2 |
|
3 |
A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
|
|
|
96 |
<img src="results/plots/prediction_distribution.png">
|
97 |
|
98 |
### Metrics Summary
|
99 |
+

|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
---
|
104 |
+
title: Omnibin
|
105 |
+
emoji: ⚡
|
106 |
+
colorFrom: pink
|
107 |
+
colorTo: yellow
|
108 |
+
sdk: gradio
|
109 |
+
sdk_version: 5.29.0
|
110 |
+
app_file: app.py
|
111 |
+
pinned: false
|
112 |
+
license: mit
|
113 |
+
short_description: A Python package for generating comprehensive binary classi
|
114 |
+
---
|
space/space/space/space/space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Deploy to Hugging Face Space
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main # or the branch you want to trigger deployment
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
deploy:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Checkout source repo
|
14 |
+
uses: actions/checkout@v3
|
15 |
+
|
16 |
+
- name: Clone HF Space repo
|
17 |
+
run: |
|
18 |
+
git config --global user.email "[email protected]"
|
19 |
+
git config --global user.name "kitamura-felipe"
|
20 |
+
git clone https://huggingface.co/spaces/felipekitamura/omnibin space
|
21 |
+
rsync -av --exclude='.git' ./ space/
|
22 |
+
cd space
|
23 |
+
git add .
|
24 |
+
git commit -m "Automated update from GitHub" || echo "No changes"
|
25 |
+
git push https://USER:[email protected]/spaces/felipekitamura/omnibin HEAD:main
|
26 |
+
env:
|
27 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
space/space/space/space/space/space/space/space/space/space/space/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
*.so
|
6 |
+
.Python
|
7 |
+
build/
|
8 |
+
develop-eggs/
|
9 |
+
dist/
|
10 |
+
downloads/
|
11 |
+
eggs/
|
12 |
+
.eggs/
|
13 |
+
lib/
|
14 |
+
lib64/
|
15 |
+
parts/
|
16 |
+
sdist/
|
17 |
+
var/
|
18 |
+
wheels/
|
19 |
+
*.egg-info/
|
20 |
+
.installed.cfg
|
21 |
+
*.egg
|
22 |
+
|
23 |
+
# Virtual Environment
|
24 |
+
venv/
|
25 |
+
env/
|
26 |
+
ENV/
|
27 |
+
|
28 |
+
# IDE
|
29 |
+
.idea/
|
30 |
+
.vscode/
|
31 |
+
*.swp
|
32 |
+
*.swo
|
33 |
+
|
34 |
+
# Jupyter Notebook
|
35 |
+
.ipynb_checkpoints
|
36 |
+
|
37 |
+
# Distribution
|
38 |
+
dist/
|
39 |
+
build/
|
40 |
+
update.bat
|
41 |
+
|
42 |
+
|
43 |
+
# Coverage
|
44 |
+
.coverage
|
45 |
+
htmlcov/
|
46 |
+
|
47 |
+
# Results and data
|
48 |
+
*.xlsx
|
49 |
+
*.xls
|
50 |
+
*.db
|
51 |
+
*.sqlite3
|
space/space/space/space/space/space/space/space/space/space/space/CHANGELOG.md
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Changelog
|
2 |
+
|
3 |
+
All notable changes to this project will be documented in this file.
|
4 |
+
|
5 |
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6 |
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7 |
+
|
8 |
+
## [0.1.0] - 2024-03-19
|
9 |
+
|
10 |
+
### Added
|
11 |
+
- Initial release
|
12 |
+
- Basic binary classification metrics and visualizations
|
13 |
+
- Comprehensive reporting functionality
|
14 |
+
- Confidence interval calculations
|
15 |
+
- Example usage and documentation
|
space/space/space/space/space/space/space/space/space/space/space/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 Felipe Campos Kitamura
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
space/space/space/space/space/space/space/space/space/space/space/data/scores.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
space/space/space/space/space/space/space/space/space/space/space/example.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
from omnibin import generate_binary_classification_report, ColorScheme
|
5 |
+
|
6 |
+
# Define paths
|
7 |
+
RESULTS_DIR = os.path.join(os.path.dirname(__file__), "results")
|
8 |
+
|
9 |
+
# Ensure results directory exists
|
10 |
+
os.makedirs(RESULTS_DIR, exist_ok=True)
|
11 |
+
|
12 |
+
# Generate random data
|
13 |
+
data = pd.DataFrame({
|
14 |
+
'y_true': (y:=np.random.choice([0,1],1000,p:=[.9,.1])),
|
15 |
+
'y_pred': np.where(
|
16 |
+
y,
|
17 |
+
np.random.beta(3,1.5,1000)*.9+.1, # Positive cases: less skewed towards 1.0
|
18 |
+
np.random.beta(1.5,3,1000)*.9+.1 # Negative cases: less skewed towards 0.1
|
19 |
+
)
|
20 |
+
})
|
21 |
+
|
22 |
+
y_true = data['y_true'].values
|
23 |
+
y_scores = data['y_pred'].values
|
24 |
+
|
25 |
+
# Generate comprehensive classification report
|
26 |
+
report_path = generate_binary_classification_report(
|
27 |
+
y_true=y_true,
|
28 |
+
y_scores=y_scores,
|
29 |
+
output_path=os.path.join(RESULTS_DIR, "classification_report.pdf"),
|
30 |
+
n_bootstrap=1000,
|
31 |
+
random_seed=42, # Set a fixed random seed for reproducibility
|
32 |
+
dpi=72,
|
33 |
+
color_scheme=ColorScheme.DEFAULT
|
34 |
+
)
|
35 |
+
|
36 |
+
print(f"Report generated and saved to: {report_path}")
|
space/space/space/space/space/space/space/space/space/space/space/pyproject.toml
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["hatchling"]
|
3 |
+
build-backend = "hatchling.build"
|
4 |
+
|
5 |
+
[project]
|
6 |
+
name = "omnibin"
|
7 |
+
version = "0.1.5"
|
8 |
+
description = "A package for generating comprehensive binary classification reports with visualizations and confidence intervals"
|
9 |
+
readme = "README.md"
|
10 |
+
requires-python = ">=3.11"
|
11 |
+
license = "MIT"
|
12 |
+
authors = [
|
13 |
+
{ name = "Felipe Campos Kitamura", email = "[email protected]" }
|
14 |
+
]
|
15 |
+
classifiers = [
|
16 |
+
"Programming Language :: Python :: 3",
|
17 |
+
"License :: OSI Approved :: MIT License",
|
18 |
+
"Operating System :: OS Independent",
|
19 |
+
]
|
20 |
+
dependencies = [
|
21 |
+
"numpy>=1.21.0",
|
22 |
+
"pandas>=1.3.0",
|
23 |
+
"scikit-learn>=1.0.0",
|
24 |
+
"matplotlib>=3.4.0",
|
25 |
+
"scipy>=1.7.0",
|
26 |
+
"seaborn>=0.11.0",
|
27 |
+
]
|
28 |
+
|
29 |
+
[project.urls]
|
30 |
+
Homepage = "https://github.com/kitamura-felipe/omnibin"
|
31 |
+
Repository = "https://github.com/kitamura-felipe/omnibin.git"
|
space/space/space/space/space/space/space/space/space/space/space/results/classification_report.pdf
ADDED
Binary file (49.5 kB). View file
|
|
space/space/space/space/space/space/space/space/space/space/space/space/.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
space/space/space/space/space/space/space/space/space/space/space/space/README.md
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Omnibin
|
3 |
+
emoji: ⚡
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.29.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
short_description: A Python package for generating comprehensive binary classi
|
12 |
+
---
|
13 |
+
|
14 |
+
# Omnibin
|
15 |
+
|
16 |
+
A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
|
17 |
+
|
18 |
+
## Try it Online
|
19 |
+
|
20 |
+
You can try Omnibin directly in your browser through our [Hugging Face Space](https://felipekitamura-omnibin.hf.space).
|
21 |
+
|
22 |
+
## Installation
|
23 |
+
|
24 |
+
```bash
|
25 |
+
pip install omnibin
|
26 |
+
```
|
27 |
+
|
28 |
+
## Usage
|
29 |
+
|
30 |
+
```python
|
31 |
+
import pandas as pd
|
32 |
+
from omnibin import generate_binary_classification_report, ColorScheme
|
33 |
+
|
34 |
+
# Load your data
|
35 |
+
data = pd.read_csv("data/scores.csv")
|
36 |
+
y_true = data['y_true'].values
|
37 |
+
y_scores = data['y_pred'].values
|
38 |
+
|
39 |
+
# Generate comprehensive classification report
|
40 |
+
report_path = generate_binary_classification_report(
|
41 |
+
y_true=y_true, # Array of true binary labels (0 or 1)
|
42 |
+
y_scores=y_scores, # Array of predicted probabilities or scores
|
43 |
+
output_path="classification_report.pdf", # Path to save the PDF report
|
44 |
+
n_bootstrap=1000, # Number of bootstrap iterations for confidence intervals
|
45 |
+
random_seed=42, # Random seed for reproducibility
|
46 |
+
dpi=300, # DPI for plot resolution
|
47 |
+
color_scheme=ColorScheme.DEFAULT # Color scheme for plots (DEFAULT, MONOCHROME, or VIBRANT)
|
48 |
+
)
|
49 |
+
```
|
50 |
+
|
51 |
+
## Input Format
|
52 |
+
|
53 |
+
The input data should be provided as:
|
54 |
+
- `y_true`: Array of true binary labels (0 or 1)
|
55 |
+
- `y_pred`: Array of predicted probabilities or scores
|
56 |
+
|
57 |
+
## Features
|
58 |
+
|
59 |
+
- Generates a comprehensive PDF report with:
|
60 |
+
- ROC curve with confidence bands
|
61 |
+
- Precision-Recall curve with confidence bands
|
62 |
+
- Metrics vs. threshold plots
|
63 |
+
- Confusion matrix at optimal threshold
|
64 |
+
- Calibration plot
|
65 |
+
- Summary table with confidence intervals
|
66 |
+
- Calculates optimal threshold using Youden's J statistic
|
67 |
+
- Provides confidence intervals using bootstrapping
|
68 |
+
- Supports both probability and score-based predictions
|
69 |
+
|
70 |
+
## Metrics Included
|
71 |
+
|
72 |
+
- Accuracy
|
73 |
+
- Sensitivity (Recall)
|
74 |
+
- Specificity
|
75 |
+
- Positive Predictive Value (Precision)
|
76 |
+
- Matthews Correlation Coefficient
|
77 |
+
- F1 Score
|
78 |
+
- AUC-ROC
|
79 |
+
- AUC-PR
|
80 |
+
|
81 |
+
All metrics include 95% confidence intervals calculated through bootstrapping.
|
82 |
+
|
83 |
+
## Output
|
84 |
+
|
85 |
+
The package generates a PDF report containing:
|
86 |
+
1. ROC and Precision-Recall curves with confidence bands
|
87 |
+
2. Metrics plotted across different thresholds
|
88 |
+
3. Confusion matrix at the optimal threshold
|
89 |
+
4. Calibration plot
|
90 |
+
5. Summary table with all metrics and their confidence intervals
|
91 |
+
|
92 |
+
## Example
|
93 |
+
|
94 |
+
Here are examples of the visualizations generated by Omnibin:
|
95 |
+
|
96 |
+
### ROC and Precision-Recall Curves
|
97 |
+

|
98 |
+
|
99 |
+
### Metrics vs Threshold
|
100 |
+
<img src="results/plots/metrics_threshold.png">
|
101 |
+
|
102 |
+
### Confusion Matrix
|
103 |
+
<img src="results/plots/confusion_matrix.png">
|
104 |
+
|
105 |
+
### Calibration Plot
|
106 |
+
<img src="results/plots/calibration.png">
|
107 |
+
|
108 |
+
### Prediction Distribution
|
109 |
+
<img src="results/plots/prediction_distribution.png">
|
110 |
+
|
111 |
+
### Metrics Summary
|
112 |
+

|
space/space/space/space/space/space/space/space/space/space/space/space/app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
from omnibin import generate_binary_classification_report, ColorScheme
|
6 |
+
|
7 |
+
# Define results directory
|
8 |
+
RESULTS_DIR = "/tmp/results"
|
9 |
+
|
10 |
+
# Map string color schemes to enum values
|
11 |
+
COLOR_SCHEME_MAP = {
|
12 |
+
"DEFAULT": ColorScheme.DEFAULT,
|
13 |
+
"MONOCHROME": ColorScheme.MONOCHROME,
|
14 |
+
"VIBRANT": ColorScheme.VIBRANT
|
15 |
+
}
|
16 |
+
|
17 |
+
def process_csv(csv_file, n_bootstrap=1000, dpi=72, color_scheme="DEFAULT"):
|
18 |
+
# Convert string color scheme to enum
|
19 |
+
color_scheme_enum = COLOR_SCHEME_MAP[color_scheme]
|
20 |
+
|
21 |
+
# Read the CSV file
|
22 |
+
df = pd.read_csv(csv_file.name)
|
23 |
+
|
24 |
+
# Check if required columns exist
|
25 |
+
required_columns = ['y_true', 'y_pred']
|
26 |
+
if not all(col in df.columns for col in required_columns):
|
27 |
+
raise ValueError("CSV file must contain 'y_true' and 'y_pred' columns")
|
28 |
+
|
29 |
+
# Clean up results directory if it exists
|
30 |
+
if os.path.exists(RESULTS_DIR):
|
31 |
+
shutil.rmtree(RESULTS_DIR)
|
32 |
+
|
33 |
+
# Create fresh results directory
|
34 |
+
os.makedirs(RESULTS_DIR, exist_ok=True)
|
35 |
+
|
36 |
+
# Generate the report
|
37 |
+
report_path = generate_binary_classification_report(
|
38 |
+
y_true=df['y_true'].values,
|
39 |
+
y_scores=df['y_pred'].values,
|
40 |
+
output_path=os.path.join(RESULTS_DIR, "classification_report.pdf"),
|
41 |
+
n_bootstrap=n_bootstrap,
|
42 |
+
random_seed=42,
|
43 |
+
dpi=dpi,
|
44 |
+
color_scheme=color_scheme_enum
|
45 |
+
)
|
46 |
+
|
47 |
+
# Get paths to individual plots
|
48 |
+
plots_dir = os.path.join(RESULTS_DIR, "plots")
|
49 |
+
plot_paths = {
|
50 |
+
"ROC and PR Curves": os.path.join(plots_dir, "roc_pr.png"),
|
51 |
+
"Metrics vs Threshold": os.path.join(plots_dir, "metrics_threshold.png"),
|
52 |
+
"Confusion Matrix": os.path.join(plots_dir, "confusion_matrix.png"),
|
53 |
+
"Calibration Plot": os.path.join(plots_dir, "calibration.png"),
|
54 |
+
"Prediction Distribution": os.path.join(plots_dir, "prediction_distribution.png"),
|
55 |
+
"Metrics Summary": os.path.join(plots_dir, "metrics_summary.png")
|
56 |
+
}
|
57 |
+
|
58 |
+
# Return both the PDF and the plot images
|
59 |
+
return report_path, *plot_paths.values()
|
60 |
+
|
61 |
+
# Create the Gradio interface
|
62 |
+
iface = gr.Interface(
|
63 |
+
fn=process_csv,
|
64 |
+
inputs=[
|
65 |
+
gr.File(label="Upload CSV file with 'y_true' and 'y_pred' columns"),
|
66 |
+
gr.Number(label="Number of Bootstrap Iterations", value=1000, minimum=100, maximum=10000),
|
67 |
+
gr.Number(label="DPI", value=72, minimum=50, maximum=300),
|
68 |
+
gr.Dropdown(label="Color Scheme", choices=["DEFAULT", "MONOCHROME", "VIBRANT"], value="DEFAULT")
|
69 |
+
],
|
70 |
+
outputs=[
|
71 |
+
gr.File(label="Classification Report PDF"),
|
72 |
+
gr.Image(label="ROC and PR Curves"),
|
73 |
+
gr.Image(label="Metrics vs Threshold"),
|
74 |
+
gr.Image(label="Confusion Matrix"),
|
75 |
+
gr.Image(label="Calibration Plot"),
|
76 |
+
gr.Image(label="Prediction Distribution"),
|
77 |
+
gr.Image(label="Metrics Summary")
|
78 |
+
],
|
79 |
+
title="Binary Classification Report Generator",
|
80 |
+
description="Upload a CSV file containing 'y_true' and 'y_pred' columns to generate a binary classification report.\n\n"
|
81 |
+
"'y_true': reference standard (0s or 1s).\n\n"
|
82 |
+
"'y_pred': model prediction (continuous value between 0 and 1).\n\n"
|
83 |
+
"This application takes approximately 35 seconds to generate the report.\n",
|
84 |
+
|
85 |
+
examples=[["scores.csv", 1000, 72, "DEFAULT"]],
|
86 |
+
cache_examples=False
|
87 |
+
)
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
iface.launch()
|
space/space/space/space/space/space/space/space/space/space/space/space/omnibin-0.1.4-py3-none-any.whl
ADDED
Binary file (5.86 kB). View file
|
|
space/space/space/space/space/space/space/space/space/space/space/space/omnibin/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .metrics import generate_binary_classification_report, ColorScheme
|
2 |
+
|
3 |
+
__version__ = "0.1.5"
|
4 |
+
__all__ = ["generate_binary_classification_report"]
|
space/space/space/space/space/space/space/space/space/space/space/space/omnibin/metrics.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
from tqdm import tqdm
|
6 |
+
import os
|
7 |
+
from sklearn.metrics import (
|
8 |
+
accuracy_score, recall_score, precision_score, f1_score, roc_auc_score,
|
9 |
+
average_precision_score, confusion_matrix, matthews_corrcoef, roc_curve,
|
10 |
+
precision_recall_curve
|
11 |
+
)
|
12 |
+
from sklearn.calibration import calibration_curve
|
13 |
+
from matplotlib.backends.backend_pdf import PdfPages
|
14 |
+
from enum import Enum
|
15 |
+
from .utils import (
|
16 |
+
ColorScheme, calculate_metrics_by_threshold, bootstrap_curves,
|
17 |
+
calculate_optimal_threshold, calculate_metrics_summary,
|
18 |
+
calculate_confidence_intervals, create_output_directories,
|
19 |
+
plot_roc_pr_curves, plot_metrics_threshold, plot_confusion_matrix,
|
20 |
+
plot_calibration, plot_metrics_summary, plot_prediction_distribution
|
21 |
+
)
|
22 |
+
|
23 |
+
def generate_binary_classification_report(y_true, y_scores, output_path="omnibin_report.pdf", n_bootstrap=1000, random_seed=42, dpi=300, color_scheme=ColorScheme.DEFAULT):
|
24 |
+
# Set random seed for reproducibility
|
25 |
+
if random_seed is not None:
|
26 |
+
np.random.seed(random_seed)
|
27 |
+
|
28 |
+
# Set DPI for all figures
|
29 |
+
plt.rcParams['figure.dpi'] = dpi
|
30 |
+
|
31 |
+
# Get color scheme
|
32 |
+
colors = color_scheme.value
|
33 |
+
|
34 |
+
# Calculate metrics and optimal threshold
|
35 |
+
metrics_df = calculate_metrics_by_threshold(y_true, y_scores)
|
36 |
+
best_thresh = calculate_optimal_threshold(y_true, y_scores)
|
37 |
+
metrics_summary = calculate_metrics_summary(y_true, y_scores, best_thresh)
|
38 |
+
conf_intervals = calculate_confidence_intervals(y_true, y_scores, best_thresh, n_bootstrap)
|
39 |
+
|
40 |
+
# Create output directories
|
41 |
+
plots_dir = create_output_directories(output_path)
|
42 |
+
|
43 |
+
# Calculate confidence intervals for curves
|
44 |
+
tpr_ci, precision_ci, common_fpr, common_recall = bootstrap_curves(y_true, y_scores, n_boot=n_bootstrap)
|
45 |
+
|
46 |
+
with PdfPages(output_path) as pdf:
|
47 |
+
# Generate and save all plots
|
48 |
+
plots = [
|
49 |
+
plot_roc_pr_curves(y_true, y_scores, tpr_ci, precision_ci, common_fpr, common_recall, colors, dpi, plots_dir),
|
50 |
+
plot_metrics_threshold(metrics_df, colors, dpi, plots_dir),
|
51 |
+
plot_confusion_matrix(y_true, y_scores, best_thresh, colors, dpi, plots_dir),
|
52 |
+
plot_calibration(y_true, y_scores, colors, dpi, plots_dir),
|
53 |
+
plot_metrics_summary(metrics_summary, conf_intervals, dpi, plots_dir),
|
54 |
+
plot_prediction_distribution(y_true, y_scores, best_thresh, colors, dpi, plots_dir)
|
55 |
+
]
|
56 |
+
|
57 |
+
# Save all plots to PDF
|
58 |
+
for plot in plots:
|
59 |
+
pdf.savefig(plot, dpi=dpi)
|
60 |
+
plt.close(plot)
|
61 |
+
|
62 |
+
return output_path
|
space/space/space/space/space/space/space/space/space/space/space/space/omnibin/utils.py
ADDED
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
from tqdm import tqdm
|
6 |
+
from sklearn.metrics import (
|
7 |
+
accuracy_score, recall_score, precision_score, f1_score, roc_auc_score,
|
8 |
+
average_precision_score, confusion_matrix, matthews_corrcoef, roc_curve,
|
9 |
+
precision_recall_curve
|
10 |
+
)
|
11 |
+
from sklearn.calibration import calibration_curve
|
12 |
+
from enum import Enum
|
13 |
+
import os
|
14 |
+
|
15 |
+
class ColorScheme(Enum):
|
16 |
+
DEFAULT = {
|
17 |
+
'positive_class': 'tab:blue',
|
18 |
+
'negative_class': 'tab:orange',
|
19 |
+
'roc_curve': 'tab:blue',
|
20 |
+
'pr_curve': 'tab:blue',
|
21 |
+
'threshold_line': 'black',
|
22 |
+
'calibration_curve': 'tab:blue',
|
23 |
+
'calibration_reference': 'gray',
|
24 |
+
'metrics_colors': ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:orange', 'tab:brown', 'tab:pink'],
|
25 |
+
'cmap': 'Blues'
|
26 |
+
}
|
27 |
+
|
28 |
+
MONOCHROME = {
|
29 |
+
'positive_class': '#404040',
|
30 |
+
'negative_class': '#808080',
|
31 |
+
'roc_curve': '#000000',
|
32 |
+
'pr_curve': '#000000',
|
33 |
+
'threshold_line': '#000000',
|
34 |
+
'calibration_curve': '#000000',
|
35 |
+
'calibration_reference': '#808080',
|
36 |
+
'metrics_colors': ['#000000', '#404040', '#606060', '#808080', '#A0A0A0', '#C0C0C0', '#E0E0E0'],
|
37 |
+
'cmap': 'Greys'
|
38 |
+
}
|
39 |
+
|
40 |
+
VIBRANT = {
|
41 |
+
'positive_class': '#FF6B6B',
|
42 |
+
'negative_class': '#4ECDC4',
|
43 |
+
'roc_curve': '#FF6B6B',
|
44 |
+
'pr_curve': '#4ECDC4',
|
45 |
+
'threshold_line': '#2C3E50',
|
46 |
+
'calibration_curve': '#FF6B6B',
|
47 |
+
'calibration_reference': '#95A5A6',
|
48 |
+
'metrics_colors': ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEEAD', '#D4A5A5', '#9B59B6'],
|
49 |
+
'cmap': 'Greens'
|
50 |
+
}
|
51 |
+
|
52 |
+
def calculate_metrics_by_threshold(y_true, y_scores):
|
53 |
+
"""Calculate various metrics across different thresholds."""
|
54 |
+
thresholds = np.linspace(0, 1, 100)
|
55 |
+
metrics_by_threshold = []
|
56 |
+
|
57 |
+
for t in tqdm(thresholds, desc="Calculating metrics across thresholds"):
|
58 |
+
y_pred = (y_scores >= t).astype(int)
|
59 |
+
acc = accuracy_score(y_true, y_pred)
|
60 |
+
sens = recall_score(y_true, y_pred)
|
61 |
+
spec = recall_score(y_true, y_pred, pos_label=0)
|
62 |
+
ppv = precision_score(y_true, y_pred, zero_division=0)
|
63 |
+
mcc = matthews_corrcoef(y_true, y_pred)
|
64 |
+
f1 = f1_score(y_true, y_pred)
|
65 |
+
metrics_by_threshold.append([t, acc, sens, spec, ppv, mcc, f1])
|
66 |
+
|
67 |
+
return pd.DataFrame(metrics_by_threshold, columns=[
|
68 |
+
"Threshold", "Accuracy", "Sensitivity", "Specificity",
|
69 |
+
"PPV", "MCC", "F1 Score"
|
70 |
+
])
|
71 |
+
|
72 |
+
def bootstrap_metric(metric_func, y_true, y_scores, n_boot=1000):
|
73 |
+
"""Calculate bootstrap confidence intervals for a given metric."""
|
74 |
+
stats = []
|
75 |
+
for _ in tqdm(range(n_boot), desc="Bootstrap iterations", leave=False):
|
76 |
+
indices = np.random.choice(range(len(y_true)), len(y_true), replace=True)
|
77 |
+
try:
|
78 |
+
stats.append(metric_func(y_true[indices], y_scores[indices]))
|
79 |
+
except:
|
80 |
+
continue
|
81 |
+
return np.percentile(stats, [2.5, 97.5])
|
82 |
+
|
83 |
+
def bootstrap_curves(y_true, y_scores, n_boot=1000):
|
84 |
+
"""Calculate bootstrap confidence intervals for ROC and PR curves."""
|
85 |
+
tprs = []
|
86 |
+
fprs = []
|
87 |
+
precisions = []
|
88 |
+
recalls = []
|
89 |
+
|
90 |
+
base_fpr, base_tpr, _ = roc_curve(y_true, y_scores)
|
91 |
+
base_precision, base_recall, _ = precision_recall_curve(y_true, y_scores)
|
92 |
+
|
93 |
+
common_fpr = np.linspace(0, 1, 100)
|
94 |
+
common_recall = np.linspace(0, 1, 100)
|
95 |
+
|
96 |
+
for _ in tqdm(range(n_boot), desc="Bootstrap iterations for curves", leave=False):
|
97 |
+
indices = np.random.choice(range(len(y_true)), len(y_true), replace=True)
|
98 |
+
try:
|
99 |
+
fpr, tpr, _ = roc_curve(y_true[indices], y_scores[indices])
|
100 |
+
tpr_interp = np.interp(common_fpr, fpr, tpr)
|
101 |
+
tprs.append(tpr_interp)
|
102 |
+
|
103 |
+
precision, recall, _ = precision_recall_curve(y_true[indices], y_scores[indices])
|
104 |
+
sort_idx = np.argsort(recall)
|
105 |
+
recall = recall[sort_idx]
|
106 |
+
precision = precision[sort_idx]
|
107 |
+
precision_interp = np.interp(common_recall, recall, precision)
|
108 |
+
precisions.append(precision_interp)
|
109 |
+
except:
|
110 |
+
continue
|
111 |
+
|
112 |
+
tpr_ci = np.percentile(tprs, [2.5, 97.5], axis=0)
|
113 |
+
precision_ci = np.percentile(precisions, [2.5, 97.5], axis=0)
|
114 |
+
|
115 |
+
return tpr_ci, precision_ci, common_fpr, common_recall
|
116 |
+
|
117 |
+
def calculate_optimal_threshold(y_true, y_scores):
|
118 |
+
"""Calculate the optimal threshold using ROC curve."""
|
119 |
+
fpr, tpr, roc_thresholds = roc_curve(y_true, y_scores)
|
120 |
+
j_scores = tpr - fpr
|
121 |
+
return roc_thresholds[np.argmax(j_scores)]
|
122 |
+
|
123 |
+
def calculate_metrics_summary(y_true, y_scores, best_thresh):
|
124 |
+
"""Calculate summary metrics at the optimal threshold."""
|
125 |
+
y_pred_opt = (y_scores >= best_thresh).astype(int)
|
126 |
+
|
127 |
+
return {
|
128 |
+
"Accuracy": accuracy_score(y_true, y_pred_opt),
|
129 |
+
"Sensitivity": recall_score(y_true, y_pred_opt),
|
130 |
+
"Specificity": recall_score(y_true, y_pred_opt, pos_label=0),
|
131 |
+
"PPV": precision_score(y_true, y_pred_opt, zero_division=0),
|
132 |
+
"MCC": matthews_corrcoef(y_true, y_pred_opt),
|
133 |
+
"F1 Score": f1_score(y_true, y_pred_opt),
|
134 |
+
"AUC-ROC": roc_auc_score(y_true, y_scores),
|
135 |
+
"AUC-PR": average_precision_score(y_true, y_scores)
|
136 |
+
}
|
137 |
+
|
138 |
+
def calculate_confidence_intervals(y_true, y_scores, best_thresh, n_bootstrap=1000):
|
139 |
+
"""Calculate confidence intervals for all metrics."""
|
140 |
+
metric_functions = {
|
141 |
+
"Accuracy": lambda yt, ys: accuracy_score(yt, ys >= best_thresh),
|
142 |
+
"Sensitivity": lambda yt, ys: recall_score(yt, ys >= best_thresh),
|
143 |
+
"Specificity": lambda yt, ys: recall_score(yt, ys >= best_thresh, pos_label=0),
|
144 |
+
"PPV": lambda yt, ys: precision_score(yt, ys >= best_thresh, zero_division=0),
|
145 |
+
"MCC": lambda yt, ys: matthews_corrcoef(yt, ys >= best_thresh),
|
146 |
+
"F1 Score": lambda yt, ys: f1_score(yt, ys >= best_thresh),
|
147 |
+
"AUC-ROC": lambda yt, ys: roc_auc_score(yt, ys),
|
148 |
+
"AUC-PR": lambda yt, ys: average_precision_score(yt, ys)
|
149 |
+
}
|
150 |
+
|
151 |
+
return {
|
152 |
+
name: bootstrap_metric(func, y_true, y_scores, n_boot=n_bootstrap)
|
153 |
+
for name, func in metric_functions.items()
|
154 |
+
}
|
155 |
+
|
156 |
+
def create_output_directories(output_path):
|
157 |
+
"""Create necessary output directories for plots and PDF."""
|
158 |
+
output_dir = os.path.dirname(output_path)
|
159 |
+
if output_dir:
|
160 |
+
os.makedirs(output_dir, exist_ok=True)
|
161 |
+
|
162 |
+
plots_dir = os.path.join(output_dir, "plots")
|
163 |
+
os.makedirs(plots_dir, exist_ok=True)
|
164 |
+
|
165 |
+
return plots_dir
|
166 |
+
|
167 |
+
def plot_roc_pr_curves(y_true, y_scores, tpr_ci, precision_ci, common_fpr, common_recall, colors, dpi, plots_dir):
|
168 |
+
"""Generate ROC and PR curves with confidence intervals."""
|
169 |
+
plt.figure(figsize=(12, 5), dpi=dpi)
|
170 |
+
|
171 |
+
plt.subplot(1, 2, 1)
|
172 |
+
fpr, tpr, _ = roc_curve(y_true, y_scores)
|
173 |
+
plt.plot(fpr, tpr, label="ROC curve", color=colors['roc_curve'])
|
174 |
+
plt.fill_between(common_fpr, tpr_ci[0], tpr_ci[1], alpha=0.3, color=colors['roc_curve'])
|
175 |
+
plt.plot([0, 1], [0, 1], "k--")
|
176 |
+
plt.xlabel("False Positive Rate")
|
177 |
+
plt.ylabel("True Positive Rate")
|
178 |
+
plt.title("ROC Curve")
|
179 |
+
plt.legend()
|
180 |
+
|
181 |
+
plt.subplot(1, 2, 2)
|
182 |
+
precision, recall, _ = precision_recall_curve(y_true, y_scores)
|
183 |
+
plt.plot(recall, precision, label="PR curve", color=colors['pr_curve'])
|
184 |
+
plt.fill_between(common_recall, precision_ci[0], precision_ci[1], alpha=0.3, color=colors['pr_curve'])
|
185 |
+
plt.xlabel("Recall")
|
186 |
+
plt.ylabel("Precision")
|
187 |
+
plt.title("Precision-Recall Curve")
|
188 |
+
plt.legend()
|
189 |
+
|
190 |
+
plt.savefig(os.path.join(plots_dir, "roc_pr.png"), dpi=dpi, bbox_inches='tight')
|
191 |
+
return plt.gcf()
|
192 |
+
|
193 |
+
def plot_metrics_threshold(metrics_df, colors, dpi, plots_dir):
|
194 |
+
"""Generate metrics vs threshold plot."""
|
195 |
+
plt.figure(figsize=(10, 6), dpi=dpi)
|
196 |
+
for i, col in enumerate(metrics_df.columns[1:]):
|
197 |
+
plt.plot(metrics_df["Threshold"], metrics_df[col], label=col,
|
198 |
+
color=colors['metrics_colors'][i % len(colors['metrics_colors'])])
|
199 |
+
plt.xlabel("Threshold")
|
200 |
+
plt.ylabel("Metric Value")
|
201 |
+
plt.title("Metrics Across Thresholds")
|
202 |
+
plt.legend()
|
203 |
+
|
204 |
+
plt.savefig(os.path.join(plots_dir, "metrics_threshold.png"), dpi=dpi, bbox_inches='tight')
|
205 |
+
return plt.gcf()
|
206 |
+
|
207 |
+
def plot_confusion_matrix(y_true, y_scores, best_thresh, colors, dpi, plots_dir):
|
208 |
+
"""Generate confusion matrix plot."""
|
209 |
+
cm = confusion_matrix(y_true, y_scores >= best_thresh)
|
210 |
+
plt.figure(figsize=(5, 4), dpi=dpi)
|
211 |
+
sns.heatmap(cm, annot=True, fmt="d", cmap=colors['cmap'], cbar=False, annot_kws={"size": 12})
|
212 |
+
plt.title("Confusion Matrix (Optimal Threshold)", fontsize=12)
|
213 |
+
plt.xlabel("Predicted Label", fontsize=12)
|
214 |
+
plt.ylabel("True Label", fontsize=12)
|
215 |
+
|
216 |
+
plt.savefig(os.path.join(plots_dir, "confusion_matrix.png"), dpi=dpi, bbox_inches='tight')
|
217 |
+
return plt.gcf()
|
218 |
+
|
219 |
+
def plot_calibration(y_true, y_scores, colors, dpi, plots_dir):
|
220 |
+
"""Generate calibration plot."""
|
221 |
+
plt.figure(figsize=(6, 6), dpi=dpi)
|
222 |
+
prob_true, prob_pred = calibration_curve(y_true, y_scores, n_bins=10, strategy='uniform')
|
223 |
+
plt.plot(prob_pred, prob_true, marker='o', label='Calibration curve', color=colors['calibration_curve'])
|
224 |
+
plt.plot([0, 1], [0, 1], linestyle='--', color=colors['calibration_reference'])
|
225 |
+
plt.xlabel('Predicted Probability')
|
226 |
+
plt.ylabel('True Probability')
|
227 |
+
plt.title('Calibration Plot')
|
228 |
+
plt.legend()
|
229 |
+
|
230 |
+
plt.savefig(os.path.join(plots_dir, "calibration.png"), dpi=dpi, bbox_inches='tight')
|
231 |
+
return plt.gcf()
|
232 |
+
|
233 |
+
def plot_metrics_summary(metrics_summary, conf_intervals, dpi, plots_dir):
|
234 |
+
"""Generate metrics summary table plot."""
|
235 |
+
fig, ax = plt.subplots(figsize=(8, 6), dpi=dpi)
|
236 |
+
ax.axis("off")
|
237 |
+
table_data = [
|
238 |
+
[k, f"{v:.3f}", f"[{conf_intervals[k][0]:.3f}, {conf_intervals[k][1]:.3f}]"]
|
239 |
+
for k, v in metrics_summary.items()
|
240 |
+
]
|
241 |
+
table = ax.table(cellText=table_data, colLabels=["Metric", "Value", "95% CI"], loc="center")
|
242 |
+
table.auto_set_font_size(False)
|
243 |
+
table.set_fontsize(10)
|
244 |
+
table.scale(1.2, 1.2)
|
245 |
+
ax.set_title("Performance Metrics at Optimal Threshold", fontweight="bold")
|
246 |
+
|
247 |
+
plt.savefig(os.path.join(plots_dir, "metrics_summary.png"), dpi=dpi, bbox_inches='tight')
|
248 |
+
return plt.gcf()
|
249 |
+
|
250 |
+
def plot_prediction_distribution(y_true, y_scores, best_thresh, colors, dpi, plots_dir):
|
251 |
+
"""Generate prediction distribution histogram."""
|
252 |
+
plt.figure(figsize=(10, 6), dpi=dpi)
|
253 |
+
plt.hist(y_scores[y_true == 1], bins=50, alpha=0.5, label='Positive Class', color=colors['positive_class'])
|
254 |
+
plt.hist(y_scores[y_true == 0], bins=50, alpha=0.5, label='Negative Class', color=colors['negative_class'])
|
255 |
+
plt.axvline(x=best_thresh, color=colors['threshold_line'], linestyle='--',
|
256 |
+
label=f'Optimal Threshold ({best_thresh:.3f})')
|
257 |
+
plt.xlabel('Predicted Probability')
|
258 |
+
plt.ylabel('Count')
|
259 |
+
plt.title('Distribution of Predictions')
|
260 |
+
plt.legend()
|
261 |
+
|
262 |
+
plt.savefig(os.path.join(plots_dir, "prediction_distribution.png"), dpi=dpi, bbox_inches='tight')
|
263 |
+
return plt.gcf()
|
space/space/space/space/space/space/space/space/space/space/space/space/requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
numpy
|
3 |
+
gradio
|
4 |
+
matplotlib
|
5 |
+
seaborn
|
6 |
+
scikit-learn
|
space/space/space/space/space/space/space/space/space/space/space/space/scores.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
space/space/space/space/space/space/space/space/space/space/space/tests/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Unit tests for the omnibin package.
|
3 |
+
"""
|
space/space/space/space/space/space/space/space/space/space/space/tests/test_metrics.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from omnibin.metrics import generate_binary_classification_report
|
6 |
+
|
7 |
+
class TestMetrics(unittest.TestCase):
|
8 |
+
@classmethod
|
9 |
+
def setUpClass(cls):
|
10 |
+
"""Set up test data"""
|
11 |
+
# Create synthetic test data
|
12 |
+
np.random.seed(42)
|
13 |
+
n_samples = 1000
|
14 |
+
cls.y_true = np.random.binomial(1, 0.3, n_samples)
|
15 |
+
cls.y_scores = np.random.beta(2, 5, n_samples)
|
16 |
+
|
17 |
+
# Adjust scores to have some correlation with true labels
|
18 |
+
cls.y_scores[cls.y_true == 1] = np.random.beta(5, 2, sum(cls.y_true == 1))
|
19 |
+
|
20 |
+
# Create test output directory
|
21 |
+
cls.test_output_dir = "test_outputs"
|
22 |
+
os.makedirs(cls.test_output_dir, exist_ok=True)
|
23 |
+
|
24 |
+
def test_report_generation(self):
|
25 |
+
"""Test the main report generation function"""
|
26 |
+
output_path = os.path.join(self.test_output_dir, "test_report.pdf")
|
27 |
+
|
28 |
+
# Generate report
|
29 |
+
result_path = generate_binary_classification_report(
|
30 |
+
y_true=self.y_true,
|
31 |
+
y_scores=self.y_scores,
|
32 |
+
output_path=output_path,
|
33 |
+
n_bootstrap=100 # Use smaller number for testing
|
34 |
+
)
|
35 |
+
|
36 |
+
# Test that file was created
|
37 |
+
self.assertTrue(os.path.exists(result_path))
|
38 |
+
self.assertTrue(os.path.getsize(result_path) > 0)
|
39 |
+
|
40 |
+
# Clean up
|
41 |
+
os.remove(result_path)
|
42 |
+
|
43 |
+
def test_input_validation(self):
|
44 |
+
"""Test input validation"""
|
45 |
+
# Test with invalid y_true values
|
46 |
+
with self.assertRaises(ValueError):
|
47 |
+
generate_binary_classification_report(
|
48 |
+
y_true=np.array([0, 1, 2]), # Invalid label
|
49 |
+
y_scores=np.array([0.1, 0.5, 0.9])
|
50 |
+
)
|
51 |
+
|
52 |
+
# Test with invalid y_scores values
|
53 |
+
with self.assertRaises(ValueError):
|
54 |
+
generate_binary_classification_report(
|
55 |
+
y_true=np.array([0, 1, 0]),
|
56 |
+
y_scores=np.array([-0.1, 1.5, 0.9]) # Values outside [0,1]
|
57 |
+
)
|
58 |
+
|
59 |
+
# Test with mismatched lengths
|
60 |
+
with self.assertRaises(ValueError):
|
61 |
+
generate_binary_classification_report(
|
62 |
+
y_true=np.array([0, 1]),
|
63 |
+
y_scores=np.array([0.1, 0.5, 0.9])
|
64 |
+
)
|
65 |
+
|
66 |
+
def test_bootstrap_consistency(self):
|
67 |
+
"""Test that bootstrap results are consistent"""
|
68 |
+
output_path1 = os.path.join(self.test_output_dir, "test_report1.pdf")
|
69 |
+
output_path2 = os.path.join(self.test_output_dir, "test_report2.pdf")
|
70 |
+
|
71 |
+
# Generate two reports with same data and seed
|
72 |
+
np.random.seed(42)
|
73 |
+
generate_binary_classification_report(
|
74 |
+
y_true=self.y_true,
|
75 |
+
y_scores=self.y_scores,
|
76 |
+
output_path=output_path1,
|
77 |
+
n_bootstrap=100
|
78 |
+
)
|
79 |
+
|
80 |
+
np.random.seed(42)
|
81 |
+
generate_binary_classification_report(
|
82 |
+
y_true=self.y_true,
|
83 |
+
y_scores=self.y_scores,
|
84 |
+
output_path=output_path2,
|
85 |
+
n_bootstrap=100
|
86 |
+
)
|
87 |
+
|
88 |
+
# Compare file sizes (they should be similar)
|
89 |
+
size1 = os.path.getsize(output_path1)
|
90 |
+
size2 = os.path.getsize(output_path2)
|
91 |
+
self.assertAlmostEqual(size1, size2, delta=1000) # Allow small differences due to PDF compression
|
92 |
+
|
93 |
+
# Clean up
|
94 |
+
os.remove(output_path1)
|
95 |
+
os.remove(output_path2)
|
96 |
+
|
97 |
+
def test_edge_cases(self):
|
98 |
+
"""Test edge cases"""
|
99 |
+
# Test with all positive labels
|
100 |
+
output_path = os.path.join(self.test_output_dir, "all_positive.pdf")
|
101 |
+
generate_binary_classification_report(
|
102 |
+
y_true=np.ones(100),
|
103 |
+
y_scores=np.random.random(100),
|
104 |
+
output_path=output_path,
|
105 |
+
n_bootstrap=100
|
106 |
+
)
|
107 |
+
self.assertTrue(os.path.exists(output_path))
|
108 |
+
os.remove(output_path)
|
109 |
+
|
110 |
+
# Test with all negative labels
|
111 |
+
output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
|
112 |
+
generate_binary_classification_report(
|
113 |
+
y_true=np.zeros(100),
|
114 |
+
y_scores=np.random.random(100),
|
115 |
+
output_path=output_path,
|
116 |
+
n_bootstrap=100
|
117 |
+
)
|
118 |
+
self.assertTrue(os.path.exists(output_path))
|
119 |
+
os.remove(output_path)
|
120 |
+
|
121 |
+
# Test with perfect predictions
|
122 |
+
output_path = os.path.join(self.test_output_dir, "perfect.pdf")
|
123 |
+
generate_binary_classification_report(
|
124 |
+
y_true=np.array([0, 1, 0, 1]),
|
125 |
+
y_scores=np.array([0.1, 0.9, 0.2, 0.8]),
|
126 |
+
output_path=output_path,
|
127 |
+
n_bootstrap=100
|
128 |
+
)
|
129 |
+
self.assertTrue(os.path.exists(output_path))
|
130 |
+
os.remove(output_path)
|
131 |
+
|
132 |
+
@classmethod
|
133 |
+
def tearDownClass(cls):
|
134 |
+
"""Clean up test outputs"""
|
135 |
+
if os.path.exists(cls.test_output_dir):
|
136 |
+
for file in os.listdir(cls.test_output_dir):
|
137 |
+
os.remove(os.path.join(cls.test_output_dir, file))
|
138 |
+
os.rmdir(cls.test_output_dir)
|
139 |
+
|
140 |
+
if __name__ == '__main__':
|
141 |
+
unittest.main()
|
space/space/tests/test_metrics.py
CHANGED
@@ -2,6 +2,8 @@ import unittest
|
|
2 |
import os
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
|
|
|
|
5 |
from omnibin.metrics import generate_binary_classification_report
|
6 |
|
7 |
class TestMetrics(unittest.TestCase):
|
@@ -109,12 +111,15 @@ class TestMetrics(unittest.TestCase):
|
|
109 |
|
110 |
# Test with all negative labels
|
111 |
output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
118 |
self.assertTrue(os.path.exists(output_path))
|
119 |
os.remove(output_path)
|
120 |
|
@@ -134,7 +139,13 @@ class TestMetrics(unittest.TestCase):
|
|
134 |
"""Clean up test outputs"""
|
135 |
if os.path.exists(cls.test_output_dir):
|
136 |
for file in os.listdir(cls.test_output_dir):
|
137 |
-
os.
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
os.rmdir(cls.test_output_dir)
|
139 |
|
140 |
if __name__ == '__main__':
|
|
|
2 |
import os
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
5 |
+
import warnings
|
6 |
+
from sklearn.exceptions import UndefinedMetricWarning
|
7 |
from omnibin.metrics import generate_binary_classification_report
|
8 |
|
9 |
class TestMetrics(unittest.TestCase):
|
|
|
111 |
|
112 |
# Test with all negative labels
|
113 |
output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
|
114 |
+
with warnings.catch_warnings():
|
115 |
+
warnings.filterwarnings("ignore", category=UserWarning)
|
116 |
+
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
|
117 |
+
generate_binary_classification_report(
|
118 |
+
y_true=np.zeros(100),
|
119 |
+
y_scores=np.random.random(100),
|
120 |
+
output_path=output_path,
|
121 |
+
n_bootstrap=100
|
122 |
+
)
|
123 |
self.assertTrue(os.path.exists(output_path))
|
124 |
os.remove(output_path)
|
125 |
|
|
|
139 |
"""Clean up test outputs"""
|
140 |
if os.path.exists(cls.test_output_dir):
|
141 |
for file in os.listdir(cls.test_output_dir):
|
142 |
+
file_path = os.path.join(cls.test_output_dir, file)
|
143 |
+
if os.path.isfile(file_path):
|
144 |
+
os.remove(file_path)
|
145 |
+
elif os.path.isdir(file_path):
|
146 |
+
for subfile in os.listdir(file_path):
|
147 |
+
os.remove(os.path.join(file_path, subfile))
|
148 |
+
os.rmdir(file_path)
|
149 |
os.rmdir(cls.test_output_dir)
|
150 |
|
151 |
if __name__ == '__main__':
|