felipekitamura commited on
Commit
e586842
·
1 Parent(s): 24364c6

Automated update from GitHub

Browse files
Files changed (30) hide show
  1. space/.github/workflows/deploy.yml +44 -0
  2. space/.github/workflows/test.yml +26 -0
  3. space/space/space/.github/workflows/update-hf-space.yml +24 -1
  4. space/space/space/.gitignore +2 -1
  5. space/space/space/README.md +3 -1
  6. space/space/space/space/space/README.md +1 -1
  7. space/space/space/space/space/space/README.md +2 -0
  8. space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml +16 -0
  9. space/space/space/space/space/space/space/README.md +0 -15
  10. space/space/space/space/space/space/space/space/README.md +16 -14
  11. space/space/space/space/space/space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml +27 -0
  12. space/space/space/space/space/space/space/space/space/space/space/space/.gitignore +51 -0
  13. space/space/space/space/space/space/space/space/space/space/space/space/CHANGELOG.md +15 -0
  14. space/space/space/space/space/space/space/space/space/space/space/space/LICENSE +21 -0
  15. space/space/space/space/space/space/space/space/space/space/space/space/data/scores.csv +0 -0
  16. space/space/space/space/space/space/space/space/space/space/space/space/example.py +36 -0
  17. space/space/space/space/space/space/space/space/space/space/space/space/pyproject.toml +31 -0
  18. space/space/space/space/space/space/space/space/space/space/space/space/results/classification_report.pdf +0 -0
  19. space/space/space/space/space/space/space/space/space/space/space/space/space/.gitattributes +35 -0
  20. space/space/space/space/space/space/space/space/space/space/space/space/space/README.md +112 -0
  21. space/space/space/space/space/space/space/space/space/space/space/space/space/app.py +90 -0
  22. space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin-0.1.4-py3-none-any.whl +0 -0
  23. space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/__init__.py +4 -0
  24. space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/metrics.py +62 -0
  25. space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/utils.py +263 -0
  26. space/space/space/space/space/space/space/space/space/space/space/space/space/requirements.txt +6 -0
  27. space/space/space/space/space/space/space/space/space/space/space/space/space/scores.csv +0 -0
  28. space/space/space/space/space/space/space/space/space/space/space/space/tests/__init__.py +3 -0
  29. space/space/space/space/space/space/space/space/space/space/space/space/tests/test_metrics.py +141 -0
  30. space/space/space/tests/test_metrics.py +18 -7
space/.github/workflows/deploy.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face Space
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout source repo
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Clone HF Space repo
17
+ run: |
18
+ git config --global user.email "[email protected]"
19
+ git config --global user.name "kitamura-felipe"
20
+ git clone https://huggingface.co/spaces/felipekitamura/omnibin space
21
+ rsync -av --exclude='.git' ./ space/
22
+
23
+ echo '---' > temp_readme.md
24
+ echo 'title: Omnibin' >> temp_readme.md
25
+ echo 'emoji: ⚡' >> temp_readme.md
26
+ echo 'colorFrom: pink' >> temp_readme.md
27
+ echo 'colorTo: yellow' >> temp_readme.md
28
+ echo 'sdk: gradio' >> temp_readme.md
29
+ echo 'sdk_version: 5.29.0' >> temp_readme.md
30
+ echo 'app_file: app.py' >> temp_readme.md
31
+ echo 'pinned: false' >> temp_readme.md
32
+ echo 'license: mit' >> temp_readme.md
33
+ echo 'short_description: A Python package for generating comprehensive binary classi' >> temp_readme.md
34
+ echo '---' >> temp_readme.md
35
+ echo '' >> temp_readme.md
36
+ cat README.md >> temp_readme.md
37
+ mv temp_readme.md space/README.md
38
+
39
+ cd space
40
+ git add .
41
+ git commit -m "Automated update from GitHub" || echo "No changes"
42
+ git push https://USER:[email protected]/spaces/felipekitamura/omnibin HEAD:main
43
+ env:
44
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
space/.github/workflows/test.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - name: Checkout code
13
+ uses: actions/checkout@v3
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v4
17
+ with:
18
+ python-version: '3.10'
19
+
20
+ - name: Install dependencies
21
+ run: |
22
+ pip install -r requirements.txt
23
+ pip install pytest
24
+
25
+ - name: Run tests
26
+ run: pytest tests/
space/space/space/.github/workflows/update-hf-space.yml CHANGED
@@ -3,11 +3,33 @@ name: Deploy to Hugging Face Space
3
  on:
4
  push:
5
  branches:
6
- - main # or the branch you want to trigger deployment
7
 
8
  jobs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  deploy:
10
  runs-on: ubuntu-latest
 
11
 
12
  steps:
13
  - name: Checkout source repo
@@ -35,6 +57,7 @@ jobs:
35
  echo '' >> temp_readme.md
36
  cat README.md >> temp_readme.md
37
  mv temp_readme.md space/README.md
 
38
  cd space
39
  git add .
40
  git commit -m "Automated update from GitHub" || echo "No changes"
 
3
  on:
4
  push:
5
  branches:
6
+ - main
7
 
8
  jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout source repo
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v4
18
+ with:
19
+ python-version: '3.10'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install -r requirements.txt
25
+ pip install pytest
26
+
27
+ - name: Run tests
28
+ run: pytest tests/
29
+
30
  deploy:
31
  runs-on: ubuntu-latest
32
+ needs: test # 👈 only runs if 'test' job passes
33
 
34
  steps:
35
  - name: Checkout source repo
 
57
  echo '' >> temp_readme.md
58
  cat README.md >> temp_readme.md
59
  mv temp_readme.md space/README.md
60
+
61
  cd space
62
  git add .
63
  git commit -m "Automated update from GitHub" || echo "No changes"
space/space/space/.gitignore CHANGED
@@ -48,4 +48,5 @@ htmlcov/
48
  *.xlsx
49
  *.xls
50
  *.db
51
- *.sqlite3
 
 
48
  *.xlsx
49
  *.xls
50
  *.db
51
+ *.sqlite3
52
+ plots/
space/space/space/README.md CHANGED
@@ -1,4 +1,6 @@
1
- [![CI/CD](https://github.com/kitamura-felipe/omnibin/actions/workflows/update-hf-space.yml/badge.svg)](https://github.com/kitamura-felipe/omnibin/actions/workflows/update-hf-space.yml)
 
 
2
 
3
  # Omnibin
4
 
 
1
+ [![Tests](https://github.com/kitamura-felipe/omnibin/actions/workflows/test.yml/badge.svg)](https://github.com/kitamura-felipe/omnibin/actions/workflows/test.yml)
2
+ [![Deploy](https://github.com/kitamura-felipe/omnibin/actions/workflows/deploy.yml/badge.svg)](https://github.com/kitamura-felipe/omnibin/actions/workflows/deploy.yml)
3
+
4
 
5
  # Omnibin
6
 
space/space/space/space/space/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![CI/CD](https://github.com/felipekitamura/omnibin/actions/workflows/update-hf-space.yml/badge.svg)](https://github.com/felipekitamura/omnibin/actions/workflows/update-hf-space.yml)
2
 
3
  # Omnibin
4
 
 
1
+ [![CI/CD](https://github.com/kitamura-felipe/omnibin/actions/workflows/update-hf-space.yml/badge.svg)](https://github.com/kitamura-felipe/omnibin/actions/workflows/update-hf-space.yml)
2
 
3
  # Omnibin
4
 
space/space/space/space/space/space/README.md CHANGED
@@ -1,3 +1,5 @@
 
 
1
  # Omnibin
2
 
3
  A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
 
1
+ [![CI/CD](https://github.com/felipekitamura/omnibin/actions/workflows/update-hf-space.yml/badge.svg)](https://github.com/felipekitamura/omnibin/actions/workflows/update-hf-space.yml)
2
+
3
  # Omnibin
4
 
5
  A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml CHANGED
@@ -19,6 +19,22 @@ jobs:
19
  git config --global user.name "kitamura-felipe"
20
  git clone https://huggingface.co/spaces/felipekitamura/omnibin space
21
  rsync -av --exclude='.git' ./ space/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  cd space
23
  git add .
24
  git commit -m "Automated update from GitHub" || echo "No changes"
 
19
  git config --global user.name "kitamura-felipe"
20
  git clone https://huggingface.co/spaces/felipekitamura/omnibin space
21
  rsync -av --exclude='.git' ./ space/
22
+
23
+ echo '---' > temp_readme.md
24
+ echo 'title: Omnibin' >> temp_readme.md
25
+ echo 'emoji: ⚡' >> temp_readme.md
26
+ echo 'colorFrom: pink' >> temp_readme.md
27
+ echo 'colorTo: yellow' >> temp_readme.md
28
+ echo 'sdk: gradio' >> temp_readme.md
29
+ echo 'sdk_version: 5.29.0' >> temp_readme.md
30
+ echo 'app_file: app.py' >> temp_readme.md
31
+ echo 'pinned: false' >> temp_readme.md
32
+ echo 'license: mit' >> temp_readme.md
33
+ echo 'short_description: A Python package for generating comprehensive binary classi' >> temp_readme.md
34
+ echo '---' >> temp_readme.md
35
+ echo '' >> temp_readme.md
36
+ cat README.md >> temp_readme.md
37
+ mv temp_readme.md space/README.md
38
  cd space
39
  git add .
40
  git commit -m "Automated update from GitHub" || echo "No changes"
space/space/space/space/space/space/space/README.md CHANGED
@@ -97,18 +97,3 @@ Here are examples of the visualizations generated by Omnibin:
97
 
98
  ### Metrics Summary
99
  ![Metrics Summary](results/plots/metrics_summary.png)
100
-
101
-
102
-
103
- ---
104
- title: Omnibin
105
- emoji: ⚡
106
- colorFrom: pink
107
- colorTo: yellow
108
- sdk: gradio
109
- sdk_version: 5.29.0
110
- app_file: app.py
111
- pinned: false
112
- license: mit
113
- short_description: A Python package for generating comprehensive binary classi
114
- ---
 
97
 
98
  ### Metrics Summary
99
  ![Metrics Summary](results/plots/metrics_summary.png)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
space/space/space/space/space/space/space/space/README.md CHANGED
@@ -1,16 +1,3 @@
1
- ---
2
- title: Omnibin
3
- emoji: ⚡
4
- colorFrom: pink
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.29.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: A Python package for generating comprehensive binary classi
12
- ---
13
-
14
  # Omnibin
15
 
16
  A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
@@ -109,4 +96,19 @@ Here are examples of the visualizations generated by Omnibin:
109
  <img src="results/plots/prediction_distribution.png">
110
 
111
  ### Metrics Summary
112
- ![Metrics Summary](results/plots/metrics_summary.png)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Omnibin
2
 
3
  A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
 
96
  <img src="results/plots/prediction_distribution.png">
97
 
98
  ### Metrics Summary
99
+ ![Metrics Summary](results/plots/metrics_summary.png)
100
+
101
+
102
+
103
+ ---
104
+ title: Omnibin
105
+ emoji: ⚡
106
+ colorFrom: pink
107
+ colorTo: yellow
108
+ sdk: gradio
109
+ sdk_version: 5.29.0
110
+ app_file: app.py
111
+ pinned: false
112
+ license: mit
113
+ short_description: A Python package for generating comprehensive binary classi
114
+ ---
space/space/space/space/space/space/space/space/space/space/space/space/.github/workflows/update-hf-space.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face Space
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main # or the branch you want to trigger deployment
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout source repo
14
+ uses: actions/checkout@v3
15
+
16
+ - name: Clone HF Space repo
17
+ run: |
18
+ git config --global user.email "[email protected]"
19
+ git config --global user.name "kitamura-felipe"
20
+ git clone https://huggingface.co/spaces/felipekitamura/omnibin space
21
+ rsync -av --exclude='.git' ./ space/
22
+ cd space
23
+ git add .
24
+ git commit -m "Automated update from GitHub" || echo "No changes"
25
+ git push https://USER:[email protected]/spaces/felipekitamura/omnibin HEAD:main
26
+ env:
27
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
space/space/space/space/space/space/space/space/space/space/space/space/.gitignore ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+
34
+ # Jupyter Notebook
35
+ .ipynb_checkpoints
36
+
37
+ # Distribution
38
+ dist/
39
+ build/
40
+ update.bat
41
+
42
+
43
+ # Coverage
44
+ .coverage
45
+ htmlcov/
46
+
47
+ # Results and data
48
+ *.xlsx
49
+ *.xls
50
+ *.db
51
+ *.sqlite3
space/space/space/space/space/space/space/space/space/space/space/space/CHANGELOG.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.0] - 2024-03-19
9
+
10
+ ### Added
11
+ - Initial release
12
+ - Basic binary classification metrics and visualizations
13
+ - Comprehensive reporting functionality
14
+ - Confidence interval calculations
15
+ - Example usage and documentation
space/space/space/space/space/space/space/space/space/space/space/space/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Felipe Campos Kitamura
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
space/space/space/space/space/space/space/space/space/space/space/space/data/scores.csv ADDED
The diff for this file is too large to render. See raw diff
 
space/space/space/space/space/space/space/space/space/space/space/space/example.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import os
4
+ from omnibin import generate_binary_classification_report, ColorScheme
5
+
6
+ # Define paths
7
+ RESULTS_DIR = os.path.join(os.path.dirname(__file__), "results")
8
+
9
+ # Ensure results directory exists
10
+ os.makedirs(RESULTS_DIR, exist_ok=True)
11
+
12
+ # Generate random data
13
+ data = pd.DataFrame({
14
+ 'y_true': (y:=np.random.choice([0,1],1000,p:=[.9,.1])),
15
+ 'y_pred': np.where(
16
+ y,
17
+ np.random.beta(3,1.5,1000)*.9+.1, # Positive cases: less skewed towards 1.0
18
+ np.random.beta(1.5,3,1000)*.9+.1 # Negative cases: less skewed towards 0.1
19
+ )
20
+ })
21
+
22
+ y_true = data['y_true'].values
23
+ y_scores = data['y_pred'].values
24
+
25
+ # Generate comprehensive classification report
26
+ report_path = generate_binary_classification_report(
27
+ y_true=y_true,
28
+ y_scores=y_scores,
29
+ output_path=os.path.join(RESULTS_DIR, "classification_report.pdf"),
30
+ n_bootstrap=1000,
31
+ random_seed=42, # Set a fixed random seed for reproducibility
32
+ dpi=72,
33
+ color_scheme=ColorScheme.DEFAULT
34
+ )
35
+
36
+ print(f"Report generated and saved to: {report_path}")
space/space/space/space/space/space/space/space/space/space/space/space/pyproject.toml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "omnibin"
7
+ version = "0.1.5"
8
+ description = "A package for generating comprehensive binary classification reports with visualizations and confidence intervals"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "Felipe Campos Kitamura", email = "[email protected]" }
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = [
21
+ "numpy>=1.21.0",
22
+ "pandas>=1.3.0",
23
+ "scikit-learn>=1.0.0",
24
+ "matplotlib>=3.4.0",
25
+ "scipy>=1.7.0",
26
+ "seaborn>=0.11.0",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/kitamura-felipe/omnibin"
31
+ Repository = "https://github.com/kitamura-felipe/omnibin.git"
space/space/space/space/space/space/space/space/space/space/space/space/results/classification_report.pdf ADDED
Binary file (49.5 kB). View file
 
space/space/space/space/space/space/space/space/space/space/space/space/space/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
space/space/space/space/space/space/space/space/space/space/space/space/space/README.md ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Omnibin
3
+ emoji: ⚡
4
+ colorFrom: pink
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: A Python package for generating comprehensive binary classi
12
+ ---
13
+
14
+ # Omnibin
15
+
16
+ A Python package for generating comprehensive binary classification reports with visualizations and confidence intervals.
17
+
18
+ ## Try it Online
19
+
20
+ You can try Omnibin directly in your browser through our [Hugging Face Space](https://felipekitamura-omnibin.hf.space).
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install omnibin
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```python
31
+ import pandas as pd
32
+ from omnibin import generate_binary_classification_report, ColorScheme
33
+
34
+ # Load your data
35
+ data = pd.read_csv("data/scores.csv")
36
+ y_true = data['y_true'].values
37
+ y_scores = data['y_pred'].values
38
+
39
+ # Generate comprehensive classification report
40
+ report_path = generate_binary_classification_report(
41
+ y_true=y_true, # Array of true binary labels (0 or 1)
42
+ y_scores=y_scores, # Array of predicted probabilities or scores
43
+ output_path="classification_report.pdf", # Path to save the PDF report
44
+ n_bootstrap=1000, # Number of bootstrap iterations for confidence intervals
45
+ random_seed=42, # Random seed for reproducibility
46
+ dpi=300, # DPI for plot resolution
47
+ color_scheme=ColorScheme.DEFAULT # Color scheme for plots (DEFAULT, MONOCHROME, or VIBRANT)
48
+ )
49
+ ```
50
+
51
+ ## Input Format
52
+
53
+ The input data should be provided as:
54
+ - `y_true`: Array of true binary labels (0 or 1)
55
+ - `y_pred`: Array of predicted probabilities or scores
56
+
57
+ ## Features
58
+
59
+ - Generates a comprehensive PDF report with:
60
+ - ROC curve with confidence bands
61
+ - Precision-Recall curve with confidence bands
62
+ - Metrics vs. threshold plots
63
+ - Confusion matrix at optimal threshold
64
+ - Calibration plot
65
+ - Summary table with confidence intervals
66
+ - Calculates optimal threshold using Youden's J statistic
67
+ - Provides confidence intervals using bootstrapping
68
+ - Supports both probability and score-based predictions
69
+
70
+ ## Metrics Included
71
+
72
+ - Accuracy
73
+ - Sensitivity (Recall)
74
+ - Specificity
75
+ - Positive Predictive Value (Precision)
76
+ - Matthews Correlation Coefficient
77
+ - F1 Score
78
+ - AUC-ROC
79
+ - AUC-PR
80
+
81
+ All metrics include 95% confidence intervals calculated through bootstrapping.
82
+
83
+ ## Output
84
+
85
+ The package generates a PDF report containing:
86
+ 1. ROC and Precision-Recall curves with confidence bands
87
+ 2. Metrics plotted across different thresholds
88
+ 3. Confusion matrix at the optimal threshold
89
+ 4. Calibration plot
90
+ 5. Summary table with all metrics and their confidence intervals
91
+
92
+ ## Example
93
+
94
+ Here are examples of the visualizations generated by Omnibin:
95
+
96
+ ### ROC and Precision-Recall Curves
97
+ ![ROC and PR Curves](results/plots/roc_pr.png)
98
+
99
+ ### Metrics vs Threshold
100
+ <img src="results/plots/metrics_threshold.png">
101
+
102
+ ### Confusion Matrix
103
+ <img src="results/plots/confusion_matrix.png">
104
+
105
+ ### Calibration Plot
106
+ <img src="results/plots/calibration.png">
107
+
108
+ ### Prediction Distribution
109
+ <img src="results/plots/prediction_distribution.png">
110
+
111
+ ### Metrics Summary
112
+ ![Metrics Summary](results/plots/metrics_summary.png)
space/space/space/space/space/space/space/space/space/space/space/space/space/app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import shutil
5
+ from omnibin import generate_binary_classification_report, ColorScheme
6
+
7
+ # Define results directory
8
+ RESULTS_DIR = "/tmp/results"
9
+
10
+ # Map string color schemes to enum values
11
+ COLOR_SCHEME_MAP = {
12
+ "DEFAULT": ColorScheme.DEFAULT,
13
+ "MONOCHROME": ColorScheme.MONOCHROME,
14
+ "VIBRANT": ColorScheme.VIBRANT
15
+ }
16
+
17
+ def process_csv(csv_file, n_bootstrap=1000, dpi=72, color_scheme="DEFAULT"):
18
+ # Convert string color scheme to enum
19
+ color_scheme_enum = COLOR_SCHEME_MAP[color_scheme]
20
+
21
+ # Read the CSV file
22
+ df = pd.read_csv(csv_file.name)
23
+
24
+ # Check if required columns exist
25
+ required_columns = ['y_true', 'y_pred']
26
+ if not all(col in df.columns for col in required_columns):
27
+ raise ValueError("CSV file must contain 'y_true' and 'y_pred' columns")
28
+
29
+ # Clean up results directory if it exists
30
+ if os.path.exists(RESULTS_DIR):
31
+ shutil.rmtree(RESULTS_DIR)
32
+
33
+ # Create fresh results directory
34
+ os.makedirs(RESULTS_DIR, exist_ok=True)
35
+
36
+ # Generate the report
37
+ report_path = generate_binary_classification_report(
38
+ y_true=df['y_true'].values,
39
+ y_scores=df['y_pred'].values,
40
+ output_path=os.path.join(RESULTS_DIR, "classification_report.pdf"),
41
+ n_bootstrap=n_bootstrap,
42
+ random_seed=42,
43
+ dpi=dpi,
44
+ color_scheme=color_scheme_enum
45
+ )
46
+
47
+ # Get paths to individual plots
48
+ plots_dir = os.path.join(RESULTS_DIR, "plots")
49
+ plot_paths = {
50
+ "ROC and PR Curves": os.path.join(plots_dir, "roc_pr.png"),
51
+ "Metrics vs Threshold": os.path.join(plots_dir, "metrics_threshold.png"),
52
+ "Confusion Matrix": os.path.join(plots_dir, "confusion_matrix.png"),
53
+ "Calibration Plot": os.path.join(plots_dir, "calibration.png"),
54
+ "Prediction Distribution": os.path.join(plots_dir, "prediction_distribution.png"),
55
+ "Metrics Summary": os.path.join(plots_dir, "metrics_summary.png")
56
+ }
57
+
58
+ # Return both the PDF and the plot images
59
+ return report_path, *plot_paths.values()
60
+
61
+ # Create the Gradio interface
62
+ iface = gr.Interface(
63
+ fn=process_csv,
64
+ inputs=[
65
+ gr.File(label="Upload CSV file with 'y_true' and 'y_pred' columns"),
66
+ gr.Number(label="Number of Bootstrap Iterations", value=1000, minimum=100, maximum=10000),
67
+ gr.Number(label="DPI", value=72, minimum=50, maximum=300),
68
+ gr.Dropdown(label="Color Scheme", choices=["DEFAULT", "MONOCHROME", "VIBRANT"], value="DEFAULT")
69
+ ],
70
+ outputs=[
71
+ gr.File(label="Classification Report PDF"),
72
+ gr.Image(label="ROC and PR Curves"),
73
+ gr.Image(label="Metrics vs Threshold"),
74
+ gr.Image(label="Confusion Matrix"),
75
+ gr.Image(label="Calibration Plot"),
76
+ gr.Image(label="Prediction Distribution"),
77
+ gr.Image(label="Metrics Summary")
78
+ ],
79
+ title="Binary Classification Report Generator",
80
+ description="Upload a CSV file containing 'y_true' and 'y_pred' columns to generate a binary classification report.\n\n"
81
+ "'y_true': reference standard (0s or 1s).\n\n"
82
+ "'y_pred': model prediction (continuous value between 0 and 1).\n\n"
83
+ "This application takes approximately 35 seconds to generate the report.\n",
84
+
85
+ examples=[["scores.csv", 1000, 72, "DEFAULT"]],
86
+ cache_examples=False
87
+ )
88
+
89
+ if __name__ == "__main__":
90
+ iface.launch()
space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin-0.1.4-py3-none-any.whl ADDED
Binary file (5.86 kB). View file
 
space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from .metrics import generate_binary_classification_report, ColorScheme
2
+
3
+ __version__ = "0.1.5"
4
+ __all__ = ["generate_binary_classification_report"]
space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/metrics.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from tqdm import tqdm
6
+ import os
7
+ from sklearn.metrics import (
8
+ accuracy_score, recall_score, precision_score, f1_score, roc_auc_score,
9
+ average_precision_score, confusion_matrix, matthews_corrcoef, roc_curve,
10
+ precision_recall_curve
11
+ )
12
+ from sklearn.calibration import calibration_curve
13
+ from matplotlib.backends.backend_pdf import PdfPages
14
+ from enum import Enum
15
+ from .utils import (
16
+ ColorScheme, calculate_metrics_by_threshold, bootstrap_curves,
17
+ calculate_optimal_threshold, calculate_metrics_summary,
18
+ calculate_confidence_intervals, create_output_directories,
19
+ plot_roc_pr_curves, plot_metrics_threshold, plot_confusion_matrix,
20
+ plot_calibration, plot_metrics_summary, plot_prediction_distribution
21
+ )
22
+
23
+ def generate_binary_classification_report(y_true, y_scores, output_path="omnibin_report.pdf", n_bootstrap=1000, random_seed=42, dpi=300, color_scheme=ColorScheme.DEFAULT):
24
+ # Set random seed for reproducibility
25
+ if random_seed is not None:
26
+ np.random.seed(random_seed)
27
+
28
+ # Set DPI for all figures
29
+ plt.rcParams['figure.dpi'] = dpi
30
+
31
+ # Get color scheme
32
+ colors = color_scheme.value
33
+
34
+ # Calculate metrics and optimal threshold
35
+ metrics_df = calculate_metrics_by_threshold(y_true, y_scores)
36
+ best_thresh = calculate_optimal_threshold(y_true, y_scores)
37
+ metrics_summary = calculate_metrics_summary(y_true, y_scores, best_thresh)
38
+ conf_intervals = calculate_confidence_intervals(y_true, y_scores, best_thresh, n_bootstrap)
39
+
40
+ # Create output directories
41
+ plots_dir = create_output_directories(output_path)
42
+
43
+ # Calculate confidence intervals for curves
44
+ tpr_ci, precision_ci, common_fpr, common_recall = bootstrap_curves(y_true, y_scores, n_boot=n_bootstrap)
45
+
46
+ with PdfPages(output_path) as pdf:
47
+ # Generate and save all plots
48
+ plots = [
49
+ plot_roc_pr_curves(y_true, y_scores, tpr_ci, precision_ci, common_fpr, common_recall, colors, dpi, plots_dir),
50
+ plot_metrics_threshold(metrics_df, colors, dpi, plots_dir),
51
+ plot_confusion_matrix(y_true, y_scores, best_thresh, colors, dpi, plots_dir),
52
+ plot_calibration(y_true, y_scores, colors, dpi, plots_dir),
53
+ plot_metrics_summary(metrics_summary, conf_intervals, dpi, plots_dir),
54
+ plot_prediction_distribution(y_true, y_scores, best_thresh, colors, dpi, plots_dir)
55
+ ]
56
+
57
+ # Save all plots to PDF
58
+ for plot in plots:
59
+ pdf.savefig(plot, dpi=dpi)
60
+ plt.close(plot)
61
+
62
+ return output_path
space/space/space/space/space/space/space/space/space/space/space/space/space/omnibin/utils.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from tqdm import tqdm
6
+ from sklearn.metrics import (
7
+ accuracy_score, recall_score, precision_score, f1_score, roc_auc_score,
8
+ average_precision_score, confusion_matrix, matthews_corrcoef, roc_curve,
9
+ precision_recall_curve
10
+ )
11
+ from sklearn.calibration import calibration_curve
12
+ from enum import Enum
13
+ import os
14
+
15
+ class ColorScheme(Enum):
16
+ DEFAULT = {
17
+ 'positive_class': 'tab:blue',
18
+ 'negative_class': 'tab:orange',
19
+ 'roc_curve': 'tab:blue',
20
+ 'pr_curve': 'tab:blue',
21
+ 'threshold_line': 'black',
22
+ 'calibration_curve': 'tab:blue',
23
+ 'calibration_reference': 'gray',
24
+ 'metrics_colors': ['tab:blue', 'tab:red', 'tab:green', 'tab:purple', 'tab:orange', 'tab:brown', 'tab:pink'],
25
+ 'cmap': 'Blues'
26
+ }
27
+
28
+ MONOCHROME = {
29
+ 'positive_class': '#404040',
30
+ 'negative_class': '#808080',
31
+ 'roc_curve': '#000000',
32
+ 'pr_curve': '#000000',
33
+ 'threshold_line': '#000000',
34
+ 'calibration_curve': '#000000',
35
+ 'calibration_reference': '#808080',
36
+ 'metrics_colors': ['#000000', '#404040', '#606060', '#808080', '#A0A0A0', '#C0C0C0', '#E0E0E0'],
37
+ 'cmap': 'Greys'
38
+ }
39
+
40
+ VIBRANT = {
41
+ 'positive_class': '#FF6B6B',
42
+ 'negative_class': '#4ECDC4',
43
+ 'roc_curve': '#FF6B6B',
44
+ 'pr_curve': '#4ECDC4',
45
+ 'threshold_line': '#2C3E50',
46
+ 'calibration_curve': '#FF6B6B',
47
+ 'calibration_reference': '#95A5A6',
48
+ 'metrics_colors': ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEEAD', '#D4A5A5', '#9B59B6'],
49
+ 'cmap': 'Greens'
50
+ }
51
+
52
+ def calculate_metrics_by_threshold(y_true, y_scores):
53
+ """Calculate various metrics across different thresholds."""
54
+ thresholds = np.linspace(0, 1, 100)
55
+ metrics_by_threshold = []
56
+
57
+ for t in tqdm(thresholds, desc="Calculating metrics across thresholds"):
58
+ y_pred = (y_scores >= t).astype(int)
59
+ acc = accuracy_score(y_true, y_pred)
60
+ sens = recall_score(y_true, y_pred)
61
+ spec = recall_score(y_true, y_pred, pos_label=0)
62
+ ppv = precision_score(y_true, y_pred, zero_division=0)
63
+ mcc = matthews_corrcoef(y_true, y_pred)
64
+ f1 = f1_score(y_true, y_pred)
65
+ metrics_by_threshold.append([t, acc, sens, spec, ppv, mcc, f1])
66
+
67
+ return pd.DataFrame(metrics_by_threshold, columns=[
68
+ "Threshold", "Accuracy", "Sensitivity", "Specificity",
69
+ "PPV", "MCC", "F1 Score"
70
+ ])
71
+
72
+ def bootstrap_metric(metric_func, y_true, y_scores, n_boot=1000):
73
+ """Calculate bootstrap confidence intervals for a given metric."""
74
+ stats = []
75
+ for _ in tqdm(range(n_boot), desc="Bootstrap iterations", leave=False):
76
+ indices = np.random.choice(range(len(y_true)), len(y_true), replace=True)
77
+ try:
78
+ stats.append(metric_func(y_true[indices], y_scores[indices]))
79
+ except:
80
+ continue
81
+ return np.percentile(stats, [2.5, 97.5])
82
+
83
+ def bootstrap_curves(y_true, y_scores, n_boot=1000):
84
+ """Calculate bootstrap confidence intervals for ROC and PR curves."""
85
+ tprs = []
86
+ fprs = []
87
+ precisions = []
88
+ recalls = []
89
+
90
+ base_fpr, base_tpr, _ = roc_curve(y_true, y_scores)
91
+ base_precision, base_recall, _ = precision_recall_curve(y_true, y_scores)
92
+
93
+ common_fpr = np.linspace(0, 1, 100)
94
+ common_recall = np.linspace(0, 1, 100)
95
+
96
+ for _ in tqdm(range(n_boot), desc="Bootstrap iterations for curves", leave=False):
97
+ indices = np.random.choice(range(len(y_true)), len(y_true), replace=True)
98
+ try:
99
+ fpr, tpr, _ = roc_curve(y_true[indices], y_scores[indices])
100
+ tpr_interp = np.interp(common_fpr, fpr, tpr)
101
+ tprs.append(tpr_interp)
102
+
103
+ precision, recall, _ = precision_recall_curve(y_true[indices], y_scores[indices])
104
+ sort_idx = np.argsort(recall)
105
+ recall = recall[sort_idx]
106
+ precision = precision[sort_idx]
107
+ precision_interp = np.interp(common_recall, recall, precision)
108
+ precisions.append(precision_interp)
109
+ except:
110
+ continue
111
+
112
+ tpr_ci = np.percentile(tprs, [2.5, 97.5], axis=0)
113
+ precision_ci = np.percentile(precisions, [2.5, 97.5], axis=0)
114
+
115
+ return tpr_ci, precision_ci, common_fpr, common_recall
116
+
117
+ def calculate_optimal_threshold(y_true, y_scores):
118
+ """Calculate the optimal threshold using ROC curve."""
119
+ fpr, tpr, roc_thresholds = roc_curve(y_true, y_scores)
120
+ j_scores = tpr - fpr
121
+ return roc_thresholds[np.argmax(j_scores)]
122
+
123
+ def calculate_metrics_summary(y_true, y_scores, best_thresh):
124
+ """Calculate summary metrics at the optimal threshold."""
125
+ y_pred_opt = (y_scores >= best_thresh).astype(int)
126
+
127
+ return {
128
+ "Accuracy": accuracy_score(y_true, y_pred_opt),
129
+ "Sensitivity": recall_score(y_true, y_pred_opt),
130
+ "Specificity": recall_score(y_true, y_pred_opt, pos_label=0),
131
+ "PPV": precision_score(y_true, y_pred_opt, zero_division=0),
132
+ "MCC": matthews_corrcoef(y_true, y_pred_opt),
133
+ "F1 Score": f1_score(y_true, y_pred_opt),
134
+ "AUC-ROC": roc_auc_score(y_true, y_scores),
135
+ "AUC-PR": average_precision_score(y_true, y_scores)
136
+ }
137
+
138
+ def calculate_confidence_intervals(y_true, y_scores, best_thresh, n_bootstrap=1000):
139
+ """Calculate confidence intervals for all metrics."""
140
+ metric_functions = {
141
+ "Accuracy": lambda yt, ys: accuracy_score(yt, ys >= best_thresh),
142
+ "Sensitivity": lambda yt, ys: recall_score(yt, ys >= best_thresh),
143
+ "Specificity": lambda yt, ys: recall_score(yt, ys >= best_thresh, pos_label=0),
144
+ "PPV": lambda yt, ys: precision_score(yt, ys >= best_thresh, zero_division=0),
145
+ "MCC": lambda yt, ys: matthews_corrcoef(yt, ys >= best_thresh),
146
+ "F1 Score": lambda yt, ys: f1_score(yt, ys >= best_thresh),
147
+ "AUC-ROC": lambda yt, ys: roc_auc_score(yt, ys),
148
+ "AUC-PR": lambda yt, ys: average_precision_score(yt, ys)
149
+ }
150
+
151
+ return {
152
+ name: bootstrap_metric(func, y_true, y_scores, n_boot=n_bootstrap)
153
+ for name, func in metric_functions.items()
154
+ }
155
+
156
+ def create_output_directories(output_path):
157
+ """Create necessary output directories for plots and PDF."""
158
+ output_dir = os.path.dirname(output_path)
159
+ if output_dir:
160
+ os.makedirs(output_dir, exist_ok=True)
161
+
162
+ plots_dir = os.path.join(output_dir, "plots")
163
+ os.makedirs(plots_dir, exist_ok=True)
164
+
165
+ return plots_dir
166
+
167
+ def plot_roc_pr_curves(y_true, y_scores, tpr_ci, precision_ci, common_fpr, common_recall, colors, dpi, plots_dir):
168
+ """Generate ROC and PR curves with confidence intervals."""
169
+ plt.figure(figsize=(12, 5), dpi=dpi)
170
+
171
+ plt.subplot(1, 2, 1)
172
+ fpr, tpr, _ = roc_curve(y_true, y_scores)
173
+ plt.plot(fpr, tpr, label="ROC curve", color=colors['roc_curve'])
174
+ plt.fill_between(common_fpr, tpr_ci[0], tpr_ci[1], alpha=0.3, color=colors['roc_curve'])
175
+ plt.plot([0, 1], [0, 1], "k--")
176
+ plt.xlabel("False Positive Rate")
177
+ plt.ylabel("True Positive Rate")
178
+ plt.title("ROC Curve")
179
+ plt.legend()
180
+
181
+ plt.subplot(1, 2, 2)
182
+ precision, recall, _ = precision_recall_curve(y_true, y_scores)
183
+ plt.plot(recall, precision, label="PR curve", color=colors['pr_curve'])
184
+ plt.fill_between(common_recall, precision_ci[0], precision_ci[1], alpha=0.3, color=colors['pr_curve'])
185
+ plt.xlabel("Recall")
186
+ plt.ylabel("Precision")
187
+ plt.title("Precision-Recall Curve")
188
+ plt.legend()
189
+
190
+ plt.savefig(os.path.join(plots_dir, "roc_pr.png"), dpi=dpi, bbox_inches='tight')
191
+ return plt.gcf()
192
+
193
+ def plot_metrics_threshold(metrics_df, colors, dpi, plots_dir):
194
+ """Generate metrics vs threshold plot."""
195
+ plt.figure(figsize=(10, 6), dpi=dpi)
196
+ for i, col in enumerate(metrics_df.columns[1:]):
197
+ plt.plot(metrics_df["Threshold"], metrics_df[col], label=col,
198
+ color=colors['metrics_colors'][i % len(colors['metrics_colors'])])
199
+ plt.xlabel("Threshold")
200
+ plt.ylabel("Metric Value")
201
+ plt.title("Metrics Across Thresholds")
202
+ plt.legend()
203
+
204
+ plt.savefig(os.path.join(plots_dir, "metrics_threshold.png"), dpi=dpi, bbox_inches='tight')
205
+ return plt.gcf()
206
+
207
+ def plot_confusion_matrix(y_true, y_scores, best_thresh, colors, dpi, plots_dir):
208
+ """Generate confusion matrix plot."""
209
+ cm = confusion_matrix(y_true, y_scores >= best_thresh)
210
+ plt.figure(figsize=(5, 4), dpi=dpi)
211
+ sns.heatmap(cm, annot=True, fmt="d", cmap=colors['cmap'], cbar=False, annot_kws={"size": 12})
212
+ plt.title("Confusion Matrix (Optimal Threshold)", fontsize=12)
213
+ plt.xlabel("Predicted Label", fontsize=12)
214
+ plt.ylabel("True Label", fontsize=12)
215
+
216
+ plt.savefig(os.path.join(plots_dir, "confusion_matrix.png"), dpi=dpi, bbox_inches='tight')
217
+ return plt.gcf()
218
+
219
+ def plot_calibration(y_true, y_scores, colors, dpi, plots_dir):
220
+ """Generate calibration plot."""
221
+ plt.figure(figsize=(6, 6), dpi=dpi)
222
+ prob_true, prob_pred = calibration_curve(y_true, y_scores, n_bins=10, strategy='uniform')
223
+ plt.plot(prob_pred, prob_true, marker='o', label='Calibration curve', color=colors['calibration_curve'])
224
+ plt.plot([0, 1], [0, 1], linestyle='--', color=colors['calibration_reference'])
225
+ plt.xlabel('Predicted Probability')
226
+ plt.ylabel('True Probability')
227
+ plt.title('Calibration Plot')
228
+ plt.legend()
229
+
230
+ plt.savefig(os.path.join(plots_dir, "calibration.png"), dpi=dpi, bbox_inches='tight')
231
+ return plt.gcf()
232
+
233
+ def plot_metrics_summary(metrics_summary, conf_intervals, dpi, plots_dir):
234
+ """Generate metrics summary table plot."""
235
+ fig, ax = plt.subplots(figsize=(8, 6), dpi=dpi)
236
+ ax.axis("off")
237
+ table_data = [
238
+ [k, f"{v:.3f}", f"[{conf_intervals[k][0]:.3f}, {conf_intervals[k][1]:.3f}]"]
239
+ for k, v in metrics_summary.items()
240
+ ]
241
+ table = ax.table(cellText=table_data, colLabels=["Metric", "Value", "95% CI"], loc="center")
242
+ table.auto_set_font_size(False)
243
+ table.set_fontsize(10)
244
+ table.scale(1.2, 1.2)
245
+ ax.set_title("Performance Metrics at Optimal Threshold", fontweight="bold")
246
+
247
+ plt.savefig(os.path.join(plots_dir, "metrics_summary.png"), dpi=dpi, bbox_inches='tight')
248
+ return plt.gcf()
249
+
250
+ def plot_prediction_distribution(y_true, y_scores, best_thresh, colors, dpi, plots_dir):
251
+ """Generate prediction distribution histogram."""
252
+ plt.figure(figsize=(10, 6), dpi=dpi)
253
+ plt.hist(y_scores[y_true == 1], bins=50, alpha=0.5, label='Positive Class', color=colors['positive_class'])
254
+ plt.hist(y_scores[y_true == 0], bins=50, alpha=0.5, label='Negative Class', color=colors['negative_class'])
255
+ plt.axvline(x=best_thresh, color=colors['threshold_line'], linestyle='--',
256
+ label=f'Optimal Threshold ({best_thresh:.3f})')
257
+ plt.xlabel('Predicted Probability')
258
+ plt.ylabel('Count')
259
+ plt.title('Distribution of Predictions')
260
+ plt.legend()
261
+
262
+ plt.savefig(os.path.join(plots_dir, "prediction_distribution.png"), dpi=dpi, bbox_inches='tight')
263
+ return plt.gcf()
space/space/space/space/space/space/space/space/space/space/space/space/space/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ gradio
4
+ matplotlib
5
+ seaborn
6
+ scikit-learn
space/space/space/space/space/space/space/space/space/space/space/space/space/scores.csv ADDED
The diff for this file is too large to render. See raw diff
 
space/space/space/space/space/space/space/space/space/space/space/space/tests/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """
2
+ Unit tests for the omnibin package.
3
+ """
space/space/space/space/space/space/space/space/space/space/space/space/tests/test_metrics.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ from omnibin.metrics import generate_binary_classification_report
6
+
7
+ class TestMetrics(unittest.TestCase):
8
+ @classmethod
9
+ def setUpClass(cls):
10
+ """Set up test data"""
11
+ # Create synthetic test data
12
+ np.random.seed(42)
13
+ n_samples = 1000
14
+ cls.y_true = np.random.binomial(1, 0.3, n_samples)
15
+ cls.y_scores = np.random.beta(2, 5, n_samples)
16
+
17
+ # Adjust scores to have some correlation with true labels
18
+ cls.y_scores[cls.y_true == 1] = np.random.beta(5, 2, sum(cls.y_true == 1))
19
+
20
+ # Create test output directory
21
+ cls.test_output_dir = "test_outputs"
22
+ os.makedirs(cls.test_output_dir, exist_ok=True)
23
+
24
+ def test_report_generation(self):
25
+ """Test the main report generation function"""
26
+ output_path = os.path.join(self.test_output_dir, "test_report.pdf")
27
+
28
+ # Generate report
29
+ result_path = generate_binary_classification_report(
30
+ y_true=self.y_true,
31
+ y_scores=self.y_scores,
32
+ output_path=output_path,
33
+ n_bootstrap=100 # Use smaller number for testing
34
+ )
35
+
36
+ # Test that file was created
37
+ self.assertTrue(os.path.exists(result_path))
38
+ self.assertTrue(os.path.getsize(result_path) > 0)
39
+
40
+ # Clean up
41
+ os.remove(result_path)
42
+
43
+ def test_input_validation(self):
44
+ """Test input validation"""
45
+ # Test with invalid y_true values
46
+ with self.assertRaises(ValueError):
47
+ generate_binary_classification_report(
48
+ y_true=np.array([0, 1, 2]), # Invalid label
49
+ y_scores=np.array([0.1, 0.5, 0.9])
50
+ )
51
+
52
+ # Test with invalid y_scores values
53
+ with self.assertRaises(ValueError):
54
+ generate_binary_classification_report(
55
+ y_true=np.array([0, 1, 0]),
56
+ y_scores=np.array([-0.1, 1.5, 0.9]) # Values outside [0,1]
57
+ )
58
+
59
+ # Test with mismatched lengths
60
+ with self.assertRaises(ValueError):
61
+ generate_binary_classification_report(
62
+ y_true=np.array([0, 1]),
63
+ y_scores=np.array([0.1, 0.5, 0.9])
64
+ )
65
+
66
+ def test_bootstrap_consistency(self):
67
+ """Test that bootstrap results are consistent"""
68
+ output_path1 = os.path.join(self.test_output_dir, "test_report1.pdf")
69
+ output_path2 = os.path.join(self.test_output_dir, "test_report2.pdf")
70
+
71
+ # Generate two reports with same data and seed
72
+ np.random.seed(42)
73
+ generate_binary_classification_report(
74
+ y_true=self.y_true,
75
+ y_scores=self.y_scores,
76
+ output_path=output_path1,
77
+ n_bootstrap=100
78
+ )
79
+
80
+ np.random.seed(42)
81
+ generate_binary_classification_report(
82
+ y_true=self.y_true,
83
+ y_scores=self.y_scores,
84
+ output_path=output_path2,
85
+ n_bootstrap=100
86
+ )
87
+
88
+ # Compare file sizes (they should be similar)
89
+ size1 = os.path.getsize(output_path1)
90
+ size2 = os.path.getsize(output_path2)
91
+ self.assertAlmostEqual(size1, size2, delta=1000) # Allow small differences due to PDF compression
92
+
93
+ # Clean up
94
+ os.remove(output_path1)
95
+ os.remove(output_path2)
96
+
97
+ def test_edge_cases(self):
98
+ """Test edge cases"""
99
+ # Test with all positive labels
100
+ output_path = os.path.join(self.test_output_dir, "all_positive.pdf")
101
+ generate_binary_classification_report(
102
+ y_true=np.ones(100),
103
+ y_scores=np.random.random(100),
104
+ output_path=output_path,
105
+ n_bootstrap=100
106
+ )
107
+ self.assertTrue(os.path.exists(output_path))
108
+ os.remove(output_path)
109
+
110
+ # Test with all negative labels
111
+ output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
112
+ generate_binary_classification_report(
113
+ y_true=np.zeros(100),
114
+ y_scores=np.random.random(100),
115
+ output_path=output_path,
116
+ n_bootstrap=100
117
+ )
118
+ self.assertTrue(os.path.exists(output_path))
119
+ os.remove(output_path)
120
+
121
+ # Test with perfect predictions
122
+ output_path = os.path.join(self.test_output_dir, "perfect.pdf")
123
+ generate_binary_classification_report(
124
+ y_true=np.array([0, 1, 0, 1]),
125
+ y_scores=np.array([0.1, 0.9, 0.2, 0.8]),
126
+ output_path=output_path,
127
+ n_bootstrap=100
128
+ )
129
+ self.assertTrue(os.path.exists(output_path))
130
+ os.remove(output_path)
131
+
132
+ @classmethod
133
+ def tearDownClass(cls):
134
+ """Clean up test outputs"""
135
+ if os.path.exists(cls.test_output_dir):
136
+ for file in os.listdir(cls.test_output_dir):
137
+ os.remove(os.path.join(cls.test_output_dir, file))
138
+ os.rmdir(cls.test_output_dir)
139
+
140
+ if __name__ == '__main__':
141
+ unittest.main()
space/space/space/tests/test_metrics.py CHANGED
@@ -2,6 +2,8 @@ import unittest
2
  import os
3
  import pandas as pd
4
  import numpy as np
 
 
5
  from omnibin.metrics import generate_binary_classification_report
6
 
7
  class TestMetrics(unittest.TestCase):
@@ -109,12 +111,15 @@ class TestMetrics(unittest.TestCase):
109
 
110
  # Test with all negative labels
111
  output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
112
- generate_binary_classification_report(
113
- y_true=np.zeros(100),
114
- y_scores=np.random.random(100),
115
- output_path=output_path,
116
- n_bootstrap=100
117
- )
 
 
 
118
  self.assertTrue(os.path.exists(output_path))
119
  os.remove(output_path)
120
 
@@ -134,7 +139,13 @@ class TestMetrics(unittest.TestCase):
134
  """Clean up test outputs"""
135
  if os.path.exists(cls.test_output_dir):
136
  for file in os.listdir(cls.test_output_dir):
137
- os.remove(os.path.join(cls.test_output_dir, file))
 
 
 
 
 
 
138
  os.rmdir(cls.test_output_dir)
139
 
140
  if __name__ == '__main__':
 
2
  import os
3
  import pandas as pd
4
  import numpy as np
5
+ import warnings
6
+ from sklearn.exceptions import UndefinedMetricWarning
7
  from omnibin.metrics import generate_binary_classification_report
8
 
9
  class TestMetrics(unittest.TestCase):
 
111
 
112
  # Test with all negative labels
113
  output_path = os.path.join(self.test_output_dir, "all_negative.pdf")
114
+ with warnings.catch_warnings():
115
+ warnings.filterwarnings("ignore", category=UserWarning)
116
+ warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
117
+ generate_binary_classification_report(
118
+ y_true=np.zeros(100),
119
+ y_scores=np.random.random(100),
120
+ output_path=output_path,
121
+ n_bootstrap=100
122
+ )
123
  self.assertTrue(os.path.exists(output_path))
124
  os.remove(output_path)
125
 
 
139
  """Clean up test outputs"""
140
  if os.path.exists(cls.test_output_dir):
141
  for file in os.listdir(cls.test_output_dir):
142
+ file_path = os.path.join(cls.test_output_dir, file)
143
+ if os.path.isfile(file_path):
144
+ os.remove(file_path)
145
+ elif os.path.isdir(file_path):
146
+ for subfile in os.listdir(file_path):
147
+ os.remove(os.path.join(file_path, subfile))
148
+ os.rmdir(file_path)
149
  os.rmdir(cls.test_output_dir)
150
 
151
  if __name__ == '__main__':