Add BiRefNet v2
Browse files- app.py +11 -4
- image_processing_pipeline.py +10 -5
- utils/upload_to_dataset.py +55 -31
app.py
CHANGED
@@ -22,6 +22,7 @@ from db import (
|
|
22 |
fill_database_once,
|
23 |
compute_votes_per_model
|
24 |
)
|
|
|
25 |
|
26 |
# Load environment variables
|
27 |
load_dotenv()
|
@@ -32,7 +33,7 @@ google_analytics_tracking_id = os.getenv("GOOGLE_ANALYTICS_TRACKING_ID")
|
|
32 |
logging.basicConfig(level=logging.INFO)
|
33 |
|
34 |
# Load datasets and initialize database
|
35 |
-
dataset = load_dataset("bgsys/background-removal-
|
36 |
fill_database_once()
|
37 |
|
38 |
# Directory setup for JSON dataset
|
@@ -64,7 +65,7 @@ def update_rankings_table():
|
|
64 |
model_vote_counts = compute_votes_per_model()
|
65 |
try:
|
66 |
# Create a list of models to iterate over
|
67 |
-
models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0"]
|
68 |
rankings = []
|
69 |
|
70 |
for model in models:
|
@@ -104,8 +105,14 @@ def select_new_image(last_used_indices):
|
|
104 |
sample = dataset[random_index]
|
105 |
input_image = sample['original_image']
|
106 |
|
107 |
-
segmented_images = [sample.get(key) for key in [
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
if segmented_images.count(None) > 2:
|
111 |
logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
|
|
|
22 |
fill_database_once,
|
23 |
compute_votes_per_model
|
24 |
)
|
25 |
+
from utils.birefnet import iterate_over_directory as birefnet_iterate
|
26 |
|
27 |
# Load environment variables
|
28 |
load_dotenv()
|
|
|
33 |
logging.basicConfig(level=logging.INFO)
|
34 |
|
35 |
# Load datasets and initialize database
|
36 |
+
dataset = load_dataset("bgsys/background-removal-arena_v0_clothing_checkered", split='train')
|
37 |
fill_database_once()
|
38 |
|
39 |
# Directory setup for JSON dataset
|
|
|
65 |
model_vote_counts = compute_votes_per_model()
|
66 |
try:
|
67 |
# Create a list of models to iterate over
|
68 |
+
models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0", "BiRefNet v2"]
|
69 |
rankings = []
|
70 |
|
71 |
for model in models:
|
|
|
105 |
sample = dataset[random_index]
|
106 |
input_image = sample['original_image']
|
107 |
|
108 |
+
segmented_images = [sample.get(key) for key in [
|
109 |
+
'clipdrop_image', 'bria_image', 'photoroom_image',
|
110 |
+
'removebg_image', 'birefnet_image'
|
111 |
+
]]
|
112 |
+
segmented_sources = [
|
113 |
+
'Clipdrop', 'BRIA RMBG 2.0', 'Photoroom',
|
114 |
+
'RemoveBG', 'BiRefNet v2'
|
115 |
+
]
|
116 |
|
117 |
if segmented_images.count(None) > 2:
|
118 |
logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
|
image_processing_pipeline.py
CHANGED
@@ -14,6 +14,7 @@ from utils.clipdrop import iterate_over_directory as clipdrop_iterate
|
|
14 |
from utils.upload_to_dataset import upload_to_dataset
|
15 |
from utils.resize_processed_images import process_images as downsize_processed_images
|
16 |
from utils.add_checkered_background import process_directory as add_checkered_background_process
|
|
|
17 |
|
18 |
def check_env_variables():
|
19 |
"""Check if the necessary environment variables are loaded."""
|
@@ -22,7 +23,11 @@ def check_env_variables():
|
|
22 |
|
23 |
load_dotenv()
|
24 |
|
25 |
-
required_keys = [
|
|
|
|
|
|
|
|
|
26 |
missing_keys = [key for key in required_keys if not os.getenv(key)]
|
27 |
|
28 |
if missing_keys:
|
@@ -86,24 +91,24 @@ def main():
|
|
86 |
"removebg": os.path.join(bg_removed_dir, "removebg"),
|
87 |
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
|
88 |
"bria": os.path.join(bg_removed_dir, "bria"),
|
89 |
-
"clipdrop": os.path.join(bg_removed_dir, "clipdrop")
|
|
|
90 |
}
|
91 |
|
92 |
for dir_path in bg_removal_dirs.values():
|
93 |
os.makedirs(dir_path, exist_ok=True)
|
94 |
|
95 |
# Use ThreadPoolExecutor to parallelize API calls
|
96 |
-
with ThreadPoolExecutor(max_workers=
|
97 |
executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
|
98 |
executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
|
99 |
executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
|
100 |
executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
|
101 |
-
|
102 |
|
103 |
print("Adding checkered background...")
|
104 |
add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
|
105 |
|
106 |
-
|
107 |
if args.dataset_name:
|
108 |
upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
|
109 |
else:
|
|
|
14 |
from utils.upload_to_dataset import upload_to_dataset
|
15 |
from utils.resize_processed_images import process_images as downsize_processed_images
|
16 |
from utils.add_checkered_background import process_directory as add_checkered_background_process
|
17 |
+
from utils.birefnet import process_directory as birefnet_iterate
|
18 |
|
19 |
def check_env_variables():
|
20 |
"""Check if the necessary environment variables are loaded."""
|
|
|
23 |
|
24 |
load_dotenv()
|
25 |
|
26 |
+
required_keys = [
|
27 |
+
'REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY',
|
28 |
+
'BRIA_API_TOKEN', 'CLIPDROP_API_KEY',
|
29 |
+
'FAL_KEY'
|
30 |
+
]
|
31 |
missing_keys = [key for key in required_keys if not os.getenv(key)]
|
32 |
|
33 |
if missing_keys:
|
|
|
91 |
"removebg": os.path.join(bg_removed_dir, "removebg"),
|
92 |
"photoroom": os.path.join(bg_removed_dir, "photoroom"),
|
93 |
"bria": os.path.join(bg_removed_dir, "bria"),
|
94 |
+
"clipdrop": os.path.join(bg_removed_dir, "clipdrop"),
|
95 |
+
"birefnet": os.path.join(bg_removed_dir, "birefnet")
|
96 |
}
|
97 |
|
98 |
for dir_path in bg_removal_dirs.values():
|
99 |
os.makedirs(dir_path, exist_ok=True)
|
100 |
|
101 |
# Use ThreadPoolExecutor to parallelize API calls
|
102 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
103 |
executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
|
104 |
executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
|
105 |
executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
|
106 |
executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
|
107 |
+
executor.submit(birefnet_iterate, input_resized_dir, bg_removal_dirs["birefnet"])
|
108 |
|
109 |
print("Adding checkered background...")
|
110 |
add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
|
111 |
|
|
|
112 |
if args.dataset_name:
|
113 |
upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
|
114 |
else:
|
utils/upload_to_dataset.py
CHANGED
@@ -6,16 +6,22 @@ import pandas as pd
|
|
6 |
import argparse
|
7 |
from PIL import Image as PILImage
|
8 |
import sys
|
|
|
9 |
|
10 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
|
|
|
|
|
|
|
|
11 |
# Define the dataset features with dedicated columns for each model
|
12 |
features = Features({
|
13 |
-
"original_image": Image(),
|
14 |
-
"clipdrop_image": Image(),
|
15 |
-
"bria_image": Image(),
|
16 |
-
"photoroom_image": Image(),
|
17 |
-
"removebg_image": Image(),
|
18 |
-
"
|
|
|
19 |
})
|
20 |
|
21 |
# Load image paths and metadata
|
@@ -23,7 +29,8 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
23 |
"clipdrop_image": None,
|
24 |
"bria_image": None,
|
25 |
"photoroom_image": None,
|
26 |
-
"removebg_image": None
|
|
|
27 |
})
|
28 |
|
29 |
# Walk into the original images folder
|
@@ -35,16 +42,15 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
35 |
data[f]["original_filename"] = f
|
36 |
|
37 |
# Check for corresponding images in processed directories
|
38 |
-
for source in ["clipdrop", "bria", "photoroom", "removebg"]:
|
39 |
-
|
40 |
-
for ext in ['.png', '.jpg']:
|
41 |
processed_image_filename = os.path.splitext(f)[0] + ext
|
42 |
source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
|
43 |
|
44 |
if os.path.exists(source_image_path):
|
45 |
data[f][f"{source}_image"] = source_image_path
|
46 |
-
break
|
47 |
-
|
48 |
# Convert the data to a dictionary of lists
|
49 |
dataset_dict = {
|
50 |
"original_image": [],
|
@@ -52,35 +58,47 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
52 |
"bria_image": [],
|
53 |
"photoroom_image": [],
|
54 |
"removebg_image": [],
|
|
|
55 |
"original_filename": []
|
56 |
}
|
57 |
|
58 |
errors = []
|
|
|
|
|
59 |
|
60 |
for filename, entry in data.items():
|
61 |
if "original_image" in entry:
|
62 |
-
# Check if all images have the same size
|
63 |
try:
|
64 |
original_size = PILImage.open(entry["original_image"]).size
|
65 |
-
|
|
|
|
|
66 |
if entry[source] is not None:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
if errors:
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
|
85 |
# Save the data dictionary to a CSV file for inspection
|
86 |
df = pd.DataFrame.from_dict(dataset_dict)
|
@@ -90,14 +108,20 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
|
|
90 |
dataset = Dataset.from_dict(dataset_dict, features=features)
|
91 |
|
92 |
if dry_run:
|
93 |
-
|
94 |
-
print(df.head()) # Display the first few rows for inspection
|
95 |
else:
|
96 |
-
|
97 |
api = HfApi()
|
98 |
dataset.push_to_hub(dataset_name, token=api.token, private=True)
|
|
|
99 |
|
100 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
|
102 |
parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
|
103 |
parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")
|
|
|
6 |
import argparse
|
7 |
from PIL import Image as PILImage
|
8 |
import sys
|
9 |
+
import logging
|
10 |
|
11 |
def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
|
12 |
+
"""Upload images to a Hugging Face dataset including BiRefNet results."""
|
13 |
+
|
14 |
+
logging.info(f"Starting dataset upload from {original_images_dir}")
|
15 |
+
|
16 |
# Define the dataset features with dedicated columns for each model
|
17 |
features = Features({
|
18 |
+
"original_image": Image(),
|
19 |
+
"clipdrop_image": Image(),
|
20 |
+
"bria_image": Image(),
|
21 |
+
"photoroom_image": Image(),
|
22 |
+
"removebg_image": Image(),
|
23 |
+
"birefnet_image": Image(),
|
24 |
+
"original_filename": Value("string")
|
25 |
})
|
26 |
|
27 |
# Load image paths and metadata
|
|
|
29 |
"clipdrop_image": None,
|
30 |
"bria_image": None,
|
31 |
"photoroom_image": None,
|
32 |
+
"removebg_image": None,
|
33 |
+
"birefnet_image": None
|
34 |
})
|
35 |
|
36 |
# Walk into the original images folder
|
|
|
42 |
data[f]["original_filename"] = f
|
43 |
|
44 |
# Check for corresponding images in processed directories
|
45 |
+
for source in ["clipdrop", "bria", "photoroom", "removebg", "birefnet"]:
|
46 |
+
for ext in ['.png', '.jpg', '.jpeg', '.webp']:
|
|
|
47 |
processed_image_filename = os.path.splitext(f)[0] + ext
|
48 |
source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
|
49 |
|
50 |
if os.path.exists(source_image_path):
|
51 |
data[f][f"{source}_image"] = source_image_path
|
52 |
+
break
|
53 |
+
|
54 |
# Convert the data to a dictionary of lists
|
55 |
dataset_dict = {
|
56 |
"original_image": [],
|
|
|
58 |
"bria_image": [],
|
59 |
"photoroom_image": [],
|
60 |
"removebg_image": [],
|
61 |
+
"birefnet_image": [],
|
62 |
"original_filename": []
|
63 |
}
|
64 |
|
65 |
errors = []
|
66 |
+
processed_count = 0
|
67 |
+
skipped_count = 0
|
68 |
|
69 |
for filename, entry in data.items():
|
70 |
if "original_image" in entry:
|
|
|
71 |
try:
|
72 |
original_size = PILImage.open(entry["original_image"]).size
|
73 |
+
valid_entry = True
|
74 |
+
|
75 |
+
for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image", "birefnet_image"]:
|
76 |
if entry[source] is not None:
|
77 |
+
try:
|
78 |
+
processed_size = PILImage.open(entry[source]).size
|
79 |
+
if processed_size != original_size:
|
80 |
+
errors.append(f"Size mismatch for {filename}: {source}")
|
81 |
+
valid_entry = False
|
82 |
+
except Exception as e:
|
83 |
+
errors.append(f"Error with {filename}: {source}")
|
84 |
+
valid_entry = False
|
85 |
|
86 |
+
if valid_entry:
|
87 |
+
for key in dataset_dict.keys():
|
88 |
+
if key in entry:
|
89 |
+
dataset_dict[key].append(entry[key])
|
90 |
+
processed_count += 1
|
91 |
+
else:
|
92 |
+
skipped_count += 1
|
93 |
+
|
94 |
+
except Exception as e:
|
95 |
+
errors.append(f"Error processing {filename}")
|
96 |
+
skipped_count += 1
|
97 |
|
98 |
if errors:
|
99 |
+
logging.warning(f"Encountered {len(errors)} errors during processing")
|
100 |
+
|
101 |
+
logging.info(f"Processed: {processed_count}, Skipped: {skipped_count}, Total: {processed_count + skipped_count}")
|
102 |
|
103 |
# Save the data dictionary to a CSV file for inspection
|
104 |
df = pd.DataFrame.from_dict(dataset_dict)
|
|
|
108 |
dataset = Dataset.from_dict(dataset_dict, features=features)
|
109 |
|
110 |
if dry_run:
|
111 |
+
logging.info("Dry run completed - dataset not pushed")
|
|
|
112 |
else:
|
113 |
+
logging.info(f"Pushing dataset to {dataset_name}")
|
114 |
api = HfApi()
|
115 |
dataset.push_to_hub(dataset_name, token=api.token, private=True)
|
116 |
+
logging.info("Upload completed successfully")
|
117 |
|
118 |
if __name__ == "__main__":
|
119 |
+
logging.basicConfig(
|
120 |
+
level=logging.INFO,
|
121 |
+
format='%(asctime)s - %(message)s',
|
122 |
+
datefmt='%Y-%m-%d %H:%M:%S'
|
123 |
+
)
|
124 |
+
|
125 |
parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
|
126 |
parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
|
127 |
parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")
|