tdurbor commited on
Commit
8436088
·
1 Parent(s): c098942

Add BiRefNet v2

Browse files
Files changed (3) hide show
  1. app.py +11 -4
  2. image_processing_pipeline.py +10 -5
  3. utils/upload_to_dataset.py +55 -31
app.py CHANGED
@@ -22,6 +22,7 @@ from db import (
22
  fill_database_once,
23
  compute_votes_per_model
24
  )
 
25
 
26
  # Load environment variables
27
  load_dotenv()
@@ -32,7 +33,7 @@ google_analytics_tracking_id = os.getenv("GOOGLE_ANALYTICS_TRACKING_ID")
32
  logging.basicConfig(level=logging.INFO)
33
 
34
  # Load datasets and initialize database
35
- dataset = load_dataset("bgsys/background-removal-arena-green_v0_clothing_checkered", split='train')
36
  fill_database_once()
37
 
38
  # Directory setup for JSON dataset
@@ -64,7 +65,7 @@ def update_rankings_table():
64
  model_vote_counts = compute_votes_per_model()
65
  try:
66
  # Create a list of models to iterate over
67
- models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0"]
68
  rankings = []
69
 
70
  for model in models:
@@ -104,8 +105,14 @@ def select_new_image(last_used_indices):
104
  sample = dataset[random_index]
105
  input_image = sample['original_image']
106
 
107
- segmented_images = [sample.get(key) for key in ['clipdrop_image', 'bria_image', 'photoroom_image', 'removebg_image']]
108
- segmented_sources = ['Clipdrop', 'BRIA RMBG 2.0', 'Photoroom', 'RemoveBG']
 
 
 
 
 
 
109
 
110
  if segmented_images.count(None) > 2:
111
  logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
 
22
  fill_database_once,
23
  compute_votes_per_model
24
  )
25
+ from utils.birefnet import iterate_over_directory as birefnet_iterate
26
 
27
  # Load environment variables
28
  load_dotenv()
 
33
  logging.basicConfig(level=logging.INFO)
34
 
35
  # Load datasets and initialize database
36
+ dataset = load_dataset("bgsys/background-removal-arena_v0_clothing_checkered", split='train')
37
  fill_database_once()
38
 
39
  # Directory setup for JSON dataset
 
65
  model_vote_counts = compute_votes_per_model()
66
  try:
67
  # Create a list of models to iterate over
68
+ models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0", "BiRefNet v2"]
69
  rankings = []
70
 
71
  for model in models:
 
105
  sample = dataset[random_index]
106
  input_image = sample['original_image']
107
 
108
+ segmented_images = [sample.get(key) for key in [
109
+ 'clipdrop_image', 'bria_image', 'photoroom_image',
110
+ 'removebg_image', 'birefnet_image'
111
+ ]]
112
+ segmented_sources = [
113
+ 'Clipdrop', 'BRIA RMBG 2.0', 'Photoroom',
114
+ 'RemoveBG', 'BiRefNet v2'
115
+ ]
116
 
117
  if segmented_images.count(None) > 2:
118
  logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])
image_processing_pipeline.py CHANGED
@@ -14,6 +14,7 @@ from utils.clipdrop import iterate_over_directory as clipdrop_iterate
14
  from utils.upload_to_dataset import upload_to_dataset
15
  from utils.resize_processed_images import process_images as downsize_processed_images
16
  from utils.add_checkered_background import process_directory as add_checkered_background_process
 
17
 
18
  def check_env_variables():
19
  """Check if the necessary environment variables are loaded."""
@@ -22,7 +23,11 @@ def check_env_variables():
22
 
23
  load_dotenv()
24
 
25
- required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN', 'CLIPDROP_API_KEY']
 
 
 
 
26
  missing_keys = [key for key in required_keys if not os.getenv(key)]
27
 
28
  if missing_keys:
@@ -86,24 +91,24 @@ def main():
86
  "removebg": os.path.join(bg_removed_dir, "removebg"),
87
  "photoroom": os.path.join(bg_removed_dir, "photoroom"),
88
  "bria": os.path.join(bg_removed_dir, "bria"),
89
- "clipdrop": os.path.join(bg_removed_dir, "clipdrop")
 
90
  }
91
 
92
  for dir_path in bg_removal_dirs.values():
93
  os.makedirs(dir_path, exist_ok=True)
94
 
95
  # Use ThreadPoolExecutor to parallelize API calls
96
- with ThreadPoolExecutor(max_workers=4) as executor:
97
  executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
98
  executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
99
  executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
100
  executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
101
-
102
 
103
  print("Adding checkered background...")
104
  add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
105
 
106
-
107
  if args.dataset_name:
108
  upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
109
  else:
 
14
  from utils.upload_to_dataset import upload_to_dataset
15
  from utils.resize_processed_images import process_images as downsize_processed_images
16
  from utils.add_checkered_background import process_directory as add_checkered_background_process
17
+ from utils.birefnet import process_directory as birefnet_iterate
18
 
19
  def check_env_variables():
20
  """Check if the necessary environment variables are loaded."""
 
23
 
24
  load_dotenv()
25
 
26
+ required_keys = [
27
+ 'REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY',
28
+ 'BRIA_API_TOKEN', 'CLIPDROP_API_KEY',
29
+ 'FAL_KEY'
30
+ ]
31
  missing_keys = [key for key in required_keys if not os.getenv(key)]
32
 
33
  if missing_keys:
 
91
  "removebg": os.path.join(bg_removed_dir, "removebg"),
92
  "photoroom": os.path.join(bg_removed_dir, "photoroom"),
93
  "bria": os.path.join(bg_removed_dir, "bria"),
94
+ "clipdrop": os.path.join(bg_removed_dir, "clipdrop"),
95
+ "birefnet": os.path.join(bg_removed_dir, "birefnet")
96
  }
97
 
98
  for dir_path in bg_removal_dirs.values():
99
  os.makedirs(dir_path, exist_ok=True)
100
 
101
  # Use ThreadPoolExecutor to parallelize API calls
102
+ with ThreadPoolExecutor(max_workers=5) as executor:
103
  executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
104
  executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
105
  executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
106
  executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
107
+ executor.submit(birefnet_iterate, input_resized_dir, bg_removal_dirs["birefnet"])
108
 
109
  print("Adding checkered background...")
110
  add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
111
 
 
112
  if args.dataset_name:
113
  upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
114
  else:
utils/upload_to_dataset.py CHANGED
@@ -6,16 +6,22 @@ import pandas as pd
6
  import argparse
7
  from PIL import Image as PILImage
8
  import sys
 
9
 
10
  def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
 
 
 
 
11
  # Define the dataset features with dedicated columns for each model
12
  features = Features({
13
- "original_image": Image(), # Original image feature
14
- "clipdrop_image": Image(), # Clipdrop segmented image
15
- "bria_image": Image(), # Bria segmented image
16
- "photoroom_image": Image(), # Photoroom segmented image
17
- "removebg_image": Image(), # RemoveBG segmented image
18
- "original_filename": Value("string") # Original filename
 
19
  })
20
 
21
  # Load image paths and metadata
@@ -23,7 +29,8 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
23
  "clipdrop_image": None,
24
  "bria_image": None,
25
  "photoroom_image": None,
26
- "removebg_image": None
 
27
  })
28
 
29
  # Walk into the original images folder
@@ -35,16 +42,15 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
35
  data[f]["original_filename"] = f
36
 
37
  # Check for corresponding images in processed directories
38
- for source in ["clipdrop", "bria", "photoroom", "removebg"]:
39
- # Check for processed images ending in .png or .jpg
40
- for ext in ['.png', '.jpg']:
41
  processed_image_filename = os.path.splitext(f)[0] + ext
42
  source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
43
 
44
  if os.path.exists(source_image_path):
45
  data[f][f"{source}_image"] = source_image_path
46
- break # Stop checking other extensions if a file is found
47
-
48
  # Convert the data to a dictionary of lists
49
  dataset_dict = {
50
  "original_image": [],
@@ -52,35 +58,47 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
52
  "bria_image": [],
53
  "photoroom_image": [],
54
  "removebg_image": [],
 
55
  "original_filename": []
56
  }
57
 
58
  errors = []
 
 
59
 
60
  for filename, entry in data.items():
61
  if "original_image" in entry:
62
- # Check if all images have the same size
63
  try:
64
  original_size = PILImage.open(entry["original_image"]).size
65
- for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image"]:
 
 
66
  if entry[source] is not None:
67
- processed_size = PILImage.open(entry[source]).size
68
- if processed_size != original_size:
69
- errors.append(f"Size mismatch for {filename}: {source} image size {processed_size} does not match original size {original_size}.")
70
- except Exception as e:
71
- errors.append(f"Error processing {filename}: {e}")
 
 
 
72
 
73
- dataset_dict["original_image"].append(entry["original_image"])
74
- dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
75
- dataset_dict["bria_image"].append(entry["bria_image"])
76
- dataset_dict["photoroom_image"].append(entry["photoroom_image"])
77
- dataset_dict["removebg_image"].append(entry["removebg_image"])
78
- dataset_dict["original_filename"].append(filename)
 
 
 
 
 
79
 
80
  if errors:
81
- for error in errors:
82
- print(error)
83
- sys.exit(1)
84
 
85
  # Save the data dictionary to a CSV file for inspection
86
  df = pd.DataFrame.from_dict(dataset_dict)
@@ -90,14 +108,20 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
90
  dataset = Dataset.from_dict(dataset_dict, features=features)
91
 
92
  if dry_run:
93
- print("Dry run: Dataset prepared but not pushed to Hugging Face Hub.")
94
- print(df.head()) # Display the first few rows for inspection
95
  else:
96
- # Push the dataset to Hugging Face Hub in a private way
97
  api = HfApi()
98
  dataset.push_to_hub(dataset_name, token=api.token, private=True)
 
99
 
100
  if __name__ == "__main__":
 
 
 
 
 
 
101
  parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
102
  parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
103
  parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")
 
6
  import argparse
7
  from PIL import Image as PILImage
8
  import sys
9
+ import logging
10
 
11
  def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
12
+ """Upload images to a Hugging Face dataset including BiRefNet results."""
13
+
14
+ logging.info(f"Starting dataset upload from {original_images_dir}")
15
+
16
  # Define the dataset features with dedicated columns for each model
17
  features = Features({
18
+ "original_image": Image(),
19
+ "clipdrop_image": Image(),
20
+ "bria_image": Image(),
21
+ "photoroom_image": Image(),
22
+ "removebg_image": Image(),
23
+ "birefnet_image": Image(),
24
+ "original_filename": Value("string")
25
  })
26
 
27
  # Load image paths and metadata
 
29
  "clipdrop_image": None,
30
  "bria_image": None,
31
  "photoroom_image": None,
32
+ "removebg_image": None,
33
+ "birefnet_image": None
34
  })
35
 
36
  # Walk into the original images folder
 
42
  data[f]["original_filename"] = f
43
 
44
  # Check for corresponding images in processed directories
45
+ for source in ["clipdrop", "bria", "photoroom", "removebg", "birefnet"]:
46
+ for ext in ['.png', '.jpg', '.jpeg', '.webp']:
 
47
  processed_image_filename = os.path.splitext(f)[0] + ext
48
  source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
49
 
50
  if os.path.exists(source_image_path):
51
  data[f][f"{source}_image"] = source_image_path
52
+ break
53
+
54
  # Convert the data to a dictionary of lists
55
  dataset_dict = {
56
  "original_image": [],
 
58
  "bria_image": [],
59
  "photoroom_image": [],
60
  "removebg_image": [],
61
+ "birefnet_image": [],
62
  "original_filename": []
63
  }
64
 
65
  errors = []
66
+ processed_count = 0
67
+ skipped_count = 0
68
 
69
  for filename, entry in data.items():
70
  if "original_image" in entry:
 
71
  try:
72
  original_size = PILImage.open(entry["original_image"]).size
73
+ valid_entry = True
74
+
75
+ for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image", "birefnet_image"]:
76
  if entry[source] is not None:
77
+ try:
78
+ processed_size = PILImage.open(entry[source]).size
79
+ if processed_size != original_size:
80
+ errors.append(f"Size mismatch for {filename}: {source}")
81
+ valid_entry = False
82
+ except Exception as e:
83
+ errors.append(f"Error with {filename}: {source}")
84
+ valid_entry = False
85
 
86
+ if valid_entry:
87
+ for key in dataset_dict.keys():
88
+ if key in entry:
89
+ dataset_dict[key].append(entry[key])
90
+ processed_count += 1
91
+ else:
92
+ skipped_count += 1
93
+
94
+ except Exception as e:
95
+ errors.append(f"Error processing {filename}")
96
+ skipped_count += 1
97
 
98
  if errors:
99
+ logging.warning(f"Encountered {len(errors)} errors during processing")
100
+
101
+ logging.info(f"Processed: {processed_count}, Skipped: {skipped_count}, Total: {processed_count + skipped_count}")
102
 
103
  # Save the data dictionary to a CSV file for inspection
104
  df = pd.DataFrame.from_dict(dataset_dict)
 
108
  dataset = Dataset.from_dict(dataset_dict, features=features)
109
 
110
  if dry_run:
111
+ logging.info("Dry run completed - dataset not pushed")
 
112
  else:
113
+ logging.info(f"Pushing dataset to {dataset_name}")
114
  api = HfApi()
115
  dataset.push_to_hub(dataset_name, token=api.token, private=True)
116
+ logging.info("Upload completed successfully")
117
 
118
  if __name__ == "__main__":
119
+ logging.basicConfig(
120
+ level=logging.INFO,
121
+ format='%(asctime)s - %(message)s',
122
+ datefmt='%Y-%m-%d %H:%M:%S'
123
+ )
124
+
125
  parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
126
  parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
127
  parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")