Spaces:

bgsys
/

background-removal-arena

Runtime error

App Files Files Community

tdurbor commited on Jan 30

Commit

8436088

1 Parent(s): c098942

Add BiRefNet v2

Browse files

Files changed (3) hide show

app.py +11 -4
image_processing_pipeline.py +10 -5
utils/upload_to_dataset.py +55 -31

app.py CHANGED Viewed

@@ -22,6 +22,7 @@ from db import (
     fill_database_once,
     compute_votes_per_model
 )
 # Load environment variables
 load_dotenv()
@@ -32,7 +33,7 @@ google_analytics_tracking_id = os.getenv("GOOGLE_ANALYTICS_TRACKING_ID")
 logging.basicConfig(level=logging.INFO)
 # Load datasets and initialize database
-dataset = load_dataset("bgsys/background-removal-arena-green_v0_clothing_checkered", split='train')
 fill_database_once()
 # Directory setup for JSON dataset
@@ -64,7 +65,7 @@ def update_rankings_table():
     model_vote_counts = compute_votes_per_model()
     try:
         # Create a list of models to iterate over
-        models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0"]
         rankings = []
         for model in models:
@@ -104,8 +105,14 @@ def select_new_image(last_used_indices):
         sample = dataset[random_index]
         input_image = sample['original_image']
-        segmented_images = [sample.get(key) for key in ['clipdrop_image', 'bria_image', 'photoroom_image', 'removebg_image']]
-        segmented_sources = ['Clipdrop', 'BRIA RMBG 2.0', 'Photoroom', 'RemoveBG']
         if segmented_images.count(None) > 2:
             logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])

     fill_database_once,
     compute_votes_per_model
 )
+from utils.birefnet import iterate_over_directory as birefnet_iterate
 # Load environment variables
 load_dotenv()
 logging.basicConfig(level=logging.INFO)
 # Load datasets and initialize database
+dataset = load_dataset("bgsys/background-removal-arena_v0_clothing_checkered", split='train')
 fill_database_once()
 # Directory setup for JSON dataset
     model_vote_counts = compute_votes_per_model()
     try:
         # Create a list of models to iterate over
+        models = ["Clipdrop", "Photoroom", "RemoveBG", "BRIA RMBG 2.0", "BiRefNet v2"]
         rankings = []
         for model in models:
         sample = dataset[random_index]
         input_image = sample['original_image']
+        segmented_images = [sample.get(key) for key in [
+            'clipdrop_image', 'bria_image', 'photoroom_image',
+            'removebg_image', 'birefnet_image'
+        ]]
+        segmented_sources = [
+            'Clipdrop', 'BRIA RMBG 2.0', 'Photoroom',
+            'RemoveBG', 'BiRefNet v2'
+        ]
         if segmented_images.count(None) > 2:
             logging.error("Not enough segmented images found for: %s. Resampling another image.", sample['original_filename'])

image_processing_pipeline.py CHANGED Viewed

@@ -14,6 +14,7 @@ from utils.clipdrop import iterate_over_directory as clipdrop_iterate
 from utils.upload_to_dataset import upload_to_dataset
 from utils.resize_processed_images import process_images as downsize_processed_images
 from utils.add_checkered_background import process_directory as add_checkered_background_process
 def check_env_variables():
     """Check if the necessary environment variables are loaded."""
@@ -22,7 +23,11 @@ def check_env_variables():
     load_dotenv()
-    required_keys = ['REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY', 'BRIA_API_TOKEN', 'CLIPDROP_API_KEY']
     missing_keys = [key for key in required_keys if not os.getenv(key)]
     if missing_keys:
@@ -86,24 +91,24 @@ def main():
         "removebg": os.path.join(bg_removed_dir, "removebg"),
         "photoroom": os.path.join(bg_removed_dir, "photoroom"),
         "bria": os.path.join(bg_removed_dir, "bria"),
-        "clipdrop": os.path.join(bg_removed_dir, "clipdrop")
     }
     for dir_path in bg_removal_dirs.values():
         os.makedirs(dir_path, exist_ok=True)
     # Use ThreadPoolExecutor to parallelize API calls
-    with ThreadPoolExecutor(max_workers=4) as executor:
         executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
         executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
         executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
         executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
     print("Adding checkered background...")
     add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
     if args.dataset_name:
         upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
     else:

 from utils.upload_to_dataset import upload_to_dataset
 from utils.resize_processed_images import process_images as downsize_processed_images
 from utils.add_checkered_background import process_directory as add_checkered_background_process
+from utils.birefnet import process_directory as birefnet_iterate
 def check_env_variables():
     """Check if the necessary environment variables are loaded."""
     load_dotenv()
+    required_keys = [
+        'REMOVEBG_API_KEY', 'PHOTOROOM_API_KEY',
+        'BRIA_API_TOKEN', 'CLIPDROP_API_KEY',
+        'FAL_KEY'
+    ]
     missing_keys = [key for key in required_keys if not os.getenv(key)]
     if missing_keys:
         "removebg": os.path.join(bg_removed_dir, "removebg"),
         "photoroom": os.path.join(bg_removed_dir, "photoroom"),
         "bria": os.path.join(bg_removed_dir, "bria"),
+        "clipdrop": os.path.join(bg_removed_dir, "clipdrop"),
+        "birefnet": os.path.join(bg_removed_dir, "birefnet")
     }
     for dir_path in bg_removal_dirs.values():
         os.makedirs(dir_path, exist_ok=True)
     # Use ThreadPoolExecutor to parallelize API calls
+    with ThreadPoolExecutor(max_workers=5) as executor:
         executor.submit(removebg_iterate, input_resized_dir, bg_removal_dirs["removebg"])
         executor.submit(photoroom_iterate, input_resized_dir, bg_removal_dirs["photoroom"])
         executor.submit(bria_iterate, input_resized_dir, bg_removal_dirs["bria"])
         executor.submit(clipdrop_iterate, input_resized_dir, bg_removal_dirs["clipdrop"])
+        executor.submit(birefnet_iterate, input_resized_dir, bg_removal_dirs["birefnet"])
     print("Adding checkered background...")
     add_checkered_background_process(bg_removed_dir, checkered_bg_dir)
     if args.dataset_name:
         upload_to_dataset(input_resized_dir, checkered_bg_dir, args.dataset_name, dry_run=not args.push_dataset)
     else:

utils/upload_to_dataset.py CHANGED Viewed

@@ -6,16 +6,22 @@ import pandas as pd
 import argparse
 from PIL import Image as PILImage
 import sys
 def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
     # Define the dataset features with dedicated columns for each model
     features = Features({
-        "original_image": Image(),  # Original image feature
-        "clipdrop_image": Image(),  # Clipdrop segmented image
-        "bria_image": Image(),      # Bria segmented image
-        "photoroom_image": Image(), # Photoroom segmented image
-        "removebg_image": Image(),  # RemoveBG segmented image
-        "original_filename": Value("string")  # Original filename
     })
     # Load image paths and metadata
@@ -23,7 +29,8 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
         "clipdrop_image": None,
         "bria_image": None,
         "photoroom_image": None,
-        "removebg_image": None
     })
     # Walk into the original images folder
@@ -35,16 +42,15 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
                 data[f]["original_filename"] = f
                 # Check for corresponding images in processed directories
-                for source in ["clipdrop", "bria", "photoroom", "removebg"]:
-                    # Check for processed images ending in .png or .jpg
-                    for ext in ['.png', '.jpg']:
                         processed_image_filename = os.path.splitext(f)[0] + ext
                         source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
                         if os.path.exists(source_image_path):
                             data[f][f"{source}_image"] = source_image_path
-                            break  # Stop checking other extensions if a file is found
     # Convert the data to a dictionary of lists
     dataset_dict = {
         "original_image": [],
@@ -52,35 +58,47 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
         "bria_image": [],
         "photoroom_image": [],
         "removebg_image": [],
         "original_filename": []
     }
     errors = []
     for filename, entry in data.items():
         if "original_image" in entry:
-            # Check if all images have the same size
             try:
                 original_size = PILImage.open(entry["original_image"]).size
-                for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image"]:
                     if entry[source] is not None:
-                        processed_size = PILImage.open(entry[source]).size
-                        if processed_size != original_size:
-                            errors.append(f"Size mismatch for {filename}: {source} image size {processed_size} does not match original size {original_size}.")
-            except Exception as e:
-                errors.append(f"Error processing {filename}: {e}")
-            dataset_dict["original_image"].append(entry["original_image"])
-            dataset_dict["clipdrop_image"].append(entry["clipdrop_image"])
-            dataset_dict["bria_image"].append(entry["bria_image"])
-            dataset_dict["photoroom_image"].append(entry["photoroom_image"])
-            dataset_dict["removebg_image"].append(entry["removebg_image"])
-            dataset_dict["original_filename"].append(filename)
     if errors:
-        for error in errors:
-            print(error)
-        sys.exit(1)
     # Save the data dictionary to a CSV file for inspection
     df = pd.DataFrame.from_dict(dataset_dict)
@@ -90,14 +108,20 @@ def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, d
     dataset = Dataset.from_dict(dataset_dict, features=features)
     if dry_run:
-        print("Dry run: Dataset prepared but not pushed to Hugging Face Hub.")
-        print(df.head())  # Display the first few rows for inspection
     else:
-        # Push the dataset to Hugging Face Hub in a private way
         api = HfApi()
         dataset.push_to_hub(dataset_name, token=api.token, private=True)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
     parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
     parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")

 import argparse
 from PIL import Image as PILImage
 import sys
+import logging
 def upload_to_dataset(original_images_dir, processed_images_dir, dataset_name, dry_run=False):
+    """Upload images to a Hugging Face dataset including BiRefNet results."""
+    logging.info(f"Starting dataset upload from {original_images_dir}")
     # Define the dataset features with dedicated columns for each model
     features = Features({
+        "original_image": Image(),
+        "clipdrop_image": Image(),
+        "bria_image": Image(),
+        "photoroom_image": Image(),
+        "removebg_image": Image(),
+        "birefnet_image": Image(),
+        "original_filename": Value("string")
     })
     # Load image paths and metadata
         "clipdrop_image": None,
         "bria_image": None,
         "photoroom_image": None,
+        "removebg_image": None,
+        "birefnet_image": None
     })
     # Walk into the original images folder
                 data[f]["original_filename"] = f
                 # Check for corresponding images in processed directories
+                for source in ["clipdrop", "bria", "photoroom", "removebg", "birefnet"]:
+                    for ext in ['.png', '.jpg', '.jpeg', '.webp']:
                         processed_image_filename = os.path.splitext(f)[0] + ext
                         source_image_path = os.path.join(processed_images_dir, source, processed_image_filename)
                         if os.path.exists(source_image_path):
                             data[f][f"{source}_image"] = source_image_path
+                            break
     # Convert the data to a dictionary of lists
     dataset_dict = {
         "original_image": [],
         "bria_image": [],
         "photoroom_image": [],
         "removebg_image": [],
+        "birefnet_image": [],
         "original_filename": []
     }
     errors = []
+    processed_count = 0
+    skipped_count = 0
     for filename, entry in data.items():
         if "original_image" in entry:
             try:
                 original_size = PILImage.open(entry["original_image"]).size
+                valid_entry = True
+                for source in ["clipdrop_image", "bria_image", "photoroom_image", "removebg_image", "birefnet_image"]:
                     if entry[source] is not None:
+                        try:
+                            processed_size = PILImage.open(entry[source]).size
+                            if processed_size != original_size:
+                                errors.append(f"Size mismatch for {filename}: {source}")
+                                valid_entry = False
+                        except Exception as e:
+                            errors.append(f"Error with {filename}: {source}")
+                            valid_entry = False
+                if valid_entry:
+                    for key in dataset_dict.keys():
+                        if key in entry:
+                            dataset_dict[key].append(entry[key])
+                    processed_count += 1
+                else:
+                    skipped_count += 1
+            except Exception as e:
+                errors.append(f"Error processing {filename}")
+                skipped_count += 1
     if errors:
+        logging.warning(f"Encountered {len(errors)} errors during processing")
+    logging.info(f"Processed: {processed_count}, Skipped: {skipped_count}, Total: {processed_count + skipped_count}")
     # Save the data dictionary to a CSV file for inspection
     df = pd.DataFrame.from_dict(dataset_dict)
     dataset = Dataset.from_dict(dataset_dict, features=features)
     if dry_run:
+        logging.info("Dry run completed - dataset not pushed")
     else:
+        logging.info(f"Pushing dataset to {dataset_name}")
         api = HfApi()
         dataset.push_to_hub(dataset_name, token=api.token, private=True)
+        logging.info("Upload completed successfully")
 if __name__ == "__main__":
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
     parser = argparse.ArgumentParser(description="Upload images to a Hugging Face dataset.")
     parser.add_argument("original_images_dir", type=str, help="Directory containing the original images.")
     parser.add_argument("processed_images_dir", type=str, help="Directory containing the processed images with subfolders for each model.")