BiRefNet_sota_rembkgd

Running on Zero

App Files Files Community

ZhengPeng7 commited on Aug 29, 2024

Commit

741bf59

verified ·

1 Parent(s): 9e40d13

Add tab of batch inference with saving function.

Browse files

Files changed (1) hide show

app.py +100 -41

app.py CHANGED Viewed

@@ -57,15 +57,37 @@ birefnet = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7',
 birefnet.to(device)
 birefnet.eval()
 @spaces.GPU
-def predict(image, resolution, weights_file):
-    assert (image is not None), 'AssertionError: image cannot be None.'
-    if isinstance(image, str):
-        response = requests.get(image)
-        image_data = BytesIO(response.content)
-        image = np.array(Image.open(image_data))
     global birefnet
     # Load BiRefNet with chosen weights
     _weights_file = '/'.join(('zhengpeng7', usage_to_weights_file[weights_file] if weights_file is not None else usage_to_weights_file['General']))
@@ -74,33 +96,63 @@ def predict(image, resolution, weights_file):
     birefnet.to(device)
     birefnet.eval()
-    resolution = f"{image.shape[1]}x{image.shape[0]}" if resolution == '' else resolution
-    resolution = [int(int(reso)//32*32) for reso in resolution.strip().split('x')]
-    image_shape = image.shape[:2]
-    image_pil = array_to_pil_image(image, tuple(resolution))
-    # Preprocess the image
-    image_preprocessor = ImagePreprocessor(resolution=tuple(resolution))
-    image_proc = image_preprocessor.proc(image_pil)
-    image_proc = image_proc.unsqueeze(0)
-    # Perform the prediction
-    with torch.no_grad():
-        scaled_pred_tensor = birefnet(image_proc.to(device))[-1].sigmoid()
-    if device == 'cuda':
-        scaled_pred_tensor = scaled_pred_tensor.cpu()
-    # Resize the prediction to match the original image shape
-    pred = torch.nn.functional.interpolate(scaled_pred_tensor, size=image_shape, mode='bilinear', align_corners=True).squeeze().numpy()
-    # Apply the prediction mask to the original image
-    image_pil = image_pil.resize(pred.shape[::-1])
-    pred = np.repeat(np.expand_dims(pred, axis=-1), 3, axis=-1)
-    image_pred = (pred * np.array(image_pil)).astype(np.uint8)
-    torch.cuda.empty_cache()
     return image, image_pred
@@ -118,6 +170,11 @@ examples_url = [
 for idx_example_url, example_url in enumerate(examples_url):
     examples_url[idx_example_url].append('1024x1024')
 tab_image = gr.Interface(
     fn=predict,
     inputs=[
@@ -128,10 +185,7 @@ tab_image = gr.Interface(
     outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
     examples=examples,
     api_name="image",
-    description=('Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n)'
-                 ' The resolution used in our training was `1024x1024`, thus the suggested resolution to obtain good results!\n'
-                 ' Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n'
-                 ' We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access.'),
 )
 tab_text = gr.Interface(
@@ -144,15 +198,20 @@ tab_text = gr.Interface(
     outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
     examples=examples_url,
     api_name="text",
-    description=('Upload a URL, our model will extract a highly accurate segmentation of the subject in it.\n)'
-                 ' The resolution used in our training was `1024x1024`, thus the suggested resolution to obtain good results!\n'
-                 ' Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n'
-                 ' We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access.'),
 )
 demo = gr.TabbedInterface(
-    [tab_image, tab_text],
-    ["image", "text"],
     title="BiRefNet demo for subject extraction (general / salient / camouflaged / portrait).",
 )

 birefnet.to(device)
 birefnet.eval()
+    # for idx, image_path in enumerate(images):
+    #     im = load_img(image_path, output_type="pil")
+    #     if im is None:
+    #         continue
+    #     im = im.convert("RGB")
+    #     image_size = im.size
+    #     input_images = transform_image(im).unsqueeze(0).to("cpu")
+    #     with torch.no_grad():
+    #         preds = birefnet(input_images)[-1].sigmoid().cpu()
+    #     pred = preds[0].squeeze()
+    #     pred_pil = transforms.ToPILImage()(pred)
+    #     mask = pred_pil.resize(image_size)
+    #     im.putalpha(mask)
+    #     output_file_path = os.path.join(save_dir, f"output_image_batch_{idx + 1}.png")
+    #     im.save(output_file_path)
+    #     output_paths.append(output_file_path)
+    # zip_file_path = os.path.join(save_dir, "processed_images.zip")
+    # with zipfile.ZipFile(zip_file_path, 'w') as zipf:
+    #     for file in output_paths:
+    #         zipf.write(file, os.path.basename(file))
+    # return output_paths, zip_file_path
 @spaces.GPU
+def predict(images, resolution, weights_file):
+    assert (images is not None), 'AssertionError: images cannot be None.'
     global birefnet
     # Load BiRefNet with chosen weights
     _weights_file = '/'.join(('zhengpeng7', usage_to_weights_file[weights_file] if weights_file is not None else usage_to_weights_file['General']))
     birefnet.to(device)
     birefnet.eval()
+    try:
+        resolution = [int(int(reso)//32*32) for reso in resolution.strip().split('x')]
+    except:
+        resolution = [1024, 1024]
+        print('Invalid resolution input. Automatically changed to 1024x1024.')
+    if isinstance(images, list):
+        save_dir = 'preds-BiRefNet'
+        if not os.path.exists(save_dir):
+            os.makedirs(save_dir)
+    else:
+        # For tab_batch
+        save_paths = []
+        images = [images]
+    for idx_image, image_src in enumerate(images):
+        if isinstance(image_src, str):
+            response = requests.get(image_src)
+            image_data = BytesIO(response.content)
+            image = np.array(Image.open(image_data))
+        else:
+            image = image_src
+        image_shape = image.shape[:2]
+        image_pil = array_to_pil_image(image, tuple(resolution))
+        # Preprocess the image
+        image_preprocessor = ImagePreprocessor(resolution=tuple(resolution))
+        image_proc = image_preprocessor.proc(image_pil)
+        image_proc = image_proc.unsqueeze(0)
+        # Perform the prediction
+        with torch.no_grad():
+            scaled_pred_tensor = birefnet(image_proc.to(device))[-1].sigmoid()
+        if device == 'cuda':
+            scaled_pred_tensor = scaled_pred_tensor.cpu()
+        # Resize the prediction to match the original image shape
+        pred = torch.nn.functional.interpolate(scaled_pred_tensor, size=image_shape, mode='bilinear', align_corners=True).squeeze().numpy()
+        # Apply the prediction mask to the original image
+        image_pil = image_pil.resize(pred.shape[::-1])
+        pred = np.repeat(np.expand_dims(pred, axis=-1), 3, axis=-1)
+        image_pred = (pred * np.array(image_pil)).astype(np.uint8)
+        torch.cuda.empty_cache()
+        save_file_path = os.path.join(save_dir, "{}.png".format(os.path.splitext(os.path.basename(image_src))[0]))
+        cv2.imwrite(save_file_path)
+        save_paths.append(save_file_path)
+    if len(images) > 1:
+        zip_file_path = os.path.join(save_dir, "{}.zip".format(save_dir))
+        with zipfile.ZipFile(zip_file_path, 'w') as zipf:
+            for file in save_paths:
+                zipf.write(file, os.path.basename(file))
     return image, image_pred
 for idx_example_url, example_url in enumerate(examples_url):
     examples_url[idx_example_url].append('1024x1024')
+descriptions = ('Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n)'
+                 ' The resolution used in our training was `1024x1024`, thus the suggested resolution to obtain good results!\n'
+                 ' Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n'
+                 ' We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access.')
 tab_image = gr.Interface(
     fn=predict,
     inputs=[
     outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
     examples=examples,
     api_name="image",
+    description=descriptions,
 )
 tab_text = gr.Interface(
     outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
     examples=examples_url,
     api_name="text",
+    description=descriptions+'\nTab-URL is partially modified from https://huggingface.co/spaces/not-lain/background-removal, thanks to this great work!',
+)
+tab_batch = gr.Interface(
+    fn=predict,
+    inputs=gr.File(label="Upload multiple images", type="filepath", file_count="multiple"),
+    outputs=[gr.Gallery(label="BiRefNet's predictions"), gr.File(label="Download masked images.")],
+    api_name="batch",
+    description=descriptions+'\nTab-batch is partially modified from https://huggingface.co/spaces/NegiTurkey/Multi_Birefnetfor_Background_Removal, thanks to this great work!',
 )
 demo = gr.TabbedInterface(
+    [tab_image, tab_text, tab_batch],
+    ['image', 'text', 'batch'],
     title="BiRefNet demo for subject extraction (general / salient / camouflaged / portrait).",
 )