BiRefNet_demo

Runtime error

App Files Files Community

ghostsInTheMachine commited on Oct 22, 2024

Commit

509862d

verified ·

1 Parent(s): 012b840

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -99

app.py CHANGED Viewed

@@ -1,15 +1,21 @@
-import os
-import cv2
-import numpy as np
-import torch
 import gradio as gr
 import spaces
-from gradio.themes.base import Base
-from gradio.themes.utils import colors, fonts, sizes
-from PIL import Image, ImageOps
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 class WhiteTheme(Base):
     def __init__(
         self,
@@ -59,89 +65,180 @@ class WhiteTheme(Base):
             shadow_drop="none"
         )
-torch.set_float32_matmul_precision('high')
-torch.jit.script = lambda f: f
 device = "cuda" if torch.cuda.is_available() else "cpu"
-def refine_foreground(image, mask, r=90):
-    if mask.size != image.size:
-        mask = mask.resize(image.size)
-    image = np.array(image) / 255.0
-    mask = np.array(mask) / 255.0
-    estimated_foreground = FB_blur_fusion_foreground_estimator_2(image, mask, r=r)
-    image_masked = Image.fromarray((estimated_foreground * 255.0).astype(np.uint8))
-    return image_masked
-def FB_blur_fusion_foreground_estimator_2(image, alpha, r=90):
-    alpha = alpha[:, :, None]
-    F, blur_B = FB_blur_fusion_foreground_estimator(
-        image, image, image, alpha, r)
-    return FB_blur_fusion_foreground_estimator(image, F, blur_B, alpha, r=6)[0]
-def FB_blur_fusion_foreground_estimator(image, F, B, alpha, r=90):
-    if isinstance(image, Image.Image):
-        image = np.array(image) / 255.0
-    blurred_alpha = cv2.blur(alpha, (r, r))[:, :, None]
-    blurred_FA = cv2.blur(F * alpha, (r, r))
-    blurred_F = blurred_FA / (blurred_alpha + 1e-5)
-    blurred_B1A = cv2.blur(B * (1 - alpha), (r, r))
-    blurred_B = blurred_B1A / ((1 - blurred_alpha) + 1e-5)
-    F = blurred_F + alpha * (image - alpha * blurred_F - (1 - alpha) * blurred_B)
-    F = np.clip(F, 0, 1)
-    return F, blurred_B
-class ImagePreprocessor():
-    def __init__(self, resolution=(1024, 1024)) -> None:
-        self.transform_image = transforms.Compose([
-            transforms.Resize(resolution),
-            transforms.ToTensor(),
-            transforms.Normalize([0.485, 0.456, 0.406],
-                                 [0.229, 0.224, 0.225]),
-        ])
-    def proc(self, image: Image.Image) -> torch.Tensor:
-        image = self.transform_image(image)
-        return image
-# Load the model
-birefnet = AutoModelForImageSegmentation.from_pretrained(
-    'zhengpeng7/BiRefNet-matting', trust_remote_code=True)
 birefnet.to(device)
-birefnet.eval()
-def remove_background_wrapper(image):
-    if image is None:
-        raise gr.Error("Please upload an image.")
-    image_ori = Image.fromarray(image).convert('RGB')
-    foreground, background, pred_pil, reverse_mask = remove_background(image_ori)
-    return foreground, background, pred_pil, reverse_mask
-@spaces.GPU
-def remove_background(image_ori):
-    original_size = image_ori.size
-    image_preprocessor = ImagePreprocessor(resolution=(1024, 1024))
-    image_proc = image_preprocessor.proc(image_ori)
-    image_proc = image_proc.unsqueeze(0)
-    with torch.no_grad():
-        preds = birefnet(image_proc.to(device))[-1].sigmoid().cpu()
-    pred = preds[0].squeeze()
-    pred_pil = transforms.ToPILImage()(pred)
-    pred_pil = pred_pil.resize(original_size, Image.BICUBIC)
-    reverse_mask = ImageOps.invert(pred_pil)
-    foreground = image_ori.copy()
-    foreground.putalpha(pred_pil)
-    background = image_ori.copy()
-    background.putalpha(reverse_mask)
-    torch.cuda.empty_cache()
-    return foreground, background, pred_pil, reverse_mask
 # Custom CSS for styling
 custom_css = """
@@ -204,6 +301,7 @@ custom_css = """
 }
 """
 with gr.Blocks(css=custom_css, theme=WhiteTheme()) as demo:
     gr.HTML('''
         <div class="title-container">
@@ -213,7 +311,7 @@ with gr.Blocks(css=custom_css, theme=WhiteTheme()) as demo:
         </div>
         <script>
             (function() {
-                const text = "image";
                 const typedTextSpan = document.getElementById("typed-text");
                 let charIndex = 0;
@@ -230,20 +328,57 @@ with gr.Blocks(css=custom_css, theme=WhiteTheme()) as demo:
         </script>
     ''')
-    # Interface setup with input and output
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(type="numpy", sources=['upload'], label="Upload Image")
-            btn = gr.Button("Process Image", elem_id="submit-button")
         with gr.Column():
-            output_foreground = gr.Image(type="pil", label="Foreground")
-            output_background = gr.Image(type="pil", label="Background")
-            output_foreground_mask = gr.Image(type="pil", label="Foreground Mask")
-            output_background_mask = gr.Image(type="pil", label="Background Mask")
-    # Link the button to the processing function
-    btn.click(fn=remove_background_wrapper, inputs=image_input, outputs=[
-        output_foreground, output_background, output_foreground_mask, output_background_mask])
     demo.launch(debug=True)

 import gradio as gr
+import torch
 import spaces
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
+import moviepy.editor as mp
+from PIL import Image
+import numpy as np
+import tempfile
+import time
+import os
+import shutil
+import ffmpeg
+from concurrent.futures import ThreadPoolExecutor
+from gradio.themes.base import Base
+from gradio.themes.utils import colors, fonts
+# Custom Theme Definition
 class WhiteTheme(Base):
     def __init__(
         self,
             shadow_drop="none"
         )
+# Set precision and device
+torch.set_float32_matmul_precision("medium")
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load models
+print("Loading models...")
+birefnet = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
 birefnet.to(device)
+birefnet_lite = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True)
+birefnet_lite.to(device)
+print("Models loaded successfully!")
+# Image transformation
+transform_image = transforms.Compose([
+    transforms.Resize((1024, 1024)),
+    transforms.ToTensor(),
+    transforms.Normalize([0.485, 0.456, 0.406],
+                         [0.229, 0.224, 0.225]),
+])
+def process_frame(frame, fast_mode=True):
+    """
+    Process a single frame through the BiRefNet model.
+    Maintains original resolution throughout processing.
+    Returns a PIL Image with alpha channel.
+    """
+    try:
+        # Preserve original resolution for final output
+        image_ori = Image.fromarray(frame).convert('RGB')
+        original_size = image_ori.size
+        # Transform for model input while maintaining aspect ratio
+        input_images = transform_image(image_ori).unsqueeze(0).to(device)
+        # Select model based on mode
+        model = birefnet_lite if fast_mode else birefnet
+        with torch.no_grad():
+            preds = model(input_images)[-1].sigmoid().cpu()
+        pred = preds[0].squeeze()
+        # Resize mask back to original resolution
+        pred_pil = transforms.ToPILImage()(pred)
+        pred_pil = pred_pil.resize(original_size, Image.BICUBIC)
+        # Create foreground with transparency
+        foreground = image_ori.copy()
+        foreground.putalpha(pred_pil)
+        return foreground
+    except Exception as e:
+        print(f"Error processing frame: {e}")
+        return None
+@spaces.GPU(duration=300)  # 5-minute duration for processing
+def process_video(video_path, fps=0, fast_mode=True, max_workers=6):
+    """
+    Process video to create transparent MOV file using ProRes 4444.
+    Maintains original resolution and framerate if fps=0.
+    """
+    temp_dir = None
+    try:
+        start_time = time.time()
+        video = mp.VideoFileClip(video_path)
+        # Use original video FPS if not specified
+        if fps == 0:
+            fps = video.fps
+        frames = list(video.iter_frames(fps=fps))
+        total_frames = len(frames)
+        print(f"Processing {total_frames} frames at {fps} FPS...")
+        # Create temporary directory for PNG sequence
+        temp_dir = tempfile.mkdtemp()
+        png_dir = os.path.join(temp_dir, "frames")
+        os.makedirs(png_dir, exist_ok=True)
+        # Prepare to collect processed frames for live preview
+        processed_frames = []
+        # Process frames with parallel execution
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(process_frame, frame, fast_mode) for frame in frames]
+            for i, future in enumerate(futures):
+                try:
+                    result = future.result()
+                    if result:
+                        # Save frame as PNG with transparency
+                        frame_path = os.path.join(png_dir, f"frame_{i:06d}.png")
+                        result.save(frame_path, "PNG")
+                        # Collect processed frames for live preview
+                        processed_frames.append(np.array(result))
+                        # Update live preview
+                        elapsed_time = time.time() - start_time
+                        yield processed_frames[-1], None, None, None, f"Processing frame {i+1}/{total_frames}... Elapsed time: {elapsed_time:.2f} seconds"
+                    if (i + 1) % 10 == 0:
+                        print(f"Processed {i+1}/{total_frames} frames")
+                except Exception as e:
+                    print(f"Error processing frame {i+1}: {e}")
+        print("Creating output files...")
+        # Create permanent output directory
+        output_dir = os.path.join(os.path.dirname(video_path), "output")
+        os.makedirs(output_dir, exist_ok=True)
+        # Create ZIP file of PNG sequence
+        zip_filename = f"frames_{int(time.time())}.zip"
+        zip_path = os.path.join(output_dir, zip_filename)
+        shutil.make_archive(zip_path[:-4], 'zip', png_dir)
+        # Create MOV file with ProRes 4444
+        print("Creating ProRes 4444 MOV...")
+        mov_filename = f"video_{int(time.time())}.mov"
+        mov_path = os.path.join(output_dir, mov_filename)
+        try:
+            # FFmpeg settings for high-quality ProRes 4444
+            stream = ffmpeg.input(
+                os.path.join(png_dir, 'frame_%06d.png'),
+                pattern_type='sequence',
+                framerate=fps
+            )
+            # ProRes 4444 settings for maximum quality with alpha
+            stream = ffmpeg.output(
+                stream,
+                mov_path,
+                vcodec='prores_ks',          # ProRes codec
+                pix_fmt='yuva444p10le',      # 10-bit 4:4:4:4 pixel format with alpha
+                profile='4444',              # ProRes 4444 profile for alpha support
+                alpha_bits=16,               # Maximum alpha bit depth
+                qscale=1,                    # Highest quality setting
+                vendor='ap10',               # Standard ProRes vendor tag
+                bits_per_mb=8000,            # High bitrate for quality
+                threads=max_workers          # Parallel processing
+            )
+            # Run FFmpeg command
+            ffmpeg.run(stream, overwrite_output=True, capture_stdout=True, capture_stderr=True)
+            print("MOV video created successfully!")
+        except ffmpeg.Error as e:
+            print(f"Error creating MOV video: {e.stderr.decode() if e.stderr else str(e)}")
+            mov_path = None
+        print("Processing complete!")
+        # Yield the final outputs
+        yield None, zip_path, mov_path, None, f"Processing complete! Total time: {time.time() - start_time:.2f} seconds"
+    except Exception as e:
+        print(f"Error: {e}")
+        yield None, None, None, None, f"Error processing video: {e}"
+    finally:
+        # Clean up temporary directory
+        if temp_dir and os.path.exists(temp_dir):
+            try:
+                shutil.rmtree(temp_dir)
+            except Exception as e:
+                print(f"Error cleaning up temp directory: {e}")
+@spaces.GPU(duration=300)  # Match process_video duration
+def process_wrapper(video, fps=0, fast_mode=True, max_workers=6):
+    if video is None:
+        raise gr.Error("Please upload a video.")
+    try:
+        for outputs in process_video(video, fps, fast_mode, max_workers):
+            yield outputs
+    except Exception as e:
+        raise gr.Error(f"Error processing video: {str(e)}")
 # Custom CSS for styling
 custom_css = """
 }
 """
+# Gradio Interface
 with gr.Blocks(css=custom_css, theme=WhiteTheme()) as demo:
     gr.HTML('''
         <div class="title-container">
         </div>
         <script>
             (function() {
+                const text = "video";
                 const typedTextSpan = document.getElementById("typed-text");
                 let charIndex = 0;
         </script>
     ''')
     with gr.Row():
         with gr.Column():
+            video_input = gr.Video(
+                label="Upload Video",
+                interactive=True,
+                show_label=True,
+                height=360,
+                width=640
+            )
+            with gr.Row():
+                fps_slider = gr.Slider(
+                    minimum=0,
+                    maximum=60,
+                    step=1,
+                    value=0,
+                    label="Output FPS (0 will inherit the original fps value)",
+                )
+                fast_mode_checkbox = gr.Checkbox(
+                    label="Fast Mode (Use BiRefNet_lite)",
+                    value=True
+                )
+                max_workers_slider = gr.Slider(
+                    minimum=1,
+                    maximum=32,
+                    step=1,
+                    value=6,
+                    label="Max Workers",
+                    info="Determines how many frames to process in parallel"
+                )
+            btn = gr.Button("Process Video", elem_id="submit-button")
         with gr.Column():
+            preview_image = gr.Image(label="Live Preview", show_label=True)
+            output_foreground_zip = gr.File(label="Download PNG Sequence (ZIP)")
+            output_foreground_video = gr.File(label="Download Video (ProRes 4444 MOV with transparency)")
+            output_background = gr.Video(label="Background (Coming Soon)")
+            time_textbox = gr.Textbox(label="Status", interactive=False)
+            gr.Markdown("""
+            ### Output Information
+            - MOV file uses ProRes 4444 codec for professional-grade alpha channel
+            - Original resolution and framerate are maintained
+            - PNG sequence provided for maximum compatibility
+            """)
+    btn.click(
+        fn=process_wrapper,
+        inputs=[video_input, fps_slider, fast_mode_checkbox, max_workers_slider],
+        outputs=[preview_image, output_foreground_zip, output_foreground_video,
+                output_background, time_textbox]
+    )
+if __name__ == "__main__":
     demo.launch(debug=True)