import numpy as np import torch import torch.nn.functional as F from torchvision.transforms.functional import normalize import gradio as gr from gradio_imageslider import ImageSlider from briarmbg import BriaRMBG import PIL from PIL import Image from typing import Tuple import cv2 # Load the model net = BriaRMBG.from_pretrained("briaai/RMBG-1.4") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net.to(device) net.eval() def resize_image(image): image = image.convert('RGB') model_input_size = (1024, 1024) image = image.resize(model_input_size, Image.BILINEAR) return image def process_image(image): # prepare input orig_image = Image.fromarray(image) w, h = orig_im_size = orig_image.size image = resize_image(orig_image) im_np = np.array(image) im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1) im_tensor = torch.unsqueeze(im_tensor, 0) im_tensor = torch.divide(im_tensor, 255.0) im_tensor = normalize(im_tensor, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0]) if torch.cuda.is_available(): im_tensor = im_tensor.cuda() # inference result = net(im_tensor) # post process result = torch.squeeze(F.interpolate(result[0][0], size=(h, w), mode='bilinear'), 0) ma = torch.max(result) mi = torch.min(result) result = (result - mi) / (ma - mi) # image to pil result_array = (result * 255).cpu().data.numpy().astype(np.uint8) pil_mask = Image.fromarray(np.squeeze(result_array)) # add the mask on the original image as alpha channel new_im = orig_image.copy() new_im.putalpha(pil_mask) return new_im def process_video(video_path): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise ValueError("Error opening video file") # Get video properties width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter('output.mp4', fourcc, fps, (width, height), isColor=True) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Convert frame to PIL Image frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(frame) # Process the frame processed_image = process_image(frame) # Convert back to OpenCV format processed_frame = cv2.cvtColor(np.array(processed_image), cv2.COLOR_RGBA2BGRA) # Write the frame to the output video out.write(processed_frame) cap.release() out.release() return 'output.mp4' def process_input(input_data): if isinstance(input_data, str): # Assuming video path is provided as a string return process_video(input_data) else: # Assuming image is provided as numpy array return process_image(input_data) gr.Markdown("## BRIA RMBG 1.4") gr.HTML('''
This is a demo for BRIA RMBG 1.4 that using BRIA RMBG-1.4 image matting model as backbone.
''') title = "Background Removal" description = r"""Background removal model developed by BRIA.AI, trained on a carefully selected dataset and is available as an open-source model for non-commercial use.