STUDIO

Running on Zero

App Files Files Community

openfree commited on May 28

Commit

8a835ac

verified ·

1 Parent(s): 62ec1ca

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -11

app.py CHANGED Viewed

@@ -16,6 +16,13 @@ from einops import rearrange
 from scipy.io import wavfile
 from transformers import pipeline
 # 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
 os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
@@ -45,7 +52,29 @@ from mmaudio.model.networks import MMAudio, get_my_mmaudio
 from mmaudio.model.sequence_config import SequenceConfig
 from mmaudio.model.utils.features_utils import FeaturesUtils
-# ControlNet 모델 로드
 try:
     from controlnet_union import ControlNetModel_Union
     from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
@@ -94,18 +123,14 @@ except Exception as e:
     logging.error(f"Failed to load outpainting models: {str(e)}")
     OUTPAINT_MODEL_LOADED = False
-# MMAudio 모델 설정
 if torch.cuda.is_available():
-    device = torch.device("cuda")
-    torch.backends.cuda.matmul.allow_tf32 = True
-    torch.backends.cudnn.allow_tf32 = True
-    torch.backends.cudnn.benchmark = True
 else:
     device = torch.device("cpu")
-dtype = torch.bfloat16
-# MMAudio 모델 초기화
 try:
     model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
     model_mmaudio.download_if_needed()
@@ -155,7 +180,7 @@ VIDEO_API_URL = "http://211.233.58.201:7875"
 # 로깅 설정
 logging.basicConfig(level=logging.INFO)
-# Image size presets
 IMAGE_PRESETS = {
     "커스텀": {"width": 1024, "height": 1024},
     "1:1 정사각형": {"width": 1024, "height": 1024},
@@ -172,6 +197,7 @@ IMAGE_PRESETS = {
     "LinkedIn 배너": {"width": 1584, "height": 396},
 }
 def update_dimensions(preset):
     if preset in IMAGE_PRESETS:
         return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
@@ -431,6 +457,113 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int
                duration_sec=seq_cfg.duration)
     return video_save_path
 # CSS
 css = """
 :root {
@@ -456,7 +589,7 @@ css = """
     padding: 20px !important;
     margin-bottom: 20px !important;
 }
-#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn {
     background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
     font-size: 1.1rem !important;
     padding: 12px 24px !important;
@@ -652,6 +785,110 @@ with demo:
                         if not MMAUDIO_MODEL_LOADED:
                             gr.Markdown("⚠️ MMAudio 모델을 로드하지 못했습니다. 이 기능은 사용할 수 없습니다.")
     # 이벤트 연결 - 첫 번째 탭
     size_preset.change(update_dimensions, [size_preset], [width, height])
@@ -689,5 +926,29 @@ with demo:
         [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
         [output_video_with_audio]
     )
 demo.launch()

 from scipy.io import wavfile
 from transformers import pipeline
+# 비디오 배경제거를 위한 추가 import
+from transformers import AutoModelForImageSegmentation
+from torchvision import transforms
+from moviepy import VideoFileClip, vfx, concatenate_videoclips, ImageSequenceClip
+import time
+from concurrent.futures import ThreadPoolExecutor
 # 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
 os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
 from mmaudio.model.sequence_config import SequenceConfig
 from mmaudio.model.utils.features_utils import FeaturesUtils
+# 기존 코드의 모든 설정과 초기화 부분 유지
+torch.set_float32_matmul_precision("medium")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# BiRefNet 모델 로드
+try:
+    birefnet = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
+    birefnet.to(device)
+    birefnet_lite = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True)
+    birefnet_lite.to(device)
+    transform_image = transforms.Compose([
+        transforms.Resize((768, 768)),
+        transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
+    ])
+    BIREFNET_MODEL_LOADED = True
+except Exception as e:
+    logging.error(f"Failed to load BiRefNet models: {str(e)}")
+    BIREFNET_MODEL_LOADED = False
+# ControlNet 모델 로드 (기존 코드)
 try:
     from controlnet_union import ControlNetModel_Union
     from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
     logging.error(f"Failed to load outpainting models: {str(e)}")
     OUTPAINT_MODEL_LOADED = False
+# MMAudio 모델 설정 (기존 코드)
 if torch.cuda.is_available():
+    dtype = torch.bfloat16
 else:
     device = torch.device("cpu")
+    dtype = torch.float32
+# MMAudio 모델 초기화 (기존 코드)
 try:
     model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
     model_mmaudio.download_if_needed()
 # 로깅 설정
 logging.basicConfig(level=logging.INFO)
+# Image size presets (기존 코드)
 IMAGE_PRESETS = {
     "커스텀": {"width": 1024, "height": 1024},
     "1:1 정사각형": {"width": 1024, "height": 1024},
     "LinkedIn 배너": {"width": 1584, "height": 396},
 }
+# 기존 함수들 모두 유지
 def update_dimensions(preset):
     if preset in IMAGE_PRESETS:
         return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
                duration_sec=seq_cfg.duration)
     return video_save_path
+# 비디오 배경제거 관련 함수들
+def process_bg_image(image, bg, fast_mode=False):
+    """단일 이미지 배경 처리"""
+    if not BIREFNET_MODEL_LOADED:
+        return image
+    image_size = image.size
+    input_images = transform_image(image).unsqueeze(0).to(device)
+    model = birefnet_lite if fast_mode else birefnet
+    with torch.no_grad():
+        preds = model(input_images)[-1].sigmoid().cpu()
+    pred = preds[0].squeeze()
+    pred_pil = transforms.ToPILImage()(pred)
+    mask = pred_pil.resize(image_size)
+    if isinstance(bg, str) and bg.startswith("#"):
+        color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5))
+        background = Image.new("RGBA", image_size, color_rgb + (255,))
+    elif isinstance(bg, Image.Image):
+        background = bg.convert("RGBA").resize(image_size)
+    else:
+        background = Image.open(bg).convert("RGBA").resize(image_size)
+    image = Image.composite(image, background, mask)
+    return image
+def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color):
+    """비디오 프레임 처리"""
+    try:
+        pil_image = Image.fromarray(frame)
+        if bg_type == "색상":
+            processed_image = process_bg_image(pil_image, color, fast_mode)
+        elif bg_type == "이미지":
+            processed_image = process_bg_image(pil_image, bg, fast_mode)
+        elif bg_type == "비디오":
+            background_frame = background_frames[bg_frame_index]
+            bg_frame_index += 1
+            background_image = Image.fromarray(background_frame)
+            processed_image = process_bg_image(pil_image, background_image, fast_mode)
+        else:
+            processed_image = pil_image
+        return np.array(processed_image), bg_frame_index
+    except Exception as e:
+        print(f"Error processing frame: {e}")
+        return frame, bg_frame_index
+@spaces.GPU
+def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
+                     fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
+    """비디오 배경 처리 메인 함수"""
+    if not BIREFNET_MODEL_LOADED:
+        yield gr.update(visible=False), gr.update(visible=True), "BiRefNet 모델을 로드하지 못했습니다."
+        yield None, None, "BiRefNet 모델을 로드하지 못했습니다."
+        return
+    try:
+        start_time = time.time()
+        video = VideoFileClip(vid)
+        if fps == 0:
+            fps = video.fps
+        audio = video.audio
+        frames = list(video.iter_frames(fps=fps))
+        processed_frames = []
+        yield gr.update(visible=True), gr.update(visible=False), f"처리 시작... 경과 시간: 0초"
+        if bg_type == "비디오":
+            background_video = VideoFileClip(bg_video)
+            if background_video.duration < video.duration:
+                if video_handling == "slow_down":
+                    background_video = background_video.fx(vfx.speedx, factor=video.duration / background_video.duration)
+                else:  # video_handling == "loop"
+                    background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
+            background_frames = list(background_video.iter_frames(fps=fps))
+        else:
+            background_frames = None
+        bg_frame_index = 0
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [executor.submit(process_video_frame, frames[i], bg_type, bg_image, fast_mode,
+                                     bg_frame_index + i, background_frames, color) for i in range(len(frames))]
+            for i, future in enumerate(futures):
+                result, _ = future.result()
+                processed_frames.append(result)
+                elapsed_time = time.time() - start_time
+                yield result, None, f"프레임 {i+1}/{len(frames)} 처리 중... 경과 시간: {elapsed_time:.2f}초"
+        processed_video = ImageSequenceClip(processed_frames, fps=fps)
+        processed_video = processed_video.with_audio(audio)
+        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
+            temp_filepath = temp_file.name
+            processed_video.write_videofile(temp_filepath, codec="libx264")
+        elapsed_time = time.time() - start_time
+        yield gr.update(visible=False), gr.update(visible=True), f"처리 완료! 경과 시간: {elapsed_time:.2f}초"
+        yield processed_frames[-1], temp_filepath, f"처리 완료! 경과 시간: {elapsed_time:.2f}초"
+    except Exception as e:
+        print(f"Error: {e}")
+        elapsed_time = time.time() - start_time
+        yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
+        yield None, f"비디오 처리 오류: {e}", f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
 # CSS
 css = """
 :root {
     padding: 20px !important;
     margin-bottom: 20px !important;
 }
+#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn {
     background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
     font-size: 1.1rem !important;
     padding: 12px 24px !important;
                         if not MMAUDIO_MODEL_LOADED:
                             gr.Markdown("⚠️ MMAudio 모델을 로드하지 못했습니다. 이 기능은 사용할 수 없습니다.")
+        # 네 번째 탭: 비디오 배경제거/합성
+        with gr.Tab("비디오 배경제거/합성", elem_classes="tabitem"):
+            with gr.Row(equal_height=True):
+                # 입력 컬럼
+                with gr.Column(scale=1):
+                    with gr.Group(elem_classes="panel-box"):
+                        gr.Markdown("### 🎥 비디오 업로드")
+                        bg_video_input = gr.Video(
+                            label="입력 비디오",
+                            interactive=True
+                        )
+                    with gr.Group(elem_classes="panel-box"):
+                        gr.Markdown("### 🎨 배경 설정")
+                        bg_type = gr.Radio(
+                            ["색상", "이미지", "비디오"],
+                            label="배경 유형",
+                            value="색상",
+                            interactive=True
+                        )
+                        color_picker = gr.ColorPicker(
+                            label="배경 색상",
+                            value="#00FF00",
+                            visible=True,
+                            interactive=True
+                        )
+                        bg_image_input = gr.Image(
+                            label="배경 이미지",
+                            type="filepath",
+                            visible=False,
+                            interactive=True
+                        )
+                        bg_video_bg = gr.Video(
+                            label="배경 비디오",
+                            visible=False,
+                            interactive=True
+                        )
+                        with gr.Column(visible=False) as video_handling_options:
+                            video_handling_radio = gr.Radio(
+                                ["slow_down", "loop"],
+                                label="비디오 처리 방식",
+                                value="slow_down",
+                                interactive=True,
+                                info="slow_down: 배경 비디오를 느리게 재생, loop: 배경 비디오를 반복"
+                            )
+                    with gr.Group(elem_classes="panel-box"):
+                        gr.Markdown("### ⚙️ 처리 설정")
+                        fps_slider = gr.Slider(
+                            minimum=0,
+                            maximum=60,
+                            step=1,
+                            value=0,
+                            label="출력 FPS (0 = 원본 FPS 유지)",
+                            interactive=True
+                        )
+                        fast_mode_checkbox = gr.Checkbox(
+                            label="빠른 모드 (BiRefNet_lite 사용)",
+                            value=True,
+                            interactive=True
+                        )
+                        max_workers_slider = gr.Slider(
+                            minimum=1,
+                            maximum=32,
+                            step=1,
+                            value=10,
+                            label="최대 워커 수",
+                            info="병렬로 처리할 프레임 수",
+                            interactive=True
+                        )
+                        bg_remove_btn = gr.Button("🎬 배경 변경", variant="primary", elem_id="bg-remove-btn")
+                        if not BIREFNET_MODEL_LOADED:
+                            gr.Markdown("⚠️ BiRefNet 모델을 로드하지 못했습니다. 이 기능은 사용할 수 없습니다.")
+                # 출력 컬럼
+                with gr.Column(scale=1):
+                    with gr.Group(elem_classes="panel-box"):
+                        gr.Markdown("### 🎬 처리 결과")
+                        stream_image = gr.Image(label="실시간 스트리밍", visible=False)
+                        output_bg_video = gr.Video(label="최종 비디오")
+                        time_textbox = gr.Textbox(label="경과 시간", interactive=False)
+                        gr.Markdown("""
+                        ### ℹ️ 사용 방법
+                        1. 비디오를 업로드하세요
+                        2. 원하는 배경 유형을 선택하세요
+                        3. 설정을 조정하고 '배경 변경' 버튼을 클릭하세요
+                        **참고**: GPU 제한으로 한 번에 약 200프레임까지 처리 가능합니다.
+                        긴 비디오는 작은 조각으로 나누어 처리하세요.
+                        """)
     # 이벤트 연결 - 첫 번째 탭
     size_preset.change(update_dimensions, [size_preset], [width, height])
         [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
         [output_video_with_audio]
     )
+    # 이벤트 연결 - 네 번째 탭
+    def update_bg_visibility(bg_type):
+        if bg_type == "색상":
+            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+        elif bg_type == "이미지":
+            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+        elif bg_type == "비디오":
+            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+        else:
+            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+    bg_type.change(
+        update_bg_visibility,
+        inputs=bg_type,
+        outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options]
+    )
+    bg_remove_btn.click(
+        process_video_bg,
+        inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker,
+                fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider],
+        outputs=[stream_image, output_bg_video, time_textbox]
+    )
 demo.launch()