STUDIO

Running on Zero

App Files Files Community

openfree commited on about 1 month ago

Commit

63f5669

verified ·

1 Parent(s): 40f5b5d

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -23

app.py CHANGED Viewed

@@ -27,9 +27,12 @@ from concurrent.futures import ThreadPoolExecutor
 os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
 # Spaces GPU
-try:
     import spaces
-except:
     # GPU 데코레이터가 없을 때를 위한 더미 데코레이터
     class spaces:
         @staticmethod
@@ -38,6 +41,15 @@ except:
                 return func
             return decorator
 # MMAudio imports
 try:
     import mmaudio
@@ -54,7 +66,16 @@ from mmaudio.model.utils.features_utils import FeaturesUtils
 # 기존 코드의 모든 설정과 초기화 부분 유지
 torch.set_float32_matmul_precision("medium")
-device = "cuda" if torch.cuda.is_available() else "cpu"
 # BiRefNet 모델 로드
 try:
@@ -100,21 +121,21 @@ try:
     )
     model = result[0]
-    model = model.to(device="cuda", dtype=torch.float16)
     # VAE 로드
     vae = AutoencoderKL.from_pretrained(
-        "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
-    ).to("cuda")
     # 파이프라인 로드
     pipe = StableDiffusionXLFillPipeline.from_pretrained(
         "SG161222/RealVisXL_V5.0_Lightning",
-        torch_dtype=torch.float16,
         vae=vae,
         controlnet=model,
-        variant="fp16",
-    ).to("cuda")
     pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
@@ -125,10 +146,9 @@ except Exception as e:
 # MMAudio 모델 설정 (기존 코드)
 if torch.cuda.is_available():
-    dtype = torch.bfloat16
 else:
-    device = torch.device("cpu")
-    dtype = torch.float32
 # MMAudio 모델 초기화 (기존 코드)
 try:
@@ -151,7 +171,7 @@ try:
     def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
         with torch.cuda.device(device):
             seq_cfg = model_mmaudio.seq_cfg
-            net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, dtype).eval()
             net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
             logging.info(f'Loaded weights from {model_mmaudio.model_path}')
@@ -162,7 +182,7 @@ try:
                 mode=model_mmaudio.mode,
                 bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
                 need_vae_encoder=False
-            ).to(device, dtype).eval()
             return net, feature_utils, seq_cfg
@@ -348,7 +368,7 @@ def preview_outpaint(image, width, height, overlap_percentage, alignment):
     return preview
-@spaces.GPU(duration=24)
 def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
     """이미지 아웃페인팅 실행"""
     if image is None:
@@ -371,13 +391,13 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
         final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
         # GPU에서 실행
-        with torch.autocast(device_type="cuda", dtype=torch.float16):
             (
                 prompt_embeds,
                 negative_prompt_embeds,
                 pooled_prompt_embeds,
                 negative_pooled_prompt_embeds,
-            ) = pipe.encode_prompt(final_prompt, "cuda", True)
             # 생성 프로세스
             for generated_image in pipe(
@@ -419,7 +439,7 @@ def translate_prompt(text):
         logging.error(f"Translation error: {e}")
         return text
-@spaces.GPU
 @torch.inference_mode()
 def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
                    cfg_strength: float, duration: float):
@@ -504,7 +524,7 @@ def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, backgroun
         print(f"Error processing frame: {e}")
         return frame, bg_frame_index
-@spaces.GPU
 def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
                      fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
     """비디오 배경 처리 메인 함수"""
@@ -564,7 +584,7 @@ def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color=
         yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
         yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
-@spaces.GPU
 def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
     """여러 비디오를 병합하고 오디오를 추가하는 함수"""
     if not video_files:
@@ -1150,10 +1170,21 @@ with demo:
 # GPU 초기화 (Spaces 환경에서 필요)
 try:
-    if torch.cuda.is_available():
         dummy_gpu_init()
-except:
-    pass
 if __name__ == "__main__":
     demo.launch()

 os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
 # Spaces GPU
+import os
+IS_SPACES = os.environ.get("SPACE_ID") is not None
+if IS_SPACES:
     import spaces
+else:
     # GPU 데코레이터가 없을 때를 위한 더미 데코레이터
     class spaces:
         @staticmethod
                 return func
             return decorator
+# GPU 초기화를 위한 간단한 함수 (Spaces 환경에서 필수)
+@spaces.GPU(duration=1)
+def gpu_warmup():
+    """GPU 워밍업 함수 - Spaces 환경에서 GPU 사용을 위해 필요"""
+    if torch.cuda.is_available():
+        dummy = torch.zeros(1).cuda()
+        del dummy
+    return "GPU ready"
 # MMAudio imports
 try:
     import mmaudio
 # 기존 코드의 모든 설정과 초기화 부분 유지
 torch.set_float32_matmul_precision("medium")
+# Device 설정을 더 명확하게
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    torch_dtype = torch.float16
+else:
+    device = torch.device("cpu")
+    torch_dtype = torch.float32
+logging.info(f"Using device: {device}")
 # BiRefNet 모델 로드
 try:
     )
     model = result[0]
+    model = model.to(device=device, dtype=torch.float16 if device.type == "cuda" else torch.float32)
     # VAE 로드
     vae = AutoencoderKL.from_pretrained(
+        "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
+    ).to(device)
     # 파이프라인 로드
     pipe = StableDiffusionXLFillPipeline.from_pretrained(
         "SG161222/RealVisXL_V5.0_Lightning",
+        torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
         vae=vae,
         controlnet=model,
+        variant="fp16" if device.type == "cuda" else None,
+    ).to(device)
     pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
 # MMAudio 모델 설정 (기존 코드)
 if torch.cuda.is_available():
+    mmaudio_dtype = torch.bfloat16
 else:
+    mmaudio_dtype = torch.float32
 # MMAudio 모델 초기화 (기존 코드)
 try:
     def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
         with torch.cuda.device(device):
             seq_cfg = model_mmaudio.seq_cfg
+            net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
             net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
             logging.info(f'Loaded weights from {model_mmaudio.model_path}')
                 mode=model_mmaudio.mode,
                 bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
                 need_vae_encoder=False
+            ).to(device, mmaudio_dtype).eval()
             return net, feature_utils, seq_cfg
     return preview
+@spaces.GPU(duration=120)
 def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
     """이미지 아웃페인팅 실행"""
     if image is None:
         final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
         # GPU에서 실행
+        with torch.autocast(device_type=device.type, dtype=torch.float16 if device.type == "cuda" else torch.float32):
             (
                 prompt_embeds,
                 negative_prompt_embeds,
                 pooled_prompt_embeds,
                 negative_pooled_prompt_embeds,
+            ) = pipe.encode_prompt(final_prompt, str(device), True)
             # 생성 프로세스
             for generated_image in pipe(
         logging.error(f"Translation error: {e}")
         return text
+@spaces.GPU(duration=120)
 @torch.inference_mode()
 def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
                    cfg_strength: float, duration: float):
         print(f"Error processing frame: {e}")
         return frame, bg_frame_index
+@spaces.GPU(duration=300)
 def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
                      fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
     """비디오 배경 처리 메인 함수"""
         yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
         yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
+@spaces.GPU(duration=180)
 def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
     """여러 비디오를 병합하고 오디오를 추가하는 함수"""
     if not video_files:
 # GPU 초기화 (Spaces 환경에서 필요)
 try:
+    if IS_SPACES and torch.cuda.is_available():
+        # Spaces 환경에서 GPU 워밍업 실행
+        gpu_warmup()
+        logging.info("GPU warmed up successfully")
+    elif torch.cuda.is_available():
         dummy_gpu_init()
+except Exception as e:
+    logging.warning(f"GPU initialization warning: {e}")
 if __name__ == "__main__":
+    # Spaces 환경에서 추가 GPU 체크
+    if IS_SPACES:
+        try:
+            gpu_warmup()
+        except:
+            pass
     demo.launch()