Update app.py
Browse files
app.py
CHANGED
@@ -27,9 +27,12 @@ from concurrent.futures import ThreadPoolExecutor
|
|
27 |
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
|
28 |
|
29 |
# Spaces GPU
|
30 |
-
|
|
|
|
|
|
|
31 |
import spaces
|
32 |
-
|
33 |
# GPU 데코레이터가 없을 때를 위한 더미 데코레이터
|
34 |
class spaces:
|
35 |
@staticmethod
|
@@ -38,6 +41,15 @@ except:
|
|
38 |
return func
|
39 |
return decorator
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# MMAudio imports
|
42 |
try:
|
43 |
import mmaudio
|
@@ -54,7 +66,16 @@ from mmaudio.model.utils.features_utils import FeaturesUtils
|
|
54 |
|
55 |
# 기존 코드의 모든 설정과 초기화 부분 유지
|
56 |
torch.set_float32_matmul_precision("medium")
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
# BiRefNet 모델 로드
|
60 |
try:
|
@@ -100,21 +121,21 @@ try:
|
|
100 |
)
|
101 |
|
102 |
model = result[0]
|
103 |
-
model = model.to(device=
|
104 |
|
105 |
# VAE 로드
|
106 |
vae = AutoencoderKL.from_pretrained(
|
107 |
-
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
108 |
-
).to(
|
109 |
|
110 |
# 파이프라인 로드
|
111 |
pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
112 |
"SG161222/RealVisXL_V5.0_Lightning",
|
113 |
-
torch_dtype=torch.float16,
|
114 |
vae=vae,
|
115 |
controlnet=model,
|
116 |
-
variant="fp16",
|
117 |
-
).to(
|
118 |
|
119 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
120 |
|
@@ -125,10 +146,9 @@ except Exception as e:
|
|
125 |
|
126 |
# MMAudio 모델 설정 (기존 코드)
|
127 |
if torch.cuda.is_available():
|
128 |
-
|
129 |
else:
|
130 |
-
|
131 |
-
dtype = torch.float32
|
132 |
|
133 |
# MMAudio 모델 초기화 (기존 코드)
|
134 |
try:
|
@@ -151,7 +171,7 @@ try:
|
|
151 |
def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
|
152 |
with torch.cuda.device(device):
|
153 |
seq_cfg = model_mmaudio.seq_cfg
|
154 |
-
net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device,
|
155 |
net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
|
156 |
logging.info(f'Loaded weights from {model_mmaudio.model_path}')
|
157 |
|
@@ -162,7 +182,7 @@ try:
|
|
162 |
mode=model_mmaudio.mode,
|
163 |
bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
|
164 |
need_vae_encoder=False
|
165 |
-
).to(device,
|
166 |
|
167 |
return net, feature_utils, seq_cfg
|
168 |
|
@@ -348,7 +368,7 @@ def preview_outpaint(image, width, height, overlap_percentage, alignment):
|
|
348 |
|
349 |
return preview
|
350 |
|
351 |
-
@spaces.GPU(duration=
|
352 |
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
|
353 |
"""이미지 아웃페인팅 실행"""
|
354 |
if image is None:
|
@@ -371,13 +391,13 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
|
|
371 |
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
|
372 |
|
373 |
# GPU에서 실행
|
374 |
-
with torch.autocast(device_type=
|
375 |
(
|
376 |
prompt_embeds,
|
377 |
negative_prompt_embeds,
|
378 |
pooled_prompt_embeds,
|
379 |
negative_pooled_prompt_embeds,
|
380 |
-
) = pipe.encode_prompt(final_prompt,
|
381 |
|
382 |
# 생성 프로세스
|
383 |
for generated_image in pipe(
|
@@ -419,7 +439,7 @@ def translate_prompt(text):
|
|
419 |
logging.error(f"Translation error: {e}")
|
420 |
return text
|
421 |
|
422 |
-
@spaces.GPU
|
423 |
@torch.inference_mode()
|
424 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
425 |
cfg_strength: float, duration: float):
|
@@ -504,7 +524,7 @@ def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, backgroun
|
|
504 |
print(f"Error processing frame: {e}")
|
505 |
return frame, bg_frame_index
|
506 |
|
507 |
-
@spaces.GPU
|
508 |
def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
|
509 |
fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
|
510 |
"""비디오 배경 처리 메인 함수"""
|
@@ -564,7 +584,7 @@ def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color=
|
|
564 |
yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
|
565 |
yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
|
566 |
|
567 |
-
@spaces.GPU
|
568 |
def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
|
569 |
"""여러 비디오를 병합하고 오디오를 추가하는 함수"""
|
570 |
if not video_files:
|
@@ -1150,10 +1170,21 @@ with demo:
|
|
1150 |
|
1151 |
# GPU 초기화 (Spaces 환경에서 필요)
|
1152 |
try:
|
1153 |
-
if torch.cuda.is_available():
|
|
|
|
|
|
|
|
|
1154 |
dummy_gpu_init()
|
1155 |
-
except:
|
1156 |
-
|
1157 |
|
1158 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1159 |
demo.launch()
|
|
|
27 |
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
|
28 |
|
29 |
# Spaces GPU
|
30 |
+
import os
|
31 |
+
IS_SPACES = os.environ.get("SPACE_ID") is not None
|
32 |
+
|
33 |
+
if IS_SPACES:
|
34 |
import spaces
|
35 |
+
else:
|
36 |
# GPU 데코레이터가 없을 때를 위한 더미 데코레이터
|
37 |
class spaces:
|
38 |
@staticmethod
|
|
|
41 |
return func
|
42 |
return decorator
|
43 |
|
44 |
+
# GPU 초기화를 위한 간단한 함수 (Spaces 환경에서 필수)
|
45 |
+
@spaces.GPU(duration=1)
|
46 |
+
def gpu_warmup():
|
47 |
+
"""GPU 워밍업 함수 - Spaces 환경에서 GPU 사용을 위해 필요"""
|
48 |
+
if torch.cuda.is_available():
|
49 |
+
dummy = torch.zeros(1).cuda()
|
50 |
+
del dummy
|
51 |
+
return "GPU ready"
|
52 |
+
|
53 |
# MMAudio imports
|
54 |
try:
|
55 |
import mmaudio
|
|
|
66 |
|
67 |
# 기존 코드의 모든 설정과 초기화 부분 유지
|
68 |
torch.set_float32_matmul_precision("medium")
|
69 |
+
|
70 |
+
# Device 설정을 더 명확하게
|
71 |
+
if torch.cuda.is_available():
|
72 |
+
device = torch.device("cuda")
|
73 |
+
torch_dtype = torch.float16
|
74 |
+
else:
|
75 |
+
device = torch.device("cpu")
|
76 |
+
torch_dtype = torch.float32
|
77 |
+
|
78 |
+
logging.info(f"Using device: {device}")
|
79 |
|
80 |
# BiRefNet 모델 로드
|
81 |
try:
|
|
|
121 |
)
|
122 |
|
123 |
model = result[0]
|
124 |
+
model = model.to(device=device, dtype=torch.float16 if device.type == "cuda" else torch.float32)
|
125 |
|
126 |
# VAE 로드
|
127 |
vae = AutoencoderKL.from_pretrained(
|
128 |
+
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
|
129 |
+
).to(device)
|
130 |
|
131 |
# 파이프라인 로드
|
132 |
pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
133 |
"SG161222/RealVisXL_V5.0_Lightning",
|
134 |
+
torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
|
135 |
vae=vae,
|
136 |
controlnet=model,
|
137 |
+
variant="fp16" if device.type == "cuda" else None,
|
138 |
+
).to(device)
|
139 |
|
140 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
141 |
|
|
|
146 |
|
147 |
# MMAudio 모델 설정 (기존 코드)
|
148 |
if torch.cuda.is_available():
|
149 |
+
mmaudio_dtype = torch.bfloat16
|
150 |
else:
|
151 |
+
mmaudio_dtype = torch.float32
|
|
|
152 |
|
153 |
# MMAudio 모델 초기화 (기존 코드)
|
154 |
try:
|
|
|
171 |
def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
|
172 |
with torch.cuda.device(device):
|
173 |
seq_cfg = model_mmaudio.seq_cfg
|
174 |
+
net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
|
175 |
net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
|
176 |
logging.info(f'Loaded weights from {model_mmaudio.model_path}')
|
177 |
|
|
|
182 |
mode=model_mmaudio.mode,
|
183 |
bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
|
184 |
need_vae_encoder=False
|
185 |
+
).to(device, mmaudio_dtype).eval()
|
186 |
|
187 |
return net, feature_utils, seq_cfg
|
188 |
|
|
|
368 |
|
369 |
return preview
|
370 |
|
371 |
+
@spaces.GPU(duration=120)
|
372 |
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
|
373 |
"""이미지 아웃페인팅 실행"""
|
374 |
if image is None:
|
|
|
391 |
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
|
392 |
|
393 |
# GPU에서 실행
|
394 |
+
with torch.autocast(device_type=device.type, dtype=torch.float16 if device.type == "cuda" else torch.float32):
|
395 |
(
|
396 |
prompt_embeds,
|
397 |
negative_prompt_embeds,
|
398 |
pooled_prompt_embeds,
|
399 |
negative_pooled_prompt_embeds,
|
400 |
+
) = pipe.encode_prompt(final_prompt, str(device), True)
|
401 |
|
402 |
# 생성 프로세스
|
403 |
for generated_image in pipe(
|
|
|
439 |
logging.error(f"Translation error: {e}")
|
440 |
return text
|
441 |
|
442 |
+
@spaces.GPU(duration=120)
|
443 |
@torch.inference_mode()
|
444 |
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
|
445 |
cfg_strength: float, duration: float):
|
|
|
524 |
print(f"Error processing frame: {e}")
|
525 |
return frame, bg_frame_index
|
526 |
|
527 |
+
@spaces.GPU(duration=300)
|
528 |
def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
|
529 |
fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
|
530 |
"""비디오 배경 처리 메인 함수"""
|
|
|
584 |
yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
|
585 |
yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
|
586 |
|
587 |
+
@spaces.GPU(duration=180)
|
588 |
def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
|
589 |
"""여러 비디오를 병합하고 오디오를 추가하는 함수"""
|
590 |
if not video_files:
|
|
|
1170 |
|
1171 |
# GPU 초기화 (Spaces 환경에서 필요)
|
1172 |
try:
|
1173 |
+
if IS_SPACES and torch.cuda.is_available():
|
1174 |
+
# Spaces 환경에서 GPU 워밍업 실행
|
1175 |
+
gpu_warmup()
|
1176 |
+
logging.info("GPU warmed up successfully")
|
1177 |
+
elif torch.cuda.is_available():
|
1178 |
dummy_gpu_init()
|
1179 |
+
except Exception as e:
|
1180 |
+
logging.warning(f"GPU initialization warning: {e}")
|
1181 |
|
1182 |
if __name__ == "__main__":
|
1183 |
+
# Spaces 환경에서 추가 GPU 체크
|
1184 |
+
if IS_SPACES:
|
1185 |
+
try:
|
1186 |
+
gpu_warmup()
|
1187 |
+
except:
|
1188 |
+
pass
|
1189 |
+
|
1190 |
demo.launch()
|