openfree commited on
Commit
63f5669
·
verified ·
1 Parent(s): 40f5b5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -23
app.py CHANGED
@@ -27,9 +27,12 @@ from concurrent.futures import ThreadPoolExecutor
27
  os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
28
 
29
  # Spaces GPU
30
- try:
 
 
 
31
  import spaces
32
- except:
33
  # GPU 데코레이터가 없을 때를 위한 더미 데코레이터
34
  class spaces:
35
  @staticmethod
@@ -38,6 +41,15 @@ except:
38
  return func
39
  return decorator
40
 
 
 
 
 
 
 
 
 
 
41
  # MMAudio imports
42
  try:
43
  import mmaudio
@@ -54,7 +66,16 @@ from mmaudio.model.utils.features_utils import FeaturesUtils
54
 
55
  # 기존 코드의 모든 설정과 초기화 부분 유지
56
  torch.set_float32_matmul_precision("medium")
57
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
 
 
 
 
 
 
58
 
59
  # BiRefNet 모델 로드
60
  try:
@@ -100,21 +121,21 @@ try:
100
  )
101
 
102
  model = result[0]
103
- model = model.to(device="cuda", dtype=torch.float16)
104
 
105
  # VAE 로드
106
  vae = AutoencoderKL.from_pretrained(
107
- "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
108
- ).to("cuda")
109
 
110
  # 파이프라인 로드
111
  pipe = StableDiffusionXLFillPipeline.from_pretrained(
112
  "SG161222/RealVisXL_V5.0_Lightning",
113
- torch_dtype=torch.float16,
114
  vae=vae,
115
  controlnet=model,
116
- variant="fp16",
117
- ).to("cuda")
118
 
119
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
120
 
@@ -125,10 +146,9 @@ except Exception as e:
125
 
126
  # MMAudio 모델 설정 (기존 코드)
127
  if torch.cuda.is_available():
128
- dtype = torch.bfloat16
129
  else:
130
- device = torch.device("cpu")
131
- dtype = torch.float32
132
 
133
  # MMAudio 모델 초기화 (기존 코드)
134
  try:
@@ -151,7 +171,7 @@ try:
151
  def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
152
  with torch.cuda.device(device):
153
  seq_cfg = model_mmaudio.seq_cfg
154
- net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, dtype).eval()
155
  net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
156
  logging.info(f'Loaded weights from {model_mmaudio.model_path}')
157
 
@@ -162,7 +182,7 @@ try:
162
  mode=model_mmaudio.mode,
163
  bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
164
  need_vae_encoder=False
165
- ).to(device, dtype).eval()
166
 
167
  return net, feature_utils, seq_cfg
168
 
@@ -348,7 +368,7 @@ def preview_outpaint(image, width, height, overlap_percentage, alignment):
348
 
349
  return preview
350
 
351
- @spaces.GPU(duration=24)
352
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
353
  """이미지 아웃페인팅 실행"""
354
  if image is None:
@@ -371,13 +391,13 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
371
  final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
372
 
373
  # GPU에서 실행
374
- with torch.autocast(device_type="cuda", dtype=torch.float16):
375
  (
376
  prompt_embeds,
377
  negative_prompt_embeds,
378
  pooled_prompt_embeds,
379
  negative_pooled_prompt_embeds,
380
- ) = pipe.encode_prompt(final_prompt, "cuda", True)
381
 
382
  # 생성 프로세스
383
  for generated_image in pipe(
@@ -419,7 +439,7 @@ def translate_prompt(text):
419
  logging.error(f"Translation error: {e}")
420
  return text
421
 
422
- @spaces.GPU
423
  @torch.inference_mode()
424
  def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
425
  cfg_strength: float, duration: float):
@@ -504,7 +524,7 @@ def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, backgroun
504
  print(f"Error processing frame: {e}")
505
  return frame, bg_frame_index
506
 
507
- @spaces.GPU
508
  def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
509
  fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
510
  """비디오 배경 처리 메인 함수"""
@@ -564,7 +584,7 @@ def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color=
564
  yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
565
  yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
566
 
567
- @spaces.GPU
568
  def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
569
  """여러 비디오를 병합하고 오디오를 추가하는 함수"""
570
  if not video_files:
@@ -1150,10 +1170,21 @@ with demo:
1150
 
1151
  # GPU 초기화 (Spaces 환경에서 필요)
1152
  try:
1153
- if torch.cuda.is_available():
 
 
 
 
1154
  dummy_gpu_init()
1155
- except:
1156
- pass
1157
 
1158
  if __name__ == "__main__":
 
 
 
 
 
 
 
1159
  demo.launch()
 
27
  os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
28
 
29
  # Spaces GPU
30
+ import os
31
+ IS_SPACES = os.environ.get("SPACE_ID") is not None
32
+
33
+ if IS_SPACES:
34
  import spaces
35
+ else:
36
  # GPU 데코레이터가 없을 때를 위한 더미 데코레이터
37
  class spaces:
38
  @staticmethod
 
41
  return func
42
  return decorator
43
 
44
+ # GPU 초기화를 위한 간단한 함수 (Spaces 환경에서 필수)
45
+ @spaces.GPU(duration=1)
46
+ def gpu_warmup():
47
+ """GPU 워밍업 함수 - Spaces 환경에서 GPU 사용을 위해 필요"""
48
+ if torch.cuda.is_available():
49
+ dummy = torch.zeros(1).cuda()
50
+ del dummy
51
+ return "GPU ready"
52
+
53
  # MMAudio imports
54
  try:
55
  import mmaudio
 
66
 
67
  # 기존 코드의 모든 설정과 초기화 부분 유지
68
  torch.set_float32_matmul_precision("medium")
69
+
70
+ # Device 설정을 더 명확하게
71
+ if torch.cuda.is_available():
72
+ device = torch.device("cuda")
73
+ torch_dtype = torch.float16
74
+ else:
75
+ device = torch.device("cpu")
76
+ torch_dtype = torch.float32
77
+
78
+ logging.info(f"Using device: {device}")
79
 
80
  # BiRefNet 모델 로드
81
  try:
 
121
  )
122
 
123
  model = result[0]
124
+ model = model.to(device=device, dtype=torch.float16 if device.type == "cuda" else torch.float32)
125
 
126
  # VAE 로드
127
  vae = AutoencoderKL.from_pretrained(
128
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
129
+ ).to(device)
130
 
131
  # 파이프라인 로드
132
  pipe = StableDiffusionXLFillPipeline.from_pretrained(
133
  "SG161222/RealVisXL_V5.0_Lightning",
134
+ torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
135
  vae=vae,
136
  controlnet=model,
137
+ variant="fp16" if device.type == "cuda" else None,
138
+ ).to(device)
139
 
140
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
141
 
 
146
 
147
  # MMAudio 모델 설정 (기존 코드)
148
  if torch.cuda.is_available():
149
+ mmaudio_dtype = torch.bfloat16
150
  else:
151
+ mmaudio_dtype = torch.float32
 
152
 
153
  # MMAudio 모델 초기화 (기존 코드)
154
  try:
 
171
  def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
172
  with torch.cuda.device(device):
173
  seq_cfg = model_mmaudio.seq_cfg
174
+ net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
175
  net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
176
  logging.info(f'Loaded weights from {model_mmaudio.model_path}')
177
 
 
182
  mode=model_mmaudio.mode,
183
  bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
184
  need_vae_encoder=False
185
+ ).to(device, mmaudio_dtype).eval()
186
 
187
  return net, feature_utils, seq_cfg
188
 
 
368
 
369
  return preview
370
 
371
+ @spaces.GPU(duration=120)
372
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
373
  """이미지 아웃페인팅 실행"""
374
  if image is None:
 
391
  final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
392
 
393
  # GPU에서 실행
394
+ with torch.autocast(device_type=device.type, dtype=torch.float16 if device.type == "cuda" else torch.float32):
395
  (
396
  prompt_embeds,
397
  negative_prompt_embeds,
398
  pooled_prompt_embeds,
399
  negative_pooled_prompt_embeds,
400
+ ) = pipe.encode_prompt(final_prompt, str(device), True)
401
 
402
  # 생성 프로세스
403
  for generated_image in pipe(
 
439
  logging.error(f"Translation error: {e}")
440
  return text
441
 
442
+ @spaces.GPU(duration=120)
443
  @torch.inference_mode()
444
  def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
445
  cfg_strength: float, duration: float):
 
524
  print(f"Error processing frame: {e}")
525
  return frame, bg_frame_index
526
 
527
+ @spaces.GPU(duration=300)
528
  def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
529
  fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
530
  """비디오 배경 처리 메인 함수"""
 
584
  yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
585
  yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
586
 
587
+ @spaces.GPU(duration=180)
588
  def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
589
  """여러 비디오를 병합하고 오디오를 추가하는 함수"""
590
  if not video_files:
 
1170
 
1171
  # GPU 초기화 (Spaces 환경에서 필요)
1172
  try:
1173
+ if IS_SPACES and torch.cuda.is_available():
1174
+ # Spaces 환경에서 GPU 워밍업 실행
1175
+ gpu_warmup()
1176
+ logging.info("GPU warmed up successfully")
1177
+ elif torch.cuda.is_available():
1178
  dummy_gpu_init()
1179
+ except Exception as e:
1180
+ logging.warning(f"GPU initialization warning: {e}")
1181
 
1182
  if __name__ == "__main__":
1183
+ # Spaces 환경에서 추가 GPU 체크
1184
+ if IS_SPACES:
1185
+ try:
1186
+ gpu_warmup()
1187
+ except:
1188
+ pass
1189
+
1190
  demo.launch()