Spaces:

fffiloni
/

RB-Modulation

Sleeping

App Files Files Community

fffiloni commited on Aug 31, 2024

Commit

04793a7

verified ·

1 Parent(s): 64bb281

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -89

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import sys
 import os
 from pathlib import Path
 # Add the StableCascade and CSD directories to the Python path
 app_dir = Path(__file__).parent
@@ -27,12 +28,29 @@ from gdf.schedulers import CosineSchedule
 from gdf import VPScaler, CosineTNoiseCond, DDPMSampler, P2LossWeight, AdaptiveLossWeight
 from gdf.targets import EpsilonTarget
 # Device configuration
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(device)
 # Flag for low VRAM usage
-low_vram = False
 # Function definition for low VRAM usage
 if low_vram:
@@ -53,84 +71,12 @@ if low_vram:
                 print(f"Change device of '{attr_name}' to {device}")
                 attr_value.to(device)
-        torch.cuda.empty_cache()
-# Stage C model configuration
-config_file = 'third_party/StableCascade/configs/inference/stage_c_3b.yaml'
-with open(config_file, "r", encoding="utf-8") as file:
-    loaded_config = yaml.safe_load(file)
-core = WurstCoreCRBM(config_dict=loaded_config, device=device, training=False)
-# Stage B model configuration
-config_file_b = 'third_party/StableCascade/configs/inference/stage_b_3b.yaml'
-with open(config_file_b, "r", encoding="utf-8") as file:
-    config_file_b = yaml.safe_load(file)
-core_b = WurstCoreB(config_dict=config_file_b, device=device, training=False)
-# Setup extras and models for Stage C
-extras = core.setup_extras_pre()
-gdf_rbm = RBM(
-    schedule=CosineSchedule(clamp_range=[0.0001, 0.9999]),
-    input_scaler=VPScaler(), target=EpsilonTarget(),
-    noise_cond=CosineTNoiseCond(),
-    loss_weight=AdaptiveLossWeight(),
-)
-sampling_configs = {
-    "cfg": 5,
-    "sampler": DDPMSampler(gdf_rbm),
-    "shift": 1,
-    "timesteps": 20
-}
-extras = core.Extras(
-    gdf=gdf_rbm,
-    sampling_configs=sampling_configs,
-    transforms=extras.transforms,
-    effnet_preprocess=extras.effnet_preprocess,
-    clip_preprocess=extras.clip_preprocess
-)
-models = core.setup_models(extras)
-models.generator.eval().requires_grad_(False)
-# Setup extras and models for Stage B
-extras_b = core_b.setup_extras_pre()
-models_b = core_b.setup_models(extras_b, skip_clip=True)
-models_b = WurstCoreB.Models(
-    **{**models_b.to_dict(), 'tokenizer': models.tokenizer, 'text_model': models.text_model}
-)
-models_b.generator.bfloat16().eval().requires_grad_(False)
-# Off-load old generator (low VRAM mode)
-if low_vram:
-    models.generator.to("cpu")
-    torch.cuda.empty_cache()
-# Load and configure new generator
-generator_rbm = StageCRBM()
-for param_name, param in load_or_fail(core.config.generator_checkpoint_path).items():
-    set_module_tensor_to_device(generator_rbm, param_name, "cpu", value=param)
-generator_rbm = generator_rbm.to(getattr(torch, core.config.dtype)).to(device)
-generator_rbm = core.load_model(generator_rbm, 'generator')
-# Create models_rbm instance
-models_rbm = core.Models(
-    effnet=models.effnet,
-    previewer=models.previewer,
-    generator=generator_rbm,
-    generator_ema=models.generator_ema,
-    tokenizer=models.tokenizer,
-    text_model=models.text_model,
-    image_model=models.image_model
-)
-models_rbm.generator.eval().requires_grad_(False)
 def infer(style_description, ref_style_file, caption):
     height=1024
     width=1024
@@ -166,19 +112,22 @@ def infer(style_description, ref_style_file, caption):
         models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
     # Stage C reverse process.
-    sampling_c = extras.gdf.sample(
-        models_rbm.generator, conditions, stage_c_latent_shape,
-        unconditions, device=device,
-        **extras.sampling_configs,
-        x0_style_forward=x0_style_forward,
-        apply_pushforward=False, tau_pushforward=8,
-        num_iter=3, eta=0.1, tau=20, eval_csd=True,
-        extras=extras, models=models_rbm,
-        lam_style=1, lam_txt_alignment=1.0,
-        use_ddim_sampler=True,
-    )
-    for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
-        sampled_c = sampled_c
     # Stage B reverse process.
     with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
@@ -203,6 +152,8 @@ def infer(style_description, ref_style_file, caption):
     sampled_image = T.ToPILImage()(sampled.squeeze(0))  # Convert tensor to PIL image
     sampled_image.save(output_file)  # Save the image
     return output_file  # Return the path to the saved image
 import gradio as gr

 import sys
 import os
 from pathlib import Path
+import gc
 # Add the StableCascade and CSD directories to the Python path
 app_dir = Path(__file__).parent
 from gdf import VPScaler, CosineTNoiseCond, DDPMSampler, P2LossWeight, AdaptiveLossWeight
 from gdf.targets import EpsilonTarget
+# Enable mixed precision
+torch.backends.cuda.matmul.allow_tf32 = True
+torch.backends.cudnn.allow_tf32 = True
 # Device configuration
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(device)
 # Flag for low VRAM usage
+low_vram = True  # Set to True to enable low VRAM optimizations
+# Function to clear GPU cache
+def clear_gpu_cache():
+    torch.cuda.empty_cache()
+    gc.collect()
+# Function to move model to CPU
+def to_cpu(model):
+    return model.cpu()
+# Function to move model to GPU
+def to_gpu(model):
+    return model.cuda()
 # Function definition for low VRAM usage
 if low_vram:
                 print(f"Change device of '{attr_name}' to {device}")
                 attr_value.to(device)
+        clear_gpu_cache()
+# ... (rest of your setup code remains the same)
 def infer(style_description, ref_style_file, caption):
+    clear_gpu_cache()  # Clear cache before inference
     height=1024
     width=1024
         models_to(models_rbm, device="cpu", excepts=["generator", "previewer"])
     # Stage C reverse process.
+    with torch.cuda.amp.autocast():  # Use mixed precision
+        sampling_c = extras.gdf.sample(
+            models_rbm.generator, conditions, stage_c_latent_shape,
+            unconditions, device=device,
+            **extras.sampling_configs,
+            x0_style_forward=x0_style_forward,
+            apply_pushforward=False, tau_pushforward=8,
+            num_iter=3, eta=0.1, tau=20, eval_csd=True,
+            extras=extras, models=models_rbm,
+            lam_style=1, lam_txt_alignment=1.0,
+            use_ddim_sampler=True,
+        )
+        for (sampled_c, _, _) in tqdm(sampling_c, total=extras.sampling_configs['timesteps']):
+            sampled_c = sampled_c
+    clear_gpu_cache()  # Clear cache between stages
     # Stage B reverse process.
     with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.bfloat16):
     sampled_image = T.ToPILImage()(sampled.squeeze(0))  # Convert tensor to PIL image
     sampled_image.save(output_file)  # Save the image
+    clear_gpu_cache()  # Clear cache after inference
     return output_file  # Return the path to the saved image
 import gradio as gr