Wmcs91's picture
Duplicate from shi-labs/Versatile-Diffusion
fb53ec8
sd_base:
symbol: sd
find_unused_parameters: true
sd_autoencoder:
type: autoencoderkl
args:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 2, 4, 4]
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
pth: pretrained/kl-f8.pth
sd_t2i:
super_cfg: sd_base
type: sd_t2i
args:
first_stage_config: MODEL(sd_autoencoder)
cond_stage_config: MODEL(clip_text_frozen)
unet_config: MODEL(openai_unet_sd)
beta_linear_start: 0.00085
beta_linear_end: 0.012
num_timesteps_cond: 1
timesteps: 1000
scale_factor: 0.18215
use_ema: true
sd_t2i_noema:
super_cfg: sd
args:
use_ema: false
#####################
# sd with full clip #
#####################
sd_t2i_fullclip_backward_compatible:
super_cfg: sd_t2i
args:
cond_stage_config: MODEL(clip_frozen_encode_text_noproj)
sd_t2i_fullclip_backward_compatible_noema:
super_cfg: sd_t2i_noema
args:
cond_stage_config: MODEL(clip_frozen_encode_text_noproj)
sd_t2i_fullclip:
super_cfg: sd_t2i
args:
cond_stage_config: MODEL(clip_frozen_encode_text)
sd_variation:
super_cfg: sd_t2i
type: sd_variation
args:
cond_stage_config: MODEL(clip_vision_frozen_justin)