diff --git a/checkpoints/put_checkpoints_here b/checkpoints/put_checkpoints_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/checkpoints/realvisxl_lightning.safetensors b/checkpoints/realvisxl_lightning.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8811ddfd563b11aff76596dbbf0eec81b5f1be12 --- /dev/null +++ b/checkpoints/realvisxl_lightning.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a48d3e2025448f011c27f4667145286910696d744c50c8ba3c2fb31dc98ea1 +size 6939220250 diff --git a/clip/clip_l.safetensors b/clip/clip_l.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e224fcda070dc105fd83c64ed2074f47a1b0ff7b --- /dev/null +++ b/clip/clip_l.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd +size 246144152 diff --git a/clip/put_clip_or_text_encoder_models_here b/clip/put_clip_or_text_encoder_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/clip/t5xxl_fp16.safetensors b/clip/t5xxl_fp16.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53df2732c884758d2a898585551df37ea652a416 --- /dev/null +++ b/clip/t5xxl_fp16.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e480b09fae049a72d2a8c5fbccb8d3e92febeb233bbe9dfe7256958a9167635 +size 9787841024 diff --git a/clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors b/clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..590c45d8aba49bfa35bc489af660f732aab16568 --- /dev/null +++ b/clip_vision/CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ca9667da1ca9e0b0f75e46bb030f7e011f44f86cbfb8d5a36590fcd7507b030 +size 2528373448 diff --git a/clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors b/clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e0b50327f653c896fb49fcdb6ab7fd3e56f84b0 --- /dev/null +++ b/clip_vision/CLIP-ViT-bigG-14-laion2B-39B-b160k.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657723e09f46a7c3957df651601029f66b1748afb12b419816330f16ed45d64d +size 3689912664 diff --git a/clip_vision/put_clip_vision_models_here b/clip_vision/put_clip_vision_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/configs/anything_v3.yaml b/configs/anything_v3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34 --- /dev/null +++ b/configs/anything_v3.yaml @@ -0,0 +1,73 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/configs/v1-inference.yaml b/configs/v1-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4effe569e897369918625f9d8be5603a0e6a0d6 --- /dev/null +++ b/configs/v1-inference.yaml @@ -0,0 +1,70 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/configs/v1-inference_clip_skip_2.yaml b/configs/v1-inference_clip_skip_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bcfe584ae73d60e2c7a6f89b3f7befbd487ea34 --- /dev/null +++ b/configs/v1-inference_clip_skip_2.yaml @@ -0,0 +1,73 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/configs/v1-inference_clip_skip_2_fp16.yaml b/configs/v1-inference_clip_skip_2_fp16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7eca31c7b5e571c2b1348e94ed9d69978ebd2d52 --- /dev/null +++ b/configs/v1-inference_clip_skip_2_fp16.yaml @@ -0,0 +1,74 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + params: + layer: "hidden" + layer_idx: -2 diff --git a/configs/v1-inference_fp16.yaml b/configs/v1-inference_fp16.yaml new file mode 100644 index 0000000000000000000000000000000000000000..147f42b17b835cc839338156f99e8f971df5c1aa --- /dev/null +++ b/configs/v1-inference_fp16.yaml @@ -0,0 +1,71 @@ +model: + base_learning_rate: 1.0e-04 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 10000 ] + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder diff --git a/configs/v1-inpainting-inference.yaml b/configs/v1-inpainting-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45f3f82d461cd8c6109f26ec3b1da75366eda0b0 --- /dev/null +++ b/configs/v1-inpainting-inference.yaml @@ -0,0 +1,71 @@ +model: + base_learning_rate: 7.5e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false # Note: different from the one we trained before + conditioning_key: hybrid # important + monitor: val/loss_simple_ema + scale_factor: 0.18215 + finetune_keys: null + + scheduler_config: # 10000 warmup steps + target: ldm.lr_scheduler.LambdaLinearScheduler + params: + warm_up_steps: [ 2500 ] # NOTE for resuming. use 10000 if starting from scratch + cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases + f_start: [ 1.e-6 ] + f_max: [ 1. ] + f_min: [ 1. ] + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + image_size: 32 # unused + in_channels: 9 # 4 data + 4 downscaled image + 1 mask + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_heads: 8 + use_spatial_transformer: True + transformer_depth: 1 + context_dim: 768 + use_checkpoint: True + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenCLIPEmbedder + diff --git a/configs/v2-inference-v.yaml b/configs/v2-inference-v.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ec8dfbfefe94ae8522c93017668fea78d580acf --- /dev/null +++ b/configs/v2-inference-v.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/configs/v2-inference-v_fp32.yaml b/configs/v2-inference-v_fp32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d5c9b9cb29ca162ade44a7c922f59e75d7d57813 --- /dev/null +++ b/configs/v2-inference-v_fp32.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: False + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/configs/v2-inference.yaml b/configs/v2-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..152c4f3c2b36c3b246a9cb10eb8166134b0d2e1c --- /dev/null +++ b/configs/v2-inference.yaml @@ -0,0 +1,67 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/configs/v2-inference_fp32.yaml b/configs/v2-inference_fp32.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d03231f3f2c2e8ef8fbe0d781e5f3d65409ef3a --- /dev/null +++ b/configs/v2-inference_fp32.yaml @@ -0,0 +1,67 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: False + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/configs/v2-inpainting-inference.yaml b/configs/v2-inpainting-inference.yaml new file mode 100644 index 0000000000000000000000000000000000000000..32a9471d71b828c51bcbbabfe34c5f6c8282c803 --- /dev/null +++ b/configs/v2-inpainting-inference.yaml @@ -0,0 +1,158 @@ +model: + base_learning_rate: 5.0e-05 + target: ldm.models.diffusion.ddpm.LatentInpaintDiffusion + params: + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: hybrid + scale_factor: 0.18215 + monitor: val/loss_simple_ema + finetune_keys: null + use_ema: False + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + image_size: 32 # unused + in_channels: 9 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [ ] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" + + +data: + target: ldm.data.laion.WebDataModuleFromConfig + params: + tar_base: null # for concat as in LAION-A + p_unsafe_threshold: 0.1 + filter_word_list: "data/filters.yaml" + max_pwatermark: 0.45 + batch_size: 8 + num_workers: 6 + multinode: True + min_size: 512 + train: + shards: + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-0/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-1/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-2/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-3/{00000..18699}.tar -" + - "pipe:aws s3 cp s3://stability-aws/laion-a-native/part-4/{00000..18699}.tar -" #{00000-94333}.tar" + shuffle: 10000 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 512 + interpolation: 3 + - target: torchvision.transforms.RandomCrop + params: + size: 512 + postprocess: + target: ldm.data.laion.AddMask + params: + mode: "512train-large" + p_drop: 0.25 + # NOTE use enough shards to avoid empty validation loops in workers + validation: + shards: + - "pipe:aws s3 cp s3://deep-floyd-s3/datasets/laion_cleaned-part5/{93001..94333}.tar - " + shuffle: 0 + image_key: jpg + image_transforms: + - target: torchvision.transforms.Resize + params: + size: 512 + interpolation: 3 + - target: torchvision.transforms.CenterCrop + params: + size: 512 + postprocess: + target: ldm.data.laion.AddMask + params: + mode: "512train-large" + p_drop: 0.25 + +lightning: + find_unused_parameters: True + modelcheckpoint: + params: + every_n_train_steps: 5000 + + callbacks: + metrics_over_trainsteps_checkpoint: + params: + every_n_train_steps: 10000 + + image_logger: + target: main.ImageLogger + params: + enable_autocast: False + disabled: False + batch_frequency: 1000 + max_images: 4 + increase_log_steps: False + log_first_step: False + log_images_kwargs: + use_ema_scope: False + inpaint: False + plot_progressive_rows: False + plot_diffusion_rows: False + N: 4 + unconditional_guidance_scale: 5.0 + unconditional_guidance_label: [""] + ddim_steps: 50 # todo check these out for depth2img, + ddim_eta: 0.0 # todo check these out for depth2img, + + trainer: + benchmark: True + val_check_interval: 5000000 + num_sanity_val_steps: 0 + accumulate_grad_batches: 1 diff --git a/controlnet/put_controlnets_and_t2i_here b/controlnet/put_controlnets_and_t2i_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/diffusers/put_diffusers_models_here b/diffusers/put_diffusers_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/diffusion_models/flux1-dev.safetensors b/diffusion_models/flux1-dev.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19debd50d8f431809f22ec7c20cafdda4a861e78 --- /dev/null +++ b/diffusion_models/flux1-dev.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4610115bb0c89560703c892c59ac2742fa821e60ef5871b33493ba544683abd7 +size 23802932552 diff --git a/diffusion_models/put_diffusion_model_files_here b/diffusion_models/put_diffusion_model_files_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/embeddings/put_embeddings_or_textual_inversion_concepts_here b/embeddings/put_embeddings_or_textual_inversion_concepts_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/face_parsing/config.json b/face_parsing/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec0bfdcb824a07a97703061444af609eefa5c968 --- /dev/null +++ b/face_parsing/config.json @@ -0,0 +1,111 @@ +{ + "_name_or_path": "jonathandinu/face-parsing", + "architectures": [ + "SegformerForSemanticSegmentation" + ], + "attention_probs_dropout_prob": 0.0, + "classifier_dropout_prob": 0.1, + "decoder_hidden_size": 768, + "depths": [ + 3, + 6, + 40, + 3 + ], + "downsampling_rates": [ + 1, + 4, + 8, + 16 + ], + "drop_path_rate": 0.1, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.0, + "hidden_sizes": [ + 64, + 128, + 320, + 512 + ], + "id2label": { + "0": "background", + "1": "skin", + "2": "nose", + "3": "eye_g", + "4": "l_eye", + "5": "r_eye", + "6": "l_brow", + "7": "r_brow", + "8": "l_ear", + "9": "r_ear", + "10": "mouth", + "11": "u_lip", + "12": "l_lip", + "13": "hair", + "14": "hat", + "15": "ear_r", + "16": "neck_l", + "17": "neck", + "18": "cloth" + }, + "image_size": 224, + "initializer_range": 0.02, + "label2id": { + "background": 0, + "skin": 1, + "nose": 2, + "eye_g": 3, + "l_eye": 4, + "r_eye": 5, + "l_brow": 6, + "r_brow": 7, + "l_ear": 8, + "r_ear": 9, + "mouth": 10, + "u_lip": 11, + "l_lip": 12, + "hair": 13, + "hat": 14, + "ear_r": 15, + "neck_l": 16, + "neck": 17, + "cloth": 18 + }, + "layer_norm_eps": 1e-06, + "mlp_ratios": [ + 4, + 4, + 4, + 4 + ], + "model_type": "segformer", + "num_attention_heads": [ + 1, + 2, + 5, + 8 + ], + "num_channels": 3, + "num_encoder_blocks": 4, + "patch_sizes": [ + 7, + 3, + 3, + 3 + ], + "reshape_last_stage": true, + "semantic_loss_ignore_index": 255, + "sr_ratios": [ + 8, + 4, + 2, + 1 + ], + "strides": [ + 4, + 2, + 2, + 2 + ], + "transformers_version": "4.37.0.dev0" +} diff --git a/face_parsing/model.safetensors b/face_parsing/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e30381f99433a6a415b0c2816579b9557e71a48 --- /dev/null +++ b/face_parsing/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2bec795a8c243db71bd95be538fd62559003566466c71237e45c99b920f4b62 +size 338580732 diff --git a/face_parsing/preprocessor_config.json b/face_parsing/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..89faa86b52097b90ef95c2cc85eb6c298a24a57e --- /dev/null +++ b/face_parsing/preprocessor_config.json @@ -0,0 +1,23 @@ +{ + "do_normalize": true, + "do_reduce_labels": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.485, + 0.456, + 0.406 + ], + "image_processor_type": "SegformerFeatureExtractor", + "image_std": [ + 0.229, + 0.224, + 0.225 + ], + "resample": 2, + "rescale_factor": 0.00392156862745098, + "size": { + "height": 512, + "width": 512 + } +} diff --git a/facerestore_models/GFPGANv1.3.pth b/facerestore_models/GFPGANv1.3.pth new file mode 100644 index 0000000000000000000000000000000000000000..1da748a3ef84ff85dd2c77c836f222aae22b007e --- /dev/null +++ b/facerestore_models/GFPGANv1.3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c953a88f2727c85c3d9ae72e2bd4846bbaf59fe6972ad94130e23e7017524a70 +size 348632874 diff --git a/facerestore_models/GFPGANv1.4.pth b/facerestore_models/GFPGANv1.4.pth new file mode 100644 index 0000000000000000000000000000000000000000..afedb5c7e826056840c9cc183f2c6f0186fd17ba --- /dev/null +++ b/facerestore_models/GFPGANv1.4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cd4703ab14f4d01fd1383a8a8b266f9a5833dacee8e6a79d3bf21a1b6be5ad +size 348632874 diff --git a/facerestore_models/GPEN-BFR-1024.onnx b/facerestore_models/GPEN-BFR-1024.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c62bc9e8d12595f0b91894e12351403fa83b4776 --- /dev/null +++ b/facerestore_models/GPEN-BFR-1024.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec8892093d7b99828acde97bf231fb0964d3fb11b43f3b0951e36ef1e192a3e +size 285101993 diff --git a/facerestore_models/GPEN-BFR-2048.onnx b/facerestore_models/GPEN-BFR-2048.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5f96247e9d808aed49cf03142dfb27a43fc15ce0 --- /dev/null +++ b/facerestore_models/GPEN-BFR-2048.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0229ff43f979c360bd19daa9cd0ce893722d59f41a41822b9223ebbe4f89b3e +size 285469146 diff --git a/facerestore_models/GPEN-BFR-512.onnx b/facerestore_models/GPEN-BFR-512.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d2111b516504e5ae7bcaeee49192bcac9ca52690 --- /dev/null +++ b/facerestore_models/GPEN-BFR-512.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf80acb8e91ba8852e3f012505be2c3b6cd6b3eed5ec605e3db87863c4e74d4e +size 284244491 diff --git a/facerestore_models/codeformer-v0.1.0.pth b/facerestore_models/codeformer-v0.1.0.pth new file mode 100644 index 0000000000000000000000000000000000000000..edd450da13c5ff890f70d726c992af569813f6af --- /dev/null +++ b/facerestore_models/codeformer-v0.1.0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7 +size 376637898 diff --git a/gligen/put_gligen_models_here b/gligen/put_gligen_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hypernetworks/put_hypernetworks_here b/hypernetworks/put_hypernetworks_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/insightface/inswapper_128.onnx b/insightface/inswapper_128.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cb672b799d74fdf7ab8b172a1b1d78411f6400f5 --- /dev/null +++ b/insightface/inswapper_128.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af +size 554253681 diff --git a/insightface/models/buffalo_l/1k3d68.onnx b/insightface/models/buffalo_l/1k3d68.onnx new file mode 100644 index 0000000000000000000000000000000000000000..221aa2f02a6faccddb2723529e1f93c7db2edbdc --- /dev/null +++ b/insightface/models/buffalo_l/1k3d68.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc +size 143607619 diff --git a/insightface/models/buffalo_l/2d106det.onnx b/insightface/models/buffalo_l/2d106det.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cdb163d88b5f51396855ebc795e0114322c98b6b --- /dev/null +++ b/insightface/models/buffalo_l/2d106det.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf +size 5030888 diff --git a/insightface/models/buffalo_l/det_10g.onnx b/insightface/models/buffalo_l/det_10g.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aa586e034379fa5ea5babc8aa73d47afcd0fa6c2 --- /dev/null +++ b/insightface/models/buffalo_l/det_10g.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91 +size 16923827 diff --git a/insightface/models/buffalo_l/genderage.onnx b/insightface/models/buffalo_l/genderage.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fcf638481cea978e99ddabd914ccd3b70c8401cb --- /dev/null +++ b/insightface/models/buffalo_l/genderage.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb +size 1322532 diff --git a/insightface/models/buffalo_l/w600k_r50.onnx b/insightface/models/buffalo_l/w600k_r50.onnx new file mode 100644 index 0000000000000000000000000000000000000000..571d2bb9ffd76399b23260620b9101b20bcc4e99 --- /dev/null +++ b/insightface/models/buffalo_l/w600k_r50.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43 +size 174383860 diff --git a/ipadapter/ip-adapter-faceid-portrait_sdxl_unnorm.bin b/ipadapter/ip-adapter-faceid-portrait_sdxl_unnorm.bin new file mode 100644 index 0000000000000000000000000000000000000000..fef531fa8dba6d532b2249bad7f2894e77175f44 --- /dev/null +++ b/ipadapter/ip-adapter-faceid-portrait_sdxl_unnorm.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:220bb86e205393a3d0411631cb473caddbf35fd371be2905ca9008818170db55 +size 1009523411 diff --git a/ipadapter/ip-adapter-plus-face_sdxl_vit-h.safetensors b/ipadapter/ip-adapter-plus-face_sdxl_vit-h.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b61b565e315fa0704a5c4e4d37ec8f9f7ac3a1af --- /dev/null +++ b/ipadapter/ip-adapter-plus-face_sdxl_vit-h.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677ad8860204f7d0bfba12d29e6c31ded9beefdf3e4bbd102518357d31a292c1 +size 847517512 diff --git a/loras/ip-adapter-faceid_sdxl_lora.safetensors b/loras/ip-adapter-faceid_sdxl_lora.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d02749c27bbb928770944f59caf7a4d58e495c2 --- /dev/null +++ b/loras/ip-adapter-faceid_sdxl_lora.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcf93d6e8dc8dd18f5f9e51c8306f369486ed0aa0780ade9961308aff7f0d64 +size 371842896 diff --git a/loras/katerina_pytorch_lora_weights.safetensors b/loras/katerina_pytorch_lora_weights.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..639dd5c50a5b97fa486404c2c13a13e5836a52e3 --- /dev/null +++ b/loras/katerina_pytorch_lora_weights.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e890585b401e9389666612c233e73a9f2a60871ac06053009a6d3b03e57cfc0 +size 130958088 diff --git a/loras/nsfw_FLUXTASTIC_lora.safetensors b/loras/nsfw_FLUXTASTIC_lora.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31cfc62e06c9e171c71b185eb0d5bfa5575d8d05 --- /dev/null +++ b/loras/nsfw_FLUXTASTIC_lora.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ee3e21f23f4a58f74d2feea17a1e4260fdc1216c549a2e0205c5147b1085d4 +size 687476504 diff --git a/loras/put_loras_here b/loras/put_loras_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/photomaker/put_photomaker_models_here b/photomaker/put_photomaker_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/sams/sam_vit_b_01ec64.pth b/sams/sam_vit_b_01ec64.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab7d111e57bd052a76fe669986560e3555e9c8f6 --- /dev/null +++ b/sams/sam_vit_b_01ec64.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912 +size 375042383 diff --git a/style_models/put_t2i_style_model_here b/style_models/put_t2i_style_model_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ultralytics/bbox/face_yolov8m.pt b/ultralytics/bbox/face_yolov8m.pt new file mode 100644 index 0000000000000000000000000000000000000000..3581945a1f3342c5c48d0b7b339b56dd1676008a --- /dev/null +++ b/ultralytics/bbox/face_yolov8m.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f02b8a23e6f12bd2c1b1f6714f66f984c728fa41ed749d033e7d6dea511ef70c +size 52026019 diff --git a/ultralytics/bbox/hand_yolov8s.pt b/ultralytics/bbox/hand_yolov8s.pt new file mode 100644 index 0000000000000000000000000000000000000000..21091d538a48b1afd5e9910e2d3863a1d4974799 --- /dev/null +++ b/ultralytics/bbox/hand_yolov8s.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4faf8d17286ace2c3d3346c6d0d4a0c8d62404955263a7ae95c1dd7eb877af +size 22507707 diff --git a/ultralytics/segm/person_yolov8m-seg.pt b/ultralytics/segm/person_yolov8m-seg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ae56d795a72bb6b19938117adf3c50ee70c21fb --- /dev/null +++ b/ultralytics/segm/person_yolov8m-seg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d881ec50b831f546e37977081b18f4e3bf65664aec163f97a311b0955499795 +size 54827683 diff --git a/unet/put_unet_files_here b/unet/put_unet_files_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/upscale_models/put_esrgan_and_other_upscale_models_here b/upscale_models/put_esrgan_and_other_upscale_models_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/vae/ae.safetensors b/vae/ae.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71f11a92800c4a93cead7cebc556531926ecfc33 --- /dev/null +++ b/vae/ae.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afc8e28272cd15db3919bacdb6918ce9c1ed22e96cb12c4d5ed0fba823529e38 +size 335304388 diff --git a/vae/put_vae_here b/vae/put_vae_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here b/vae_approx/put_taesd_encoder_pth_and_taesd_decoder_pth_here new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391