ersdd
/

temp

Model card Files Files and versions Community

ersdd commited on Jan 13

Commit

3aeed18

verified ·

1 Parent(s): fa6bf01

Upload 2 files

Browse files

Files changed (2) hide show

cldm.yaml +106 -0
swinir.yaml +22 -0

cldm.yaml ADDED Viewed

	@@ -0,0 +1,106 @@

+target: model.cldm.ControlLDM
+params:
+  linear_start: 0.00085
+  linear_end: 0.0120
+  num_timesteps_cond: 1
+  log_every_t: 200
+  timesteps: 1000
+  first_stage_key: "jpg"
+  cond_stage_key: "txt"
+  control_key: "hint"
+  image_size: 64
+  channels: 4
+  cond_stage_trainable: false
+  conditioning_key: crossattn
+  monitor: val/loss_simple_ema
+  scale_factor: 0.18215
+  use_ema: False
+  sd_locked: True
+  only_mid_control: False
+  # Learning rate.
+  learning_rate: 1e-4
+  control_stage_config:
+    target: model.cldm.ControlNet
+    params:
+      use_checkpoint: True
+      image_size: 32 # unused
+      in_channels: 4
+      hint_channels: 4
+      model_channels: 320
+      attention_resolutions: [ 4, 2, 1 ]
+      num_res_blocks: 2
+      channel_mult: [ 1, 2, 4, 4 ]
+      num_head_channels: 64 # need to fix for flash-attn
+      use_spatial_transformer: True
+      use_linear_in_transformer: True
+      transformer_depth: 1
+      context_dim: 1024
+      legacy: False
+  unet_config:
+    target: model.cldm.ControlledUnetModel
+    params:
+      use_checkpoint: True
+      image_size: 32 # unused
+      in_channels: 4
+      out_channels: 4
+      model_channels: 320
+      attention_resolutions: [ 4, 2, 1 ]
+      num_res_blocks: 2
+      channel_mult: [ 1, 2, 4, 4 ]
+      num_head_channels: 64 # need to fix for flash-attn
+      use_spatial_transformer: True
+      use_linear_in_transformer: True
+      transformer_depth: 1
+      context_dim: 1024
+      legacy: False
+  first_stage_config:
+    target: ldm.models.autoencoder.AutoencoderKL
+    params:
+      embed_dim: 4
+      monitor: val/rec_loss
+      ddconfig:
+        #attn_type: "vanilla-xformers"
+        double_z: true
+        z_channels: 4
+        resolution: 256
+        in_channels: 3
+        out_ch: 3
+        ch: 128
+        ch_mult:
+        - 1
+        - 2
+        - 4
+        - 4
+        num_res_blocks: 2
+        attn_resolutions: []
+        dropout: 0.0
+      lossconfig:
+        target: torch.nn.Identity
+  cond_stage_config:
+    target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+    params:
+      freeze: True
+      layer: "penultimate"
+  preprocess_config:
+    target: model.swinir.SwinIR
+    params:
+      img_size: 64
+      patch_size: 1
+      in_chans: 3
+      embed_dim: 180
+      depths: [6, 6, 6, 6, 6, 6, 6, 6]
+      num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
+      window_size: 8
+      mlp_ratio: 2
+      sf: 8
+      img_range: 1.0
+      upsampler: "nearest+conv"
+      resi_connection: "1conv"
+      unshuffle: True
+      unshuffle_scale: 8

swinir.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+target: model.swinir.SwinIR
+params:
+  img_size: 64
+  patch_size: 1
+  in_chans: 3
+  embed_dim: 180
+  depths: [6, 6, 6, 6, 6, 6, 6, 6]
+  num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
+  window_size: 8
+  mlp_ratio: 2
+  sf: 8
+  img_range: 1.0
+  upsampler: "nearest+conv"
+  resi_connection: "1conv"
+  unshuffle: True
+  unshuffle_scale: 8
+  hq_key: jpg
+  lq_key: hint
+  # Learning rate.
+  learning_rate: 1e-4
+  weight_decay: 0