ersdd commited on
Commit
3aeed18
·
verified ·
1 Parent(s): fa6bf01

Upload 2 files

Browse files
Files changed (2) hide show
  1. cldm.yaml +106 -0
  2. swinir.yaml +22 -0
cldm.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target: model.cldm.ControlLDM
2
+ params:
3
+ linear_start: 0.00085
4
+ linear_end: 0.0120
5
+ num_timesteps_cond: 1
6
+ log_every_t: 200
7
+ timesteps: 1000
8
+ first_stage_key: "jpg"
9
+ cond_stage_key: "txt"
10
+ control_key: "hint"
11
+ image_size: 64
12
+ channels: 4
13
+ cond_stage_trainable: false
14
+ conditioning_key: crossattn
15
+ monitor: val/loss_simple_ema
16
+ scale_factor: 0.18215
17
+ use_ema: False
18
+
19
+ sd_locked: True
20
+ only_mid_control: False
21
+ # Learning rate.
22
+ learning_rate: 1e-4
23
+
24
+ control_stage_config:
25
+ target: model.cldm.ControlNet
26
+ params:
27
+ use_checkpoint: True
28
+ image_size: 32 # unused
29
+ in_channels: 4
30
+ hint_channels: 4
31
+ model_channels: 320
32
+ attention_resolutions: [ 4, 2, 1 ]
33
+ num_res_blocks: 2
34
+ channel_mult: [ 1, 2, 4, 4 ]
35
+ num_head_channels: 64 # need to fix for flash-attn
36
+ use_spatial_transformer: True
37
+ use_linear_in_transformer: True
38
+ transformer_depth: 1
39
+ context_dim: 1024
40
+ legacy: False
41
+
42
+ unet_config:
43
+ target: model.cldm.ControlledUnetModel
44
+ params:
45
+ use_checkpoint: True
46
+ image_size: 32 # unused
47
+ in_channels: 4
48
+ out_channels: 4
49
+ model_channels: 320
50
+ attention_resolutions: [ 4, 2, 1 ]
51
+ num_res_blocks: 2
52
+ channel_mult: [ 1, 2, 4, 4 ]
53
+ num_head_channels: 64 # need to fix for flash-attn
54
+ use_spatial_transformer: True
55
+ use_linear_in_transformer: True
56
+ transformer_depth: 1
57
+ context_dim: 1024
58
+ legacy: False
59
+
60
+ first_stage_config:
61
+ target: ldm.models.autoencoder.AutoencoderKL
62
+ params:
63
+ embed_dim: 4
64
+ monitor: val/rec_loss
65
+ ddconfig:
66
+ #attn_type: "vanilla-xformers"
67
+ double_z: true
68
+ z_channels: 4
69
+ resolution: 256
70
+ in_channels: 3
71
+ out_ch: 3
72
+ ch: 128
73
+ ch_mult:
74
+ - 1
75
+ - 2
76
+ - 4
77
+ - 4
78
+ num_res_blocks: 2
79
+ attn_resolutions: []
80
+ dropout: 0.0
81
+ lossconfig:
82
+ target: torch.nn.Identity
83
+
84
+ cond_stage_config:
85
+ target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
86
+ params:
87
+ freeze: True
88
+ layer: "penultimate"
89
+
90
+ preprocess_config:
91
+ target: model.swinir.SwinIR
92
+ params:
93
+ img_size: 64
94
+ patch_size: 1
95
+ in_chans: 3
96
+ embed_dim: 180
97
+ depths: [6, 6, 6, 6, 6, 6, 6, 6]
98
+ num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
99
+ window_size: 8
100
+ mlp_ratio: 2
101
+ sf: 8
102
+ img_range: 1.0
103
+ upsampler: "nearest+conv"
104
+ resi_connection: "1conv"
105
+ unshuffle: True
106
+ unshuffle_scale: 8
swinir.yaml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target: model.swinir.SwinIR
2
+ params:
3
+ img_size: 64
4
+ patch_size: 1
5
+ in_chans: 3
6
+ embed_dim: 180
7
+ depths: [6, 6, 6, 6, 6, 6, 6, 6]
8
+ num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
9
+ window_size: 8
10
+ mlp_ratio: 2
11
+ sf: 8
12
+ img_range: 1.0
13
+ upsampler: "nearest+conv"
14
+ resi_connection: "1conv"
15
+ unshuffle: True
16
+ unshuffle_scale: 8
17
+
18
+ hq_key: jpg
19
+ lq_key: hint
20
+ # Learning rate.
21
+ learning_rate: 1e-4
22
+ weight_decay: 0