MultiMatrix commited on
Commit
b708a95
·
verified ·
1 Parent(s): 6a38f1b

Upload 7 files

Browse files
configs/inference/bsrnet.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ target: model.RRDBNet
2
+ params:
3
+ in_nc: 3
4
+ out_nc: 3
5
+ nf: 64
6
+ nb: 23
7
+ gc: 32
8
+ sf: 4
configs/inference/cldm.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target: model.ControlLDM
2
+ params:
3
+ latent_scale_factor: 0.18215
4
+ unet_cfg:
5
+ use_checkpoint: True
6
+ image_size: 32 # unused
7
+ in_channels: 4
8
+ out_channels: 4
9
+ model_channels: 320
10
+ attention_resolutions: [ 4, 2, 1 ]
11
+ num_res_blocks: 2
12
+ channel_mult: [ 1, 2, 4, 4 ]
13
+ num_head_channels: 64 # need to fix for flash-attn
14
+ use_spatial_transformer: True
15
+ use_linear_in_transformer: True
16
+ transformer_depth: 1
17
+ context_dim: 1024
18
+ legacy: False
19
+ vae_cfg:
20
+ embed_dim: 4
21
+ ddconfig:
22
+ double_z: true
23
+ z_channels: 4
24
+ resolution: 256
25
+ in_channels: 3
26
+ out_ch: 3
27
+ ch: 128
28
+ ch_mult:
29
+ - 1
30
+ - 2
31
+ - 4
32
+ - 4
33
+ num_res_blocks: 2
34
+ attn_resolutions: []
35
+ dropout: 0.0
36
+ clip_cfg:
37
+ embed_dim: 1024
38
+ vision_cfg:
39
+ image_size: 224
40
+ layers: 32
41
+ width: 1280
42
+ head_width: 80
43
+ patch_size: 14
44
+ text_cfg:
45
+ context_length: 77
46
+ vocab_size: 49408
47
+ width: 1024
48
+ heads: 16
49
+ layers: 24
50
+ layer: "penultimate"
51
+ controlnet_cfg:
52
+ use_checkpoint: True
53
+ image_size: 32 # unused
54
+ in_channels: 4
55
+ hint_channels: 4
56
+ model_channels: 320
57
+ attention_resolutions: [ 4, 2, 1 ]
58
+ num_res_blocks: 2
59
+ channel_mult: [ 1, 2, 4, 4 ]
60
+ num_head_channels: 64 # need to fix for flash-attn
61
+ use_spatial_transformer: True
62
+ use_linear_in_transformer: True
63
+ transformer_depth: 1
64
+ context_dim: 1024
65
+ legacy: False
configs/inference/diffusion.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ target: model.Diffusion
2
+ params:
3
+ linear_start: 0.00085
4
+ linear_end: 0.0120
5
+ timesteps: 1000
configs/inference/scunet.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ target: model.SCUNet
2
+ params:
3
+ in_nc: 3
4
+ config: [4,4,4,4,4,4,4]
5
+ dim: 64
configs/inference/swinir.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ target: model.SwinIR
2
+ params:
3
+ img_size: 64
4
+ patch_size: 1
5
+ in_chans: 3
6
+ embed_dim: 180
7
+ depths: [6, 6, 6, 6, 6, 6, 6, 6]
8
+ num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
9
+ window_size: 8
10
+ mlp_ratio: 2
11
+ sf: 8
12
+ img_range: 1.0
13
+ upsampler: "nearest+conv"
14
+ resi_connection: "1conv"
15
+ unshuffle: True
16
+ unshuffle_scale: 8
configs/train/train_stage1.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ swinir:
3
+ target: model.swinir.SwinIR
4
+ params:
5
+ img_size: 64
6
+ patch_size: 1
7
+ in_chans: 3
8
+ embed_dim: 180
9
+ depths: [6, 6, 6, 6, 6, 6, 6, 6]
10
+ num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
11
+ window_size: 8
12
+ mlp_ratio: 2
13
+ sf: 8
14
+ img_range: 1.0
15
+ upsampler: "nearest+conv"
16
+ resi_connection: "1conv"
17
+ unshuffle: True
18
+ unshuffle_scale: 8
19
+
20
+ dataset:
21
+ train:
22
+ target: dataset.codeformer.CodeformerDataset
23
+ params:
24
+ # training file list path
25
+ file_list:
26
+ file_backend_cfg:
27
+ target: dataset.file_backend.HardDiskBackend
28
+ out_size: 512
29
+ crop_type: center
30
+ blur_kernel_size: 41
31
+ kernel_list: ['iso', 'aniso']
32
+ kernel_prob: [0.5, 0.5]
33
+ blur_sigma: [0.1, 12]
34
+ downsample_range: [1, 12]
35
+ noise_range: [0, 15]
36
+ jpeg_range: [30, 100]
37
+ val:
38
+ target: dataset.codeformer.CodeformerDataset
39
+ params:
40
+ # validation file list path
41
+ file_list:
42
+ file_backend_cfg:
43
+ target: dataset.file_backend.HardDiskBackend
44
+ out_size: 512
45
+ crop_type: center
46
+ blur_kernel_size: 41
47
+ kernel_list: ['iso', 'aniso']
48
+ kernel_prob: [0.5, 0.5]
49
+ blur_sigma: [0.1, 12]
50
+ downsample_range: [1, 12]
51
+ noise_range: [0, 15]
52
+ jpeg_range: [30, 100]
53
+
54
+ train:
55
+ # experiment directory path
56
+ exp_dir:
57
+ learning_rate: 1e-4
58
+ # total batch size
59
+ batch_size: 96
60
+ num_workers:
61
+ train_steps: 150000
62
+ log_every: 50
63
+ ckpt_every: 10000
64
+ image_every: 1000
65
+ val_every: 1000
66
+ resume: ~
configs/train/train_stage2.yaml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ cldm:
3
+ target: model.cldm.ControlLDM
4
+ params:
5
+ latent_scale_factor: 0.18215
6
+ unet_cfg:
7
+ use_checkpoint: True
8
+ image_size: 32 # unused
9
+ in_channels: 4
10
+ out_channels: 4
11
+ model_channels: 320
12
+ attention_resolutions: [ 4, 2, 1 ]
13
+ num_res_blocks: 2
14
+ channel_mult: [ 1, 2, 4, 4 ]
15
+ num_head_channels: 64 # need to fix for flash-attn
16
+ use_spatial_transformer: True
17
+ use_linear_in_transformer: True
18
+ transformer_depth: 1
19
+ context_dim: 1024
20
+ legacy: False
21
+ vae_cfg:
22
+ embed_dim: 4
23
+ ddconfig:
24
+ double_z: true
25
+ z_channels: 4
26
+ resolution: 256
27
+ in_channels: 3
28
+ out_ch: 3
29
+ ch: 128
30
+ ch_mult:
31
+ - 1
32
+ - 2
33
+ - 4
34
+ - 4
35
+ num_res_blocks: 2
36
+ attn_resolutions: []
37
+ dropout: 0.0
38
+ clip_cfg:
39
+ embed_dim: 1024
40
+ vision_cfg:
41
+ image_size: 224
42
+ layers: 32
43
+ width: 1280
44
+ head_width: 80
45
+ patch_size: 14
46
+ text_cfg:
47
+ context_length: 77
48
+ vocab_size: 49408
49
+ width: 1024
50
+ heads: 16
51
+ layers: 24
52
+ layer: "penultimate"
53
+ controlnet_cfg:
54
+ use_checkpoint: True
55
+ image_size: 32 # unused
56
+ in_channels: 4
57
+ hint_channels: 4
58
+ model_channels: 320
59
+ attention_resolutions: [ 4, 2, 1 ]
60
+ num_res_blocks: 2
61
+ channel_mult: [ 1, 2, 4, 4 ]
62
+ num_head_channels: 64 # need to fix for flash-attn
63
+ use_spatial_transformer: True
64
+ use_linear_in_transformer: True
65
+ transformer_depth: 1
66
+ context_dim: 1024
67
+ legacy: False
68
+
69
+ swinir:
70
+ target: model.swinir.SwinIR
71
+ params:
72
+ img_size: 64
73
+ patch_size: 1
74
+ in_chans: 3
75
+ embed_dim: 180
76
+ depths: [6, 6, 6, 6, 6, 6, 6, 6]
77
+ num_heads: [6, 6, 6, 6, 6, 6, 6, 6]
78
+ window_size: 8
79
+ mlp_ratio: 2
80
+ sf: 8
81
+ img_range: 1.0
82
+ upsampler: "nearest+conv"
83
+ resi_connection: "1conv"
84
+ unshuffle: True
85
+ unshuffle_scale: 8
86
+
87
+ diffusion:
88
+ target: model.gaussian_diffusion.Diffusion
89
+ params:
90
+ linear_start: 0.00085
91
+ linear_end: 0.0120
92
+ timesteps: 1000
93
+
94
+ dataset:
95
+ train:
96
+ target: dataset.codeformer.CodeformerDataset
97
+ params:
98
+ # training file list path
99
+ file_list:
100
+ file_backend_cfg:
101
+ target: dataset.file_backend.HardDiskBackend
102
+ out_size: 512
103
+ crop_type: center
104
+ blur_kernel_size: 41
105
+ kernel_list: ['iso', 'aniso']
106
+ kernel_prob: [0.5, 0.5]
107
+ blur_sigma: [0.1, 12]
108
+ downsample_range: [1, 12]
109
+ noise_range: [0, 15]
110
+ jpeg_range: [30, 100]
111
+
112
+ train:
113
+ # pretrained sd v2.1 path
114
+ sd_path:
115
+ # experiment directory path
116
+ exp_dir:
117
+ # stage 1 swinir path
118
+ swinir_path:
119
+ learning_rate: 1e-4
120
+ # ImageNet 1k (1.3M images)
121
+ # batch size = 192, lr = 1e-4, total training steps = 25k
122
+ # Our filtered laion2b-en (15M images)
123
+ # batch size = 256, lr = 1e-4 (first 30k), 1e-5 (next 50k), total training steps = 80k
124
+ batch_size: 256
125
+ num_workers:
126
+ train_steps: 30000
127
+ log_every: 50
128
+ ckpt_every: 10000
129
+ image_every: 1000
130
+ resume: ~