chaojiemao commited on
Commit
a09443c
·
verified ·
1 Parent(s): 11b2b49

modify 1024.yaml

Browse files
Files changed (1) hide show
  1. config/models/ace_0.6b_1024.yaml +171 -15
config/models/ace_0.6b_1024.yaml CHANGED
@@ -1,5 +1,5 @@
1
- NAME: ACE_0.6B_1024
2
- IS_DEFAULT: False
3
  DEFAULT_PARAS:
4
  PARAS:
5
  #
@@ -9,14 +9,18 @@ DEFAULT_PARAS:
9
  TASK:
10
  PROMPT: ""
11
  NEGATIVE_PROMPT: ""
12
- OUTPUT_HEIGHT: 512
13
- OUTPUT_WIDTH: 512
14
  SAMPLER: ddim
15
- SAMPLE_STEPS: 20
16
  GUIDE_SCALE: 4.5
17
  GUIDE_RESCALE: 0.5
18
  SEED: -1
19
  TAR_INDEX: 0
 
 
 
 
20
  OUTPUT:
21
  LATENT:
22
  IMAGES:
@@ -39,12 +43,12 @@ DEFAULT_PARAS:
39
  #
40
  COND_STAGE_MODEL:
41
  FUNCTION:
42
- - NAME: encode_list
43
  DTYPE: bfloat16
44
  INPUT: ["PROMPT"]
45
  #
46
  MODEL:
47
- NAME: LdmACE
48
  PRETRAINED_MODEL:
49
  IGNORE_KEYS: [ ]
50
  SCALE_FACTOR: 0.18215
@@ -55,7 +59,7 @@ MODEL:
55
  USE_TEXT_POS_EMBEDDINGS: True
56
  #
57
  DIFFUSION:
58
- NAME: ACEDiffusion
59
  PREDICTION_TYPE: eps
60
  MIN_SNR_GAMMA:
61
  NOISE_SCHEDULER:
@@ -65,8 +69,8 @@ MODEL:
65
  BETA_MAX: 0.02
66
  #
67
  DIFFUSION_MODEL:
68
- NAME: DiTACE
69
- PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/dit/ace_0.6b_1024px.pth
70
  IGNORE_KEYS: [ ]
71
  PATCH_SIZE: 2
72
  IN_CHANNELS: 4
@@ -78,7 +82,7 @@ MODEL:
78
  DROP_PATH: 0.0
79
  WINDOW_DIZE: 0
80
  Y_CHANNELS: 4096
81
- MAX_SEQ_LEN: 1024
82
  QK_NORM: True
83
  USE_GRAD_CHECKPOINT: True
84
  ATTENTION_BACKEND: flash_attn
@@ -86,7 +90,7 @@ MODEL:
86
  FIRST_STAGE_MODEL:
87
  NAME: AutoencoderKL
88
  EMBED_DIM: 4
89
- PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/vae/vae.bin
90
  IGNORE_KEYS: []
91
  #
92
  ENCODER:
@@ -117,11 +121,163 @@ MODEL:
117
  TANH_OUT: False
118
  #
119
  COND_STAGE_MODEL:
120
- NAME: ACETextEmbedder
121
- PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/text_encoder/t5-v1_1-xxl/
122
- TOKENIZER_PATH: hf://scepter-studio/ACE-0.6B-1024px@models/tokenizer/t5-v1_1-xxl
123
  LENGTH: 120
124
  T5_DTYPE: bfloat16
125
  ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
126
  CLEAN: whitespace
127
  USE_GRAD: False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NAME: ACE_0.6B_1024_REFINER
2
+ IS_DEFAULT: True
3
  DEFAULT_PARAS:
4
  PARAS:
5
  #
 
9
  TASK:
10
  PROMPT: ""
11
  NEGATIVE_PROMPT: ""
12
+ OUTPUT_HEIGHT: 1024
13
+ OUTPUT_WIDTH: 1024
14
  SAMPLER: ddim
15
+ SAMPLE_STEPS: 50
16
  GUIDE_SCALE: 4.5
17
  GUIDE_RESCALE: 0.5
18
  SEED: -1
19
  TAR_INDEX: 0
20
+ REFINER_SCALE: 0.2
21
+ USE_ACE: True
22
+ #REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
23
+ REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
24
  OUTPUT:
25
  LATENT:
26
  IMAGES:
 
43
  #
44
  COND_STAGE_MODEL:
45
  FUNCTION:
46
+ - NAME: encode_list_of_list
47
  DTYPE: bfloat16
48
  INPUT: ["PROMPT"]
49
  #
50
  MODEL:
51
+ NAME: LatentDiffusionACE
52
  PRETRAINED_MODEL:
53
  IGNORE_KEYS: [ ]
54
  SCALE_FACTOR: 0.18215
 
59
  USE_TEXT_POS_EMBEDDINGS: True
60
  #
61
  DIFFUSION:
62
+ NAME: BaseDiffusion
63
  PREDICTION_TYPE: eps
64
  MIN_SNR_GAMMA:
65
  NOISE_SCHEDULER:
 
69
  BETA_MAX: 0.02
70
  #
71
  DIFFUSION_MODEL:
72
+ NAME: ACE
73
+ PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/dit/ace_0.6b_1024px.pth
74
  IGNORE_KEYS: [ ]
75
  PATCH_SIZE: 2
76
  IN_CHANNELS: 4
 
82
  DROP_PATH: 0.0
83
  WINDOW_DIZE: 0
84
  Y_CHANNELS: 4096
85
+ MAX_SEQ_LEN: 4096
86
  QK_NORM: True
87
  USE_GRAD_CHECKPOINT: True
88
  ATTENTION_BACKEND: flash_attn
 
90
  FIRST_STAGE_MODEL:
91
  NAME: AutoencoderKL
92
  EMBED_DIM: 4
93
+ PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/vae/vae.bin
94
  IGNORE_KEYS: []
95
  #
96
  ENCODER:
 
121
  TANH_OUT: False
122
  #
123
  COND_STAGE_MODEL:
124
+ NAME: T5EmbedderHF
125
+ PRETRAINED_MODEL: ms://iic/ACE-0.6B-1024px@models/text_encoder/t5-v1_1-xxl/
126
+ TOKENIZER_PATH: ms://iic/ACE-0.6B-1024px@models/tokenizer/t5-v1_1-xxl
127
  LENGTH: 120
128
  T5_DTYPE: bfloat16
129
  ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
130
  CLEAN: whitespace
131
  USE_GRAD: False
132
+
133
+ ACE_PROMPT: [
134
+ "A cute cartoon rabbit holding a whiteboard that says 'ACE Refiner', standing in a sunny meadow filled with flowers, with a big smile and bright colors.",
135
+ "A beautiful young woman with long flowing hair, wearing a summer dress, holding a whiteboard that reads 'ACE Refiner' while sitting on a park bench surrounded by cherry blossoms.",
136
+ "An adorable cartoon cat wearing oversized glasses, holding a whiteboard that says 'ACE Refiner', perched on a stack of colorful books in a cozy library setting.",
137
+ "A charming girl with pigtails, wearing a cute school uniform, enthusiastically holding a whiteboard that has 'ACE Refiner' written on it, in a bright and cheerful classroom full of educational posters.",
138
+ "A friendly cartoon dog with floppy ears, sitting in front of a doghouse, proudly holding a whiteboard that says 'ACE Refiner', with a playful expression and a blue sky in the background.",
139
+ "A cute anime girl with big expressive eyes, dressed in a colorful outfit, holding a whiteboard that reads 'ACE Refiner' in a fantastical landscape filled with mythical creatures.",
140
+ "A vibrant cartoon fox holding a whiteboard that says 'ACE Refiner', standing on a rock by a sparkling stream, surrounded by lush greenery and butterflies.",
141
+ "A stylish young woman in a business outfit, smiling as she holds a whiteboard written with 'ACE Refiner', in a modern office filled with plants and natural light.",
142
+ "A cute cartoon unicorn holding a sparkling whiteboard that says 'ACE Refiner', frolicking in a magical forest, with rainbows and stars in the background.",
143
+ "A happy family, consisting of a cute little girl and her playful puppy, holding a whiteboard that says 'ACE Refiner', together in their backyard on a sunny day."
144
+ ]
145
+ REFINER_MODEL:
146
+ NAME: ""
147
+ IS_DEFAULT: False
148
+ DEFAULT_PARAS:
149
+ PARAS:
150
+ RESOLUTIONS: [ [ 1024, 1024 ] ]
151
+ INPUT:
152
+ INPUT_IMAGE:
153
+ INPUT_MASK:
154
+ TASK:
155
+ PROMPT: ""
156
+ NEGATIVE_PROMPT: ""
157
+ OUTPUT_HEIGHT: 1024
158
+ OUTPUT_WIDTH: 1024
159
+ SAMPLER: flow_euler
160
+ SAMPLE_STEPS: 30
161
+ GUIDE_SCALE: 3.5
162
+ GUIDE_RESCALE:
163
+ OUTPUT:
164
+ LATENT:
165
+ IMAGES:
166
+ SEED:
167
+ MODULES_PARAS:
168
+ FIRST_STAGE_MODEL:
169
+ FUNCTION:
170
+ - NAME: encode
171
+ DTYPE: bfloat16
172
+ INPUT: [ "IMAGE" ]
173
+ - NAME: decode
174
+ DTYPE: bfloat16
175
+ INPUT: [ "LATENT" ]
176
+ PARAS:
177
+ SCALE_FACTOR: 1.5305
178
+ SHIFT_FACTOR: 0.0609
179
+ SIZE_FACTOR: 8
180
+ DIFFUSION_MODEL:
181
+ FUNCTION:
182
+ - NAME: forward
183
+ DTYPE: bfloat16
184
+ INPUT: [ "SAMPLE_STEPS", "SAMPLE", "GUIDE_SCALE" ]
185
+ COND_STAGE_MODEL:
186
+ FUNCTION:
187
+ - NAME: encode
188
+ DTYPE: bfloat16
189
+ INPUT: [ "PROMPT" ]
190
+
191
+ MODEL:
192
+ DIFFUSION:
193
+ NAME: DiffusionFluxRF
194
+ PREDICTION_TYPE: raw
195
+ NOISE_SCHEDULER:
196
+ NAME: FlowMatchSigmaScheduler
197
+ WEIGHTING_SCHEME: logit_normal
198
+ SHIFT: 3.0
199
+ LOGIT_MEAN: 0.0
200
+ LOGIT_STD: 1.0
201
+ MODE_SCALE: 1.29
202
+ DIFFUSION_MODEL:
203
+ NAME: FluxMR
204
+ PRETRAINED_MODEL: ms://AI-ModelScope/[email protected]
205
+ IN_CHANNELS: 64
206
+ OUT_CHANNELS: 64
207
+ HIDDEN_SIZE: 3072
208
+ NUM_HEADS: 24
209
+ AXES_DIM: [ 16, 56, 56 ]
210
+ THETA: 10000
211
+ VEC_IN_DIM: 768
212
+ GUIDANCE_EMBED: True
213
+ CONTEXT_IN_DIM: 4096
214
+ MLP_RATIO: 4.0
215
+ QKV_BIAS: True
216
+ DEPTH: 19
217
+ DEPTH_SINGLE_BLOCKS: 38
218
+ USE_GRAD_CHECKPOINT: True
219
+ ATTN_BACKEND: flash_attn
220
+ #
221
+ FIRST_STAGE_MODEL:
222
+ NAME: AutoencoderKLFlux
223
+ EMBED_DIM: 16
224
+ PRETRAINED_MODEL: ms://AI-ModelScope/[email protected]
225
+ IGNORE_KEYS: [ ]
226
+ BATCH_SIZE: 8
227
+ USE_CONV: False
228
+ SCALE_FACTOR: 0.3611
229
+ SHIFT_FACTOR: 0.1159
230
+ #
231
+ ENCODER:
232
+ NAME: Encoder
233
+ USE_CHECKPOINT: False
234
+ CH: 128
235
+ OUT_CH: 3
236
+ NUM_RES_BLOCKS: 2
237
+ IN_CHANNELS: 3
238
+ ATTN_RESOLUTIONS: [ ]
239
+ CH_MULT: [ 1, 2, 4, 4 ]
240
+ Z_CHANNELS: 16
241
+ DOUBLE_Z: True
242
+ DROPOUT: 0.0
243
+ RESAMP_WITH_CONV: True
244
+ #
245
+ DECODER:
246
+ NAME: Decoder
247
+ USE_CHECKPOINT: False
248
+ CH: 128
249
+ OUT_CH: 3
250
+ NUM_RES_BLOCKS: 2
251
+ IN_CHANNELS: 3
252
+ ATTN_RESOLUTIONS: [ ]
253
+ CH_MULT: [ 1, 2, 4, 4 ]
254
+ Z_CHANNELS: 16
255
+ DROPOUT: 0.0
256
+ RESAMP_WITH_CONV: True
257
+ GIVE_PRE_END: False
258
+ TANH_OUT: False
259
+ #
260
+ COND_STAGE_MODEL:
261
+ NAME: T5PlusClipFluxEmbedder
262
+ T5_MODEL:
263
+ NAME: HFEmbedder
264
+ HF_MODEL_CLS: T5EncoderModel
265
+ MODEL_PATH: ms://AI-ModelScope/FLUX.1-dev@text_encoder_2/
266
+ HF_TOKENIZER_CLS: T5Tokenizer
267
+ TOKENIZER_PATH: ms://AI-ModelScope/FLUX.1-dev@tokenizer_2/
268
+ MAX_LENGTH: 512
269
+ OUTPUT_KEY: last_hidden_state
270
+ D_TYPE: bfloat16
271
+ BATCH_INFER: False
272
+ CLEAN: whitespace
273
+ CLIP_MODEL:
274
+ NAME: HFEmbedder
275
+ HF_MODEL_CLS: CLIPTextModel
276
+ MODEL_PATH: ms://AI-ModelScope/FLUX.1-dev@text_encoder/
277
+ HF_TOKENIZER_CLS: CLIPTokenizer
278
+ TOKENIZER_PATH: ms://AI-ModelScope/FLUX.1-dev@tokenizer/
279
+ MAX_LENGTH: 77
280
+ OUTPUT_KEY: pooler_output
281
+ D_TYPE: bfloat16
282
+ BATCH_INFER: True
283
+ CLEAN: whitespace