Spaces:

Swarmeta-AI
/

Twig-V0-Alpha-Demo-CPU

Runtime error

App Files Files Community

Twig-V0-Alpha-Demo-CPU / configs /sana_controlnet_config /Sana_600M_img1024_controlnet.yaml

zzc0208

Upload 265 files

f1f9265 verified 11 days ago

raw

history blame contribute delete

3.24 kB

	data:
	data_dir: [data/data_public/controlnet_data]
	image_size: 1024
	caption_proportion:
	prompt: 1
	external_caption_suffixes: []
	external_clipscore_suffixes: []
	clip_thr_temperature: 0.1
	clip_thr: 25.0
	load_text_feat: false
	load_vae_feat: false
	transform: default_train
	type: SanaWebDatasetMSControl
	sort_dataset: false
	# model config
	model:
	model: SanaMSControlNet_600M_P1_D28
	image_size: 1024
	mixed_precision: fp16
	fp32_attention: true
	load_from: hf://Efficient-Large-Model/Sana_600M_1024px/checkpoint/Sana_600M_1024px.pth
	resume_from:
	aspect_ratio_type: ASPECT_RATIO_1024
	multi_scale: true
	attn_type: linear
	ffn_type: glumbconv
	mlp_acts:
	- silu
	- silu
	-
	mlp_ratio: 2.5
	use_pe: false
	qk_norm: false
	class_dropout_prob: 0.1
	# VAE setting
	vae:
	vae_type: AutoencoderDC
	vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0-diffusers
	scale_factor: 0.41407
	vae_latent_dim: 32
	vae_downsample_rate: 32
	sample_posterior: true
	# text encoder
	text_encoder:
	text_encoder_name: gemma-2-2b-it
	y_norm: true
	y_norm_scale_factor: 0.01
	model_max_length: 300
	# CHI
	chi_prompt:
	- 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:'
	- '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.'
	- '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.'
	- 'Here are examples of how to transform or refine prompts:'
	- '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.'
	- '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.'
	- 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:'
	- 'User Prompt: '
	# Sana schedule Flow
	scheduler:
	predict_v: true
	noise_schedule: linear_flow
	pred_sigma: false
	flow_shift: 4.0
	# logit-normal timestep
	weighting_scheme: logit_normal
	logit_mean: 0.0
	logit_std: 1.0
	vis_sampler: flow_dpm-solver
	# training setting
	train:
	num_workers: 10
	seed: 1
	train_batch_size: 16
	num_epochs: 100
	gradient_accumulation_steps: 1
	grad_checkpointing: true
	gradient_clip: 0.1
	optimizer:
	betas:
	- 0.9
	- 0.999
	- 0.9999
	eps:
	- 1.0e-30
	- 1.0e-16
	lr: 0.0001
	type: CAMEWrapper
	weight_decay: 0.0
	lr_schedule: constant
	lr_schedule_args:
	num_warmup_steps: 30
	local_save_vis: true # if save log image locally
	visualize: true
	eval_sampling_steps: 500
	log_interval: 20
	save_model_epochs: 5
	save_model_steps: 500
	work_dir: output/debug
	online_metric: false
	eval_metric_step: 2000
	online_metric_dir: metric_helper
	controlnet:
	control_signal_type: "scribble"