|
{ |
|
"architectures": [ |
|
"HARTForT2I" |
|
], |
|
"attn_drop_rate": 0.0, |
|
"attn_l2_norm": false, |
|
"attn_type": "llama", |
|
"cond_drop_rate": 0.1, |
|
"context_dim": 1536, |
|
"context_norm_scale": 1.0, |
|
"context_token": 300, |
|
"depth": 24, |
|
"diff_depth": 6, |
|
"diff_width": 1024, |
|
"diffusion_batch_mul": 4, |
|
"diffusion_head_repeats": 1, |
|
"disable_aln": true, |
|
"drop_path_rate": 0.10000000000000002, |
|
"drop_rate": 0.0, |
|
"embed_dim": 1536, |
|
"flash_if_available": true, |
|
"fused_if_available": true, |
|
"mlp_ratio": 4.0, |
|
"mlp_type": "llama", |
|
"model_type": "hart_transformer_t2i", |
|
"norm_eps": 1e-06, |
|
"num_heads": 24, |
|
"num_sampling_steps": "8", |
|
"patch_nums": [ |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
7, |
|
9, |
|
12, |
|
16, |
|
21, |
|
27, |
|
36, |
|
48, |
|
64 |
|
], |
|
"sampler": "iddpm", |
|
"sep_aln_pooling_mode": "max", |
|
"shared_aln": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.42.2", |
|
"use_context_norm": true, |
|
"use_cross_attn": false, |
|
"use_timestep_embed": true |
|
} |
|
|