Spaces:
Running
on
Zero
Running
on
Zero
BlobNetModel( | |
(conv_in): Conv2d(1029, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_proj): Timesteps() | |
(time_embedding): TimestepEmbedding( | |
(linear_1): Linear(in_features=320, out_features=1280, bias=True) | |
(act): SiLU() | |
(linear_2): Linear(in_features=1280, out_features=1280, bias=True) | |
) | |
(down_blocks): ModuleList( | |
(0): CrossAttnDownBlock2D( | |
(attentions): ModuleList( | |
(0-1): 2 x Transformer2DModel( | |
(norm): GroupNorm(32, 320, eps=1e-06, affine=True) | |
(proj_in): Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((320,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=320, out_features=320, bias=False) | |
(to_k): Linear(in_features=320, out_features=320, bias=False) | |
(to_v): Linear(in_features=320, out_features=320, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=320, out_features=320, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((320,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=320, out_features=2560, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=1280, out_features=320, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0-1): 2 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 320, eps=1e-05, affine=True) | |
(conv1): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True) | |
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
) | |
) | |
(downsamplers): ModuleList( | |
(0): Downsample2D( | |
(conv): Conv2d(320, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
) | |
) | |
) | |
(1): CrossAttnDownBlock2D( | |
(attentions): ModuleList( | |
(0-1): 2 x Transformer2DModel( | |
(norm): GroupNorm(32, 640, eps=1e-06, affine=True) | |
(proj_in): Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=640, out_features=640, bias=False) | |
(to_k): Linear(in_features=640, out_features=640, bias=False) | |
(to_v): Linear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0): ResnetBlock2D( | |
(norm1): GroupNorm(32, 320, eps=1e-05, affine=True) | |
(conv1): Conv2d(320, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True) | |
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(320, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(1): ResnetBlock2D( | |
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(conv1): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True) | |
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
) | |
) | |
(downsamplers): ModuleList( | |
(0): Downsample2D( | |
(conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
) | |
) | |
) | |
(2): CrossAttnDownBlock2D( | |
(attentions): ModuleList( | |
(0-1): 2 x Transformer2DModel( | |
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True) | |
(proj_in): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_k): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_v): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0): ResnetBlock2D( | |
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(conv1): Conv2d(640, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(640, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(1): ResnetBlock2D( | |
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(conv1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
) | |
) | |
(downsamplers): ModuleList( | |
(0): Downsample2D( | |
(conv): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) | |
) | |
) | |
) | |
(3): DownBlock2D( | |
(resnets): ModuleList( | |
(0-1): 2 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(conv1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
) | |
) | |
) | |
) | |
(blobnet_down_blocks): ModuleList( | |
(0-3): 4 x Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
(4-6): 3 x Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
(7-11): 5 x Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(blobnet_mid_block): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
(mid_block): UNetMidBlock2DCrossAttn( | |
(attentions): ModuleList( | |
(0): Transformer2DModel( | |
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True) | |
(proj_in): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_k): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_v): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0-1): 2 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(conv1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
) | |
) | |
) | |
(up_blocks): ModuleList( | |
(0): UpBlock2D( | |
(resnets): ModuleList( | |
(0-2): 3 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 2560, eps=1e-05, affine=True) | |
(conv1): Conv2d(2560, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(2560, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(upsamplers): ModuleList( | |
(0): Upsample2D( | |
(conv): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
) | |
) | |
) | |
(1): CrossAttnUpBlock2D( | |
(attentions): ModuleList( | |
(0-2): 3 x Transformer2DModel( | |
(norm): GroupNorm(32, 1280, eps=1e-06, affine=True) | |
(proj_in): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_k): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_v): Linear(in_features=1280, out_features=1280, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=1280, out_features=1280, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((1280,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=1280, out_features=10240, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=5120, out_features=1280, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0-1): 2 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 2560, eps=1e-05, affine=True) | |
(conv1): Conv2d(2560, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(2560, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(2): ResnetBlock2D( | |
(norm1): GroupNorm(32, 1920, eps=1e-05, affine=True) | |
(conv1): Conv2d(1920, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=1280, bias=True) | |
(norm2): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(1920, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(upsamplers): ModuleList( | |
(0): Upsample2D( | |
(conv): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
) | |
) | |
) | |
(2): CrossAttnUpBlock2D( | |
(attentions): ModuleList( | |
(0-2): 3 x Transformer2DModel( | |
(norm): GroupNorm(32, 640, eps=1e-06, affine=True) | |
(proj_in): Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=640, out_features=640, bias=False) | |
(to_k): Linear(in_features=640, out_features=640, bias=False) | |
(to_v): Linear(in_features=640, out_features=640, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=640, out_features=640, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((640,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=640, out_features=5120, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=2560, out_features=640, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0): ResnetBlock2D( | |
(norm1): GroupNorm(32, 1920, eps=1e-05, affine=True) | |
(conv1): Conv2d(1920, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True) | |
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(1920, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(1): ResnetBlock2D( | |
(norm1): GroupNorm(32, 1280, eps=1e-05, affine=True) | |
(conv1): Conv2d(1280, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True) | |
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(1280, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(2): ResnetBlock2D( | |
(norm1): GroupNorm(32, 960, eps=1e-05, affine=True) | |
(conv1): Conv2d(960, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=640, bias=True) | |
(norm2): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(960, 640, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(upsamplers): ModuleList( | |
(0): Upsample2D( | |
(conv): Conv2d(640, 640, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
) | |
) | |
) | |
(3): CrossAttnUpBlock2D( | |
(attentions): ModuleList( | |
(0-2): 3 x Transformer2DModel( | |
(norm): GroupNorm(32, 320, eps=1e-06, affine=True) | |
(proj_in): Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
(transformer_blocks): ModuleList( | |
(0): BasicTransformerBlock( | |
(norm1): LayerNorm((320,), eps=1e-05, elementwise_affine=True) | |
(attn1): Attention( | |
(to_q): Linear(in_features=320, out_features=320, bias=False) | |
(to_k): Linear(in_features=320, out_features=320, bias=False) | |
(to_v): Linear(in_features=320, out_features=320, bias=False) | |
(to_out): ModuleList( | |
(0): Linear(in_features=320, out_features=320, bias=True) | |
(1): Dropout(p=0.0, inplace=False) | |
) | |
) | |
(norm3): LayerNorm((320,), eps=1e-05, elementwise_affine=True) | |
(ff): FeedForward( | |
(net): ModuleList( | |
(0): GEGLU( | |
(proj): Linear(in_features=320, out_features=2560, bias=True) | |
) | |
(1): Dropout(p=0.0, inplace=False) | |
(2): Linear(in_features=1280, out_features=320, bias=True) | |
) | |
) | |
) | |
) | |
(proj_out): Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
(resnets): ModuleList( | |
(0): ResnetBlock2D( | |
(norm1): GroupNorm(32, 960, eps=1e-05, affine=True) | |
(conv1): Conv2d(960, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True) | |
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
(1-2): 2 x ResnetBlock2D( | |
(norm1): GroupNorm(32, 640, eps=1e-05, affine=True) | |
(conv1): Conv2d(640, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(time_emb_proj): Linear(in_features=1280, out_features=320, bias=True) | |
(norm2): GroupNorm(32, 320, eps=1e-05, affine=True) | |
(dropout): Dropout(p=0.0, inplace=False) | |
(conv2): Conv2d(320, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) | |
(nonlinearity): SiLU() | |
(conv_shortcut): Conv2d(640, 320, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |
) | |
) | |
(blobnet_up_blocks): ModuleList( | |
(0-7): 8 x Conv2d(1280, 1280, kernel_size=(1, 1), stride=(1, 1)) | |
(8-11): 4 x Conv2d(640, 640, kernel_size=(1, 1), stride=(1, 1)) | |
(12-14): 3 x Conv2d(320, 320, kernel_size=(1, 1), stride=(1, 1)) | |
) | |
) | |