Spaces:

caixiaoshun
/

cloudseg

Sleeping

App Files Files Community

caixiaoshun commited on Aug 4, 2024

Commit

dca1999

1 Parent(s): 2c73586

删除无用文件

Browse files

Files changed (4) hide show

src/models/components/cnn.py +0 -26
src/models/components/lnn.py +0 -23
src/models/components/unet.py +0 -63
src/models/components/vae.py +0 -152

src/models/components/cnn.py DELETED Viewed

@@ -1,26 +0,0 @@
-import torch
-from torch import nn
-class CNN(nn.Module):
-    def __init__(self, dim=32):
-        super(CNN, self).__init__()
-        self.conv1 = nn.Conv2d(1, dim, 5)
-        self.conv2 = nn.Conv2d(dim, dim * 2, 5)
-        self.fc1 = nn.Linear(dim * 2 * 4 * 4, 10)
-    def forward(self, x):
-        x = torch.relu(self.conv1(x))
-        x = torch.max_pool2d(x, 2)
-        x = torch.relu(self.conv2(x))
-        x = torch.max_pool2d(x, 2)
-        x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])
-        x = self.fc1(x)
-        return x
-if __name__ == "__main__":
-    input = torch.randn(2, 1, 28, 28)
-    model = CNN()
-    output = model(input)
-    assert output.shape == (2, 10)

src/models/components/lnn.py DELETED Viewed

@@ -1,23 +0,0 @@
-import torch
-from torch import nn
-class LNN(nn.Module):
-    # 创建一个全连接网络用于手写数字识别，并通过一个参数dim控制中间层的维度
-    def __init__(self, dim=32):
-        super(LNN, self).__init__()
-        self.fc1 = nn.Linear(28 * 28, dim)
-        self.fc2 = nn.Linear(dim, 10)
-    def forward(self, x):
-        x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])
-        x = torch.relu(self.fc1(x))
-        x = self.fc2(x)
-        return x
-if __name__ == "__main__":
-    input = torch.randn(2, 1, 28, 28)
-    model = LNN()
-    output = model(input)
-    assert output.shape == (2, 10)

src/models/components/unet.py DELETED Viewed

@@ -1,63 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class UNet(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(UNet, self).__init__()
-        def conv_block(in_channels, out_channels):
-            return nn.Sequential(
-                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
-                nn.ReLU(inplace=True),
-                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
-                nn.ReLU(inplace=True)
-            )
-        self.encoder1 = conv_block(in_channels, 64)
-        self.encoder2 = conv_block(64, 128)
-        self.encoder3 = conv_block(128, 256)
-        self.encoder4 = conv_block(256, 512)
-        self.bottleneck = conv_block(512, 1024)
-        self.upconv4 = nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2)
-        self.decoder4 = conv_block(1024, 512)
-        self.upconv3 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
-        self.decoder3 = conv_block(512, 256)
-        self.upconv2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
-        self.decoder2 = conv_block(256, 128)
-        self.upconv1 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
-        self.decoder1 = conv_block(128, 64)
-        self.final = nn.Conv2d(64, out_channels, kernel_size=1)
-    def forward(self, x):
-        enc1 = self.encoder1(x)
-        enc2 = self.encoder2(F.max_pool2d(enc1, kernel_size=2, stride=2))
-        enc3 = self.encoder3(F.max_pool2d(enc2, kernel_size=2, stride=2))
-        enc4 = self.encoder4(F.max_pool2d(enc3, kernel_size=2, stride=2))
-        bottleneck = self.bottleneck(F.max_pool2d(enc4, kernel_size=2, stride=2))
-        dec4 = self.upconv4(bottleneck)
-        dec4 = torch.cat((dec4, enc4), dim=1)
-        dec4 = self.decoder4(dec4)
-        dec3 = self.upconv3(dec4)
-        dec3 = torch.cat((dec3, enc3), dim=1)
-        dec3 = self.decoder3(dec3)
-        dec2 = self.upconv2(dec3)
-        dec2 = torch.cat((dec2, enc2), dim=1)
-        dec2 = self.decoder2(dec2)
-        dec1 = self.upconv1(dec2)
-        dec1 = torch.cat((dec1, enc1), dim=1)
-        dec1 = self.decoder1(dec1)
-        return self.final(dec1)
-if __name__ == "__main__":
-     model = UNet(in_channels=3,out_channels=7)
-     fake_img = torch.rand(size=(2,3,224,224))
-     print(fake_img.shape)
-    #  torch.Size([2, 3, 224, 224])
-     out = model(fake_img)
-     print(out.shape)
-    #  torch.Size([2, 7, 224, 224])

src/models/components/vae.py DELETED Viewed

@@ -1,152 +0,0 @@
-import torch
-from torch import nn
-import torch.nn.functional as F
-from contextlib import contextmanager
-from typing import List, Dict
-from src.plugin.taming_transformers.taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
-from src.plugin.ldm.modules.diffusionmodules.model import Encoder, Decoder
-from src.plugin.ldm.modules.distributions.distributions import DiagonalGaussianDistribution
-import matplotlib.pyplot as plt
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class AutoencoderKL(nn.Module):
-    def __init__(
-        self,
-        double_z: bool = True,
-        z_channels: int = 3,
-        resolution: int = 512,
-        in_channels: int = 3,
-        out_ch: int = 3,
-        ch: int = 128,
-        ch_mult: List = [1, 2, 4, 4],
-        num_res_blocks: int = 2,
-        attn_resolutions: List = [],
-        dropout: float = 0.0,
-        embed_dim: int = 3,
-        ckpt_path: str = None,
-        ignore_keys: List = [],
-    ):
-        super(AutoencoderKL, self).__init__()
-        ddconfig = {
-            "double_z": double_z,
-            "z_channels": z_channels,
-            "resolution": resolution,
-            "in_channels": in_channels,
-            "out_ch": out_ch,
-            "ch": ch,
-            "ch_mult": ch_mult,
-            "num_res_blocks": num_res_blocks,
-            "attn_resolutions": attn_resolutions,
-            "dropout": dropout
-        }
-        self.encoder = Encoder(**ddconfig)
-        self.decoder = Decoder(**ddconfig)
-        assert ddconfig["double_z"]
-        self.quant_conv = nn.Conv2d(
-            2 * ddconfig["z_channels"], 2 * embed_dim, 1)
-        self.post_quant_conv = nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
-        self.embed_dim = embed_dim
-        if ckpt_path is not None:
-            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
-    def init_from_ckpt(self, path, ignore_keys=list()):
-        sd = torch.load(path, map_location="cpu")["state_dict"]
-        keys = list(sd.keys())
-        for k in keys:
-            for ik in ignore_keys:
-                if k.startswith(ik):
-                    print(f"Deleting key {k} from state_dict.")
-                    del sd[k]
-        self.load_state_dict(sd, strict=False)
-        print(f"Restored from {path}")
-    def encode(self, x):
-        h = self.encoder(x)  # B, C, h, w
-        moments = self.quant_conv(h)  # B, 6, h, w
-        posterior = DiagonalGaussianDistribution(moments)
-        return posterior  # 分布
-    def decode(self, z):
-        z = self.post_quant_conv(z)
-        dec = self.decoder(z)
-        return dec
-    def forward(self, input, sample_posterior=True):
-        posterior = self.encode(input)  # 高斯分布
-        if sample_posterior:
-            z = posterior.sample()  # 采样
-        else:
-            z = posterior.mode()
-        dec = self.decode(z)
-        last_layer_weight = self.decoder.conv_out.weight
-        return dec, posterior, last_layer_weight
-if __name__ == '__main__':
-    # Test the input and output shapes of the model
-    model = AutoencoderKL()
-    x = torch.randn(1, 3, 512, 512)
-    dec, posterior, last_layer_weight = model(x)
-    assert dec.shape == (1, 3, 512, 512)
-    assert posterior.sample().shape == posterior.mode().shape == (1, 3, 64, 64)
-    assert last_layer_weight.shape == (3, 128, 3, 3)
-    # Plot the latent space and the reconstruction from the pretrained model
-    model = AutoencoderKL(ckpt_path="/mnt/chongqinggeminiceph1fs/geminicephfs/wx-mm-spr-xxxx/zouxuechao/Collaborative-Diffusion/outputs/512_vae/2024-06-27T06-02-04_512_vae/checkpoints/epoch=000036.ckpt")
-    model.eval()
-    image_path = "data/celeba/image/image_512_downsampled_from_hq_1024/0.jpg"
-    from PIL import Image
-    import numpy as np
-    from src.data.components.celeba import DalleTransformerPreprocessor
-    from src.data.components.celeba import CelebA
-    image = Image.open(image_path).convert('RGB')
-    image = np.array(image).astype(np.uint8)
-    import copy
-    original = copy.deepcopy(image)
-    transform = DalleTransformerPreprocessor(size=512, phase='test')
-    image = transform(image=image)['image']
-    image = image.astype(np.float32)/127.5 - 1.0
-    image = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0)
-    dec, posterior, last_layer_weight = model(image)
-    # original image
-    plt.subplot(1, 3, 1)
-    plt.imshow(original)
-    plt.title("Original")
-    plt.axis("off")
-    # sampled image from the latent space
-    plt.subplot(1, 3, 2)
-    x = model.decode(posterior.sample())
-    x = (x+1)/2
-    x = x.squeeze(0).permute(1, 2, 0).cpu()
-    x = x.detach().numpy()
-    x = x.clip(0, 1)
-    x = (x*255).astype(np.uint8)
-    plt.imshow(x)
-    plt.title("Sampled")
-    plt.axis("off")
-    # reconstructed image
-    plt.subplot(1, 3, 3)
-    x = dec
-    x = (x+1)/2
-    x = x.squeeze(0).permute(1, 2, 0).cpu()
-    x = x.detach().numpy()
-    x = x.clip(0, 1)
-    x = (x*255).astype(np.uint8)
-    plt.imshow(x)
-    plt.title("Reconstructed")
-    plt.axis("off")
-    plt.tight_layout()
-    plt.savefig("vae_reconstruction.png")