Spaces:

SkylarWhite
/

57894

Sleeping

App Files Files Community

Muhammad Naufal Rizqullah commited on Aug 14, 2024

Commit

21a662b

1 Parent(s): 36cfe0b

first commit

Browse files

Files changed (13) hide show

.gitignore +30 -0
LICENSE +21 -0
app.py +44 -0
config/__init__.py +0 -0
config/core.py +34 -0
models/__init__.py +0 -0
models/base.py +181 -0
models/lightning.py +138 -0
requirements.txt +3 -0
utility/__init__.py +0 -0
utility/helper.py +60 -0
weights/epoch=999-step=96000.ckpt +3 -0
weights/source.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,30 @@

+.idea
+.ipynb_checkpoints
+.mypy_cache
+.vscode
+__pycache__
+.pytest_cache
+htmlcov
+dist
+site
+.coverage
+coverage.xml
+.netlify
+test.db
+log.txt
+Pipfile.lock
+env3.*
+env
+docs_build
+site_build
+venv
+docs.zip
+archive.zip
+# vim temporary files
+*~
+.*.sw?
+.cache
+# macOS
+.DS_Store

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Muhammad Naufal Rizqullah
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+from PIL import Image
+import numpy as np
+import gradio as gr
+from config.core import config
+from utility.helper import load_model_weights, init_generator_model, get_selected_value
+DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+model = init_generator_model()
+model = load_model_weights(config.CKPT_PATH, model, DEVICE, "generator")
+model.eval()
+def inference(choice):
+    z = torch.randn(1, config.INPUT_Z_DIM, 1, 1).to(DEVICE)
+    label = torch.tensor([get_selected_value(choice)], device=DEVICE)
+    image_tensor = model(z, label)
+    image_tensor = (image_tensor + 1) / 2  # Shift and scale to 0 to 1
+    image_unflat = image_tensor.detach().cpu().squeeze(0)  # Remove batch dimension
+    image = image_unflat.permute(1, 2, 0)  # Permute to (H, W, C)
+    # Convert image to numpy array
+    image_array = image.numpy()
+    # Scale values to 0-255 range
+    image_array = (image_array * 255).astype(np.uint8)
+    # Convert numpy array to PIL Image
+    image = Image.fromarray(image_array)
+    return image
+demo = gr.Interface(
+    fn=inference,
+    inputs=gr.Dropdown(choices=list(config.OPTIONS_MAPPING.keys()), label="Select an option to Generates Images"),
+    outputs=gr.Image(),
+    title="Shoe, Sandal, Boot - Conditional GAN",
+    description="Conditional WGAN-GP",
+)
+demo.launch()

config/__init__.py ADDED Viewed

File without changes

config/core.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pydantic import BaseSettings
+class Config(BaseSettings):
+    IMAGE_CHANNEL: int = 3
+    NUM_CLASSES: int = 3
+    IMAGE_SIZE: int = 128
+    FEATURES_DISCRIMINATOR: int = 64
+    FEATURES_GENERATOR: int = 64
+    EMBED_SIZE: int = 64
+    INPUT_Z_DIM: int = 64
+    BATCH_SIZE: int = 128
+    DISPLAY_STEP: int = 500
+    MAX_SAMPLES: int = 3000
+    LEARNING_RATE: float = 0.0002
+    BETA_1: float = 0.5
+    BETA_2: float = 0.999
+    C_LAMBDA: int = 10
+    NUM_EPOCH: int = 200 * 5
+    CRITIC_REPEAT: int = 3
+    LOAD_CHECKPOINT: bool = True
+    PATH_DATASET: str = ""
+    CKPT_PATH: str = "./weights/epoch=999-step=96000.ckpt"
+    OPTIONS_MAPPING: dict = {
+        "Boot": 0,
+        "Sandal": 1,
+        "Shoe": 2
+    }
+config = Config()

models/__init__.py ADDED Viewed

File without changes

models/base.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import torch
+import torch.nn as nn
+class Discriminator(nn.Module):
+    """Discriminator model for Conditional GAN.
+    Args:
+        num_classes (int): Number of classes in the dataset.
+        image_size (int): Size of the input images (assumes square images).
+        features_discriminator (int): Number of feature maps in the first layer of the discriminator.
+        image_channel (int): Number of channels in the input image.
+    Attributes:
+        disc (nn.Sequential): The sequential layers that define the discriminator.
+        embed (nn.Embedding): Embedding layer to encode labels into image-like format.
+    """
+    def __init__(self, num_classes=3, image_size=128, features_discriminator=128, image_channel=3):
+        super().__init__()
+        self.num_classes = num_classes
+        self.image_size = image_size
+        label_channel = 1
+        self.disc = nn.Sequential(
+            self._block_discriminator(image_channel + label_channel, features_discriminator, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator, features_discriminator, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator, features_discriminator * 2, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 2, features_discriminator * 4, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 4, features_discriminator * 4, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 4, 1, kernel_size=4, stride=1, padding=0, final_layer=True)
+        )
+        self.embed = nn.Embedding(num_classes, image_size * image_size)
+    def forward(self, image, label):
+        """Forward pass for the discriminator.
+        Args:
+            image (torch.Tensor): Batch of input images.
+            label (torch.Tensor): Corresponding labels for the images.
+        Returns:
+            torch.Tensor: Discriminator output.
+        """
+        # Embed label into an image-like format
+        embedding = self.embed(label)
+        embedding = embedding.view(
+            label.shape[0],
+            1,
+            self.image_size,
+            self.image_size
+        )  # Reshape into 1-channel image
+        data = torch.cat([image, embedding], dim=1)  # Concatenate image with the label channel
+        x = self.disc(data)
+        return x.view(len(x), -1)
+    def _block_discriminator(self, input_channels, output_channels, kernel_size=3, stride=2, padding=0, final_layer=False):
+        """Creates a convolutional block for the discriminator.
+        Args:
+            input_channels (int): Number of input channels for the convolutional layer.
+            output_channels (int): Number of output channels for the convolutional layer.
+            kernel_size (int): Size of the kernel for the convolutional layer.
+            stride (int): Stride of the convolutional layer.
+            padding (int): Padding for the convolutional layer.
+            final_layer (bool): If True, this is the final layer, which doesn't include normalization or activation.
+        Returns:
+            nn.Sequential: Sequential block for the discriminator.
+        """
+        if not final_layer:
+            return nn.Sequential(
+                nn.Conv2d(input_channels, output_channels, kernel_size, stride, padding),
+                nn.InstanceNorm2d(output_channels, affine=True),
+                nn.LeakyReLU(0.2)
+            )
+        else:
+            return nn.Sequential(
+                nn.Conv2d(input_channels, output_channels, kernel_size, stride, padding),
+            )
+class Generator(nn.Module):
+    """Generator model for Conditional GAN.
+    Args:
+        embed_size (int): Size of the embedding vector for the labels.
+        num_classes (int): Number of classes in the dataset.
+        image_size (int): Size of the output images (assumes square images).
+        features_generator (int): Number of feature maps in the first layer of the generator.
+        input_dim (int): Dimensionality of the noise vector.
+        image_channel (int): Number of channels in the output image.
+    Attributes:
+        gen (nn.Sequential): The sequential layers that define the generator.
+        embed (nn.Embedding): Embedding layer to encode labels.
+    """
+    def __init__(self, embed_size=128, num_classes=3, image_size=128, features_generator=128, input_dim=128, image_channel=3):
+        super(Generator, self).__init__()
+        self.gen = nn.Sequential(
+           self._block(input_dim + embed_size, features_generator * 2, first_double_up=True),
+           self._block(features_generator * 2, features_generator * 4, first_double_up=False, final_layer=False),
+           self._block(features_generator * 4, features_generator * 4, first_double_up=False, final_layer=False),
+           self._block(features_generator * 4, features_generator * 4, first_double_up=False, final_layer=False),
+           self._block(features_generator * 4, features_generator * 2, first_double_up=False, final_layer=False),
+           self._block(features_generator * 2, features_generator, first_double_up=False, final_layer=False),
+           self._block(features_generator, image_channel, first_double_up=False, use_double=False, final_layer=True),
+        )
+        self.image_size = image_size
+        self.embed_size = embed_size
+        self.embed = nn.Embedding(num_classes, embed_size)
+    def forward(self, noise, labels):
+        """Forward pass for the generator.
+        Args:
+            noise (torch.Tensor): Batch of input noise vectors.
+            labels (torch.Tensor): Corresponding labels for the noise vectors.
+        Returns:
+            torch.Tensor: Generated images.
+        """
+        embedding_label = self.embed(labels).unsqueeze(2).unsqueeze(3)  # Reshape to (batch_size, embed_size, 1, 1)
+        noise = noise.view(noise.size(0), noise.size(1), 1, 1)  # Reshape to (batch_size, z_dim, 1, 1)
+        x = torch.cat([noise, embedding_label], dim=1)
+        return self.gen(x)
+    def _block(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1,
+               first_double_up=False, use_double=True, final_layer=False):
+        """Creates a convolutional block for the generator.
+        Args:
+            in_channels (int): Number of input channels for the convolutional layer.
+            out_channels (int): Number of output channels for the convolutional layer.
+            kernel_size (int): Size of the kernel for the convolutional layer.
+            stride (int): Stride of the convolutional layer.
+            padding (int): Padding for the convolutional layer.
+            first_double_up (bool): If True, the first layer uses a different upsampling strategy.
+            use_double (bool): If True, the block includes an upsampling layer.
+            final_layer (bool): If True, this is the final layer, which uses Tanh activation.
+        Returns:
+            nn.Sequential: Sequential block for the generator.
+        """
+        layers = []
+        if not final_layer:
+            # Add first convolutional layer
+            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
+            layers.append(nn.BatchNorm2d(out_channels))
+            layers.append(nn.LeakyReLU(0.2))
+            # Add second convolutional layer
+            layers.append(nn.Conv2d(out_channels, out_channels, kernel_size, stride, padding))
+            layers.append(nn.BatchNorm2d(out_channels))
+            layers.append(nn.LeakyReLU(0.2))
+        else:
+            layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
+            layers.append(nn.Tanh())
+        if use_double:
+            if first_double_up:
+                layers.append(nn.ConvTranspose2d(out_channels, out_channels, 4, 1, 0))
+            else:
+                layers.append(nn.ConvTranspose2d(out_channels, out_channels, 4, 2, 1))
+            layers.append(nn.BatchNorm2d(out_channels))
+            layers.append(nn.LeakyReLU(0.2))
+        return nn.Sequential(*layers)

models/lightning.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import torch
+import torch.optim as optim
+import lightning as L
+from .base import Discriminator, Generator
+class ConditionalWGAN_GP(L.LightningModule):
+    """Conditional WGAN-GP implementation using PyTorch Lightning.
+    Attributes:
+        image_size: Size of the generated images.
+        critic_repeats: Number of critic iterations per generator iteration.
+        c_lambda: Gradient penalty lambda hyperparameter.
+        generator: The generator model.
+        critic: The discriminator (critic) model.
+        critic_losses: List to store critic loss values.
+        generator_losses: List to store generator loss values.
+        curr_step: The current training step.
+        fixed_latent_space: Fixed latent vectors for generating consistent images.
+        fixed_label: Fixed labels corresponding to the latent vectors.
+    """
+    def __init__(self, image_size, learning_rate, z_dim, embed_size, num_classes,
+                 critic_repeats, feature_gen, feature_critic, c_lambda, beta_1,
+                 beta_2, display_step):
+        """Initializes the Conditional WGAN-GP model.
+        Args:
+            image_size: Size of the generated images.
+            learning_rate: Learning rate for the optimizers.
+            z_dim: Dimension of the latent space.
+            embed_size: Size of the embedding for the labels.
+            num_classes: Number of classes for the conditional generation.
+            critic_repeats: Number of critic iterations per generator iteration.
+            feature_gen: Number of features for the generator.
+            feature_critic: Number of features for the critic.
+            c_lambda: Gradient penalty lambda hyperparameter.
+            beta_1: Beta1 parameter for the Adam optimizer.
+            beta_2: Beta2 parameter for the Adam optimizer.
+            display_step: Step interval for displaying generated images.
+        """
+        super().__init__()
+        self.automatic_optimization = False
+        self.image_size = image_size
+        self.critic_repeats = critic_repeats
+        self.c_lambda = c_lambda
+        self.generator = Generator(
+            embed_size=embed_size,
+            num_classes=num_classes,
+            image_size=image_size,
+            features_generator=feature_gen,
+            input_dim=z_dim,
+        )
+        self.critic = Discriminator(
+            num_classes=num_classes,
+            image_size=image_size,
+            features_discriminator=feature_critic,
+        )
+        self.critic_losses = []
+        self.generator_losses = []
+        self.curr_step = 0
+        self.fixed_latent_space = torch.randn(25, z_dim, 1, 1)
+        self.fixed_label = torch.tensor([i % num_classes for i in range(25)])
+        self.save_hyperparameters()
+    def configure_optimizers(self):
+        """Configures the optimizers for the generator and critic.
+        Returns:
+            A tuple of two Adam optimizers, one for the generator and one for the critic.
+        """
+        optimizer_g = optim.Adam(
+            self.generator.parameters(),
+            lr=self.hparams.learning_rate,
+            betas=(self.hparams.beta_1, self.hparams.beta_2),
+        )
+        optimizer_c = optim.Adam(
+            self.critic.parameters(),
+            lr=self.hparams.learning_rate,
+            betas=(self.hparams.beta_1, self.hparams.beta_2),
+        )
+        return optimizer_g, optimizer_c
+    def on_load_checkpoint(self, checkpoint):
+        """Loads necessary variables from a checkpoint.
+        Args:
+            checkpoint: The checkpoint dictionary.
+        """
+        if self.current_epoch != 0:
+            self.critic_losses = checkpoint['critic_losses']
+            self.generator_losses = checkpoint['generator_losses']
+            self.curr_step = checkpoint['curr_step']
+            self.fixed_latent_space = checkpoint['fixed_latent_space']
+            self.fixed_label = checkpoint['fixed_label']
+    def on_save_checkpoint(self, checkpoint):
+        """Saves necessary variables to a checkpoint.
+        Args:
+            checkpoint: The checkpoint dictionary.
+        """
+        checkpoint['critic_losses'] = self.critic_losses
+        checkpoint['generator_losses'] = self.generator_losses
+        checkpoint['curr_step'] = self.curr_step
+        checkpoint['fixed_latent_space'] = self.fixed_latent_space
+        checkpoint['fixed_label'] = self.fixed_label
+    def forward(self, noise, labels):
+        """Generates an image given noise and labels.
+        Args:
+            noise: Latent noise vector.
+            labels: Class labels for conditional generation.
+        Returns:
+            Generated image tensor.
+        """
+        return self.generator(noise, labels)
+    def predict_step(self, noise, labels):
+        """Predicts an image given noise and labels.
+        Args:
+            noise: Latent noise vector.
+            labels: Class labels for conditional generation.
+        Returns:
+            Generated image tensor.
+        """
+        return self.generator(noise, labels)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==2.1.2
+pytorch-lightning==2.3.3
+python-multipart

utility/__init__.py ADDED Viewed

File without changes

utility/helper.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import torch
+from config.core import config
+from models.base import Generator
+def load_model_weights(checkpoint_path, model, device, prefix):
+    """
+    Load specific weights from a PyTorch Lightning checkpoint into a model.
+    Parameters:
+        checkpoint_path (str): Path to the checkpoint file.
+        model (torch.nn.Module): The model instance to load weights into.
+        prefix (str): The prefix in the checkpoint's state_dict keys to filter by and remove.
+    Returns:
+        model (torch.nn.Module): The model with loaded weights.
+    """
+    # Load the checkpoint
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    # Extract and modify the state_dict keys to match the model's keys
+    model_weights = {k.replace(f"{prefix}.", ""): v for k, v in checkpoint["state_dict"].items() if k.startswith(f"{prefix}.")}
+    # Load the weights into the model
+    model.load_state_dict(model_weights)
+    return model
+def init_generator_model():
+    """
+    Initializes and returns the Generator model.
+    Args:
+        None.
+    Returns:
+        Generator: The initialized Generator model.
+    """
+    model = Generator(
+        embed_size=config.EMBED_SIZE,
+        num_classes=config.NUM_CLASSES,
+        image_size=config.IMAGE_SIZE,
+        features_generator=config.FEATURES_GENERATOR,
+        input_dim=config.INPUT_Z_DIM,
+        image_channel=config.IMAGE_CHANNEL
+    )
+    return model
+def get_selected_value(label):
+    """
+    Get the selected value based on the display label.
+    Args:
+        label (str): The display label.
+    Returns:
+        int: The selected value corresponding to the display label.
+    """
+    # Get the selected value from the options mapping based on the display label.
+    return config.OPTIONS_MAPPING[label]

weights/epoch=999-step=96000.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93409ee79fbe7ecfbcd95fe775a7625408e088624c0e80153e14c234c93d8132
+size 116330608

weights/source.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ using a weight from kaggle after training 600 epoch:
2	+ - https://www.kaggle.com/datasets/dimensioncore/conditional-gan-part-2/versions/1020