Spaces:

SkylarWhite
/

57894

Sleeping

App Files Files Community

Muhammad Naufal Rizqullah commited on Sep 25, 2024

Commit

e61c431

1 Parent(s): 439c972

Experiment 2

Browse files

Files changed (10) hide show

config/core.py +9 -8
data/dataloader.py +52 -0
models/discriminator.py +48 -33
models/generator.py +35 -19
requirements.txt +3 -1
training/train.py +196 -0
utility/helper.py +161 -0
utility/wgan_gp.py +77 -0
weights/{epoch=957-step=1164300.ckpt → epoch=299-step=450000.ckpt} +2 -2
weights/source.txt +2 -2

config/core.py CHANGED Viewed

@@ -2,15 +2,16 @@ from pydantic_settings import BaseSettings
 class Config(BaseSettings):
     IMAGE_CHANNEL: int = 3
     NUM_CLASSES: int = 3
     IMAGE_SIZE: int = 128
-    FEATURES_DISCRIMINATOR: int = 64
-    FEATURES_GENERATOR: int = 64
-    EMBED_SIZE: int = 64
-    INPUT_Z_DIM: int = 64
-    BATCH_SIZE: int = 128
     DISPLAY_STEP: int = 500
-    MAX_SAMPLES: int = 3000
     LEARNING_RATE: float = 0.0002
     BETA_1: float = 0.5
@@ -22,8 +23,8 @@ class Config(BaseSettings):
     CRITIC_REPEAT: int = 3
     LOAD_CHECKPOINT: bool = True
-    PATH_DATASET: str = ""
-    CKPT_PATH: str = "./weights/epoch=957-step=1164300.ckpt"
     OPTIONS_MAPPING: dict = {
         "Boot": 0,

 class Config(BaseSettings):
     IMAGE_CHANNEL: int = 3
+    LABEL_CHANNEL: int = 3
     NUM_CLASSES: int = 3
     IMAGE_SIZE: int = 128
+    FEATURES_DISCRIMINATOR: int = 64 * 2
+    FEATURES_GENERATOR: int = 64 * 2
+    EMBED_SIZE: int = 30 + 20
+    INPUT_Z_DIM: int = 64 * 2
+    BATCH_SIZE: int = 20
     DISPLAY_STEP: int = 500
+    MAX_SAMPLES: int = 2500
     LEARNING_RATE: float = 0.0002
     BETA_1: float = 0.5
     CRITIC_REPEAT: int = 3
     LOAD_CHECKPOINT: bool = True
+    PATH_DATASET: str = "/kaggle/input/shoe-vs-sandal-vs-boot-dataset-15k-images/Shoe vs Sandal vs Boot Dataset"
+    CKPT_PATH: str = "./weights/epoch=299-step=450000.ckpt"
     OPTIONS_MAPPING: dict = {
         "Boot": 0,

data/dataloader.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import lightning as L
+import torchvision.transforms as T
+import os
+from config.core import config
+from torch.utils.data import DataLoader
+from torchvision.datasets import ImageFolder
+from utility.helper import PadToSquare
+class ShoeSandalBoot(L.LightningDataModule):
+    def __init__(
+        self,
+        dataset_directory,
+        image_size=config.image_size,
+        batch_size=config.image_size,
+        max_samples=None
+    ):
+        super().__init__()
+        self.data_dir = dataset_directory
+        self.bs = batch_size
+        self.max_samples = max_samples # to limit dataset.
+        self.transforms = T.Compose([
+            # T.Resize(size=(image_size, image_size)),
+            PadToSquare(image_size),
+            T.ToTensor(),
+            T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        self.ssb = None
+    def prepare_data(self):
+        pass
+    def setup(self, stage):
+        if stage == "fit":
+            dataset = ImageFolder(
+                root=self.data_dir,
+                transform=self.transforms
+            )
+            # To Limit Dataset
+            if self.max_samples:
+                print(f"[INFO] Dataset is Limited to {self.max_samples} Samples")
+                self.ssb = torch.utils.data.Subset(dataset, range(min(len(dataset), self.max_samples)))
+            else:
+                self.ssb = dataset
+    def train_dataloader(self):
+        return DataLoader(self.ssb, batch_size=self.bs, num_workers=os.cpu_count(), shuffle=True)

models/discriminator.py CHANGED Viewed

@@ -2,63 +2,78 @@ import torch
 import torch.nn as nn
 from .base import WSConv2d, ConvBlock
 class Discriminator(nn.Module):
-    def __init__(self, num_classes=3, image_size=128, features_discriminator=128, image_channel=3):
         super().__init__()
         self.num_classes = num_classes
         self.image_size = image_size
-        label_channel = 1
         self.disc = nn.Sequential(
-            self._block_discriminator(image_channel + label_channel, features_discriminator, kernel_size=4, stride=2,
-                                      padding=1),
-            self._block_discriminator(features_discriminator, features_discriminator, kernel_size=4, stride=2,
-                                      padding=1),
-            self._block_discriminator(features_discriminator, features_discriminator * 2, kernel_size=4, stride=2,
-                                      padding=1),
-            self._block_discriminator(features_discriminator * 2, features_discriminator * 4, kernel_size=4, stride=2,
-                                      padding=1),
-            self._block_discriminator(features_discriminator * 4, features_discriminator * 4, kernel_size=4, stride=2,
-                                      padding=1),
-            self._block_discriminator(features_discriminator * 4, 1, kernel_size=4, stride=1, padding=0,
-                                      final_layer=True)
         )
-        self.embed = nn.Embedding(num_classes, image_size * image_size)
     def forward(self, image, label):
         embedding = self.embed(label)
-        embedding = embedding.view(
             label.shape[0],
-            1,
             self.image_size,
             self.image_size
         )
-        data = torch.cat([image, embedding], dim=1)
         x = self.disc(data)
         return x.view(len(x), -1)
     def _block_discriminator(
-            self,
-            input_channels,
-            output_channels,
-            kernel_size=3,
-            stride=2,
-            padding=0,
-            final_layer=False
     ):
-        if not final_layer:
             return nn.Sequential(
                 ConvBlock(input_channels, output_channels),
-                WSConv2d(output_channels, output_channels, kernel_size, stride, padding)
             )
         else:
             return WSConv2d(input_channels, output_channels, kernel_size, stride, padding)

 import torch.nn as nn
 from .base import WSConv2d, ConvBlock
+from config.core import config
 class Discriminator(nn.Module):
+    def __init__(self, num_classes=3, embed_size=128, image_size=128, features_discriminator=128, image_channel=3, label_channel=3):
         super().__init__()
         self.num_classes = num_classes
         self.image_size = image_size
+        self.embed_size = embed_size
+        self.label_channel = label_channel
         self.disc = nn.Sequential(
+            self._block_discriminator(image_channel + label_channel, features_discriminator, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator, features_discriminator, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator, features_discriminator * 2, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 2, features_discriminator * 4, kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 4, features_discriminator  *4 , kernel_size=4, stride=2, padding=1),
+            self._block_discriminator(features_discriminator * 4, 1, kernel_size=4, stride=1, padding=0, final_layer=True)
         )
+        self.embed = nn.Embedding(num_classes, embed_size)
+        self.embed_linear = nn.Linear(embed_size, label_channel*image_size*image_size)
     def forward(self, image, label):
         embedding = self.embed(label)
+        linear_embedding = self.embed_linear(embedding)
+        embedding_layer = linear_embedding.view(
             label.shape[0],
+            self.label_channel,
             self.image_size,
             self.image_size
         )
+        data = torch.cat([image, embedding_layer], dim=1)
         x = self.disc(data)
         return x.view(len(x), -1)
     def _block_discriminator(
+        self,
+        input_channels,
+        output_channels,
+        kernel_size=3,
+        stride=2,
+        padding=0,
+        final_layer=False
     ):
+        if not final_layer:
             return nn.Sequential(
                 ConvBlock(input_channels, output_channels),
+                WSConv2d(output_channels, output_channels, kernel_size, stride, padding),
             )
         else:
             return WSConv2d(input_channels, output_channels, kernel_size, stride, padding)
+def test():
+    sample = torch.randn(1, 3, 128, 128)
+    label = torch.tensor([1])
+    model = Discriminator(
+        num_classes=config.NUM_CLASSES,
+        embed_size=config.EMBED_SIZE,
+        image_size=config.IMAGE_SIZE,
+        features_discriminator=config.FEATURES_DISCRIMINATOR,
+        image_channel=config.IMAGE_CHANNEL,
+        label_channel=config.LABEL_CHANNEL
+    )
+    preds = model(sample, label)
+    print(preds.shape)

models/generator.py CHANGED Viewed

@@ -2,40 +2,42 @@ import torch
 import torch.nn as nn
 from .base import WSConv2d, ConvBlock, PixelNorm
 class Generator(nn.Module):
-    def __init__(self, embed_size=128, num_classes=3, image_size=128, features_generator=128, input_dim=128, image_channel=3):
         super().__init__()
         self.gen = nn.Sequential(
-            self._block(input_dim + embed_size, features_generator * 2, first_double_up=True),
-            self._block(features_generator * 2, features_generator * 4, first_double_up=False, final_layer=False, ),
-            self._block(features_generator * 4, features_generator * 4, first_double_up=False, final_layer=False, ),
-            self._block(features_generator * 4, features_generator * 4, first_double_up=False, final_layer=False, ),
-            self._block(features_generator * 4, features_generator * 2, first_double_up=False, final_layer=False, ),
-            self._block(features_generator * 2, features_generator, first_double_up=False, final_layer=False, ),
-            self._block(features_generator, image_channel, first_double_up=False, use_double=False, final_layer=True, ),
         )
         self.image_size = image_size
         self.embed_size = embed_size
         self.embed = nn.Embedding(num_classes, embed_size)
     def forward(self, noise, labels):
-        embedding_label = self.embed(labels).unsqueeze(2).unsqueeze(
-            3)  # Add height and width channel; N x Noise_dim x 1 x 1
-        # Noise is 4 channel, or 2 channel. later will decide
-        noise = noise.view(noise.size(0), noise.size(1), 1, 1)  # Reshape to (batch_size, z_dim, 1, 1)
-        x = torch.cat([noise, embedding_label], dim=1)
         return self.gen(x)
     def _block(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1,
-               first_double_up=False, use_double=True, final_layer=False):
         layers = []
         if not final_layer:
@@ -55,3 +57,17 @@ class Generator(nn.Module):
         return nn.Sequential(*layers)

 import torch.nn as nn
 from .base import WSConv2d, ConvBlock, PixelNorm
+from config.core import config
 class Generator(nn.Module):
+    def __init__(self,  embed_size=128, num_classes=3, image_size=128, features_generator=128, input_dim=128, image_channel=3):
         super().__init__()
         self.gen = nn.Sequential(
+           self._block(input_dim + embed_size, features_generator*2, first_double_up=True),
+           self._block(features_generator*2, features_generator*4, first_double_up=False, final_layer=False,),
+           self._block(features_generator*4, features_generator*4, first_double_up=False, final_layer=False,),
+           self._block(features_generator*4, features_generator*4, first_double_up=False, final_layer=False,),
+           self._block(features_generator*4, features_generator*2, first_double_up=False, final_layer=False,),
+           self._block(features_generator*2, features_generator, first_double_up=False, final_layer=False,),
+           self._block(features_generator, image_channel, first_double_up=False, use_double=False, final_layer=True,),
         )
         self.image_size = image_size
         self.embed_size = embed_size
         self.embed = nn.Embedding(num_classes, embed_size)
+        self.embed_linear = nn.Linear(embed_size, embed_size)
     def forward(self, noise, labels):
+        embedding_label = self.embed(labels)
+        linear_embedding_label = self.embed_linear(embedding_label).unsqueeze(2).unsqueeze(3)
+        noise = noise.view(noise.size(0), noise.size(1), 1, 1)
+        x = torch.cat([noise, linear_embedding_label], dim=1)
         return self.gen(x)
     def _block(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1,
+           first_double_up=False, use_double=True, final_layer=False):
         layers = []
         if not final_layer:
         return nn.Sequential(*layers)
+def test():
+    sample = torch.randn(1, config.INPUT_Z_DIM, 1, 1)
+    label = torch.tensor([1])
+    model = Generator(
+                embed_size=config.EMBED_SIZE,
+                num_classes=config.NUM_CLASSES,
+                image_size=config.IMAGE_SIZE,
+                features_generator=config.FEATURES_GENERATOR,
+                input_dim=config.INPUT_Z_DIM,
+            )
+    preds = model(sample, label)
+    print(preds.shape)

requirements.txt CHANGED Viewed

@@ -3,4 +3,6 @@ pytorch-lightning
 python-multipart
 fastapi
 pydantic
-pydantic-settings

 python-multipart
 fastapi
 pydantic
+pydantic-settings
+opencv-python==4.10.0
+imageio==2.33.1

training/train.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import torch
+import lightning as L
+import torch.optim as optim
+from models.generator import Generator
+from models.discriminator import Discriminator
+from utility.helper import initialize_weights, plot_images_from_tensor
+from utility.wgan_gp import gradient_penalty, calculate_generator_loss, calculate_critic_loss
+class ConditionalWGAN_GP(L.LightningModule):
+    def __init__(self, image_channel, label_channel, image_size, learning_rate, z_dim, embed_size, num_classes, critic_repeats, feature_gen, feature_critic, c_lambda, beta_1, beta_2, display_step):
+        super().__init__()
+        self.automatic_optimization = False
+        self.image_size = image_size
+        self.critic_repeats = critic_repeats
+        self.c_lambda = c_lambda
+        self.generator = Generator(
+            embed_size=embed_size,
+            num_classes=num_classes,
+            image_size=image_size,
+            features_generator=feature_gen,
+            input_dim=z_dim,
+        )
+        self.critic = Discriminator(
+            num_classes=num_classes,
+            embed_size=embed_size,
+            image_size=image_size,
+            features_discriminator=feature_critic,
+            image_channel=image_channel,
+            label_channel=label_channel,
+        )
+        self.critic_losses = []
+        self.generator_losses = []
+        self.curr_step = 0
+        self.fixed_latent_space = torch.randn(25, z_dim, 1, 1)
+        self.fixed_label = torch.tensor([i % num_classes for i in range(25)])
+        self.save_hyperparameters()
+    def configure_optimizers(self):
+        # READ: https://lightning.ai/docs/pytorch/stable/common/optimization.html#use-multiple-optimizers-like-gans
+        # READ: https://lightning.ai/docs/pytorch/stable/model/manual_optimization.html
+        # READ: https://lightning.ai/docs/pytorch/stable/model/build_model_advanced.html
+        # READ: https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.core.LightningModule.html#lightning.pytorch.core.LightningModule.backward
+        # READ: https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#manual-backward
+        optimizer_G = optim.Adam(self.generator.parameters(), lr=self.hparams.learning_rate, betas=(self.hparams.beta_1, self.hparams.beta_2))
+        optimizer_C = optim.Adam(self.critic.parameters(), lr=self.hparams.learning_rate, betas=(self.hparams.beta_1, self.hparams.beta_2))
+        return optimizer_G, optimizer_C
+    def on_load_checkpoint(self, checkpoint):
+        # List of keys that you expect to load from the checkpoint
+        keys_to_load = ['critic_losses', 'generator_losses', 'curr_step', 'fixed_latent_space', 'fixed_label']
+        # Iterate over the keys and load them if they exist in the checkpoint
+        for key in keys_to_load:
+            if key in checkpoint:
+                setattr(self, key, checkpoint[key])
+    def on_save_checkpoint(self, checkpoint):
+        # Save necessary variable to checkpoint
+        checkpoint['critic_losses'] = self.critic_losses
+        checkpoint['generator_losses'] = self.generator_losses
+        checkpoint['curr_step'] = self.curr_step
+        checkpoint['fixed_latent_space'] = self.fixed_latent_space
+        checkpoint['fixed_label'] = self.fixed_label
+    def on_train_start(self):
+        if self.current_epoch == 0:
+            self.generator.apply(initialize_weights)
+            self.critic.apply(initialize_weights)
+    def training_step(self, batch, batch_idx):
+        # Get the Optimizers
+        opt_generator, opt_critic = self.optimizers()
+        # Get Data and Label
+        X, labels = batch
+        # Get the current batch size
+        batch_size = X.shape[0]
+        ##############################
+        # Train Critic ###############
+        ##############################
+        mean_critic_loss_for_this_iteration = 0
+        for _ in range(self.critic_repeats):
+            # Clean the Gradient
+            opt_critic.zero_grad()
+            # Generate the noise.
+            noise = torch.randn(batch_size, self.hparams.z_dim, device=self.device)
+            # Generate fake image.
+            fake = self.generator(noise, labels)
+            # Get the Critic's prediction on the reals and fakes
+            critic_fake_pred = self.critic(fake.detach(), labels)
+            critic_real_pred = self.critic(X, labels)
+            # Calculate the Critic loss using WGAN
+            # Generate epsilon for interpolate image.
+            epsilon = torch.rand(batch_size, 1, 1, 1, device=self.device, requires_grad=True)
+            # Calculate Gradient Penalty Critic model
+            gp = gradient_penalty(self.critic, labels, X, fake.detach(), epsilon)
+            # calculate full of WGAN-GP loss for Critic
+            critic_loss = calculate_critic_loss(
+                critic_fake_pred, critic_real_pred, gp, self.c_lambda
+            )
+            # Keep track of the average critic loss in this batch
+            mean_critic_loss_for_this_iteration += critic_loss.item() / self.critic_repeats
+            # Update the gradients Criticz
+            # self.manual_backward(critic_loss, retain_graph=True)
+            self.manual_backward(critic_loss) # no need retain graph cause, already detach() on the image, so it will cut from backpropagate. use that retain_graph=True if not using detach()
+            # Update the optimizer
+            opt_critic.step()
+        ##############################
+        # Train Generator ############
+        ##############################
+        # Clean the gradient
+        opt_generator.zero_grad()
+        # Generate the noise.
+        noise = torch.randn(batch_size, self.hparams.z_dim, device=self.device)
+        # Generate fake image.
+        fake = self.generator(noise, labels)
+        # Get the Critic's prediction on the fakes by generator
+        generator_fake_predictions = self.critic(fake, labels)
+        # Calculate loss for Generator
+        generator_loss = calculate_generator_loss(generator_fake_predictions)
+        # update the gradient generator
+        self.manual_backward(generator_loss)
+        # Update the optimizer
+        opt_generator.step()
+        ##############################
+        # Visualization ##############
+        ##############################
+        if self.curr_step % self.hparams.display_step == 0 and self.curr_step > 0:
+            VISUALIZE = True
+            if VISUALIZE:
+                with torch.no_grad():
+                    fake_images_fixed = self.generator(
+                        self.fixed_latent_space.to(self.device),
+                        self.fixed_label.to(self.device)
+                    )
+            path_save = f"/kaggle/working/generates/generated-{self.curr_step}-step.png"
+            plot_images_from_tensor(fake_images_fixed, size=(3, self.image_size, self.image_size), show=False, save_path=path_save)
+            plot_images_from_tensor(X, size=(3, self.image_size, self.image_size), show=False)
+            print(f" ==== Critic Loss: {mean_critic_loss_for_this_iteration} ==== ")
+            print(f" ==== Generator Loss: {generator_loss.item()} ==== ")
+        self.curr_step += 1
+        ##############################
+        # Logging ####################
+        ##############################
+        # Store the loss Critic into Log
+        self.log("critic_loss", mean_critic_loss_for_this_iteration, on_step=False, on_epoch=True, prog_bar=True)
+        self.log("generator_loss", generator_loss.item(), on_step=False, on_epoch=True, prog_bar=True)
+        # store into list, so can used later for visualization
+        self.critic_losses.append(mean_critic_loss_for_this_iteration)
+        self.generator_losses.append(generator_loss.item())
+    def forward(self, noise, labels):
+        return self.generator(noise, labels)
+    def predict_step(self, noise, labels):
+        return self.generator(noise, labels)

utility/helper.py CHANGED Viewed

@@ -1,7 +1,15 @@
 import torch
 from config.core import config
 from models.generator import Generator
 def load_model_weights(checkpoint_path, model, device, prefix):
     """
@@ -61,3 +69,156 @@ def get_selected_value(label):
     """
     # Get the selected value from the options mapping based on the display label.
     return config.OPTIONS_MAPPING[label]

 import torch
+import torch.nn as nn
+import cv2
+import imageio
+import os
+import matplotlib.pyplot as plt
 from config.core import config
 from models.generator import Generator
+from PIL import Image
+from torchvision.utils import make_grid
 def load_model_weights(checkpoint_path, model, device, prefix):
     """
     """
     # Get the selected value from the options mapping based on the display label.
     return config.OPTIONS_MAPPING[label]
+def initialize_weights(model):
+    """
+    Initializes the weights of a model using a normal distribution.
+    Args:
+        model: The model to be initialized.
+    Returns:
+        None
+    """
+    for m in model.modules():
+        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d, nn.InstanceNorm2d)):
+            nn.init.normal_(m.weight.data, 0.0, 0.02)
+def plot_images_from_tensor(image_tensor, num_images=25, size=(1, 28, 28), nrow=5, show=True, save_path=None):
+    image_tensor = (image_tensor + 1) / 2
+    image_unflat = image_tensor.detach().cpu()
+    image_grid = make_grid(image_unflat[:num_images], nrow=nrow)
+    plt.imshow(image_grid.permute(1, 2, 0).squeeze())
+    plt.axis('off')
+    if save_path:
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
+    if show:
+        plt.show()
+    else:
+        plt.close()
+def create_video(image_folder, video_name, fps, appearance_duration=None):
+    """
+    Creates a video from a sequence of images with customizable appearance duration.
+    Args:
+        image_folder (str): The path to the folder containing the images.
+        video_name (str): The name of the output video file.
+        fps (int): The frames per second of the video.
+        appearance_duration (int, optional): The desired appearance duration for each image in milliseconds.
+            If None, the default duration based on frame rate is used.
+    Example:
+        image_folder = '/path/to/image/folder' \n
+        video_name = 'output_video.mp4' \n
+        fps = 12 \n
+        appearance_duration = 200  # Appearance duration of 200ms for each image \n
+        create_video(image_folder, video_name, fps, appearance_duration)
+    """
+    # Get a list of all image files in the folder
+    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
+    # Sort the image files based on the step number
+    image_files = sorted(image_files, key=lambda x: int(x.split('-')[1].split('.')[0]))
+    # Load the first image to get the video size
+    image = cv2.imread(os.path.join(image_folder, image_files[0]))
+    height, width, layers = image.shape
+    # Create a VideoWriter object
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Specify the video codec
+    video = cv2.VideoWriter(video_name, fourcc, fps, (width, height))
+    # Write each image to the video with customizable appearance duration
+    for image_file in image_files:
+        image = cv2.imread(os.path.join(image_folder, image_file))
+        video.write(image)
+        if appearance_duration is not None:
+            # Calculate the number of frames for the desired appearance duration
+            num_frames = appearance_duration * fps // 1000
+            for _ in range(num_frames):
+                video.write(image)
+    # Release the video writer
+    video.release()
+def create_gif(image_folder, gif_name, fps, appearance_duration=None):
+    """
+    Creates a GIF from a sequence of images sorted by step number, with customizable appearance duration.
+    Args:
+        image_folder (str): The path to the folder containing the images.
+        gif_name (str): The name of the output GIF file.
+        fps (int): The frames per second of the GIF.
+        appearance_duration (int, optional): The desired appearance duration for each image in milliseconds.
+            If None, the default duration based on frame rate is used.
+    Example:
+        image_folder = '/path/to/image/folder'
+        gif_name = 'output_animation.gif'
+        fps = 12
+        appearance_duration = 300  # Appearance duration of 300ms for each image
+        create_gif(image_folder, gif_name, fps, appearance_duration)
+    """
+    # Get a list of all image files in the folder
+    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
+    # Sort the image files based on the step number
+    image_files = sorted(image_files, key=lambda x: int(x.split('-')[1].split('.')[0]))
+    # Load the images into a list
+    images = []
+    for file in image_files:
+        images.append(imageio.imread(os.path.join(image_folder, file)))
+    # Create a list to store the repeated images
+    repeated_images = []
+    # Repeat each image for the desired duration
+    if appearance_duration is not None:
+        for image in images:
+            repeated_images.extend([image] * (appearance_duration * fps // 1000))
+    else:
+        repeated_images = images  # Default appearance duration (based on fps)
+    # Save the repeated images as a GIF
+    imageio.mimsave(gif_name, repeated_images, fps=fps)
+class PadToSquare:
+    """Pad an image to a square of the given size with a white background.
+    Args:
+        size (int): The target size for the output image.
+    """
+    def __init__(self, size):
+        self.size = size
+    def __call__(self, img):
+        """Pad the input image to the target size with a white background.
+        Args:
+            img (PIL.Image.Image): The input image.
+        Returns:
+            PIL.Image.Image: The padded image.
+        """
+        # Create a white canvas
+        white_canvas = Image.new('RGB', (self.size, self.size), (255, 255, 255))
+        # Calculate the position to paste the image onto the white canvas
+        left = (self.size - img.width) // 2
+        top = (self.size - img.height) // 2
+        # Paste the image onto the canvas
+        white_canvas.paste(img, (left, top))
+        return white_canvas

utility/wgan_gp.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+# Gradient Penalty Calculation - Calculate Gradient of Critic Score
+def gradient_penalty(critic, labels, real_images, fake_images, epsilon):
+    """
+    This function calculates the gradient penalty for the WGAN-GP loss function
+    Parameters:
+    critic (nn.Module): The critic model
+    labels (torch.tensor): The labels for the images
+    real_images (torch.tensor): The real images
+    fake_images (torch.tensor): The fake images
+    epsilon (torch.tensor): The interpolation parameter
+    Returns:
+    gradient_penalty (torch.tensor): The gradient penalty for the critic model
+    """
+    # Create the interpolated images as a weighted combination of real and fake images
+    interpolated_images = real_images * epsilon + fake_images * (1 - epsilon)
+    mixed_scores = critic(interpolated_images, labels)
+    create_real_label = torch.ones_like(mixed_scores)
+    gradient = torch.autograd.grad(
+        inputs=interpolated_images,
+        outputs=mixed_scores,
+        grad_outputs=create_real_label,
+        create_graph=True,
+        retain_graph=True,
+    )[0]
+    # Reshape each image in the batch into a 1D tensor (flatten the images)
+    gradient = gradient.view(len(gradient), -1)
+    # Calculate the L2 norm of the gradients
+    gradient_norm = gradient.norm(2, dim=1)
+    # Calculate the penalty as the mean squared distance of the norms from 1
+    gradient_penalty = torch.mean((gradient_norm - 1) ** 2)
+    return gradient_penalty
+# Critic Loss Calculation
+def calculate_critic_loss(critic_fake_prediction, critic_real_prediction, gradient_penalty, critic_lambda):
+    """
+    Calculates the critic loss, which is the difference between the mean of the real scores and mean of the fake scores plus the gradient penalty.
+    Parameters:
+    critic_fake_prediction (torch.tensor): The critic predictions for the fake images
+    critic_real_prediction (torch.tensor): The critic predictions for the real images
+    gradient_penalty (torch.tensor): The gradient penalty for the critic model
+    critic_lambda (float): The coefficient for the gradient penalty
+    Returns:
+    critic_loss (torch.tensor): The critic loss
+    """
+    critic_loss = (
+        -(torch.mean(critic_real_prediction) - torch.mean(critic_fake_prediction))  + critic_lambda * gradient_penalty
+    )
+    return critic_loss
+# Generator Loss Calculation
+def calculate_generator_loss(critic_fake_prediction):
+    """
+    Calculates the generator loss, which is the mean of the critic predictions for the fake images with a negative sign.
+    Parameters:
+    critic_fake_prediction (torch.tensor): The critic predictions for the fake images
+    Returns:
+    generator_loss (torch.tensor): The generator loss
+    """
+    generator_loss = -1.0 * torch.mean(critic_fake_prediction)
+    return generator_loss

weights/{epoch=957-step=1164300.ckpt → epoch=299-step=450000.ckpt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1602e9cb80fcfbc39ed2c5c74873744a546cbab346730aaa0f25b02140bc1d2f
-size 157799852

 version https://git-lfs.github.com/spec/v1
+oid sha256:8db76fefe85e1e6077bf8aa7532699d379ea0dc1f0d1b6130ebac9ca3b814303
+size 640723340

weights/source.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	- ~~using~~ a ~~weight~~ ~~from~~ kaggle ~~after training 957 epoch:~~
2	- - https://www.kaggle.com/datasets/dimensioncore/conditional-gan-part-2/versions/~~2397~~


1	+ - Notebook: [VERSION 19] https://www.kaggle.com/code/dimensioncore/57894-conditional-gan-try-part-2?scriptVersionId=197222851
2	+ - Checkpoint: [VERSION 3016] https://www.kaggle.com/datasets/dimensioncore/conditional-gan-part-2/versions/3016