Spaces:

SkylarWhite
/

57894-Pix2Pix

Sleeping

App Files Files Community

Muhammad Naufal Rizqullah commited on Oct 19, 2024

Commit

ae0af75

1 Parent(s): b2c027e

first commit

Browse files

Files changed (25) hide show

.gitignore +30 -0
CHANGELOG.md +30 -0
LICENSE +21 -0
app.py +77 -0
config/__init__.py +0 -0
config/core.py +27 -0
data/dataloader.py +68 -0
data/dataset.py +39 -0
data/examples/100.jpg +0 -0
data/examples/1001.jpg +0 -0
data/examples/1020.jpg +0 -0
data/examples/1021.jpg +0 -0
models/__init__.py +0 -0
models/base.py +35 -0
models/discriminator.py +48 -0
models/generator.py +68 -0
requirements.txt +8 -0
train.py +82 -0
training/__init__.py +0 -0
training/callbacks.py +10 -0
training/model.py +102 -0
utility/__init__.py +0 -0
utility/helper.py +208 -0
weights/epoch=266-step=42186.ckpt +3 -0
weights/source.txt +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,30 @@

+.idea
+.ipynb_checkpoints
+.mypy_cache
+.vscode
+__pycache__
+.pytest_cache
+htmlcov
+dist
+site
+.coverage
+coverage.xml
+.netlify
+test.db
+log.txt
+Pipfile.lock
+env3.*
+env
+docs_build
+site_build
+venv
+docs.zip
+archive.zip
+# vim temporary files
+*~
+.*.sw?
+.cache
+# macOS
+.DS_Store

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased] - 2024-10-03
+### Plan
+The Plan is making modular core. So when training in kaggle only pull and make some config for dataset (kaggle) and training / inference using only script with args.
+## [Unreleased] - 2024-10-18
+Build up the app so it can run demo. after that, find out the model is ugly when we tested. so maybe i back to training again.
+idk what happen, but when i see the result of training by inspecting image (visualize generate), seems fine. but in interface, run badly.
+## [0.0.1] - 2024-10-19
+After looking on dataset, the problem before is because we training the image in shape 1024x1024 Close up Face Image, so when retrive image with face and a bit body can make model mess up. so require image like example provided to make some nice result.
+### Feature:
+- Turn Image of Face Close up into a Comic Style.
+### Changed
+- The Example is change, so user will get some insiration for the input
+- 2 Output, first is the original image after transformation, and second is image after sending to model
+### Removed
+- Old Example
+### Fixed
+- When the resize doesnt match 256x256, because not provide in tuple, so resize only height when passed 1 paramters.

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Muhammad Naufal Rizqullah
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+import numpy as np
+import gradio as gr
+import os
+from PIL import Image
+import torchvision.transforms as T
+from config.core import config
+from utility.helper import load_model_weights, init_generator_model
+device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+model = init_generator_model()
+model = load_model_weights(
+    model=model,
+    checkpoint_path=config.CKPT_PATH,
+    device=device,
+    prefix="gen",
+)
+# Transformation
+transform_face = T.Compose([
+        T.Resize((config.IMAGE_SIZE, config.IMAGE_SIZE)),
+        T.ToTensor(),
+        T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+])
+def inference(image: Image):
+    # transforms the target image and add a batch dimension
+    img = transform_face(image)
+    img_un = img.unsqueeze(0)
+    image_transform = img_un * 0.5 + 0.5 # Normalize from Tanh
+    im_detach = image_transform.detach().cpu().squeeze(0)
+    im_permute = im_detach.permute(1, 2, 0)
+    im_array = im_permute.numpy()
+    # Scale values to 0-255 range
+    im_array = (im_array * 255).astype(np.uint8)
+    # Convert numpy array to PIL Image
+    im_pil = Image.fromarray(im_array)
+    # Inference the image
+    model.eval()
+    with torch.inference_mode():
+        c2f = model(img_un)
+    c2f = c2f * 0.5 + 0.5 # Normalize from Tanh
+    image_unflat = c2f.detach().cpu().squeeze(0)  # Remove batch dimension
+    image = image_unflat.permute(1, 2, 0)  # Permute to (H, W, C)
+    # Convert image to numpy array
+    image_array = image.numpy()
+    # Scale values to 0-255 range
+    image_array = (image_array * 255).astype(np.uint8)
+    # Convert numpy array to PIL Image
+    image = Image.fromarray(image_array)
+    return im_pil, image
+demo = gr.Interface(
+    fn=inference,
+    inputs=gr.Image(type="pil"),
+    outputs=[
+        gr.Image(label="Original after Transform"),
+        gr.Image(label="Converted by Model")
+    ],
+    title="Pix2Pix Face to Comic",
+    description="A implementation Pix2Pix from Scratch Pytorch",
+    examples=[f"data/examples/{i}" for i in os.listdir("data/examples") if i.endswith(('.png', '.jpg', '.jpeg', '.gif'))]
+)
+demo.launch()

config/__init__.py ADDED Viewed

File without changes

config/core.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+from pydantic_settings import BaseSettings
+class Config(BaseSettings):
+    PATH_FACE: str = "/kaggle/input/comic-faces-paired-synthetic-v2/face2comics_v2.0.0_by_Sxela/face2comics_v2.0.0_by_Sxela/faces"
+    PATH_COMIC: str = "/kaggle/input/comic-faces-paired-synthetic-v2/face2comics_v2.0.0_by_Sxela/face2comics_v2.0.0_by_Sxela/comics"
+    PATH_OUTPUT: str ="/kaggle/working/generates"
+    IMAGE_CHANNELS: int = 3
+    FEATURE_DISCRIMINATOR: list = [64, 128, 256, 512]
+    FEATURE_GENERATOR: int = 64
+    IMAGE_SIZE: int = 256
+    BATCH_SIZE: int = 128
+    DISPLAY_STEP: int = 500
+    MAX_SAMPLES: int = 5000
+    LEARNING_RATE: float = 2e-4
+    L1_LAMBDA: int = 100
+    NUM_EPOCH: int = 500
+    LOAD_CHECKPOINT: bool = False
+    CKPT_PATH: str = "weights\epoch=266-step=42186.ckpt"
+config = Config()

data/dataloader.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import lightning as L
+import torchvision.transforms as T
+import os
+from torch.utils.data import DataLoader, Subset
+from data.dataset import FaceToComicDataset
+class FaceToComicDataModule(L.LightningDataModule):
+    def __init__(
+        self,
+        face_path,
+        comic_path,
+        image_size=(128, 128),
+        batch_size=32,
+        max_samples=None
+    ):
+        super().__init__()
+        self.face_dir = face_path
+        self.comic_dir = comic_path
+        self.image_size = image_size
+        self.batch_size = batch_size
+        self.max_samples = max_samples
+        self.transform_face = T.Compose([
+            T.Resize(self.image_size),
+            T.ToTensor(),
+            T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        self.transform_comic = T.Compose([
+            T.Resize(self.image_size),
+            T.ToTensor(),
+            T.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        self.face2comic = None
+    def prepare_data(self):
+        # No need to download or prepare data, as it's already present in the directories
+        pass
+    def setup(self, stage=None):
+        if stage == "fit" or stage is None:
+            dataset = FaceToComicDataset(
+                face_path=self.face_dir,
+                comic_path=self.comic_dir,
+                transform_face=self.transform_face,
+                transform_comic=self.transform_comic
+            )
+            # To Limit Dataset
+            if self.max_samples:
+                print(f"[INFO] Dataset is Limited to {self.max_samples} Samples")
+                self.face2comic = Subset(dataset, range(min(len(dataset), self.max_samples)))
+            else:
+                self.face2comic = dataset
+    def train_dataloader(self):
+        return DataLoader(self.face2comic, batch_size=self.batch_size, num_workers=os.cpu_count(), shuffle=True)
+    def val_dataloader(self):
+        # Implement if you need validation during training
+        pass
+    def test_dataloader(self):
+        # Implement if you need testing after training
+        pass

data/dataset.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+from PIL import Image
+from torch.utils.data import Dataset
+class FaceToComicDataset(Dataset):
+    def __init__(self, face_path, comic_path, transform_face=None, transform_comic=None):
+        super().__init__()
+        self.face_dir = face_path
+        self.comic_dir = comic_path
+        self.face_list_files = os.listdir(self.face_dir)
+        self.comic_list_files = os.listdir(self.comic_dir)
+        # Create a dictionary for quick lookup of comic files
+        self.comic_dict = {comic_file: idx for idx, comic_file in enumerate(self.comic_list_files)}
+        # Filter out files that don't have a corresponding pair (find only have pair)
+        self.face_list_files = [f for f in self.face_list_files if f in self.comic_list_files]
+        self.transform_face = transform_face
+        self.transform_comic = transform_comic
+    def __getitem__(self, index):
+        face_file = self.face_list_files[index]
+        comic_file = self.comic_list_files[self.comic_dict[face_file]]
+        face_image = Image.open(os.path.join(self.face_dir, face_file))
+        comic_image = Image.open(os.path.join(self.comic_dir, comic_file))
+        if self.transform_face:
+            face_image = self.transform_face(face_image)
+        if self.transform_comic:
+            comic_image = self.transform_comic(comic_image)
+        return face_image, comic_image
+    def __len__(self):
+        return len(self.face_list_files)

data/examples/100.jpg ADDED Viewed

data/examples/1001.jpg ADDED Viewed

data/examples/1020.jpg ADDED Viewed

data/examples/1021.jpg ADDED Viewed

models/__init__.py ADDED Viewed

File without changes

models/base.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch.nn as nn
+class Block(nn.Module):
+    def __init__(self, in_channels, out_channels, down=True, act="relu", use_dropout=False):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 4, 2, 1, bias=False, padding_mode="reflect")
+            if down
+            else nn.ConvTranspose2d(in_channels, out_channels, 4, 2, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU() if act == "relu" else nn.LeakyReLU(0.2),
+        )
+        self.use_dropout = use_dropout
+        self.dropout = nn.Dropout(0.5)
+        self.down = down
+    def forward(self, x):
+        x = self.conv(x)
+        return self.dropout(x)
+class BlockCNN(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=2):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 4, stride, bias=False, padding_mode="reflect"),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2),
+        )
+    def forward(self, x):
+        return self.conv(x)

models/discriminator.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+from models.base import BlockCNN
+class Discriminator(nn.Module):
+    def __init__(self, in_channels=3, features=[64, 128, 256, 512], kernel_size=4, activation_slope=0.2, ):
+        super().__init__()
+        self.initial = nn.Sequential(
+            nn.Conv2d(
+                in_channels * 2,
+                features[0],
+                kernel_size,
+                stride=2,
+                padding=1,
+                padding_mode="reflect",
+            ),
+            nn.LeakyReLU(activation_slope),
+        )
+        layers = []
+        in_channels = features[0]
+        for feature in features[1:]:
+            layers.append(
+                BlockCNN(in_channels, feature, stride=1 if feature == features[-1] else 2)
+            )
+            in_channels = feature
+        layers.append(
+            nn.Conv2d(
+                in_channels, 1, kernel_size=kernel_size, stride=1, padding=1, padding_mode="reflect"
+            )
+        )
+        self.model = nn.Sequential(*layers)
+    def forward(self, x, y):
+        x = torch.cat([x, y], dim=1)
+        x = self.initial(x)
+        return self.model(x)
+def test():
+    # Test Case for Discriminator Model
+    x = torch.randn((1, 3, 256, 256))
+    disc = Discriminator()
+    print(f"Discriminator Output Shape: {disc(x, x).shape}")

models/generator.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+import torch.nn as nn
+from models.base import Block
+class Generator(nn.Module):
+    def __init__(self, in_channels=3, features=64):
+        super().__init__()
+        self.initial_down = nn.Sequential(
+            nn.Conv2d(in_channels, features, 4, 2, 1, padding_mode="reflect"),
+            nn.LeakyReLU(0.2),
+        )
+        self.down1 = Block(features, features * 2, down=True, act="leaky", use_dropout=False)
+        self.down2 = Block(features * 2, features * 4, down=True, act="leaky", use_dropout=False)
+        self.down3 = Block(features * 4, features * 8, down=True, act="leaky", use_dropout=False)
+        self.down4 = Block(features * 8, features * 8, down=True, act="leaky", use_dropout=False)
+        self.down5 = Block(features * 8, features * 8, down=True, act="leaky", use_dropout=False)
+        self.down6 = Block(features * 8, features * 8, down=True, act="leaky", use_dropout=False)
+        self.bottleneck = nn.Sequential(
+            nn.Conv2d(features * 8, features * 8, 4, 2, 1),
+            nn.ReLU()
+        )
+        self.up1 = Block(features * 8, features * 8, down=False, act="relu", use_dropout=True)
+        self.up2 = Block(features * 8 * 2, features * 8, down=False, act="relu", use_dropout=True)
+        self.up3 = Block(features * 8 * 2, features * 8, down=False, act="relu", use_dropout=True)
+        self.up4 = Block(features * 8 * 2, features * 8, down=False, act="relu", use_dropout=False)
+        self.up5 = Block(features * 8 * 2, features * 4, down=False, act="relu", use_dropout=False)
+        self.up6 = Block(features * 4 * 2, features * 2, down=False, act="relu", use_dropout=False)
+        self.up7 = Block(features * 2 * 2, features, down=False, act="relu", use_dropout=False)
+        self.final_up = nn.Sequential(
+            nn.ConvTranspose2d(features * 2, in_channels, kernel_size=4, stride=2, padding=1),
+            nn.Tanh(),
+        )
+    def forward(self, x):
+        d1 = self.initial_down(x)
+        d2 = self.down1(d1)
+        d3 = self.down2(d2)
+        d4 = self.down3(d3)
+        d5 = self.down4(d4)
+        d6 = self.down5(d5)
+        d7 = self.down6(d6)
+        bottleneck = self.bottleneck(d7)
+        up1 = self.up1(bottleneck)
+        up2 = self.up2(torch.cat([up1, d7], 1))
+        up3 = self.up3(torch.cat([up2, d6], 1))
+        up4 = self.up4(torch.cat([up3, d5], 1))
+        up5 = self.up5(torch.cat([up4, d4], 1))
+        up6 = self.up6(torch.cat([up5, d3], 1))
+        up7 = self.up7(torch.cat([up6, d2], 1))
+        final_up = self.final_up(torch.cat([up7, d1], 1))
+        return final_up
+def test():
+    # Test Case for Generator Model
+    x = torch.randn((1, 3, 256, 256))
+    gen = Generator()
+    print(f"Generator Output Shape: {gen(x).shape}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+pytorch-lightning
+python-multipart
+fastapi
+pydantic
+pydantic-settings
+opencv-python==4.10.0
+imageio==2.33.1

train.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import argparse
+import lightning as L
+from config.core import config
+from training.model import Pix2Pix
+from training.callbacks import MyCustomSavingCallback
+from data.dataloader import FaceToComicDataModule
+# Add argparser for config params
+parser = argparse.ArgumentParser()
+parser.add_argument("--load_checkpoint", action='store_true', help="Load checkpoint if this flag is set. If not set, start training from scratch.")
+parser.add_argument("--no_load_checkpoint", action='store_false', dest='load_checkpoint', help="Do not load checkpoint. If set, start training from scratch.")
+parser.add_argument("--ckpt_path", type=str, default=config.CKPT_PATH, help="Path to checkpoint file. If load_checkpoint is set, this path will be used to load the checkpoint.")
+parser.add_argument("--learning_rate", type=float, default=config.LEARNING_RATE, help="Learning rate for Adam optimizer.")
+parser.add_argument("--l1_lambda", type=int, default=config.L1_LAMBDA, help="Scale factor for L1 loss.")
+parser.add_argument("--features_discriminator", type=int, nargs='+', default=config.FEATURE_DISCRIMINATOR, help="List of feature sizes for the discriminator network.")
+parser.add_argument("--features_generator", type=int, default=config.FEATURE_GENERATOR, help="Feature size for the generator network.")
+parser.add_argument("--display_step", type=int, default=config.DISPLAY_STEP, help="Interval of epochs to display loss and save examples.")
+parser.add_argument("--num_epoch", type=int, default=config.NUM_EPOCH, help="Number of epochs to train for.")
+parser.add_argument("--path_face", type=str, default=config.PATH_FACE, help="Path to folder containing face images.")
+parser.add_argument("--path_comic", type=str, default=config.PATH_COMIC, help="Path to folder containing comic images.")
+parser.add_argument("--image_size", type=int, default=config.IMAGE_SIZE, help="Size of input images.")
+parser.add_argument("--batch_size", type=int, default=config.BATCH_SIZE, help="Batch size for training.")
+parser.add_argument("--max_samples", type=int, default=config.MAX_SAMPLES, help="Maximum number of samples to use for training. If set to None, all samples will be used.")
+args = parser.parse_args()
+config.LOAD_CHECKPOINT = args.load_checkpoint if args.load_checkpoint is not None else config.LOAD_CHECKPOINT
+config.CKPT_PATH = args.ckpt_path
+config.LEARNING_RATE = args.learning_rate
+config.L1_LAMBDA = args.l1_lambda
+config.FEATURE_DISCRIMINATOR = args.features_discriminator
+config.FEATURE_GENERATOR = args.features_generator
+config.DISPLAY_STEP = args.display_step
+config.NUM_EPOCH = args.num_epoch
+config.PATH_FACE = args.path_face
+config.PATH_COMIC = args.path_comic
+config.IMAGE_SIZE = args.image_size
+config.BATCH_SIZE = args.batch_size
+config.MAX_SAMPLES = args.max_samples
+# Initialize the Model Lightning
+model = Pix2Pix(
+    in_channels=3,
+    learning_rate=config.LEARNING_RATE,
+    l1_lambda=config.L1_LAMBDA,
+    features_discriminator=config.FEATURE_DISCRIMINATOR,
+    features_generator=config.FEATURE_GENERATOR,
+    display_step=config.DISPLAY_STEP,
+)
+# Setup Trainer
+n_log = None
+trainer = L.Trainer(
+    accelerator="auto",
+    devices="auto",
+    strategy="auto",
+    log_every_n_steps=n_log,
+    max_epochs=config.NUM_EPOCH,
+    callbacks=[MyCustomSavingCallback()],
+    default_root_dir="/kaggle/working/",
+    precision="16-mixed",
+    # fast_dev_run=True
+)
+# Lightning DataModule
+dm = FaceToComicDataModule(
+    face_path=config.PATH_FACE,
+    comic_path=config.PATH_COMIC,
+    image_size=(config.IMAGE_SIZE, config.IMAGE_SIZE),
+    batch_size=config.BATCH_SIZE,
+    max_samples=None
+)
+# Training set
+if config.LOAD_CHECKPOINT:
+    trainer.fit(model, datamodule=dm, ckpt_path=config.CKPT_PATH)
+else:
+    trainer.fit(model, datamodule=dm)

training/__init__.py ADDED Viewed

File without changes

training/callbacks.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from lightning.pytorch.callbacks import Callback
+from utility.helper import update_version_kaggle_dataset
+class MyCustomSavingCallback(Callback):
+    def __init__(self):
+        super().__init__()
+    def on_save_checkpoint(self, trainer, pl_module, checkpoint):
+        super().on_save_checkpoint(trainer, pl_module, checkpoint)
+        update_version_kaggle_dataset()

training/model.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import torch
+import torch.nn as nn
+import lightning as L
+import torch.optim as optim
+from models.generator import Generator
+from models.discriminator import Discriminator
+from utility.helper import save_some_examples
+class Pix2Pix(L.LightningModule):
+    def __init__(self, in_channels, learning_rate, l1_lambda, features_generator, features_discriminator, display_step):
+        super().__init__()
+        self.automatic_optimization = False
+        self.gen = Generator(
+            in_channels=in_channels,
+            features=features_generator
+        )
+        self.disc = Discriminator(
+            in_channels=in_channels,
+            features=features_discriminator
+        )
+        self.loss_fn = nn.BCEWithLogitsLoss()
+        self.discriminator_losses = []
+        self.generator_losses = []
+        self.curr_step = 0
+        self.bce = nn.BCEWithLogitsLoss()
+        self.l1_loss = nn.L1Loss()
+        self.save_hyperparameters()
+    def configure_optimizers(self):
+        optimizer_G = optim.Adam(self.gen.parameters(), lr=self.hparams.learning_rate, betas=(0.5, 0.999))
+        optimizer_D = optim.Adam(self.disc.parameters(), lr=self.hparams.learning_rate, betas=(0.5, 0.999))
+        return optimizer_G, optimizer_D
+    def on_load_checkpoint(self, checkpoint):
+        # List of keys that you expect to load from the checkpoint
+        keys_to_load = ['discriminator_losses', 'generator_losses', 'curr_step']
+        # Iterate over the keys and load them if they exist in the checkpoint
+        for key in keys_to_load:
+            if key in checkpoint:
+                setattr(self, key, checkpoint[key])
+    def on_save_checkpoint(self, checkpoint):
+        # Save the current state of the model
+        checkpoint['discriminator_losses'] = self.discriminator_losses
+        checkpoint['generator_losses'] = self.generator_losses
+        checkpoint['curr_step'] = self.curr_step
+    def training_step(self, batch, batch_idx):
+        # Get the Optimizers
+        opt_generator, opt_discriminator = self.optimizers()
+        X, y = batch
+        # Train Discriminator
+        y_fake = self.gen(X)
+        D_real = self.disc(X, y)
+        D_fake = self.disc(X, y_fake.detach())
+        D_real_loss = self.loss_fn(D_real, torch.ones_like(D_real))
+        D_fake_loss = self.loss_fn(D_fake, torch.zeros_like(D_fake))
+        D_loss = (D_real_loss + D_fake_loss) / 2
+        opt_discriminator.zero_grad()
+        self.manual_backward(D_loss)
+        opt_discriminator.step()
+        self.log("D_loss", D_loss.item(), on_step=False, on_epoch=True, prog_bar=True)
+        self.discriminator_losses.append(D_loss.item())
+        # Train Generator
+        D_fake = self.disc(X, y_fake)
+        G_fake_loss = self.bce(D_fake, torch.ones_like(D_fake))
+        L1 = self.l1_loss(y_fake, y) * self.hparams.l1_lambda
+        G_loss = G_fake_loss + L1
+        opt_generator.zero_grad()
+        self.manual_backward(G_loss)
+        opt_generator.step()
+        self.log("G_loss", G_loss.item(), on_step=False, on_epoch=True, prog_bar=True)
+        self.generator_losses.append(G_loss.item())
+        self.log("Current_Step", self.curr_step, on_step=False, on_epoch=True, prog_bar=True)
+        # Visualize
+        if self.curr_step % self.hparams.display_step == 0 and self.curr_step > 0:
+            save_some_examples(self.gen, batch, self.current_epoch)
+        self.curr_step += 1

utility/__init__.py ADDED Viewed

File without changes

utility/helper.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import torch
+import torch.nn as nn
+import cv2
+import imageio
+import os
+import subprocess
+from config.core import config
+from models.generator import Generator
+from torchvision.utils import save_image
+def save_some_examples(generator_model, batch, epoch, folder_path=config.PATH_OUTPUT, num_images=15):
+    """
+    Save some examples of the generator's output.
+    Parameters:
+        generator_model (nn.Module): The generator model.
+        batch (tuple): The batch of input and target images as a tuple of tensors.
+        epoch (int): The current epoch.
+        folder_path (str): The folder path to save the examples to. Defaults to config.PATH_OUTPUT.
+        num_images (int): The number of images to save. Defaults to 15.
+    """
+    # Ensure the folder exists
+    os.makedirs(folder_path, exist_ok=True)
+    x, y = batch  # Unpack the batch
+    # Limit the number of images to the specified num_images
+    x = x[:num_images]
+    y = y[:num_images]
+    generator_model.eval()
+    with torch.inference_mode():
+        y_fake = generator_model(x)
+        y_fake = y_fake * 0.5 + 0.5  # Remove normalization by tanh
+        # Create 3x5 grid for generated images
+        save_image(y_fake, folder_path + f"/y_gen_{epoch}.png", nrow=5)  # Save Generated Image
+        # Create 3x5 grid for input images
+        save_image(x * 0.5 + 0.5, folder_path + f"/input_{epoch}.png", nrow=5)  # Save Real Image
+    generator_model.train()
+def update_version_kaggle_dataset():
+    # Make Metadata json
+    subprocess.run(['kaggle', 'datasets', 'init'], check=True)
+    # Write new metadata
+    with open('/kaggle/working/dataset-metadata.json', 'w') as json_fid:
+        json_fid.write(f'{{\n  "title": "Update Logs Pix2Pix",\n  "id": "muhammadnaufal/pix2pix",\n  "licenses": [{{"name": "CC0-1.0"}}]}}')
+    # Push new version
+    subprocess.run(['kaggle', 'datasets', 'version', '-m', 'Updated Dataset', '--quiet', '--dir-mode', 'tar'], check=True)
+def init_generator_model():
+    """
+    Initializes and returns the Generator model.
+    Args:
+        None.
+    Returns:
+        Generator: The initialized Generator model.
+    """
+    model = Generator(
+        in_channels=config.IMAGE_CHANNELS,
+        features=config.FEATURE_GENERATOR,
+    )
+    return model
+def load_model_weights(checkpoint_path, model, device, prefix):
+    """
+    Load specific weights from a PyTorch Lightning checkpoint into a model.
+    Parameters:
+        checkpoint_path (str): Path to the checkpoint file.
+        model (torch.nn.Module): The model instance to load weights into.
+        prefix (str): The prefix in the checkpoint's state_dict keys to filter by and remove.
+    Returns:
+        model (torch.nn.Module): The model with loaded weights.
+    """
+    # Load the checkpoint
+    checkpoint = torch.load(checkpoint_path, map_location=device)
+    # Extract and modify the state_dict keys to match the model's keys
+    model_weights = {k.replace(f"{prefix}.", ""): v for k, v in checkpoint["state_dict"].items() if k.startswith(f"{prefix}.")}
+    # Load the weights into the model
+    model.load_state_dict(model_weights)
+    return model
+def initialize_weights(model):
+    """
+    Initializes the weights of a model using a normal distribution.
+    Args:
+        model: The model to be initialized.
+    Returns:
+        None
+    """
+    for m in model.modules():
+        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.BatchNorm2d, nn.InstanceNorm2d)):
+            nn.init.normal_(m.weight.data, 0.0, 0.02)
+def create_video(image_folder, video_name, fps, appearance_duration=None):
+    """
+    Creates a video from a sequence of images with customizable appearance duration.
+    Args:
+        image_folder (str): The path to the folder containing the images.
+        video_name (str): The name of the output video file.
+        fps (int): The frames per second of the video.
+        appearance_duration (int, optional): The desired appearance duration for each image in milliseconds.
+            If None, the default duration based on frame rate is used.
+    Example:
+        image_folder = '/path/to/image/folder' \n
+        video_name = 'output_video.mp4' \n
+        fps = 12 \n
+        appearance_duration = 200  # Appearance duration of 200ms for each image \n
+        create_video(image_folder, video_name, fps, appearance_duration)
+    """
+    # Get a list of all image files in the folder
+    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
+    # Sort the image files based on the step number
+    image_files = sorted(image_files, key=lambda x: int(x.split('-')[1].split('.')[0]))
+    # Load the first image to get the video size
+    image = cv2.imread(os.path.join(image_folder, image_files[0]))
+    height, width, layers = image.shape
+    # Create a VideoWriter object
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Specify the video codec
+    video = cv2.VideoWriter(video_name, fourcc, fps, (width, height))
+    # Write each image to the video with customizable appearance duration
+    for image_file in image_files:
+        image = cv2.imread(os.path.join(image_folder, image_file))
+        video.write(image)
+        if appearance_duration is not None:
+            # Calculate the number of frames for the desired appearance duration
+            num_frames = appearance_duration * fps // 1000
+            for _ in range(num_frames):
+                video.write(image)
+    # Release the video writer
+    video.release()
+def create_gif(image_folder, gif_name, fps, appearance_duration=None):
+    """
+    Creates a GIF from a sequence of images sorted by step number, with customizable appearance duration.
+    Args:
+        image_folder (str): The path to the folder containing the images.
+        gif_name (str): The name of the output GIF file.
+        fps (int): The frames per second of the GIF.
+        appearance_duration (int, optional): The desired appearance duration for each image in milliseconds.
+            If None, the default duration based on frame rate is used.
+    Example:
+        image_folder = '/path/to/image/folder'
+        gif_name = 'output_animation.gif'
+        fps = 12
+        appearance_duration = 300  # Appearance duration of 300ms for each image
+        create_gif(image_folder, gif_name, fps, appearance_duration)
+    """
+    # Get a list of all image files in the folder
+    image_files = [f for f in os.listdir(image_folder) if f.endswith('.png')]
+    # Sort the image files based on the step number
+    image_files = sorted(image_files, key=lambda x: int(x.split('-')[1].split('.')[0]))
+    # Load the images into a list
+    images = []
+    for file in image_files:
+        images.append(imageio.imread(os.path.join(image_folder, file)))
+    # Create a list to store the repeated images
+    repeated_images = []
+    # Repeat each image for the desired duration
+    if appearance_duration is not None:
+        for image in images:
+            repeated_images.extend([image] * (appearance_duration * fps // 1000))
+    else:
+        repeated_images = images  # Default appearance duration (based on fps)
+    # Save the repeated images as a GIF
+    imageio.mimsave(gif_name, repeated_images, fps=fps)

weights/epoch=266-step=42186.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac94fc32bc10114294d5b0fe772847d1f6f3f83f28eaac13154ec2a99a13afec
+size 686714944

weights/source.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Dataset(Private): https://www.kaggle.com/datasets/muhammadnaufal/pix2pix