Spaces:

farrell236
/

RetinaGAN

Sleeping

App Files Files Community

farrell236 commited on May 2, 2023

Commit

2aa6515

1 Parent(s): 3699172

Upload 37 files

Browse files

Files changed (37) hide show

assets/GauGAN.png +0 -0
assets/RetinaGAN_pipeline.png +0 -0
assets/cStyleGAN.png +0 -0
assets/sample.jpeg +0 -0
assets/sample_images/image_class_0_batch_0_sample_0.png +0 -0
assets/sample_images/image_class_0_batch_0_sample_1.png +0 -0
assets/sample_images/image_class_0_batch_1_sample_0.png +0 -0
assets/sample_images/image_class_0_batch_1_sample_1.png +0 -0
assets/sample_images/image_class_1_batch_0_sample_0.png +0 -0
assets/sample_images/image_class_1_batch_0_sample_1.png +0 -0
assets/sample_images/image_class_1_batch_1_sample_0.png +0 -0
assets/sample_images/image_class_1_batch_1_sample_1.png +0 -0
assets/sample_images/image_class_2_batch_0_sample_0.png +0 -0
assets/sample_images/image_class_2_batch_0_sample_1.png +0 -0
assets/sample_images/image_class_2_batch_1_sample_0.png +0 -0
assets/sample_images/image_class_2_batch_1_sample_1.png +0 -0
assets/sample_images/image_class_3_batch_0_sample_0.png +0 -0
assets/sample_images/image_class_3_batch_0_sample_1.png +0 -0
assets/sample_images/image_class_3_batch_1_sample_0.png +0 -0
assets/sample_images/image_class_3_batch_1_sample_1.png +0 -0
assets/sample_images/image_class_4_batch_0_sample_0.png +0 -0
assets/sample_images/image_class_4_batch_0_sample_1.png +0 -0
assets/sample_images/image_class_4_batch_1_sample_0.png +0 -0
assets/sample_images/image_class_4_batch_1_sample_1.png +0 -0
assets/sample_images/mask_class_0_batch_0.png +0 -0
assets/sample_images/mask_class_0_batch_1.png +0 -0
assets/sample_images/mask_class_1_batch_0.png +0 -0
assets/sample_images/mask_class_1_batch_1.png +0 -0
assets/sample_images/mask_class_2_batch_0.png +0 -0
assets/sample_images/mask_class_2_batch_1.png +0 -0
assets/sample_images/mask_class_3_batch_0.png +0 -0
assets/sample_images/mask_class_3_batch_1.png +0 -0
assets/sample_images/mask_class_4_batch_0.png +0 -0
assets/sample_images/mask_class_4_batch_1.png +0 -0
models/cstylegan.py +530 -0
models/gaugan.py +403 -0
utils.py +71 -0

assets/GauGAN.png ADDED Viewed

assets/RetinaGAN_pipeline.png ADDED Viewed

assets/cStyleGAN.png ADDED Viewed

assets/sample.jpeg ADDED Viewed

assets/sample_images/image_class_0_batch_0_sample_0.png ADDED Viewed

assets/sample_images/image_class_0_batch_0_sample_1.png ADDED Viewed

assets/sample_images/image_class_0_batch_1_sample_0.png ADDED Viewed

assets/sample_images/image_class_0_batch_1_sample_1.png ADDED Viewed

assets/sample_images/image_class_1_batch_0_sample_0.png ADDED Viewed

assets/sample_images/image_class_1_batch_0_sample_1.png ADDED Viewed

assets/sample_images/image_class_1_batch_1_sample_0.png ADDED Viewed

assets/sample_images/image_class_1_batch_1_sample_1.png ADDED Viewed

assets/sample_images/image_class_2_batch_0_sample_0.png ADDED Viewed

assets/sample_images/image_class_2_batch_0_sample_1.png ADDED Viewed

assets/sample_images/image_class_2_batch_1_sample_0.png ADDED Viewed

assets/sample_images/image_class_2_batch_1_sample_1.png ADDED Viewed

assets/sample_images/image_class_3_batch_0_sample_0.png ADDED Viewed

assets/sample_images/image_class_3_batch_0_sample_1.png ADDED Viewed

assets/sample_images/image_class_3_batch_1_sample_0.png ADDED Viewed

assets/sample_images/image_class_3_batch_1_sample_1.png ADDED Viewed

assets/sample_images/image_class_4_batch_0_sample_0.png ADDED Viewed

assets/sample_images/image_class_4_batch_0_sample_1.png ADDED Viewed

assets/sample_images/image_class_4_batch_1_sample_0.png ADDED Viewed

assets/sample_images/image_class_4_batch_1_sample_1.png ADDED Viewed

assets/sample_images/mask_class_0_batch_0.png ADDED Viewed

assets/sample_images/mask_class_0_batch_1.png ADDED Viewed

assets/sample_images/mask_class_1_batch_0.png ADDED Viewed

assets/sample_images/mask_class_1_batch_1.png ADDED Viewed

assets/sample_images/mask_class_2_batch_0.png ADDED Viewed

assets/sample_images/mask_class_2_batch_1.png ADDED Viewed

assets/sample_images/mask_class_3_batch_0.png ADDED Viewed

assets/sample_images/mask_class_3_batch_1.png ADDED Viewed

assets/sample_images/mask_class_4_batch_0.png ADDED Viewed

assets/sample_images/mask_class_4_batch_1.png ADDED Viewed

models/cstylegan.py ADDED Viewed

	@@ -0,0 +1,530 @@

+# This file is based on the StyleGAN by Cheong et. al
+# https://keras.io/examples/generative/stylegan/
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+from tensorflow.keras.models import Sequential
+from tensorflow_addons.layers import InstanceNormalization
+def log2(x):
+    return int(np.log2(x))
+# we use different batch size for different resolution, so larger image size
+# could fit into GPU memory. The keys is image resolution in log2
+batch_sizes = {2: 16, 3: 16, 4: 16, 5: 16, 6: 16, 7: 8, 8: 4, 9: 2, 10: 1}
+# We adjust the train step accordingly
+train_step_ratio = {k: batch_sizes[2] / v for k, v in batch_sizes.items()}
+def fade_in(alpha, a, b):
+    return alpha * a + (1.0 - alpha) * b
+def wasserstein_loss(y_true, y_pred):
+    return -tf.reduce_mean(y_true * y_pred)
+def pixel_norm(x, epsilon=1e-8):
+    return x / tf.math.sqrt(tf.reduce_mean(x ** 2, axis=-1, keepdims=True) + epsilon)
+def minibatch_std(input_tensor, epsilon=1e-8):
+    n, h, w, c = tf.shape(input_tensor)
+    group_size = tf.minimum(4, n)
+    x = tf.reshape(input_tensor, [group_size, -1, h, w, c])
+    group_mean, group_var = tf.nn.moments(x, axes=(0), keepdims=False)
+    group_std = tf.sqrt(group_var + epsilon)
+    avg_std = tf.reduce_mean(group_std, axis=[1, 2, 3], keepdims=True)
+    x = tf.tile(avg_std, [group_size, h, w, 1])
+    return tf.concat([input_tensor, x], axis=-1)
+class EqualizedConv(layers.Layer):
+    def __init__(self, out_channels, kernel=3, gain=2, **kwargs):
+        super(EqualizedConv, self).__init__(**kwargs)
+        self.kernel = kernel
+        self.out_channels = out_channels
+        self.gain = gain
+        self.pad = kernel != 1
+    def build(self, input_shape):
+        self.in_channels = input_shape[-1]
+        initializer = keras.initializers.RandomNormal(mean=0.0, stddev=1.0)
+        self.w = self.add_weight(
+            shape=[self.kernel, self.kernel, self.in_channels, self.out_channels],
+            initializer=initializer,
+            trainable=True,
+            name="kernel",
+        )
+        self.b = self.add_weight(
+            shape=(self.out_channels,), initializer="zeros", trainable=True, name="bias"
+        )
+        fan_in = self.kernel * self.kernel * self.in_channels
+        self.scale = tf.sqrt(self.gain / fan_in)
+    def call(self, inputs):
+        if self.pad:
+            x = tf.pad(inputs, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="REFLECT")
+        else:
+            x = inputs
+        output = (
+            tf.nn.conv2d(x, self.scale * self.w, strides=1, padding="VALID") + self.b
+        )
+        return output
+class EqualizedDense(layers.Layer):
+    def __init__(self, units, gain=2, learning_rate_multiplier=1, **kwargs):
+        super(EqualizedDense, self).__init__(**kwargs)
+        self.units = units
+        self.gain = gain
+        self.learning_rate_multiplier = learning_rate_multiplier
+    def build(self, input_shape):
+        self.in_channels = input_shape[-1]
+        initializer = keras.initializers.RandomNormal(
+            mean=0.0, stddev=1.0 / self.learning_rate_multiplier
+        )
+        self.w = self.add_weight(
+            shape=[self.in_channels, self.units],
+            initializer=initializer,
+            trainable=True,
+            name="kernel",
+        )
+        self.b = self.add_weight(
+            shape=(self.units,), initializer="zeros", trainable=True, name="bias"
+        )
+        fan_in = self.in_channels
+        self.scale = tf.sqrt(self.gain / fan_in)
+    def call(self, inputs):
+        output = tf.add(tf.matmul(inputs, self.scale * self.w), self.b)
+        return output * self.learning_rate_multiplier
+class AddNoise(layers.Layer):
+    def build(self, input_shape):
+        n, h, w, c = input_shape[0]
+        initializer = keras.initializers.RandomNormal(mean=0.0, stddev=1.0)
+        self.b = self.add_weight(
+            shape=[1, 1, 1, c], initializer=initializer, trainable=True, name="kernel"
+        )
+    def call(self, inputs):
+        x, noise = inputs
+        output = x + self.b * noise
+        return output
+class AdaIN(layers.Layer):
+    def __init__(self, gain=1, **kwargs):
+        super(AdaIN, self).__init__(**kwargs)
+        self.gain = gain
+    def build(self, input_shapes):
+        x_shape = input_shapes[0]
+        w_shape = input_shapes[1]
+        self.w_channels = w_shape[-1]
+        self.x_channels = x_shape[-1]
+        self.dense_1 = EqualizedDense(self.x_channels, gain=1)
+        self.dense_2 = EqualizedDense(self.x_channels, gain=1)
+    def call(self, inputs):
+        x, w = inputs
+        ys = tf.reshape(self.dense_1(w), (-1, 1, 1, self.x_channels))
+        yb = tf.reshape(self.dense_2(w), (-1, 1, 1, self.x_channels))
+        return ys * x + yb
+def Mapping(num_stages, input_shape=512):
+    z = layers.Input(shape=(input_shape,))
+    w = pixel_norm(z)
+    class_embedding = layers.Input(shape=512)
+    for i in range(8):
+        w = EqualizedDense(512, learning_rate_multiplier=0.01)(w)
+        w = w + class_embedding
+        w = layers.LeakyReLU(0.2)(w)
+    w = tf.tile(tf.expand_dims(w, 1), (1, num_stages, 1))
+    return keras.Model([z, class_embedding], w, name="mapping")
+class Generator:
+    def __init__(self, start_res_log2, target_res_log2):
+        self.start_res_log2 = start_res_log2
+        self.target_res_log2 = target_res_log2
+        self.num_stages = target_res_log2 - start_res_log2 + 1
+        # list of generator blocks at increasing resolution
+        self.g_blocks = []
+        # list of layers to convert g_block activation to RGB
+        self.to_rgb = []
+        # list of noise input of different resolutions into g_blocks
+        self.noise_inputs = []
+        # filter size to use at each stage, keys are log2(resolution)
+        self.filter_nums = {
+            0: 512,
+            1: 512,
+            2: 512,  # 4x4
+            3: 512,  # 8x8
+            4: 512,  # 16x16
+            5: 512,  # 32x32
+            6: 256,  # 64x64
+            7: 128,  # 128x128
+            8: 64,  # 256x256
+            9: 32,  # 512x512
+            10: 16,
+        }  # 1024x1024
+        start_res = 2 ** start_res_log2
+        self.input_shape = (start_res, start_res, self.filter_nums[start_res_log2])
+        self.g_input = layers.Input(self.input_shape, name="generator_input")
+        for i in range(start_res_log2, target_res_log2 + 1):
+            filter_num = self.filter_nums[i]
+            res = 2 ** i
+            self.noise_inputs.append(
+                layers.Input(shape=(res, res, 1), name=f"noise_{res}x{res}")
+            )
+            to_rgb = Sequential(
+                [
+                    layers.InputLayer(input_shape=(res, res, filter_num)),
+                    EqualizedConv(7, 1, gain=1),  # CHANGE NO OF CHANNELS
+                ],
+                name=f"to_rgb_{res}x{res}",
+            )
+            self.to_rgb.append(to_rgb)
+            is_base = i == self.start_res_log2
+            if is_base:
+                input_shape = (res, res, self.filter_nums[i - 1])
+            else:
+                input_shape = (2 ** (i - 1), 2 ** (i - 1), self.filter_nums[i - 1])
+            g_block = self.build_block(
+                filter_num, res=res, input_shape=input_shape, is_base=is_base
+            )
+            self.g_blocks.append(g_block)
+    def build_block(self, filter_num, res, input_shape, is_base):
+        input_tensor = layers.Input(shape=input_shape, name=f"g_{res}")
+        noise = layers.Input(shape=(res, res, 1), name=f"noise_{res}")
+        w = layers.Input(shape=512)
+        x = input_tensor
+        if not is_base:
+            x = layers.UpSampling2D((2, 2))(x)
+            x = EqualizedConv(filter_num, 3)(x)
+        x = AddNoise()([x, noise])
+        x = layers.LeakyReLU(0.2)(x)
+        x = InstanceNormalization()(x)
+        x = AdaIN()([x, w])
+        x = EqualizedConv(filter_num, 3)(x)
+        x = AddNoise()([x, noise])
+        x = layers.LeakyReLU(0.2)(x)
+        x = InstanceNormalization()(x)
+        x = AdaIN()([x, w])
+        return keras.Model([input_tensor, w, noise], x, name=f"genblock_{res}x{res}")
+    def grow(self, res_log2):
+        res = 2 ** res_log2
+        num_stages = res_log2 - self.start_res_log2 + 1
+        w = layers.Input(shape=(self.num_stages, 512), name="w")
+        alpha = layers.Input(shape=(1), name="g_alpha")
+        x = self.g_blocks[0]([self.g_input, w[:, 0], self.noise_inputs[0]])
+        if num_stages == 1:
+            rgb = self.to_rgb[0](x)
+        else:
+            for i in range(1, num_stages - 1):
+                x = self.g_blocks[i]([x, w[:, i], self.noise_inputs[i]])
+            old_rgb = self.to_rgb[num_stages - 2](x)
+            old_rgb = layers.UpSampling2D((2, 2))(old_rgb)
+            i = num_stages - 1
+            x = self.g_blocks[i]([x, w[:, i], self.noise_inputs[i]])
+            new_rgb = self.to_rgb[i](x)
+            rgb = fade_in(alpha[0], new_rgb, old_rgb)
+        return keras.Model(
+            [self.g_input, w, self.noise_inputs, alpha],
+            rgb,
+            name=f"generator_{res}_x_{res}",
+        )
+class Discriminator:
+    def __init__(self, start_res_log2, target_res_log2):
+        self.start_res_log2 = start_res_log2
+        self.target_res_log2 = target_res_log2
+        self.num_stages = target_res_log2 - start_res_log2 + 1
+        # filter size to use at each stage, keys are log2(resolution)
+        self.filter_nums = {
+            0: 512,
+            1: 512,
+            2: 512,  # 4x4
+            3: 512,  # 8x8
+            4: 512,  # 16x16
+            5: 512,  # 32x32
+            6: 256,  # 64x64
+            7: 128,  # 128x128
+            8: 64,  # 256x256
+            9: 32,  # 512x512
+            10: 16,
+        }  # 1024x1024
+        # list of discriminator blocks at increasing resolution
+        self.d_blocks = []
+        # list of layers to convert RGB into activation for d_blocks inputs
+        self.from_rgb = []
+        # Conditional embedding
+        # self.embedding = layers.Embedding(5, 256)
+        for res_log2 in range(self.start_res_log2, self.target_res_log2 + 1):
+            res = 2 ** res_log2
+            filter_num = self.filter_nums[res_log2]
+            from_rgb = Sequential(
+                [
+                    layers.InputLayer(
+                        input_shape=(res, res, 7), name=f"from_rgb_input_{res}" # CHANGE NO OF CHANNELS
+                    ),
+                    EqualizedConv(filter_num, 1),
+                    layers.LeakyReLU(0.2),
+                ],
+                name=f"from_rgb_{res}",
+            )
+            self.from_rgb.append(from_rgb)
+            input_shape = (res, res, filter_num)
+            if len(self.d_blocks) == 0:
+                d_block = self.build_base(filter_num, res)
+            else:
+                d_block = self.build_block(
+                    filter_num, self.filter_nums[res_log2 - 1], res
+                )
+            self.d_blocks.append(d_block)
+    def build_base(self, filter_num, res):
+        input_tensor = layers.Input(shape=(res, res, filter_num), name=f"d_{res}")
+        x = minibatch_std(input_tensor)
+        x = EqualizedConv(filter_num, 3)(x)
+        x = layers.LeakyReLU(0.2)(x)
+        x = layers.Flatten()(x)
+        x = EqualizedDense(filter_num)(x)
+        x = layers.LeakyReLU(0.2)(x)
+        x = EqualizedDense(1)(x)
+        return keras.Model(input_tensor, x, name=f"d_{res}")
+    def build_block(self, filter_num_1, filter_num_2, res):
+        input_tensor = layers.Input(shape=(res, res, filter_num_1), name=f"d_{res}")
+        x = EqualizedConv(filter_num_1, 3)(input_tensor)
+        x = layers.LeakyReLU(0.2)(x)
+        x = EqualizedConv(filter_num_2)(x)
+        x = layers.LeakyReLU(0.2)(x)
+        x = layers.AveragePooling2D((2, 2))(x)
+        return keras.Model(input_tensor, x, name=f"d_{res}")
+    def grow(self, res_log2):
+        res = 2 ** res_log2
+        idx = res_log2 - self.start_res_log2
+        alpha = layers.Input(shape=(1), name="d_alpha")
+        input_image = layers.Input(shape=(res, res, 7), name="input_image") # CHANGE NO OF CHANNELS
+        class_embedding = layers.Input(shape=512, name="class_embedding")
+        x = self.from_rgb[idx](input_image)
+        x = AdaIN()([x, class_embedding])
+        x = self.d_blocks[idx](x)
+        if idx > 0:
+            idx -= 1
+            downsized_image = layers.AveragePooling2D((2, 2))(input_image)
+            y = self.from_rgb[idx](downsized_image)
+            x = fade_in(alpha[0], x, y)
+            for i in range(idx, -1, -1):
+                x = AdaIN()([x, class_embedding])
+                x = self.d_blocks[i](x)
+        return keras.Model([input_image, class_embedding, alpha], x, name=f"discriminator_{res}_x_{res}")
+class cStyleGAN(tf.keras.Model):
+    def __init__(self, z_dim=512, target_res=64, start_res=4):
+        super(cStyleGAN, self).__init__()
+        self.z_dim = z_dim
+        self.target_res_log2 = log2(target_res)
+        self.start_res_log2 = log2(start_res)
+        self.current_res_log2 = self.target_res_log2
+        self.num_stages = self.target_res_log2 - self.start_res_log2 + 1
+        self.alpha = tf.Variable(1.0, dtype=tf.float32, trainable=False, name="alpha")
+        self.mapping = Mapping(num_stages=self.num_stages)
+        self.embedding = layers.Embedding(5, 512)
+        self.d_builder = Discriminator(self.start_res_log2, self.target_res_log2)
+        self.g_builder = Generator(self.start_res_log2, self.target_res_log2)
+        self.g_input_shape = self.g_builder.input_shape
+        self.phase = None
+        self.train_step_counter = tf.Variable(0, dtype=tf.int32, trainable=False)
+        self.loss_weights = {"gradient_penalty": 10, "drift": 0.001}
+    def grow_model(self, res):
+        tf.keras.backend.clear_session()
+        res_log2 = log2(res)
+        self.generator = self.g_builder.grow(res_log2)
+        self.discriminator = self.d_builder.grow(res_log2)
+        self.current_res_log2 = res_log2
+        print(f"\nModel resolution:{res}x{res}")
+    def compile(
+        self, steps_per_epoch, phase, res, d_optimizer, g_optimizer, *args, **kwargs
+    ):
+        self.loss_weights = kwargs.pop("loss_weights", self.loss_weights)
+        self.steps_per_epoch = steps_per_epoch
+        if res != 2 ** self.current_res_log2:
+            self.grow_model(res)
+            self.d_optimizer = d_optimizer
+            self.g_optimizer = g_optimizer
+        self.train_step_counter.assign(0)
+        self.phase = phase
+        self.d_loss_metric = keras.metrics.Mean(name="d_loss")
+        self.g_loss_metric = keras.metrics.Mean(name="g_loss")
+        super(cStyleGAN, self).compile(*args, **kwargs)
+    @property
+    def metrics(self):
+        return [self.d_loss_metric, self.g_loss_metric]
+    def generate_noise(self, batch_size):
+        noise = [
+            tf.random.normal((batch_size, 2 ** res, 2 ** res, 1))
+            for res in range(self.start_res_log2, self.target_res_log2 + 1)
+        ]
+        return noise
+    def gradient_loss(self, grad):
+        loss = tf.square(grad)
+        loss = tf.reduce_sum(loss, axis=tf.range(1, tf.size(tf.shape(loss))))
+        loss = tf.sqrt(loss)
+        loss = tf.reduce_mean(tf.square(loss - 1))
+        return loss
+    def train_step(self, data_tuple):
+        real_images, class_label = data_tuple
+        self.train_step_counter.assign_add(1)
+        if self.phase == "TRANSITION":
+            self.alpha.assign(
+                tf.cast(self.train_step_counter / self.steps_per_epoch, tf.float32)
+            )
+        elif self.phase == "STABLE":
+            self.alpha.assign(1.0)
+        else:
+            raise NotImplementedError
+        alpha = tf.expand_dims(self.alpha, 0)
+        batch_size = tf.shape(real_images)[0]
+        real_labels = tf.ones(batch_size)
+        fake_labels = -tf.ones(batch_size)
+        z = tf.random.normal((batch_size, self.z_dim))
+        const_input = tf.ones(tuple([batch_size] + list(self.g_input_shape)))
+        noise = self.generate_noise(batch_size)
+        # generator
+        with tf.GradientTape() as g_tape:
+            class_embedding = self.embedding(class_label)
+            w = self.mapping([z, class_embedding])
+            fake_images = self.generator([const_input, w, noise, alpha])
+            pred_fake = self.discriminator([fake_images, class_embedding, alpha])
+            g_loss = wasserstein_loss(real_labels, pred_fake)
+            trainable_weights = (
+                self.embedding.trainable_weights + self.mapping.trainable_weights + self.generator.trainable_weights
+            )
+            gradients = g_tape.gradient(g_loss, trainable_weights)
+            self.g_optimizer.apply_gradients(zip(gradients, trainable_weights))
+        # discriminator
+        with tf.GradientTape() as gradient_tape, tf.GradientTape() as total_tape:
+            # class_embedding = self.embedding(class_label)
+            # forward pass
+            pred_fake = self.discriminator([fake_images, class_embedding, alpha])
+            pred_real = self.discriminator([real_images, class_embedding, alpha])
+            epsilon = tf.random.uniform((batch_size, 1, 1, 1))
+            interpolates = epsilon * real_images + (1 - epsilon) * fake_images
+            gradient_tape.watch(interpolates)
+            pred_fake_grad = self.discriminator([interpolates, class_embedding, alpha])
+            # calculate losses
+            loss_fake = wasserstein_loss(fake_labels, pred_fake)
+            loss_real = wasserstein_loss(real_labels, pred_real)
+            loss_fake_grad = wasserstein_loss(fake_labels, pred_fake_grad)
+            # gradient penalty
+            gradients_fake = gradient_tape.gradient(loss_fake_grad, [interpolates])
+            gradient_penalty = self.loss_weights[
+                "gradient_penalty"
+            ] * self.gradient_loss(gradients_fake)
+            # drift loss
+            all_pred = tf.concat([pred_fake, pred_real], axis=0)
+            drift_loss = self.loss_weights["drift"] * tf.reduce_mean(all_pred ** 2)
+            d_loss = loss_fake + loss_real + gradient_penalty + drift_loss
+            gradients = total_tape.gradient(
+                d_loss, self.discriminator.trainable_weights
+            )
+            self.d_optimizer.apply_gradients(
+                zip(gradients, self.discriminator.trainable_weights)
+            )
+        # Update metrics
+        self.d_loss_metric.update_state(d_loss)
+        self.g_loss_metric.update_state(g_loss)
+        return {
+            "d_loss": self.d_loss_metric.result(),
+            "g_loss": self.g_loss_metric.result(),
+        }
+    def call(self, inputs: dict()):
+        style_code = inputs.get("style_code", None)
+        z = inputs.get("z", None)
+        noise = inputs.get("noise", None)
+        class_label = inputs.get("class_label", 0)
+        batch_size = inputs.get("batch_size", 1)
+        alpha = inputs.get("alpha", 1.0)
+        alpha = tf.expand_dims(alpha, 0)
+        class_embedding = self.embedding(class_label)
+        if style_code is None:
+            if z is None:
+                z = tf.random.normal((batch_size, self.z_dim))
+            style_code = self.mapping([z, class_embedding])
+        if noise is None:
+            noise = self.generate_noise(batch_size)
+        # self.alpha.assign(alpha)
+        const_input = tf.ones(tuple([batch_size] + list(self.g_input_shape)))
+        images = self.generator([const_input, style_code, noise, alpha])
+        # images = np.clip((images * 0.5 + 0.5) * 255, 0, 255).astype(np.uint8)
+        images = tf.clip_by_value((images * 0.5 + 0.5) * 255, 0, 255)
+        return images

models/gaugan.py ADDED Viewed

	@@ -0,0 +1,403 @@

+# This file is based on the GauGAN by Rakshit et. al
+# https://keras.io/examples/generative/gaugan/
+import tensorflow as tf
+import tensorflow_addons as tfa
+class SPADE(tf.keras.layers.Layer):
+    def __init__(self, filters, epsilon=1e-5, **kwargs):
+        super().__init__(**kwargs)
+        self.epsilon = epsilon
+        self.conv = tf.keras.layers.Conv2D(128, 3, padding="same", activation="relu")
+        self.conv_gamma = tf.keras.layers.Conv2D(filters, 3, padding="same")
+        self.conv_beta = tf.keras.layers.Conv2D(filters, 3, padding="same")
+    def build(self, input_shape):
+        self.resize_shape = input_shape[1:3]
+    def call(self, input_tensor, raw_mask):
+        mask = tf.image.resize(raw_mask, self.resize_shape, method="nearest")
+        x = self.conv(mask)
+        gamma = self.conv_gamma(x)
+        beta = self.conv_beta(x)
+        mean, var = tf.nn.moments(input_tensor, axes=(0, 1, 2), keepdims=True)
+        std = tf.sqrt(var + self.epsilon)
+        normalized = (input_tensor - mean) / std
+        output = gamma * normalized + beta
+        return output
+class ResBlock(tf.keras.layers.Layer):
+    def __init__(self, filters, **kwargs):
+        super().__init__(**kwargs)
+        self.filters = filters
+    def build(self, input_shape):
+        input_filter = input_shape[-1]
+        self.spade_1 = SPADE(input_filter)
+        self.spade_2 = SPADE(self.filters)
+        self.conv_1 = tf.keras.layers.Conv2D(self.filters, 3, padding="same")
+        self.conv_2 = tf.keras.layers.Conv2D(self.filters, 3, padding="same")
+        self.learned_skip = False
+        if self.filters != input_filter:
+            self.learned_skip = True
+            self.spade_3 = SPADE(input_filter)
+            self.conv_3 = tf.keras.layers.Conv2D(self.filters, 3, padding="same")
+    def call(self, input_tensor, mask):
+        x = self.spade_1(input_tensor, mask)
+        x = self.conv_1(tf.nn.leaky_relu(x, 0.2))
+        x = self.spade_2(x, mask)
+        x = self.conv_2(tf.nn.leaky_relu(x, 0.2))
+        skip = (
+            self.conv_3(tf.nn.leaky_relu(self.spade_3(input_tensor, mask), 0.2))
+            if self.learned_skip
+            else input_tensor
+        )
+        output = skip + x
+        return output
+class GaussianSampler(tf.keras.layers.Layer):
+    def __init__(self, batch_size, latent_dim, **kwargs):
+        super().__init__(**kwargs)
+        self.batch_size = batch_size
+        self.latent_dim = latent_dim
+    def call(self, inputs):
+        means, variance = inputs
+        epsilon = tf.random.normal(
+            shape=(self.batch_size, self.latent_dim), mean=0.0, stddev=1.0
+        )
+        samples = means + tf.exp(0.5 * variance) * epsilon
+        return samples
+def downsample(
+    channels,
+    kernels,
+    strides=2,
+    apply_norm=True,
+    apply_activation=True,
+    apply_dropout=False,
+):
+    block = tf.keras.Sequential()
+    block.add(
+        tf.keras.layers.Conv2D(
+            channels,
+            kernels,
+            strides=strides,
+            padding="same",
+            use_bias=False,
+            kernel_initializer=tf.keras.initializers.GlorotNormal(),
+        )
+    )
+    if apply_norm:
+        block.add(tfa.layers.InstanceNormalization())
+    if apply_activation:
+        block.add(tf.keras.layers.LeakyReLU(0.2))
+    if apply_dropout:
+        block.add(tf.keras.layers.Dropout(0.5))
+    return block
+def build_encoder(image_shape, encoder_downsample_factor=64, latent_dim=256):
+    input_image = tf.keras.Input(shape=image_shape)
+    x = downsample(encoder_downsample_factor, 3, apply_norm=False)(input_image)
+    x = downsample(2 * encoder_downsample_factor, 3)(x)
+    x = downsample(4 * encoder_downsample_factor, 3)(x)
+    x = downsample(8 * encoder_downsample_factor, 3)(x)
+    x = downsample(8 * encoder_downsample_factor, 3)(x)
+    x = downsample(8 * encoder_downsample_factor, 3)(x)
+    x = downsample(16 * encoder_downsample_factor, 3)(x)
+    x = tf.keras.layers.Flatten()(x)
+    mean = tf.keras.layers.Dense(latent_dim, name="mean")(x)
+    variance = tf.keras.layers.Dense(latent_dim, name="variance")(x)
+    return tf.keras.Model(input_image, [mean, variance], name="encoder")
+def build_generator(mask_shape, latent_dim=256):
+    latent = tf.keras.Input(shape=(latent_dim))
+    mask = tf.keras.Input(shape=mask_shape)
+    x = tf.keras.layers.Dense(16384)(latent)
+    x = tf.keras.layers.Reshape((4, 4, 1024))(x)
+    x = ResBlock(filters=1024)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=1024)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=1024)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=512)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=256)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=128)(x, mask)
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)
+    x = ResBlock(filters=64)(x, mask)               # These 2 added layers
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)     # to make input 512x512
+    x = ResBlock(filters=32)(x, mask)               # These 2 added layers
+    x = tf.keras.layers.UpSampling2D((2, 2))(x)     # to make input 1024x1024
+    x = tf.nn.leaky_relu(x, 0.2)
+    output_image = tf.nn.sigmoid(tf.keras.layers.Conv2D(3, 4, padding="same")(x))
+    return tf.keras.Model([latent, mask], output_image, name="generator")
+def build_discriminator(image_shape, downsample_factor=64):
+    input_image_A = tf.keras.Input(shape=image_shape, name="discriminator_image_A")
+    input_image_B = tf.keras.Input(shape=image_shape, name="discriminator_image_B")
+    x = tf.keras.layers.Concatenate()([input_image_A, input_image_B])
+    x1 = downsample(downsample_factor, 4, apply_norm=False)(x)
+    x2 = downsample(2 * downsample_factor, 4)(x1)
+    x3 = downsample(4 * downsample_factor, 4)(x2)
+    x4 = downsample(8 * downsample_factor, 4)(x3)
+    x5 = downsample(8 * downsample_factor, 4)(x4)
+    x6 = downsample(8 * downsample_factor, 4)(x5)
+    x7 = downsample(16 * downsample_factor, 4)(x6)
+    x8 = tf.keras.layers.Conv2D(1, 4)(x7)
+    outputs = [x1, x2, x3, x4, x5, x6, x7, x8]
+    return tf.keras.Model([input_image_A, input_image_B], outputs)
+def generator_loss(y):
+    return -tf.reduce_mean(y)
+def kl_divergence_loss(mean, variance):
+    return -0.5 * tf.reduce_sum(1 + variance - tf.square(mean) - tf.exp(variance))
+class FeatureMatchingLoss(tf.keras.losses.Loss):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.mae = tf.keras.losses.MeanAbsoluteError()
+    def call(self, y_true, y_pred):
+        loss = 0
+        for i in range(len(y_true) - 1):
+            loss += self.mae(y_true[i], y_pred[i])
+        return loss
+class VGGFeatureMatchingLoss(tf.keras.losses.Loss):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.encoder_layers = [
+            "block1_conv1",
+            "block2_conv1",
+            "block3_conv1",
+            "block4_conv1",
+            "block5_conv1",
+        ]
+        self.weights = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0]
+        vgg = tf.keras.applications.VGG19(include_top=False, weights="imagenet")
+        layer_outputs = [vgg.get_layer(x).output for x in self.encoder_layers]
+        self.vgg_model = tf.keras.Model(vgg.input, layer_outputs, name="VGG")
+        self.mae = tf.keras.losses.MeanAbsoluteError()
+    def call(self, y_true, y_pred):
+        y_true = tf.keras.applications.vgg19.preprocess_input(127.5 * (y_true + 1))
+        y_pred = tf.keras.applications.vgg19.preprocess_input(127.5 * (y_pred + 1))
+        real_features = self.vgg_model(y_true)
+        fake_features = self.vgg_model(y_pred)
+        loss = 0
+        for i in range(len(real_features)):
+            loss += self.weights[i] * self.mae(real_features[i], fake_features[i])
+        return loss
+class DiscriminatorLoss(tf.keras.losses.Loss):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.hinge_loss = tf.keras.losses.Hinge()
+    def call(self, y, is_real):
+        label = 1.0 if is_real else -1.0
+        return self.hinge_loss(label, y)
+class GauGAN(tf.keras.Model):
+    def __init__(
+        self,
+        image_size,
+        num_classes,
+        batch_size,
+        latent_dim,
+        feature_loss_coeff=10,
+        vgg_feature_loss_coeff=0.1,
+        kl_divergence_loss_coeff=0.1,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.image_size = image_size
+        self.latent_dim = latent_dim
+        self.batch_size = batch_size
+        self.num_classes = num_classes
+        self.image_shape = (image_size, image_size, 3)
+        self.mask_shape = (image_size, image_size, num_classes)
+        self.feature_loss_coeff = feature_loss_coeff
+        self.vgg_feature_loss_coeff = vgg_feature_loss_coeff
+        self.kl_divergence_loss_coeff = kl_divergence_loss_coeff
+        self.discriminator = build_discriminator(self.image_shape)
+        self.generator = build_generator(self.mask_shape, latent_dim=latent_dim)
+        self.encoder = build_encoder(self.image_shape, latent_dim=latent_dim)
+        self.sampler = GaussianSampler(batch_size, latent_dim)
+        self.patch_size, self.combined_model = self.build_combined_generator()
+        self.disc_loss_tracker = tf.keras.metrics.Mean(name="disc_loss")
+        self.gen_loss_tracker = tf.keras.metrics.Mean(name="gen_loss")
+        self.feat_loss_tracker = tf.keras.metrics.Mean(name="feat_loss")
+        self.vgg_loss_tracker = tf.keras.metrics.Mean(name="vgg_loss")
+        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")
+    @property
+    def metrics(self):
+        return [
+            self.disc_loss_tracker,
+            self.gen_loss_tracker,
+            self.feat_loss_tracker,
+            self.vgg_loss_tracker,
+            self.kl_loss_tracker,
+        ]
+    def build_combined_generator(self):
+        # This method builds a model that takes as inputs the following:
+        # latent vector, one-hot encoded segmentation label map, and
+        # a segmentation map. It then (i) generates an image with the generator,
+        # (ii) passes the generated images and segmentation map to the discriminator.
+        # Finally, the model produces the following outputs: (a) discriminator outputs,
+        # (b) generated image.
+        # We will be using this model to simplify the implementation.
+        self.discriminator.trainable = False
+        mask_input = tf.keras.Input(shape=self.mask_shape, name="mask")
+        image_input = tf.keras.Input(shape=self.image_shape, name="image")
+        latent_input = tf.keras.Input(shape=(self.latent_dim), name="latent")
+        generated_image = self.generator([latent_input, mask_input])
+        discriminator_output = self.discriminator([image_input, generated_image])
+        patch_size = discriminator_output[-1].shape[1]
+        combined_model = tf.keras.Model(
+            [latent_input, mask_input, image_input],
+            [discriminator_output, generated_image],
+        )
+        return patch_size, combined_model
+    def compile(self, gen_lr=1e-4, disc_lr=4e-4, **kwargs):
+        super().compile(**kwargs)
+        self.generator_optimizer = tf.keras.optimizers.Adam(
+            gen_lr, beta_1=0.0, beta_2=0.999
+        )
+        self.discriminator_optimizer = tf.keras.optimizers.Adam(
+            disc_lr, beta_1=0.0, beta_2=0.999
+        )
+        self.discriminator_loss = DiscriminatorLoss()
+        self.feature_matching_loss = FeatureMatchingLoss()
+        self.vgg_loss = VGGFeatureMatchingLoss()
+    def train_discriminator(self, latent_vector, segmentation_map, real_image, labels):
+        fake_images = self.generator([latent_vector, labels])
+        with tf.GradientTape() as gradient_tape:
+            pred_fake = self.discriminator([segmentation_map, fake_images])[-1]
+            pred_real = self.discriminator([segmentation_map, real_image])[-1]
+            loss_fake = self.discriminator_loss(pred_fake, False)
+            loss_real = self.discriminator_loss(pred_real, True)
+            total_loss = 0.5 * (loss_fake + loss_real)
+        self.discriminator.trainable = True
+        gradients = gradient_tape.gradient(
+            total_loss, self.discriminator.trainable_variables
+        )
+        self.discriminator_optimizer.apply_gradients(
+            zip(gradients, self.discriminator.trainable_variables)
+        )
+        return total_loss
+    def train_generator(
+        self, latent_vector, segmentation_map, labels, image, mean, variance
+    ):
+        # Generator learns through the signal provided by the discriminator. During
+        # backpropagation, we only update the generator parameters.
+        self.discriminator.trainable = False
+        with tf.GradientTape() as tape:
+            real_d_output = self.discriminator([segmentation_map, image])
+            fake_d_output, fake_image = self.combined_model(
+                [latent_vector, labels, segmentation_map]
+            )
+            pred = fake_d_output[-1]
+            # Compute generator losses.
+            g_loss = generator_loss(pred)
+            kl_loss = self.kl_divergence_loss_coeff * kl_divergence_loss(mean, variance)
+            vgg_loss = self.vgg_feature_loss_coeff * self.vgg_loss(image, fake_image)
+            feature_loss = self.feature_loss_coeff * self.feature_matching_loss(real_d_output, fake_d_output)
+            total_loss = g_loss + kl_loss + vgg_loss + feature_loss
+        gradients = tape.gradient(total_loss, self.combined_model.trainable_variables)
+        self.generator_optimizer.apply_gradients(
+            zip(gradients, self.combined_model.trainable_variables)
+        )
+        return total_loss, feature_loss, vgg_loss, kl_loss
+    def train_step(self, data):
+        segmentation_map, image, labels = data
+        mean, variance = self.encoder(image)
+        latent_vector = self.sampler([mean, variance])
+        discriminator_loss = self.train_discriminator(
+            latent_vector, segmentation_map, image, labels
+        )
+        (generator_loss, feature_loss, vgg_loss, kl_loss) = self.train_generator(
+            latent_vector, segmentation_map, labels, image, mean, variance
+        )
+        # Report progress.
+        self.disc_loss_tracker.update_state(discriminator_loss)
+        self.gen_loss_tracker.update_state(generator_loss)
+        self.feat_loss_tracker.update_state(feature_loss)
+        self.vgg_loss_tracker.update_state(vgg_loss)
+        self.kl_loss_tracker.update_state(kl_loss)
+        results = {m.name: m.result() for m in self.metrics}
+        return results
+    def test_step(self, data):
+        segmentation_map, image, labels = data
+        # Obtain the learned moments of the real image distribution.
+        mean, variance = self.encoder(image)
+        # Sample a latent from the distribution defined by the learned moments.
+        latent_vector = self.sampler([mean, variance])
+        # Generate the fake images.
+        fake_images = self.generator([latent_vector, labels])
+        # Calculate the losses.
+        pred_fake = self.discriminator([segmentation_map, fake_images])[-1]
+        pred_real = self.discriminator([segmentation_map, image])[-1]
+        loss_fake = self.discriminator_loss(pred_fake, False)
+        loss_real = self.discriminator_loss(pred_real, True)
+        total_discriminator_loss = 0.5 * (loss_fake + loss_real)
+        real_d_output = self.discriminator([segmentation_map, image])
+        fake_d_output, fake_image = self.combined_model(
+            [latent_vector, labels, segmentation_map]
+        )
+        pred = fake_d_output[-1]
+        g_loss = generator_loss(pred)
+        kl_loss = self.kl_divergence_loss_coeff * kl_divergence_loss(mean, variance)
+        vgg_loss = self.vgg_feature_loss_coeff * self.vgg_loss(image, fake_image)
+        feature_loss = self.feature_loss_coeff * self.feature_matching_loss(
+            real_d_output, fake_d_output
+        )
+        total_generator_loss = g_loss + kl_loss + vgg_loss + feature_loss
+        # Report progress.
+        self.disc_loss_tracker.update_state(total_discriminator_loss)
+        self.gen_loss_tracker.update_state(total_generator_loss)
+        self.feat_loss_tracker.update_state(feature_loss)
+        self.vgg_loss_tracker.update_state(vgg_loss)
+        self.kl_loss_tracker.update_state(kl_loss)
+        results = {m.name: m.result() for m in self.metrics}
+        return results
+    def call(self, inputs):
+        latent_vectors, labels = inputs
+        return self.generator([latent_vectors, labels])

utils.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import cv2
+import numpy as np
+# class to rgb colour pallet
+color_dict = {
+    0: (0, 0, 0),        # BG
+    1: (239, 164, 0),    # EX
+    2: (0, 186, 127),    # HE
+    3: (0, 185, 255),    # SE
+    4: (34, 80, 242),    # MA
+    5: (73, 73, 73),     # OD
+    6: (255, 255, 255),  # VB
+}
+def rgb_to_onehot(rgb_arr, color_dict):
+    """
+    Converts a rgb label map to onehot label map defined by color_dict
+        Parameters:
+            rgb_arr (array): rgb label mask with shape (H x W x 3)
+            color_dict (dict): dictionary mapping of class to colour
+        Returns:
+            arr (array): onehot label map of shape (H x W x n_classes)
+    """
+    num_classes = len(color_dict)
+    shape = rgb_arr.shape[:2]+(num_classes,)
+    arr = np.zeros(shape, dtype=np.int8)
+    for i, cls in enumerate(color_dict):
+        arr[:, :, i] = np.all(rgb_arr.reshape((-1, 3)) == color_dict[i], axis=1).reshape(shape[:2])
+    return arr
+def onehot_to_rgb(onehot_arr, color_dict):
+    """
+    Converts an onehot label map to rgb label map defined by color_dict
+        Parameters:
+            onehot_arr (array): onehot label mask with shape (H x W x n_classes)
+            color_dict (dict): dictionary mapping of class to colour
+        Returns:
+            arr (array): rgb label map of shape (H x W x 3)
+    """
+    shape = onehot_arr.shape[:2]
+    mask = np.argmax(onehot_arr, axis=-1)
+    arr = np.zeros(shape+(3,), dtype=np.uint8)
+    for i, cls in enumerate(color_dict):
+        arr = arr + np.tile(color_dict[cls], shape + (1,)) * (mask[..., None] == cls)
+    return arr
+def fix_pred_label(labels):
+    """
+    Post-processing fixes for the prediction of VB and BG label class,
+    the Vitrous Body should be consistently spherical on a black background
+        Parameters:
+            labels (tensor): A 4-D array of predicted label
+              with shape (batch x H x W x 7)
+        Returns:
+            fixed_labels (array): shape (batch x H x W x 7)
+    """
+    shape = labels.shape[1:-1]
+    VB = np.uint8(cv2.circle(np.zeros(shape), (shape[0]//2, shape[1]//2), min(shape) // 2, 1, -1))[..., None]
+    BG = np.uint8(VB == 0)
+    VB = VB - np.sum(labels[..., 1:-1], axis=-1)[..., None]
+    BG = np.broadcast_to(BG, VB.shape)
+    fixed_labels = np.concatenate([BG, labels[..., 1:-1], VB], axis=-1)
+    return fixed_labels