TomRB22
/

pivaenist

English

music

autoencoder

variational autoencoder

music generation

Model card Files Files and versions Community

TomRB22 commited on Jul 23, 2023

Commit

63f1a8f

1 Parent(s): e878ec7

Documenting model.py

Browse files

Files changed (1) hide show

model.py +41 -13

model.py CHANGED Viewed

@@ -1,11 +1,15 @@
-import tensorflow as tf
-import os
 import inspect
 _CAP = 3501 # Cap for the number of notes
 class Encoder_Z(tf.keras.layers.Layer):
   def __init__(self, dim_z, name="encoder", **kwargs):
     super(Encoder_Z, self).__init__(name=name, **kwargs)
@@ -31,6 +35,7 @@ class Encoder_Z(tf.keras.layers.Layer):
 class Decoder_X(tf.keras.layers.Layer):
   def __init__(self, dim_z, name="decoder", **kwargs):
     super(Decoder_X, self).__init__(name=name, **kwargs)
@@ -64,12 +69,14 @@ kl_weight = tf.keras.backend.variable(0.125)
 class VAECost:
-    # VAE cost with a schedule based on the Microsoft Research Blog's article
-    # "Less pain, more gain: A simple method for VAE training with less of that KL-vanishing agony"
-    #
-    # The KL weight increases linearly, until it meets a certain threshold and keeps constant
-    # for the same number of epochs. After that, it decreases abruptly to zero again, and the
-    # cycle repeats.
   def __init__(self, model):
     self.model = model
@@ -113,6 +120,7 @@ class VAECost:
 class VAE(tf.keras.Model):
   def __init__(self, name="variational autoencoder", **kwargs):
     super(VAE, self).__init__(name=name, **kwargs)
@@ -147,17 +155,37 @@ class VAE(tf.keras.Model):
             "mean recons": mean_recons_error,
             "kl weight": kl_weight}
-  def encode(self, x_input):
-    # Get a "song map" and make a forward pass through the encoder, in order
-    # to return the latent representation and the distribution's parameters
     mu, rho = tf.split(self.encoder(x_input), num_or_size_splits=2, axis=1)
     sd = tf.math.log(1 + tf.math.exp(rho))
     z_sample = mu + sd * tf.random.normal(shape=(120,))
     return z_sample, mu, sd
-  def generate(self, z_sample=None):
-    # Decode a latent representation of a song, which is provided or sampled
     if z_sample == None:
       z_sample = tf.expand_dims(tf.random.normal(shape=(120,)), axis=0)

+# Deep learning
+import tensorflow as tf
+# Methods for loading the weights into the model
+import os
 import inspect
 _CAP = 3501 # Cap for the number of notes
 class Encoder_Z(tf.keras.layers.Layer):
+  # Encoder part of the VAE
   def __init__(self, dim_z, name="encoder", **kwargs):
     super(Encoder_Z, self).__init__(name=name, **kwargs)
 class Decoder_X(tf.keras.layers.Layer):
+  # Decoder part of the VAE.
   def __init__(self, dim_z, name="decoder", **kwargs):
     super(Decoder_X, self).__init__(name=name, **kwargs)
 class VAECost:
+    """
+    VAE cost with a schedule based on the Microsoft Research Blog's article
+    "Less pain, more gain: A simple method for VAE training with less of that KL-vanishing agony"
+    The KL weight increases linearly, until it meets a certain threshold and keeps constant
+    for the same number of epochs. After that, it decreases abruptly to zero again, and the
+    cycle repeats.
+    """
   def __init__(self, model):
     self.model = model
 class VAE(tf.keras.Model):
+  # Main architecture, which connects the encoder with the decoder.
   def __init__(self, name="variational autoencoder", **kwargs):
     super(VAE, self).__init__(name=name, **kwargs)
             "mean recons": mean_recons_error,
             "kl weight": kl_weight}
+  def encode(self, x_input: tf.Tensor) -> tuple[tf.Tensor]:
+    """
+    Get a "song map" and make a forward pass through the encoder, in order
+    to return the latent representation and the distribution's parameters.
+    Parameters:
+    x_input (tf.Tensor): Song map to be encoded by the VAE.
+    Returns:
+    tf.Tensor: The parameters of the distribution which encode the song
+               (mu, sd) and a sampled latent representation from this
+               distribution (z_sample).
+    """
     mu, rho = tf.split(self.encoder(x_input), num_or_size_splits=2, axis=1)
     sd = tf.math.log(1 + tf.math.exp(rho))
     z_sample = mu + sd * tf.random.normal(shape=(120,))
     return z_sample, mu, sd
+  def generate(self, z_sample: tf.Tensor=None) -> tf.Tensor:
+    """
+    Decode a latent representation of a song.
+    Parameters:
+    z_sample (tf.Tensor): Song encoding outputed by the encoder. If
+                          None, this sampling is done over an
+                          unit Gaussian distribution.
+    Returns:
+    tf.Tensor: Song map corresponding to the encoding.
+    """
     if z_sample == None:
       z_sample = tf.expand_dims(tf.random.normal(shape=(120,)), axis=0)