Spaces:

uruguayai
/

trainflux

Runtime error

App Files Files Community

uruguayai commited on Sep 8, 2024

Commit

bd20ad9

verified ·

1 Parent(s): 35bc545

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -25

app.py CHANGED Viewed

@@ -10,9 +10,10 @@ import os
 import pickle
 from PIL import Image
 import numpy as np
 # Set up cache directories
 cache_dir = "/tmp/huggingface_cache"
@@ -35,7 +36,7 @@ def get_model(model_id, revision):
         pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
             model_id,
             revision=revision,
-            dtype=jnp.float32,
         )
         with open(model_cache_file, 'wb') as f:
             pickle.dump((pipeline, params), f)
@@ -58,14 +59,12 @@ def preprocess_images(examples):
         # Resize and convert to RGB
         image = image.convert("RGB").resize((512, 512))
         # Convert to numpy array and normalize
-        image = np.array(image).astype(np.float32) / 255.0
         # Ensure the image has the shape (3, height, width)
         return image.transpose(2, 0, 1)  # Change to channel-first format
     return {"pixel_values": [process_image(img) for img in examples["image"]]}
 # Load dataset from Hugging Face
 dataset_name = "uruguayai/montevideo"
 dataset_cache_file = os.path.join(cache_dir, "montevideo_dataset.pkl")
@@ -114,12 +113,16 @@ except Exception as e:
     raise ValueError("Unable to locate or load the dataset. Please check the dataset path and permissions.")
-# Training function
-def train_step(state, batch, rng):
-    def compute_loss(params):
-        # Convert batch to JAX array
-        pixel_values = jnp.array(batch["pixel_values"])
         batch_size = pixel_values.shape[0]
         # Encode images to latent space
@@ -128,11 +131,11 @@ def train_step(state, batch, rng):
             pixel_values,
             method=pipeline.vae.encode
         ).latent_dist.sample(rng)
-        latents = latents * 0.18215  # scaling factor
         # Generate random noise
         noise_rng, timestep_rng, latents_rng = jax.random.split(rng, 3)
-        noise = jax.random.normal(noise_rng, latents.shape)
         # Sample random timesteps
         timesteps = jax.random.randint(
@@ -151,13 +154,17 @@ def train_step(state, batch, rng):
         )
         # Generate random latents for text encoder
-        encoder_hidden_states = jax.random.normal(latents_rng, (batch_size, pipeline.text_encoder.config.hidden_size))
         # Predict noise
         model_output = state.apply_fn.apply(
             {'params': params["unet"]},
-            jnp.array(noisy_latents),
-            jnp.array(timesteps),
             encoder_hidden_states=encoder_hidden_states,
             train=True,
         )
@@ -166,12 +173,37 @@ def train_step(state, batch, rng):
         loss = jnp.mean((model_output - noise) ** 2)
         return loss
-    loss, grads = jax.value_and_grad(compute_loss)(state.params)
-    state = state.apply_gradients(grads=grads)
-    return state, loss
 # Initialize training state
-learning_rate = 1e-5
 optimizer = optax.adam(learning_rate)
 state = train_state.TrainState.create(
     apply_fn=unet,
@@ -181,7 +213,7 @@ state = train_state.TrainState.create(
 # Training loop
 num_epochs = 10
-batch_size = 4
 rng = jax.random.PRNGKey(0)
 for epoch in range(num_epochs):
@@ -189,19 +221,25 @@ for epoch in range(num_epochs):
     num_batches = 0
     for batch in tqdm(processed_dataset.batch(batch_size)):
         # Convert the list of pixel values to a numpy array for each batch
-        batch['pixel_values'] = np.array(batch['pixel_values'])
         rng, step_rng = jax.random.split(rng)
         state, loss = train_step(state, batch, step_rng)
         epoch_loss += loss
         num_batches += 1
     avg_loss = epoch_loss / num_batches
     print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss}")
 # Save the fine-tuned model
 output_dir = "/tmp/montevideo_fine_tuned_model"
 os.makedirs(output_dir, exist_ok=True)
-unet.save_pretrained(output_dir, params=state.params)
 print(f"Model saved to {output_dir}")

 import pickle
 from PIL import Image
 import numpy as np
+import gc
+# Set default dtype to float16
+jax.config.update("jax_default_dtype", "float16")
 # Set up cache directories
 cache_dir = "/tmp/huggingface_cache"
         pipeline, params = FlaxStableDiffusionPipeline.from_pretrained(
             model_id,
             revision=revision,
+            dtype=jnp.float16,
         )
         with open(model_cache_file, 'wb') as f:
             pickle.dump((pipeline, params), f)
         # Resize and convert to RGB
         image = image.convert("RGB").resize((512, 512))
         # Convert to numpy array and normalize
+        image = np.array(image).astype(np.float16) / 255.0
         # Ensure the image has the shape (3, height, width)
         return image.transpose(2, 0, 1)  # Change to channel-first format
     return {"pixel_values": [process_image(img) for img in examples["image"]]}
 # Load dataset from Hugging Face
 dataset_name = "uruguayai/montevideo"
 dataset_cache_file = os.path.join(cache_dir, "montevideo_dataset.pkl")
     raise ValueError("Unable to locate or load the dataset. Please check the dataset path and permissions.")
+# Function to clear JIT cache
+def clear_jit_cache():
+    jax.clear_caches()
+    gc.collect()
+# Training function with gradient accumulation
+def train_step(state, batch, rng, grad_accumulation_steps=8):
+    def compute_loss(params, batch_slice, rng):
+        # Convert batch slice to JAX array
+        pixel_values = jnp.array(batch_slice["pixel_values"], dtype=jnp.float16)
         batch_size = pixel_values.shape[0]
         # Encode images to latent space
             pixel_values,
             method=pipeline.vae.encode
         ).latent_dist.sample(rng)
+        latents = latents * jnp.float16(0.18215)  # scaling factor
         # Generate random noise
         noise_rng, timestep_rng, latents_rng = jax.random.split(rng, 3)
+        noise = jax.random.normal(noise_rng, latents.shape, dtype=jnp.float16)
         # Sample random timesteps
         timesteps = jax.random.randint(
         )
         # Generate random latents for text encoder
+        encoder_hidden_states = jax.random.normal(
+            latents_rng,
+            (batch_size, pipeline.text_encoder.config.hidden_size),
+            dtype=jnp.float16
+        )
         # Predict noise
         model_output = state.apply_fn.apply(
             {'params': params["unet"]},
+            jnp.array(noisy_latents, dtype=jnp.float16),
+            jnp.array(timesteps, dtype=jnp.float16),
             encoder_hidden_states=encoder_hidden_states,
             train=True,
         )
         loss = jnp.mean((model_output - noise) ** 2)
         return loss
+    grad_fn = jax.value_and_grad(compute_loss)
+    # Split the batch into smaller chunks
+    batch_size = len(batch['pixel_values'])
+    chunk_size = batch_size // grad_accumulation_steps
+    # Initialize accumulated gradients
+    acc_grads = jax.tree_map(jnp.zeros_like, state.params)
+    acc_loss = jnp.float16(0.0)
+    for i in range(grad_accumulation_steps):
+        start_idx = i * chunk_size
+        end_idx = start_idx + chunk_size if i < grad_accumulation_steps - 1 else batch_size
+        batch_slice = {
+            'pixel_values': batch['pixel_values'][start_idx:end_idx]
+        }
+        rng, step_rng = jax.random.split(rng)
+        loss, grads = grad_fn(state.params, batch_slice, step_rng)
+        # Accumulate gradients and loss
+        acc_grads = jax.tree_map(lambda acc, g: acc + g / grad_accumulation_steps, acc_grads, grads)
+        acc_loss += loss / grad_accumulation_steps
+    # Update state with accumulated gradients
+    state = state.apply_gradients(grads=acc_grads)
+    return state, acc_loss
 # Initialize training state
+learning_rate = jnp.float16(1e-5)
 optimizer = optax.adam(learning_rate)
 state = train_state.TrainState.create(
     apply_fn=unet,
 # Training loop
 num_epochs = 10
+batch_size = 2  # Reduced batch size
 rng = jax.random.PRNGKey(0)
 for epoch in range(num_epochs):
     num_batches = 0
     for batch in tqdm(processed_dataset.batch(batch_size)):
         # Convert the list of pixel values to a numpy array for each batch
+        batch['pixel_values'] = np.array(batch['pixel_values'], dtype=np.float16)
         rng, step_rng = jax.random.split(rng)
         state, loss = train_step(state, batch, step_rng)
         epoch_loss += loss
         num_batches += 1
+        # Clear JIT cache every 10 batches
+        if num_batches % 10 == 0:
+            clear_jit_cache()
     avg_loss = epoch_loss / num_batches
     print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss}")
+    # Clear JIT cache after each epoch
+    clear_jit_cache()
 # Save the fine-tuned model
 output_dir = "/tmp/montevideo_fine_tuned_model"
 os.makedirs(output_dir, exist_ok=True)
+unet.save_pretrained(output_dir, params=state.params["unet"])
 print(f"Model saved to {output_dir}")