Spaces:

uruguayai
/

trainflux

Runtime error

App Files Files Community

uruguayai commited on Sep 8, 2024

Commit

cc5a61c

verified ·

1 Parent(s): 0d8b9ef

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -94

app.py CHANGED Viewed

@@ -53,12 +53,9 @@ def preprocess_images(examples):
             image = Image.open(image)
         if not isinstance(image, Image.Image):
             raise ValueError(f"Unexpected image type: {type(image)}")
-        # Resize and convert to RGB
-        image = image.convert("RGB").resize((512, 512))
-        # Convert to numpy array and normalize
         image = np.array(image).astype(np.float16) / 255.0
-        # Ensure the image has the shape (3, height, width)
-        return image.transpose(2, 0, 1)  # Change to channel-first format
     return {"pixel_values": [process_image(img) for img in examples["image"]]}
@@ -76,15 +73,9 @@ try:
             processed_dataset = pickle.load(f)
     else:
         print("Loading dataset from Hugging Face...")
-        dataset = load_dataset(dataset_name)
-        print("Dataset structure:", dataset)
-        print("Available splits:", dataset.keys())
-        if "train" not in dataset:
-            raise ValueError("The dataset does not contain a 'train' split.")
         print("Processing dataset...")
-        processed_dataset = dataset["train"].map(preprocess_images, batched=True, remove_columns=dataset["train"].column_names)
         with open(dataset_cache_file, 'wb') as f:
             pickle.dump(processed_dataset, f)
@@ -92,23 +83,7 @@ try:
 except Exception as e:
     print(f"Error loading or processing dataset: {str(e)}")
-    print("Attempting to find dataset...")
-    # List contents of current directory and parent directories
-    print("Current directory contents:")
-    print(os.listdir('.'))
-    print("Parent directory contents:")
-    print(os.listdir('..'))
-    print("Root directory contents:")
-    print(os.listdir('/'))
-    # Try to find any directory that might contain the dataset
-    for root, dirs, files in os.walk('/'):
-        if 'montevideo' in dirs:
-            print(f"Found 'montevideo' directory at: {os.path.join(root, 'montevideo')}")
-            print(f"Contents: {os.listdir(os.path.join(root, 'montevideo'))}")
-    raise ValueError("Unable to locate or load the dataset. Please check the dataset path and permissions.")
 # Function to clear JIT cache
 def clear_jit_cache():
@@ -116,122 +91,78 @@ def clear_jit_cache():
     gc.collect()
 # Training function with gradient accumulation
-def train_step(state, batch, rng, grad_accumulation_steps=8):
-    def compute_loss(params, batch_slice, rng):
-        # Convert batch slice to JAX array
-        pixel_values = jnp.array(batch_slice["pixel_values"], dtype=jnp.float16)
-        batch_size = pixel_values.shape[0]
-        # Encode images to latent space
         latents = pipeline.vae.apply(
             {"params": params["vae"]},
             pixel_values,
             method=pipeline.vae.encode
         ).latent_dist.sample(rng)
-        latents = latents * jnp.float16(0.18215)  # scaling factor
-        # Generate random noise
-        noise_rng, timestep_rng, latents_rng = jax.random.split(rng, 3)
-        noise = jax.random.normal(noise_rng, latents.shape, dtype=jnp.float16)
-        # Sample random timesteps
         timesteps = jax.random.randint(
-            timestep_rng, (batch_size,), 0, pipeline.scheduler.config.num_train_timesteps
         )
-        # Create scheduler state
-        scheduler_state = pipeline.scheduler.create_state()
-        # Add noise to latents using the scheduler
         noisy_latents = pipeline.scheduler.add_noise(
-            scheduler_state,
             original_samples=latents,
             noise=noise,
             timesteps=timesteps
         )
-        # Generate random latents for text encoder
         encoder_hidden_states = jax.random.normal(
-            latents_rng,
-            (batch_size, pipeline.text_encoder.config.hidden_size),
             dtype=jnp.float16
         )
-        # Predict noise
         model_output = state.apply_fn.apply(
             {'params': params["unet"]},
-            jnp.array(noisy_latents, dtype=jnp.float16),
-            jnp.array(timesteps, dtype=jnp.float16),
             encoder_hidden_states=encoder_hidden_states,
             train=True,
         )
-        # Compute loss
-        loss = jnp.mean((model_output - noise) ** 2)
-        return loss
     grad_fn = jax.value_and_grad(compute_loss)
-    # Split the batch into smaller chunks
-    batch_size = len(batch['pixel_values'])
-    chunk_size = batch_size // grad_accumulation_steps
-    # Initialize accumulated gradients
-    acc_grads = jax.tree_map(jnp.zeros_like, state.params)
-    acc_loss = jnp.float16(0.0)
-    for i in range(grad_accumulation_steps):
-        start_idx = i * chunk_size
-        end_idx = start_idx + chunk_size if i < grad_accumulation_steps - 1 else batch_size
-        batch_slice = {
-            'pixel_values': batch['pixel_values'][start_idx:end_idx]
-        }
-        rng, step_rng = jax.random.split(rng)
-        loss, grads = grad_fn(state.params, batch_slice, step_rng)
-        # Accumulate gradients and loss
-        acc_grads = jax.tree_map(lambda acc, g: acc + g / grad_accumulation_steps, acc_grads, grads)
-        acc_loss += loss / grad_accumulation_steps
-    # Update state with accumulated gradients
-    state = state.apply_gradients(grads=acc_grads)
-    return state, acc_loss
 # Initialize training state
 learning_rate = jnp.float16(1e-5)
 optimizer = optax.adam(learning_rate)
 state = train_state.TrainState.create(
     apply_fn=unet,
-    params={"unet": params["unet"], "vae": params["vae"]},  # Include both UNet and VAE params
     tx=optimizer,
 )
 # Training loop
-num_epochs = 10
-batch_size = 2  # Reduced batch size
 rng = jax.random.PRNGKey(0)
 for epoch in range(num_epochs):
     epoch_loss = 0
     num_batches = 0
     for batch in tqdm(processed_dataset.batch(batch_size)):
-        # Convert the list of pixel values to a numpy array for each batch
-        batch['pixel_values'] = np.array(batch['pixel_values'], dtype=np.float16)
         rng, step_rng = jax.random.split(rng)
         state, loss = train_step(state, batch, step_rng)
         epoch_loss += loss
         num_batches += 1
-        # Clear JIT cache every 10 batches
         if num_batches % 10 == 0:
             clear_jit_cache()
     avg_loss = epoch_loss / num_batches
     print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss}")
-    # Clear JIT cache after each epoch
     clear_jit_cache()
 # Save the fine-tuned model

             image = Image.open(image)
         if not isinstance(image, Image.Image):
             raise ValueError(f"Unexpected image type: {type(image)}")
+        image = image.convert("RGB").resize((256, 256))  # Reduced image size
         image = np.array(image).astype(np.float16) / 255.0
+        return image.transpose(2, 0, 1)
     return {"pixel_values": [process_image(img) for img in examples["image"]]}
             processed_dataset = pickle.load(f)
     else:
         print("Loading dataset from Hugging Face...")
+        dataset = load_dataset(dataset_name, split="train[:1000]")  # Load only first 1000 samples
         print("Processing dataset...")
+        processed_dataset = dataset.map(preprocess_images, batched=True, remove_columns=dataset.column_names)
         with open(dataset_cache_file, 'wb') as f:
             pickle.dump(processed_dataset, f)
 except Exception as e:
     print(f"Error loading or processing dataset: {str(e)}")
+    raise ValueError("Unable to load or process the dataset.")
 # Function to clear JIT cache
 def clear_jit_cache():
     gc.collect()
 # Training function with gradient accumulation
+@jax.jit
+def train_step(state, batch, rng):
+    def compute_loss(params, pixel_values, rng):
         latents = pipeline.vae.apply(
             {"params": params["vae"]},
             pixel_values,
             method=pipeline.vae.encode
         ).latent_dist.sample(rng)
+        latents = latents * jnp.float16(0.18215)
+        noise = jax.random.normal(rng, latents.shape, dtype=jnp.float16)
         timesteps = jax.random.randint(
+            rng, (latents.shape[0],), 0, pipeline.scheduler.config.num_train_timesteps
         )
         noisy_latents = pipeline.scheduler.add_noise(
+            pipeline.scheduler.create_state(),
             original_samples=latents,
             noise=noise,
             timesteps=timesteps
         )
         encoder_hidden_states = jax.random.normal(
+            rng,
+            (latents.shape[0], pipeline.text_encoder.config.hidden_size),
             dtype=jnp.float16
         )
         model_output = state.apply_fn.apply(
             {'params': params["unet"]},
+            noisy_latents,
+            timesteps,
             encoder_hidden_states=encoder_hidden_states,
             train=True,
         )
+        return jnp.mean((model_output - noise) ** 2)
     grad_fn = jax.value_and_grad(compute_loss)
+    rng, step_rng = jax.random.split(rng)
+    loss, grads = grad_fn(state.params, batch["pixel_values"], step_rng)
+    state = state.apply_gradients(grads=grads)
+    return state, loss
 # Initialize training state
 learning_rate = jnp.float16(1e-5)
 optimizer = optax.adam(learning_rate)
 state = train_state.TrainState.create(
     apply_fn=unet,
+    params={"unet": params["unet"], "vae": params["vae"]},
     tx=optimizer,
 )
 # Training loop
+num_epochs = 5  # Reduced number of epochs
+batch_size = 4
 rng = jax.random.PRNGKey(0)
 for epoch in range(num_epochs):
     epoch_loss = 0
     num_batches = 0
     for batch in tqdm(processed_dataset.batch(batch_size)):
+        batch['pixel_values'] = jnp.array(batch['pixel_values'], dtype=jnp.float16)
         rng, step_rng = jax.random.split(rng)
         state, loss = train_step(state, batch, step_rng)
         epoch_loss += loss
         num_batches += 1
         if num_batches % 10 == 0:
             clear_jit_cache()
     avg_loss = epoch_loss / num_batches
     print(f"Epoch {epoch+1}/{num_epochs}, Average Loss: {avg_loss}")
     clear_jit_cache()
 # Save the fine-tuned model