cyan2k
/

molmo-7B-O-bnb-4bit

@@ -85,26 +85,26 @@ def resize_and_pad(
     scaled_height = int(np.array(height, np.float32) * image_scale)
     scaled_width = int(np.array(width, np.float32) * image_scale)
-    # if resize_method == "tensorflow":
-    #     FIXME remove
-    import tensorflow as tf
-    image = tf.image.convert_image_dtype(tf.constant(image), dtype=tf.float32)
-    image = tf.image.resize(
-        image,
-        [scaled_height, scaled_width],
-        method=tf.image.ResizeMethod.BILINEAR,
-        antialias=True,
-    )
-    image = tf.clip_by_value(image, 0.0, 1.0)
-    image = image.numpy()
-    # else:
-    #     image = torch.permute(torch.from_numpy(image), [2, 0, 1])
-    #     image = convert_image_dtype(image)  # resize in flaot32
-    #     image = torchvision.transforms.Resize(
-    #         [scaled_height, scaled_width], InterpolationMode.BILINEAR, antialias=True
-    #     )(image)
-    #     image = torch.clip(image, 0.0, 1.0)
-    #     image = torch.permute(image, [1, 2, 0]).numpy()
     top_pad = (desired_height - scaled_height) // 2
     left_pad = (desired_width - scaled_width) // 2

     scaled_height = int(np.array(height, np.float32) * image_scale)
     scaled_width = int(np.array(width, np.float32) * image_scale)
+    if resize_method == "tensorflow":
+        # this option leads to large gpu mem increase likely due to how tensorflow handle memory allocation
+        import tensorflow as tf
+        image = tf.image.convert_image_dtype(tf.constant(image), dtype=tf.float32)
+        image = tf.image.resize(
+            image,
+            [scaled_height, scaled_width],
+            method=tf.image.ResizeMethod.BILINEAR,
+            antialias=True,
+        )
+        image = tf.clip_by_value(image, 0.0, 1.0)
+        image = image.numpy()
+    else:
+        image = torch.permute(torch.from_numpy(image), [2, 0, 1])
+        image = convert_image_dtype(image)  # resize in flaot32
+        image = torchvision.transforms.Resize(
+            [scaled_height, scaled_width], InterpolationMode.BILINEAR, antialias=True
+        )(image)
+        image = torch.clip(image, 0.0, 1.0)
+        image = torch.permute(image, [1, 2, 0]).numpy()
     top_pad = (desired_height - scaled_height) // 2
     left_pad = (desired_width - scaled_width) // 2