deanna-emery's picture
updates
93528c6
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image-related utilities that are useful to prepare dataset."""
import dataclasses
import imghdr
import io
from typing import Optional, Tuple
import numpy as np
from PIL import Image
@dataclasses.dataclass
class ImageFormat:
"""Supported image formats.
For model development, this library should support the same image formats as
`tf.io.decode_image`[1].
[1]: https://www.tensorflow.org/api_docs/python/tf/io/decode_image
"""
bmp: str = 'BMP'
png: str = 'PNG'
jpeg: str = 'JPEG'
raw: str = 'RAW'
def validate_image_format(format_str: str) -> str:
"""Validates `format_str` and returns canonical format.
This function accepts image format in lower case and will returns the upper
case string as canonical format.
Args:
format_str: Image format string.
Returns:
Canonical image format string.
Raises:
ValueError: If the canonical format is not listed in `ImageFormat`.
"""
canonical_format = format_str.upper()
if canonical_format in dataclasses.asdict(ImageFormat()).values():
return canonical_format
raise ValueError(f'Image format is invalid: {format_str}')
def encode_image(image_np: np.ndarray, image_format: str) -> bytes:
"""Encodes `image_np` specified by `image_format`.
Args:
image_np: Numpy image array.
image_format: An enum specifying the format of the generated image.
Returns:
Encoded image string.
"""
if image_format == 'RAW':
return image_np.tobytes()
if len(image_np.shape) > 2 and image_np.shape[2] == 1:
image_pil = Image.fromarray(np.squeeze(image_np), 'L')
else:
image_pil = Image.fromarray(image_np)
with io.BytesIO() as output:
image_pil.save(output, format=validate_image_format(image_format))
return output.getvalue()
def decode_image(image_bytes: bytes,
image_format: Optional[str] = None,
image_dtype: str = 'uint8') -> np.ndarray:
"""Decodes image_bytes into numpy array."""
if image_format == 'RAW':
return np.frombuffer(image_bytes, dtype=image_dtype)
image_pil = Image.open(io.BytesIO(image_bytes))
image_np = np.array(image_pil)
if len(image_np.shape) < 3:
image_np = image_np[..., np.newaxis]
return image_np
def decode_image_metadata(image_bytes: bytes) -> Tuple[int, int, int, str]:
"""Decodes image metadata from encoded image string.
Note that if the image is encoded in RAW format, the metadata cannot be
inferred from the image bytes.
Args:
image_bytes: Encoded image string.
Returns:
A tuple of height, width, number of channels, and encoding format.
"""
image_np = decode_image(image_bytes)
# https://pillow.readthedocs.io/en/stable/reference/Image.html#image-attributes
height, width, num_channels = image_np.shape
image_format = imghdr.what(file=None, h=image_bytes)
return height, width, num_channels, validate_image_format(image_format)