Spaces:
Sleeping
Sleeping
from typing import List, Union | |
import numpy as np | |
# DEPRECATED: for dataset preprocessing use torch version. | |
# Here is datatype issue this code converts torch.float32 to numpy.float64 | |
# And it causes error in the model training: | |
# Error: RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same! | |
def pad_1D(inputs: List[np.ndarray], pad_value: float = 0.0) -> np.ndarray: | |
r"""Pad a list of 1D numpy arrays to the same length. | |
Args: | |
inputs (List[np.ndarray]): List of 1D numpy arrays to pad. | |
pad_value (float): Value to use for padding. Default is 0.0. | |
Returns: | |
np.ndarray: Padded 2D numpy array of shape (len(inputs), max_len), where max_len is the length of the longest input array. | |
""" | |
def pad_data(x: np.ndarray, length: int) -> np.ndarray: | |
r"""Pad a 1D numpy array with zeros to a specified length. | |
Args: | |
x (np.ndarray): 1D numpy array to pad. | |
length (int): Length to pad the array to. | |
Returns: | |
np.ndarray: Padded 1D numpy array of shape (length,). | |
""" | |
return np.pad( | |
x, (0, length - x.shape[0]), mode="constant", constant_values=pad_value, | |
) | |
max_len = max(len(x) for x in inputs) | |
return np.stack([pad_data(x, max_len) for x in inputs]) | |
def pad_2D( | |
inputs: List[np.ndarray], maxlen: Union[int, None] = None, pad_value: float = 0.0, | |
) -> np.ndarray: | |
r"""Pad a list of 2D numpy arrays to the same length. | |
Args: | |
inputs (List[np.ndarray]): List of 2D numpy arrays to pad. | |
maxlen (Union[int, None]): Maximum length to pad the arrays to. If None, pad to the length of the longest array. Default is None. | |
pad_value (float): Value to use for padding. Default is 0.0. | |
Returns: | |
np.ndarray: Padded 3D numpy array of shape (len(inputs), max_len, input_dim), where max_len is the maximum length of the input arrays, and input_dim is the dimension of the input arrays. | |
""" | |
def pad(x: np.ndarray, max_len: int) -> np.ndarray: | |
r"""Pad a 2D numpy array with zeros to a specified length. | |
Args: | |
x (np.ndarray): 2D numpy array to pad. | |
max_len (int): Maximum length to pad the array to. | |
Returns: | |
np.ndarray: Padded 2D numpy array of shape (x.shape[0], max_len), where x.shape[0] is the number of rows in the input array. | |
""" | |
if np.shape(x)[1] > max_len: | |
raise ValueError("not max_len") | |
padding = np.ones((x.shape[0], max_len - np.shape(x)[1])) * pad_value | |
return np.concatenate((x, padding), 1) | |
if maxlen: | |
output = np.stack([pad(x, maxlen) for x in inputs]) | |
else: | |
max_len = max(np.shape(x)[1] for x in inputs) | |
output = np.stack([pad(x, max_len) for x in inputs]) | |
return output | |
def pad_3D(inputs: Union[np.ndarray, List[np.ndarray]], B: int, T: int, L: int) -> np.ndarray: | |
r"""Pad a 3D numpy array to a specified shape. | |
Args: | |
inputs (np.ndarray): 3D numpy array to pad. | |
B (int): Batch size to pad the array to. | |
T (int): Time steps to pad the array to. | |
L (int): Length to pad the array to. | |
Returns: | |
np.ndarray: Padded 3D numpy array of shape (B, T, L), where B is the batch size, T is the time steps, and L is the length. | |
""" | |
inputs_padded = np.zeros((B, T, L), dtype=np.float32) | |
for i, input_ in enumerate(inputs): | |
inputs_padded[i, : np.shape(input_)[0], : np.shape(input_)[1]] = input_ | |
return inputs_padded | |