Spaces:
Sleeping
Sleeping
import unittest | |
import pytest | |
import numpy as np | |
import torch | |
import treetensor.torch as ttorch | |
from ding.policy.common_utils import default_preprocess_learn | |
shape_test = [ | |
[2], | |
[1], | |
] | |
dtype_test = [ | |
"int64", | |
"float32", | |
] | |
data_type_test = [ | |
"numpy", | |
"torch", | |
"treetensor", | |
] | |
def get_action(shape, dtype, class_type): | |
if class_type == "numpy": | |
if dtype == "int64": | |
dtype = np.int64 | |
elif dtype == "float32": | |
dtype = np.float32 | |
return np.random.randn(*shape).astype(dtype) | |
else: | |
if dtype == "int64": | |
dtype = torch.int64 | |
elif dtype == "float32": | |
dtype = torch.float32 | |
if class_type == "torch": | |
return torch.randn(*shape).type(dtype) | |
elif class_type == "treetensor": | |
return ttorch.randn(*shape).type(dtype) | |
def test_default_preprocess_learn_action(): | |
for shape in shape_test: | |
for dtype in dtype_test: | |
for data_type in data_type_test: | |
data = [ | |
{ | |
'obs': np.random.randn(4, 84, 84), | |
'action': get_action(shape, dtype, data_type), | |
'reward': 1.0, | |
'next_obs': np.random.randn(4, 84, 84), | |
'done': False, | |
'weight': 1.0, | |
'value': 1.0, | |
'adv': 1.0, | |
} for _ in range(10) | |
] | |
use_priority_IS_weight = False | |
use_priority = False | |
use_nstep = False | |
ignore_done = False | |
data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done) | |
assert data['obs'].shape == torch.Size([10, 4, 84, 84]) | |
if dtype in ["int64"] and shape[0] == 1: | |
assert data['action'].shape == torch.Size([10]) | |
else: | |
assert data['action'].shape == torch.Size([10, *shape]) | |
assert data['reward'].shape == torch.Size([10]) | |
assert data['next_obs'].shape == torch.Size([10, 4, 84, 84]) | |
assert data['done'].shape == torch.Size([10]) | |
assert data['weight'].shape == torch.Size([10]) | |
assert data['value'].shape == torch.Size([10]) | |
assert data['adv'].shape == torch.Size([10]) | |
def test_default_preprocess_learn_reward_done_adv_1d(): | |
data = [ | |
{ | |
'obs': np.random.randn(4, 84, 84), | |
'action': np.random.randn(2), | |
'reward': np.array([1.0]), | |
'next_obs': np.random.randn(4, 84, 84), | |
'done': False, | |
'value': np.array([1.0]), | |
'adv': np.array([1.0]), | |
} for _ in range(10) | |
] | |
use_priority_IS_weight = False | |
use_priority = False | |
use_nstep = False | |
ignore_done = False | |
data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done) | |
assert data['reward'].shape == torch.Size([10]) | |
assert data['done'].shape == torch.Size([10]) | |
assert data['weight'] is None | |
assert data['value'].shape == torch.Size([10]) | |
assert data['adv'].shape == torch.Size([10]) | |
def test_default_preprocess_learn_ignore_done(): | |
data = [ | |
{ | |
'obs': np.random.randn(4, 84, 84), | |
'action': np.random.randn(2), | |
'reward': np.array([1.0]), | |
'next_obs': np.random.randn(4, 84, 84), | |
'done': True, | |
'value': np.array([1.0]), | |
'adv': np.array([1.0]), | |
} for _ in range(10) | |
] | |
use_priority_IS_weight = False | |
use_priority = False | |
use_nstep = False | |
ignore_done = True | |
data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done) | |
assert data['done'].dtype == torch.float32 | |
assert torch.sum(data['done']) == 0 | |
def test_default_preprocess_learn_use_priority_IS_weight(): | |
data = [ | |
{ | |
'obs': np.random.randn(4, 84, 84), | |
'action': np.random.randn(2), | |
'reward': 1.0, | |
'next_obs': np.random.randn(4, 84, 84), | |
'done': False, | |
'priority_IS': 1.0, | |
'value': 1.0, | |
'adv': 1.0, | |
} for _ in range(10) | |
] | |
use_priority_IS_weight = True | |
use_priority = True | |
use_nstep = False | |
ignore_done = False | |
data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done) | |
assert data['weight'].shape == torch.Size([10]) | |
assert torch.sum(data['weight']) == torch.tensor(10.0) | |
def test_default_preprocess_learn_nstep(): | |
data = [ | |
{ | |
'obs': np.random.randn(4, 84, 84), | |
'action': np.random.randn(2), | |
'reward': np.array([1.0, 2.0, 0.0]), | |
'next_obs': np.random.randn(4, 84, 84), | |
'done': False, | |
'value': 1.0, | |
'adv': 1.0, | |
} for _ in range(10) | |
] | |
use_priority_IS_weight = False | |
use_priority = False | |
use_nstep = True | |
ignore_done = False | |
data = default_preprocess_learn(data, use_priority_IS_weight, use_priority, use_nstep, ignore_done) | |
assert data['reward'].shape == torch.Size([3, 10]) | |
assert data['reward'][0][0] == torch.tensor(1.0) | |
assert data['reward'][1][0] == torch.tensor(2.0) | |
assert data['reward'][2][0] == torch.tensor(0.0) | |