Spaces:
Sleeping
Sleeping
import pytest | |
import numpy as np | |
from easydict import EasyDict | |
from ding.utils import set_pkg_seed | |
from dizoo.evogym.envs import EvoGymEnv | |
def test_evogym_env_eval_episode_return(robot): | |
set_pkg_seed(1234, use_cuda=False) | |
env = EvoGymEnv(EasyDict({'env_id': 'Walker-v0', 'robot': robot, 'robot_dir': '../'})) | |
env.seed(1234) | |
env.reset() | |
action_dim = env.action_space.shape | |
eval_episode_return = np.array([0.], dtype=np.float32) | |
if robot == 'speed_bot': | |
assert env.observation_space.shape == (58, ) | |
assert action_dim == (10, ) | |
while True: | |
action = np.random.random(size=action_dim) | |
timestep = env.step(action) | |
eval_episode_return += timestep.reward | |
print("{}(dtype: {})".format(timestep.reward, timestep.reward.dtype)) | |
if timestep.done: | |
print( | |
"{}({}), {}({})".format( | |
timestep.info['eval_episode_return'], type(timestep.info['eval_episode_return']), | |
eval_episode_return, type(eval_episode_return) | |
) | |
) | |
# timestep.reward and the cumulative reward in wrapper EvalEpisodeReturn are not the same. | |
assert abs(timestep.info['eval_episode_return'].item() - eval_episode_return.item()) / \ | |
abs(timestep.info['eval_episode_return'].item()) < 1e-5 | |
break | |