Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / DI-engine /dizoo /minigrid /utils /eval.py

zjowowen

init space

3dfe8fb over 1 year ago

raw

history blame contribute delete

3.8 kB

	from typing import Union, Optional, List, Any, Callable, Tuple
	import torch
	from ding.config import compile_config, read_config
	from ding.envs import get_vec_env_setting
	from ding.policy import create_policy
	from ding.utils import set_pkg_seed
	from ding.torch_utils import to_tensor, to_ndarray, tensor_to_list


	def eval(
	input_cfg: Union[str, Tuple[dict, dict]],
	seed: int = 0,
	model: Optional[torch.nn.Module] = None,
	state_dict: Optional[dict] = None,
	replay_path: Optional[str] = './video',
	) -> float:
	r"""
	Overview:
	The evaluation entry for NGU policy.
	Arguments:
	- input_cfg (:obj:`Union[str, Tuple[dict, dict]]`): Config in dict type. \
	``str`` type means config file path. \
	``Tuple[dict, dict]`` type means [user_config, create_cfg].
	- seed (:obj:`int`): Random seed.
	- env_setting (:obj:`Optional[List[Any]]`): A list with 3 elements: \
	``BaseEnv`` subclass, collector env config, and evaluator env config.
	- model (:obj:`Optional[torch.nn.Module]`): Instance of torch.nn.Module.
	- state_dict (:obj:`Optional[dict]`): The state_dict of policy or model.
	"""
	if isinstance(input_cfg, str):
	cfg, create_cfg = read_config(input_cfg)
	else:
	cfg, create_cfg = input_cfg
	create_cfg.policy.type += '_command'
	cfg = compile_config(cfg, auto=True, create_cfg=create_cfg)
	env_fn, _, evaluator_env_cfg = get_vec_env_setting(cfg.env)
	env = env_fn(evaluator_env_cfg[0])
	env.seed(seed, dynamic_seed=False)
	set_pkg_seed(seed, use_cuda=cfg.policy.cuda)
	policy = create_policy(cfg.policy, model=model, enable_field=['eval']).eval_mode
	if state_dict is None:
	state_dict = torch.load(cfg.learner.load_path, map_location='cpu')
	policy.load_state_dict(state_dict)
	env.enable_save_replay(replay_path=replay_path)
	obs = env.reset()
	obs = {0: obs}
	episode_return = 0.

	beta_index = {i: 0 for i in range(1)}
	beta_index = to_tensor(beta_index, dtype=torch.int64)
	prev_action = {i: torch.tensor(-1) for i in range(1)}
	prev_reward_e = {i: to_tensor(0, dtype=torch.float32) for i in range(1)}

	while True:
	# TODO(pu): r_i, reward embedding
	policy_output = policy.forward(beta_index, obs, prev_action, prev_reward_e)

	actions = {i: a['action'] for i, a in policy_output.items()}
	actions = to_ndarray(actions)

	action = policy_output[0]['action']
	action = to_ndarray(action)
	timestep = env.step(action)
	# print(action)
	# print(timestep.reward)

	timesteps = {0: timestep}
	timesteps = to_tensor(timesteps, dtype=torch.float32)

	prev_reward_e = {env_id: timestep.reward for env_id, timestep in timesteps.items()}
	prev_reward_e = to_ndarray(prev_reward_e)
	prev_action = actions

	timestep = timesteps[0]
	# print(timestep.info)
	episode_return += timestep.reward

	obs = timestep.obs
	obs = {0: obs}

	if timestep.done:
	print(timestep.info)
	break
	print('Eval is over! The performance of your RL policy is {}'.format(episode_return))


	if __name__ == "__main__":
	# Users should add their own model path here. Model path should lead to a model.
	# Absolute path is recommended.
	# In DI-engine, it is ``exp_name/ckpt/ckpt_best.pth.tar``.
	model_path = './debug_minigrid_doorkey_ngu_ul298_er01_n32_rbs3e4_fixepseval/ckpt/ckpt_best.pth.tar',
	# model_path = 'model_path_placeholder',
	cfg = '../config/minigrid_ngu_config.py'

	state_dict = torch.load(model_path, map_location='cpu')
	for i in range(0, 10):
	eval(cfg, seed=i, state_dict=state_dict, replay_path='./video')