Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / LightZero /zoo /box2d /lunarlander /entry /lunarlander_eval.py

zjowowen

init space

3dfe8fb over 1 year ago

raw

history blame contribute delete

3.99 kB

	# Import the necessary libraries and configs based on the model you want to evaluate
	from zoo.box2d.lunarlander.config.lunarlander_disc_muzero_config import main_config, create_config
	from lzero.entry import eval_muzero
	import numpy as np

	if __name__ == "__main__":
	"""
	Overview:
	Evaluate the model performance by running multiple episodes with different seeds using the MuZero algorithm.
	The evaluation results (returns and mean returns) are printed out for each seed and summarized for all seeds.
	Variables:
	- model_path (:obj:`str`): Path to the pretrained model's checkpoint file. Usually something like
	"exp_name/ckpt/ckpt_best.pth.tar". Absolute path is recommended.
	- seeds (:obj:`List[int]`): List of seeds to use for evaluation. Each seed will run for a specified number
	of episodes.
	- num_episodes_each_seed (:obj:`int`): Number of episodes to be run for each seed.
	- main_config (:obj:`EasyDict`): Main configuration for the evaluation, imported from the model's config file.
	- returns_mean_seeds (:obj:`List[float]`): List to store the mean returns for each seed.
	- returns_seeds (:obj:`List[List[float]]`): List to store the returns for each episode from each seed.
	Outputs:
	Prints out the mean returns and returns for each seed, along with the overall mean return across all seeds.

	.. note::
	The eval_muzero function is used here for evaluation. For more details about this function and its parameters,
	please refer to its own documentation.
	"""
	# model_path = './ckpt/ckpt_best.pth.tar'
	model_path = None

	# Initialize a list with a single seed for the experiment
	seeds = [0]

	# Set the number of episodes to run for each seed
	num_episodes_each_seed = 1

	# Specify the number of environments for the evaluator to use
	main_config.env.evaluator_env_num = 1

	# Set the number of episodes for the evaluator to run
	main_config.env.n_evaluator_episode = 1

	# The total number of test episodes is the product of the number of episodes per seed and the number of seeds
	total_test_episodes = num_episodes_each_seed * len(seeds)

	# Uncomment the following lines to save a replay of the episodes as an mp4 video
	# main_config.env.replay_path = './video'

	# Enable saving of replay as a gif, specify the path to save the replay gif
	main_config.env.save_replay_gif = True
	main_config.env.replay_path_gif = './video'

	# Initialize lists to store the mean and total returns for each seed
	returns_mean_seeds = []
	returns_seeds = []

	# For each seed, run the evaluation function and store the resulting mean and total returns
	for seed in seeds:
	returns_mean, returns = eval_muzero(
	[main_config, create_config], # Configuration parameters for the evaluation
	seed=seed, # The seed for the random number generator
	num_episodes_each_seed=num_episodes_each_seed, # The number of episodes to run for this seed
	print_seed_details=False, # Whether to print detailed information for each seed
	model_path=model_path # The path to the trained model to be evaluated
	)
	# Append the mean and total returns to their respective lists
	returns_mean_seeds.append(returns_mean)
	returns_seeds.append(returns)

	# Convert the lists of returns to numpy arrays for easier statistical analysis
	returns_mean_seeds = np.array(returns_mean_seeds)
	returns_seeds = np.array(returns_seeds)

	# Print evaluation results
	print("=" * 20)
	print(f"We evaluated a total of {len(seeds)} seeds. For each seed, we evaluated {num_episodes_each_seed} episode(s).")
	print(f"For seeds {seeds}, the mean returns are {returns_mean_seeds}, and the returns are {returns_seeds}.")
	print("Across all seeds, the mean reward is:", returns_mean_seeds.mean())
	print("=" * 20)