Spaces:

acozma
/

CS581-Algos-Demo

Sleeping

Andrei Cozma

Added monte carlo training and testing scripts for cliff walking gym env

01901c5 about 2 years ago

1.06 kB

	import numpy as np
	import gymnasium as gym
	from tqdm import tqdm

	policy_file = "policy.npy"
	n_steps = 500
	n_test_episodes = 10


	def main():
	print("=" * 80)
	print("# Cliff Walking - Monte Carlo Test")
	print("=" * 80)
	# save the policy
	print(f"Loading policy from file: '{policy_file}'...")
	Pi = np.load(policy_file)
	print("Policy:")
	print(Pi)
	print(f"shape: {Pi.shape}")
	_, n_actions = Pi.shape

	print("=" * 80)
	print(f"Testing policy for {n_test_episodes} episodes...")
	env = gym.make("CliffWalking-v0", render_mode="human")
	for e in range(n_test_episodes):
	print(f"Test #{e + 1}:", end=" ")

	state, _ = env.reset()
	for _ in range(n_steps):
	action = np.random.choice(n_actions, p=Pi[state])
	next_state, reward, done, _, _ = env.step(action)
	state = next_state
	if done:
	print("Success!")
	break
	else:
	print("Failed!")

	env.close()


	if __name__ == "__main__":
	main()