Spaces:

OpenDILabCommunity
/

gomoku

Sleeping

App Files Files Community

gomoku / LightZero /zoo /board_games /test_speed_win-rate_between_bots.py

zjowowen

init space

3dfe8fb over 1 year ago

raw

history blame contribute delete

21.6 kB

	"""
	Overview:
	Implement games between different bots to test the win rates and the speed.
	Example:
	test_tictactoe_mcts_bot_vs_alphabeta_bot means a game between mcts_bot and alphabeta_bot where
	mcts_bot makes the first move (i.e. bots on the left make the first move).
	"""
	import time

	import numpy as np
	from easydict import EasyDict

	from zoo.board_games.gomoku.envs.gomoku_env import GomokuEnv
	from zoo.board_games.mcts_bot import MCTSBot
	from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv

	cfg_tictactoe = dict(
	battle_mode='self_play_mode',
	agent_vs_human=False,
	bot_action_type='v0', # {'v0', 'alpha_beta_pruning'}
	prob_random_agent=0,
	prob_expert_agent=0,
	channel_last=True,
	scale=True,
	prob_random_action_in_bot=0.,
	)


	def test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	cfg_tictactoe['bot_action_type'] = 'v0'
	# List to record the time required for each decision round and the winner.
	mcts_bot_time_list = []
	bot_action_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = TicTacToeEnv(EasyDict(cfg_tictactoe))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset()
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 1 to move first.
	player_index = 0
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set rule_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	action = env.bot_action()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	mcts_bot_time_list.append(t2 - t1)
	player_index = 1
	# Set mcts_bot to be player 2.
	else:
	t1 = time.time()
	# action = env.bot_action()
	action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	bot_action_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	print(state)

	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	mcts_bot_mu = np.mean(mcts_bot_time_list)
	mcts_bot_var = np.var(mcts_bot_time_list)

	bot_action_mu = np.mean(bot_action_time_list)
	bot_action_var = np.var(bot_action_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
	print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

	print('bot_action_time_list={}\n'.format(bot_action_time_list))
	print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	def test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between alphabeta_bot and rule_bot, where alphabeta_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

	# List to record the time required for each decision round and the winner.
	alphabeta_pruning_time_list = []
	rule_bot_v0_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = TicTacToeEnv(EasyDict(cfg_tictactoe))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset(start_player_index=1)
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 2 to move first.
	player_index = 1
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set rule_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	action = env.rule_bot_v0()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	# mcts_bot_time_list.append(t2 - t1)
	rule_bot_v0_time_list.append(t2 - t1)

	player_index = 1
	# Set alpha_beta_bot to be player 2.
	else:
	t1 = time.time()
	action = env.bot_action_alpha_beta_pruning()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	alphabeta_pruning_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	if env.get_done_reward()[0]:
	print(state)

	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
	alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

	rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
	rule_bot_v0_var = np.var(rule_bot_v0_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
	print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

	print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
	print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	def test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between alphabeta_bot and mcts_bot, where mcts_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

	# List to record the time required for each decision round and the winner.
	alphabeta_pruning_time_list = []
	mcts_bot_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = TicTacToeEnv(EasyDict(cfg_tictactoe))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset(start_player_index=1)
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 2 to move first.
	player_index = 1
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set mcts_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	# action = env.rule_bot_v0()
	action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	mcts_bot_time_list.append(t2 - t1)
	# rule_bot_v0_time_list.append(t2 - t1)

	player_index = 1
	# Set alpha_beta_bot to be player 2.
	else:
	t1 = time.time()
	action = env.bot_action_alpha_beta_pruning()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	alphabeta_pruning_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	print(state)
	print(action)
	if env.get_done_reward()[0]:
	print(state)

	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
	alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

	mcts_bot_mu = np.mean(mcts_bot_time_list)
	mcts_bot_var = np.var(mcts_bot_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
	print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

	print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
	print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	def test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between rule_bot and alphabeta_bot, where rule_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

	# List to record the time required for each decision round and the winner.
	alphabeta_pruning_time_list = []
	rule_bot_v0_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = TicTacToeEnv(EasyDict(cfg_tictactoe))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset()
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 1 to move first.
	player_index = 0
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set rule_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	action = env.rule_bot_v0()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	# mcts_bot_time_list.append(t2 - t1)
	rule_bot_v0_time_list.append(t2 - t1)

	player_index = 1
	# Set alpha_beta_bot to be player 2.
	else:
	t1 = time.time()
	action = env.bot_action_alpha_beta_pruning()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	alphabeta_pruning_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	if env.get_done_reward()[0]:
	print(state)

	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
	alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

	rule_bot_v0_mu = np.mean(rule_bot_v0_time_list)
	rule_bot_v0_var = np.var(rule_bot_v0_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
	print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

	print('rule_bot_v0_time_list={}\n'.format(rule_bot_v0_time_list))
	print('rule_bot_v0_mu={}, bot_action_var={}\n'.format(rule_bot_v0_mu, rule_bot_v0_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	def test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between mcts_bot and alphabeta_bot, where mcts_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	cfg_tictactoe['bot_action_type'] = 'alpha_beta_pruning'

	# List to record the time required for each decision round and the winner.
	alphabeta_pruning_time_list = []
	mcts_bot_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = TicTacToeEnv(EasyDict(cfg_tictactoe))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset()
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 1 to move first.
	player_index = 0
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set mcts_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	# action = env.mcts_bot()
	action = player.get_actions(state, player_index=player_index, best_action_type = "most_visit")
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	# mcts_bot_time_list.append(t2 - t1)
	mcts_bot_time_list.append(t2 - t1)

	player_index = 1

	# Set alpha_beta_bot to be player 2.
	else:
	t1 = time.time()
	action = env.bot_action_alpha_beta_pruning()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	alphabeta_pruning_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	# Print the result of the game.
	if env.get_done_reward()[0]:
	print(state)
	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	alphabeta_pruning_mu = np.mean(alphabeta_pruning_time_list)
	alphabeta_pruning_var = np.var(alphabeta_pruning_time_list)

	mcts_bot_mu = np.mean(mcts_bot_time_list)
	mcts_bot_var = np.var(mcts_bot_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('alphabeta_pruning_time_list={}\n'.format(alphabeta_pruning_time_list))
	print('alphabeta_pruning_mu={}, alphabeta_pruning_var={}\n'.format(alphabeta_pruning_mu, alphabeta_pruning_var))

	print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
	print('mcts_bot_mu={}, bot_action_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	cfg_gomoku = dict(
	board_size=5,
	battle_mode='self_play_mode',
	bot_action_type='v0', # {'v0', 'alpha_beta_pruning'}
	agent_vs_human=False,
	prob_random_agent=0,
	channel_last=True,
	scale=True,
	prob_random_action_in_bot=0.,
	check_action_to_connect4_in_bot_v0=False,
	)


	def test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50):
	"""
	Overview:
	A tictactoe game between mcts_bot and rule_bot, where rule_bot take the first move.
	Arguments:
	- num_simulations (:obj:`int`): The number of the simulations required to find the best move.
	"""
	# List to record the time required for each decision round and the winner.
	mcts_bot_time_list = []
	bot_action_time_list = []
	winner = []

	# Repeat the game for 10 rounds.
	for i in range(10):
	print('-' * 10 + str(i) + '-' * 10)
	# Initialize the game, where there are two players: player 1 and player 2.
	env = GomokuEnv(EasyDict(cfg_gomoku))
	# Reset the environment, set the board to a clean board and the start player to be player 1.
	env.reset()
	state = env.board
	player = MCTSBot(env, 'a', num_simulations) # player_index = 0, player = 1
	# Set player 1 to move first.
	player_index = 0
	while not env.get_done_reward()[0]:
	"""
	Overview:
	The two players take turns to make moves, and the time required for each decision is recorded.
	"""
	# Set rule_bot to be player 1.
	if player_index == 0:
	t1 = time.time()
	action = env.bot_action()
	# action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	mcts_bot_time_list.append(t2 - t1)
	player_index = 1

	# Set mcts_bot to be player 2.
	else:
	t1 = time.time()
	# action = env.bot_action()
	action = player.get_actions(state, player_index=player_index)
	t2 = time.time()
	# print("The time difference is :", t2-t1)
	bot_action_time_list.append(t2 - t1)
	player_index = 0
	env.step(action)
	state = env.board
	# Print the result of the game.
	if env.get_done_reward()[0]:
	print(state)

	# Record the winner.
	winner.append(env.get_done_winner()[1])

	# Calculate the variance and mean of decision times.
	mcts_bot_mu = np.mean(mcts_bot_time_list)
	mcts_bot_var = np.var(mcts_bot_time_list)

	bot_action_mu = np.mean(bot_action_time_list)
	bot_action_var = np.var(bot_action_time_list)

	# Print the information of the games.
	print('num_simulations={}\n'.format(num_simulations))
	print('mcts_bot_time_list={}\n'.format(mcts_bot_time_list))
	print('mcts_bot_mu={}, mcts_bot_var={}\n'.format(mcts_bot_mu, mcts_bot_var))

	print('bot_action_time_list={}\n'.format(bot_action_time_list))
	print('bot_action_mu={}, bot_action_var={}\n'.format(bot_action_mu, bot_action_var))

	print(
	'winner={}, draw={}, player1={}, player2={}\n'.format(
	winner, winner.count(-1), winner.count(1), winner.count(2)
	)
	)


	if __name__ == '__main__':
	# ==============================================================
	# test win rate between alphabeta_bot and rule_bot_v0
	# ==============================================================
	# test_tictactoe_alphabeta_bot_vs_rule_bot_v0_bot()
	# test_tictactoe_rule_bot_v0_bot_vs_alphabeta_bot()
	# ==============================================================
	# test win rate between alphabeta_bot and mcts_bot
	# ==============================================================
	# test_tictactoe_alphabeta_bot_vs_mcts_bot(num_simulations=2000)
	# test_tictactoe_mcts_bot_vs_alphabeta_bot(num_simulations=2000)

	# ==============================================================
	# test win rate between mcts_bot and rule_bot_v0
	# ==============================================================
	test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=50)
	# test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=500)
	# test_tictactoe_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)

	# test_gomoku_mcts_bot_vs_rule_bot_v0_bot(num_simulations=1000)