Spaces:
Sleeping
Sleeping
File size: 3,096 Bytes
3dfe8fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import pytest
from easydict import EasyDict
from zoo.board_games.tictactoe.envs.tictactoe_env import TicTacToeEnv
@pytest.mark.envtest
class TestTicTacToeEnv:
def test_self_play_mode(self):
cfg = EasyDict(
battle_mode='self_play_mode',
channel_last=True,
scale=True,
agent_vs_human=False,
prob_random_agent=0,
prob_expert_agent=0,
bot_action_type='v0'
)
env = TicTacToeEnv(cfg)
env.reset()
print('init board state: ')
env.render()
while True:
"""player 1"""
# action = env.human_to_action()
action = env.random_action()
# action = env.bot_action()
# test legal_actions
# legal_actions = env.legal_actions
# print('legal_actions: ', legal_actions)
# action = legal_actions[-1]
print('player 1: ' + env.action_to_string(action))
obs, reward, done, info = env.step(action)
env.render()
if done:
if reward > 0:
print('player 1 (human player) win')
else:
print('draw')
break
"""player 2"""
action = env.bot_action()
print('player 2 (computer player): ' + env.action_to_string(action))
obs, reward, done, info = env.step(action)
env.render()
if done:
if reward > 0:
print('player 2 (computer player) win')
else:
print('draw')
break
def test_play_with_bot_mode(self):
cfg = EasyDict(
battle_mode='play_with_bot_mode',
channel_last=True,
scale=True,
# channel_last=False,
# scale=False,
agent_vs_human=False,
prob_random_agent=0,
prob_expert_agent=0,
bot_action_type='v0'
)
env = TicTacToeEnv(cfg)
env.reset()
print('init board state: ')
env.render()
while True:
"""player 1"""
# action = env.human_to_action()
action = env.random_action()
# test legal_actions
# legal_actions = env.legal_actions
# print('legal_actions: ', legal_actions)
# action = legal_actions[-1]
print('player 1: ' + env.action_to_string(action))
obs, reward, done, info = env.step(action)
# reward is in the perspective of player1
env.render()
if done:
if reward != 0 and info['next player to play'] == 2:
print('player 1 (human player) win')
elif reward != 0 and info['next player to play'] == 1:
print('player 2 (computer player) win')
else:
print('draw')
break
test = TestTicTacToeEnv()
test.test_self_play_mode()
test.test_play_with_bot_mode()
|