Kano001's picture
Upload 919 files
375a1cf verified
import os
from typing import Optional
import numpy as np
import gym
from gym import spaces
from gym.error import DependencyNotInstalled
def cmp(a, b):
return float(a > b) - float(a < b)
# 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10
deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10]
def draw_card(np_random):
return int(np_random.choice(deck))
def draw_hand(np_random):
return [draw_card(np_random), draw_card(np_random)]
def usable_ace(hand): # Does this hand have a usable ace?
return 1 in hand and sum(hand) + 10 <= 21
def sum_hand(hand): # Return current hand total
if usable_ace(hand):
return sum(hand) + 10
return sum(hand)
def is_bust(hand): # Is this hand a bust?
return sum_hand(hand) > 21
def score(hand): # What is the score of this hand (0 if bust)
return 0 if is_bust(hand) else sum_hand(hand)
def is_natural(hand): # Is this hand a natural blackjack?
return sorted(hand) == [1, 10]
class BlackjackEnv(gym.Env):
"""
Blackjack is a card game where the goal is to beat the dealer by obtaining cards
that sum to closer to 21 (without going over 21) than the dealers cards.
### Description
Card Values:
- Face cards (Jack, Queen, King) have a point value of 10.
- Aces can either count as 11 (called a 'usable ace') or 1.
- Numerical cards (2-9) have a value equal to their number.
This game is played with an infinite deck (or with replacement).
The game starts with the dealer having one face up and one face down card,
while the player has two face up cards.
The player can request additional cards (hit, action=1) until they decide to stop (stick, action=0)
or exceed 21 (bust, immediate loss).
After the player sticks, the dealer reveals their facedown card, and draws
until their sum is 17 or greater. If the dealer goes bust, the player wins.
If neither the player nor the dealer busts, the outcome (win, lose, draw) is
decided by whose sum is closer to 21.
### Action Space
There are two actions: stick (0), and hit (1).
### Observation Space
The observation consists of a 3-tuple containing: the player's current sum,
the value of the dealer's one showing card (1-10 where 1 is ace),
and whether the player holds a usable ace (0 or 1).
This environment corresponds to the version of the blackjack problem
described in Example 5.1 in Reinforcement Learning: An Introduction
by Sutton and Barto (http://incompleteideas.net/book/the-book-2nd.html).
### Rewards
- win game: +1
- lose game: -1
- draw game: 0
- win game with natural blackjack:
+1.5 (if <a href="#nat">natural</a> is True)
+1 (if <a href="#nat">natural</a> is False)
### Arguments
```
gym.make('Blackjack-v1', natural=False, sab=False)
```
<a id="nat">`natural=False`</a>: Whether to give an additional reward for
starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21).
<a id="sab">`sab=False`</a>: Whether to follow the exact rules outlined in the book by
Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored.
If the player achieves a natural blackjack and the dealer does not, the player
will win (i.e. get a reward of +1). The reverse rule does not apply.
If both the player and the dealer get a natural, it will be a draw (i.e. reward 0).
### Version History
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 4,
}
def __init__(self, render_mode: Optional[str] = None, natural=False, sab=False):
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Tuple(
(spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))
)
# Flag to payout 1.5 on a "natural" blackjack win, like casino rules
# Ref: http://www.bicyclecards.com/how-to-play/blackjack/
self.natural = natural
# Flag for full agreement with the (Sutton and Barto, 2018) definition. Overrides self.natural
self.sab = sab
self.render_mode = render_mode
def step(self, action):
assert self.action_space.contains(action)
if action: # hit: add a card to players hand and return
self.player.append(draw_card(self.np_random))
if is_bust(self.player):
terminated = True
reward = -1.0
else:
terminated = False
reward = 0.0
else: # stick: play out the dealers hand, and score
terminated = True
while sum_hand(self.dealer) < 17:
self.dealer.append(draw_card(self.np_random))
reward = cmp(score(self.player), score(self.dealer))
if self.sab and is_natural(self.player) and not is_natural(self.dealer):
# Player automatically wins. Rules consistent with S&B
reward = 1.0
elif (
not self.sab
and self.natural
and is_natural(self.player)
and reward == 1.0
):
# Natural gives extra points, but doesn't autowin. Legacy implementation
reward = 1.5
if self.render_mode == "human":
self.render()
return self._get_obs(), reward, terminated, False, {}
def _get_obs(self):
return (sum_hand(self.player), self.dealer[0], usable_ace(self.player))
def reset(
self,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self.dealer = draw_hand(self.np_random)
self.player = draw_hand(self.np_random)
_, dealer_card_value, _ = self._get_obs()
suits = ["C", "D", "H", "S"]
self.dealer_top_card_suit = self.np_random.choice(suits)
if dealer_card_value == 1:
self.dealer_top_card_value_str = "A"
elif dealer_card_value == 10:
self.dealer_top_card_value_str = self.np_random.choice(["J", "Q", "K"])
else:
self.dealer_top_card_value_str = str(dealer_card_value)
if self.render_mode == "human":
self.render()
return self._get_obs(), {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[toy_text]`"
)
player_sum, dealer_card_value, usable_ace = self._get_obs()
screen_width, screen_height = 600, 500
card_img_height = screen_height // 3
card_img_width = int(card_img_height * 142 / 197)
spacing = screen_height // 20
bg_color = (7, 99, 36)
white = (255, 255, 255)
if not hasattr(self, "screen"):
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode((screen_width, screen_height))
else:
pygame.font.init()
self.screen = pygame.Surface((screen_width, screen_height))
if not hasattr(self, "clock"):
self.clock = pygame.time.Clock()
self.screen.fill(bg_color)
def get_image(path):
cwd = os.path.dirname(__file__)
image = pygame.image.load(os.path.join(cwd, path))
return image
def get_font(path, size):
cwd = os.path.dirname(__file__)
font = pygame.font.Font(os.path.join(cwd, path), size)
return font
small_font = get_font(
os.path.join("font", "Minecraft.ttf"), screen_height // 15
)
dealer_text = small_font.render(
"Dealer: " + str(dealer_card_value), True, white
)
dealer_text_rect = self.screen.blit(dealer_text, (spacing, spacing))
def scale_card_img(card_img):
return pygame.transform.scale(card_img, (card_img_width, card_img_height))
dealer_card_img = scale_card_img(
get_image(
os.path.join(
"img",
f"{self.dealer_top_card_suit}{self.dealer_top_card_value_str}.png",
)
)
)
dealer_card_rect = self.screen.blit(
dealer_card_img,
(
screen_width // 2 - card_img_width - spacing // 2,
dealer_text_rect.bottom + spacing,
),
)
hidden_card_img = scale_card_img(get_image(os.path.join("img", "Card.png")))
self.screen.blit(
hidden_card_img,
(
screen_width // 2 + spacing // 2,
dealer_text_rect.bottom + spacing,
),
)
player_text = small_font.render("Player", True, white)
player_text_rect = self.screen.blit(
player_text, (spacing, dealer_card_rect.bottom + 1.5 * spacing)
)
large_font = get_font(os.path.join("font", "Minecraft.ttf"), screen_height // 6)
player_sum_text = large_font.render(str(player_sum), True, white)
player_sum_text_rect = self.screen.blit(
player_sum_text,
(
screen_width // 2 - player_sum_text.get_width() // 2,
player_text_rect.bottom + spacing,
),
)
if usable_ace:
usable_ace_text = small_font.render("usable ace", True, white)
self.screen.blit(
usable_ace_text,
(
screen_width // 2 - usable_ace_text.get_width() // 2,
player_sum_text_rect.bottom + spacing // 2,
),
)
if self.render_mode == "human":
pygame.event.pump()
pygame.display.update()
self.clock.tick(self.metadata["render_fps"])
else:
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if hasattr(self, "screen"):
import pygame
pygame.display.quit()
pygame.quit()
# Pixel art from Mariia Khmelnytska (https://www.123rf.com/photo_104453049_stock-vector-pixel-art-playing-cards-standart-deck-vector-set.html)