JaishreeramCoder commited on
Commit
a905106
·
1 Parent(s): a443ce6

new file: Dockerfile

Browse files

new file: app.py
new file: blackjack_env.py
new file: models/final_model.zip
new file: models/phase-1-model.zip
new file: requirements.txt

Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official lightweight Python image.
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory.
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file and install dependencies.
8
+ COPY requirements.txt .
9
+ RUN pip install --upgrade pip
10
+ RUN pip install -r requirements.txt
11
+
12
+ # Copy your application code.
13
+ COPY . .
14
+
15
+ # Expose the port provided by the $PORT environment variable.
16
+ EXPOSE 7860
17
+
18
+ # Start your FastAPI app using uvicorn.
19
+ # The $PORT environment variable is provided by Hugging Face Spaces.
20
+ CMD sh -c "uvicorn app:app --host 0.0.0.0 --port ${PORT:-7860}"
21
+
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from blackjack_env import BlackjackEnvCountingFirstMove
4
+ from stable_baselines3 import PPO
5
+ import numpy as np
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+
8
+ app = FastAPI()
9
+
10
+ # Load model and env once at startup
11
+ env = BlackjackEnvCountingFirstMove()
12
+ model = PPO.load("models/final_model.zip", env=env)
13
+
14
+
15
+ app.add_middleware(
16
+ CORSMiddleware,
17
+ allow_origins=["*"], # or ["https://yourdomain.com"]
18
+ allow_credentials=True,
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+
24
+ # Input schema
25
+ class StateInput(BaseModel):
26
+ observation: list # list of 13 integers
27
+
28
+ @app.get("/")
29
+ def root():
30
+ return {"message": "Blackjack AI is live!"}
31
+
32
+ @app.post("/predict")
33
+ def predict(state: StateInput):
34
+ try:
35
+ obs = np.array(state.observation)
36
+ action, _ = model.predict(obs, deterministic=True)
37
+ return {"action": int(action)}
38
+ except Exception as e:
39
+ raise HTTPException(status_code=500, detail=str(e))
blackjack_env.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ from stable_baselines3 import PPO
4
+ import gymnasium as gym
5
+ from gymnasium import spaces
6
+ import numpy as np
7
+ import random
8
+
9
+ # ----------------------------
10
+ # Environment Definition
11
+ # ----------------------------
12
+ class BlackjackEnvCountingFirstMove(gym.Env):
13
+ """
14
+ Custom Blackjack environment with card counting and a first-move flag.
15
+
16
+ State (MultiDiscrete):
17
+ [player_sum, usable_ace, dealer_card, is_first_move, cnt_A, cnt_2, ..., cnt_9, cnt_10group]
18
+
19
+ - player_sum: Sum of player's hand (0 to 31)
20
+ - usable_ace: 0 (no usable ace) or 1 (usable ace exists)
21
+ - dealer_card: Dealer's face-up card (1 for Ace, 2–10 for number cards)
22
+ - is_first_move: 1 if it's the first decision of the episode, 0 otherwise.
23
+ - cnt_A: Count for Ace (0–4)
24
+ - cnt_2 to cnt_9: Count for cards 2 through 9 (each 0–4)
25
+ - cnt_10group: Count for 10, Jack, Queen, King (0–16)
26
+
27
+ Actions (Discrete(4)):
28
+ 0: HIT – Request another card.
29
+ 1: STK – Stand.
30
+ 2: DBL – Double Down (allowed only on the first move).
31
+ 3: SUR – Surrender (allowed only on the first move).
32
+
33
+ On moves after the first, only HIT (0) and STK (1) are allowed.
34
+
35
+ Reward Structure:
36
+ - Blackjack pays 3:2 (payout_blackjack=1.5) if only the player has blackjack.
37
+ - Regular win pays 1:1.
38
+ - Push returns 0.
39
+ - Loss costs the bet.
40
+ - Surrender returns -0.5 times the base bet.
41
+ - Double Down outcomes are scaled (bet multiplied by 2).
42
+ """
43
+ def __init__(self, payout_blackjack=1.5, deck_threshold=15):
44
+ super(BlackjackEnvCountingFirstMove, self).__init__()
45
+ # Define the action space: 4 discrete actions
46
+ self.action_space = spaces.Discrete(4)
47
+
48
+ # Observation space:
49
+ # [player_sum (32), usable_ace (2), dealer_card (11), is_first_move (2),
50
+ # cnt_A (5), cnt_2,...,cnt_9 (each 5), cnt_10group (17)]
51
+ self.observation_space = spaces.MultiDiscrete([32, 2, 11, 2] + [5]*9 + [17])
52
+
53
+ self.payout_blackjack = payout_blackjack
54
+ self.base_bet = 1.0
55
+ self.deck_threshold = deck_threshold
56
+
57
+ self._init_deck()
58
+ self.reset()
59
+
60
+ def _init_deck(self):
61
+ """Initialize a single deck and reset card counts."""
62
+ self.deck = []
63
+ self.deck += ['A'] * 4
64
+ for card in range(2, 10):
65
+ self.deck += [str(card)] * 4
66
+ self.deck += ['10'] * 16
67
+ random.shuffle(self.deck)
68
+
69
+ self.card_counts = {'A': 0}
70
+ for card in range(2, 10):
71
+ self.card_counts[str(card)] = 0
72
+ self.card_counts['10'] = 0
73
+
74
+ def _draw_card(self):
75
+ if len(self.deck) == 0:
76
+ self._init_deck()
77
+ card = self.deck.pop()
78
+ if card == 'A':
79
+ self.card_counts['A'] = min(self.card_counts['A'] + 1, 4)
80
+ elif card == '10':
81
+ self.card_counts['10'] = min(self.card_counts['10'] + 1, 16)
82
+ else:
83
+ self.card_counts[card] = min(self.card_counts[card] + 1, 4)
84
+ return card
85
+
86
+ def _hand_value(self, hand):
87
+ total = 0
88
+ ace_count = 0
89
+ for card in hand:
90
+ if card == 'A':
91
+ total += 1
92
+ ace_count += 1
93
+ else:
94
+ total += int(card)
95
+ usable_ace = 0
96
+ if ace_count > 0 and total + 10 <= 21:
97
+ total += 10
98
+ usable_ace = 1
99
+ return total, usable_ace
100
+
101
+ def _card_value(self, card):
102
+ return 1 if card == 'A' else int(card)
103
+
104
+ def _get_observation(self):
105
+ player_sum, usable_ace = self._hand_value(self.player_hand)
106
+ dealer_card_val = self._card_value(self.dealer_hand[0])
107
+ first_move_flag = 1 if self.first_move else 0
108
+ counts = [self.card_counts['A']]
109
+ for card in range(2, 10):
110
+ counts.append(self.card_counts[str(card)])
111
+ counts.append(self.card_counts['10'])
112
+ obs = np.array([player_sum, usable_ace, dealer_card_val, first_move_flag] + counts, dtype=np.int32)
113
+ return obs
114
+
115
+ def reset(self, seed=None, options=None):
116
+ self.first_move = True
117
+ self.done = False
118
+ self.natural_blackjack = False
119
+ if len(self.deck) < self.deck_threshold:
120
+ self._init_deck()
121
+ self.player_hand = [self._draw_card(), self._draw_card()]
122
+ self.dealer_hand = [self._draw_card(), self._draw_card()]
123
+ player_total, _ = self._hand_value(self.player_hand)
124
+ dealer_total, _ = self._hand_value(self.dealer_hand)
125
+ if player_total == 21:
126
+ self.reward = 0.0 if dealer_total == 21 else self.payout_blackjack * self.base_bet
127
+ self.natural_blackjack = True
128
+ else:
129
+ self.reward = 0.0
130
+ return self._get_observation(), {}
131
+
132
+ def step(self, action):
133
+ if self.natural_blackjack:
134
+ self.natural_blackjack = False
135
+ self.done = True
136
+ info = {"bet": 1.0}
137
+ return self._get_observation(), self.reward, True, False, info
138
+
139
+ if self.done:
140
+ return self._get_observation(), 0.0, True, False, {}
141
+
142
+ if not self.first_move and action in [2, 3]:
143
+ self.done = True
144
+ return self._get_observation(), -1.0, True, False, {"illegal_action": True}
145
+
146
+ if action == 0: # HIT
147
+ card = self._draw_card()
148
+ self.player_hand.append(card)
149
+ player_total, _ = self._hand_value(self.player_hand)
150
+ if player_total > 21:
151
+ self.done = True
152
+ reward = -self.base_bet
153
+ else:
154
+ reward = 0.0
155
+ self.first_move = False
156
+ return self._get_observation(), reward, self.done, False, {}
157
+
158
+ elif action == 1: # STAND
159
+ reward = self._dealer_play()
160
+ self.done = True
161
+ return self._get_observation(), reward, self.done, False, {}
162
+
163
+ elif action == 2: # DOUBLE DOWN
164
+ self.first_move = False
165
+ card = self._draw_card()
166
+ self.player_hand.append(card)
167
+ player_total, _ = self._hand_value(self.player_hand)
168
+ if player_total > 21:
169
+ reward = -2 * self.base_bet
170
+ self.done = True
171
+ return self._get_observation(), reward, self.done, False, {}
172
+ reward = self._dealer_play(double_down=True)
173
+ self.done = True
174
+ return self._get_observation(), reward, self.done, False, {}
175
+
176
+ elif action == 3: # SURRENDER
177
+ self.first_move = False
178
+ self.done = True
179
+ reward = -0.5 * self.base_bet
180
+ return self._get_observation(), reward, self.done, False, {}
181
+ else:
182
+ self.done = True
183
+ return self._get_observation(), -1.0, True, False, {"illegal_action": True}
184
+
185
+ def _dealer_play(self, double_down=False):
186
+ player_total, _ = self._hand_value(self.player_hand)
187
+ dealer_total, _ = self._hand_value(self.dealer_hand)
188
+ while dealer_total < 17:
189
+ card = self._draw_card()
190
+ self.dealer_hand.append(card)
191
+ dealer_total, _ = self._hand_value(self.dealer_hand)
192
+ bet = self.base_bet * (2 if double_down else 1)
193
+ if dealer_total > 21:
194
+ return bet
195
+ elif dealer_total > player_total:
196
+ return -bet
197
+ elif dealer_total < player_total:
198
+ return bet
199
+ else:
200
+ return 0.0
201
+
202
+ def render(self, mode='human'):
203
+ player_total, usable = self._hand_value(self.player_hand)
204
+ dealer_total, _ = self._hand_value(self.dealer_hand)
205
+ print(f"Player hand: {self.player_hand} (Total: {player_total}, Usable Ace: {usable})")
206
+ print(f"Dealer hand: {self.dealer_hand} (Total: {dealer_total})")
207
+ print("Card counts:", self.card_counts)
208
+ print("First move:", self.first_move)
models/final_model.zip ADDED
Binary file (303 kB). View file
 
models/phase-1-model.zip ADDED
Binary file (303 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ stable-baselines3
4
+ gymnasium
5
+ torch
6
+ numpy
7
+