Kaushik Rajan commited on
Commit
898b55a
ยท
1 Parent(s): 56d247f

Feat: Replace Tic-Tac-Toe with Strategic Business Competition

Browse files
Files changed (2) hide show
  1. app.py +268 -347
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,397 +1,318 @@
1
  """
2
- SPIRAL: Interactive Reasoning Game Simulator
3
 
4
- Demonstrates key concepts from "Self-Play in Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning"
 
5
 
6
- This simplified demo shows how strategic reasoning emerges from self-play in zero-sum games like TicTacToe.
 
7
  """
8
 
9
  import gradio as gr
10
  import numpy as np
11
- import random
12
- import spaces
13
 
 
 
 
 
 
 
14
 
15
- class TicTacToeEnv:
16
- """Simple TicTacToe environment for SPIRAL demonstration."""
17
-
 
18
  def __init__(self):
19
  self.reset()
20
-
21
  def reset(self):
22
- """Reset the game to initial state."""
23
- self.board = np.zeros((3, 3), dtype=np.int8)
24
- self.current_player = 1 # Player 1 starts (X)
25
  self.game_over = False
26
- self.winner = None
27
- self.move_count = 0
28
- return self.board.copy()
29
-
30
- def step(self, action):
31
- """Execute one step in the environment."""
32
- if self.game_over:
33
- return self.board.copy(), 0, True, {}
34
 
35
- # Convert action to row, col
36
- row, col = divmod(action, 3)
37
-
38
- # Check if move is valid
39
- if self.board[row, col] != 0:
40
- return self.board.copy(), -1, True, {"invalid_move": True}
 
 
 
 
41
 
42
- # Make the move
43
- self.board[row, col] = self.current_player
44
- self.move_count += 1
45
 
46
- # Check for win
47
- winner = self._check_winner()
48
- if winner is not None:
49
- self.game_over = True
50
- self.winner = winner
51
- reward = 1 if winner == self.current_player else -1
52
- return self.board.copy(), reward, True, {}
53
- elif self.move_count >= 9:
54
- # Draw
55
- self.game_over = True
56
- return self.board.copy(), 0, True, {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  else:
58
- # Game continues
59
- self.current_player *= -1 # Switch player
60
- return self.board.copy(), 0, False, {}
61
-
62
- def _check_winner(self):
63
- """Check if there's a winner."""
64
- # Check rows
65
- for row in range(3):
66
- if abs(self.board[row, :].sum()) == 3:
67
- return self.board[row, 0]
68
-
69
- # Check columns
70
- for col in range(3):
71
- if abs(self.board[:, col].sum()) == 3:
72
- return self.board[0, col]
73
-
74
- # Check diagonals
75
- if abs(self.board.diagonal().sum()) == 3:
76
- return self.board[0, 0]
77
-
78
- if abs(np.fliplr(self.board).diagonal().sum()) == 3:
79
- return self.board[0, 2]
80
-
81
- return None
82
-
83
- def get_valid_actions(self):
84
- """Get list of valid actions (empty positions)."""
85
- valid_actions = []
86
- for i in range(9):
87
- row, col = divmod(i, 3)
88
- if self.board[row, col] == 0:
89
- valid_actions.append(i)
90
- return valid_actions
91
-
92
-
93
- # Global game environment
94
- tictactoe_env = TicTacToeEnv()
95
-
96
-
97
- def check_winner(board):
98
- """Check if there's a winner on the given board."""
99
- # Check rows
100
- for row in range(3):
101
- if abs(board[row, :].sum()) == 3:
102
- return board[row, 0]
103
-
104
- # Check columns
105
- for col in range(3):
106
- if abs(board[:, col].sum()) == 3:
107
- return board[0, col]
108
-
109
- # Check diagonals
110
- if abs(board.diagonal().sum()) == 3:
111
- return board[0, 0]
112
-
113
- if abs(np.fliplr(board).diagonal().sum()) == 3:
114
- return board[0, 2]
115
-
116
- return None
117
-
118
-
119
- def get_valid_moves(board):
120
- """Get valid moves for the given board."""
121
- valid_moves = []
122
- for i in range(9):
123
- row, col = divmod(i, 3)
124
- if board[row, col] == 0:
125
- valid_moves.append(i)
126
- return valid_moves
127
-
128
-
129
- def minimax(board, player, depth=0):
130
- """Minimax algorithm - demonstrates strategic reasoning."""
131
- # Base cases
132
- winner = check_winner(board)
133
- if winner == 1: # Human wins
134
- return -10 + depth, None
135
- elif winner == -1: # AI wins
136
- return 10 - depth, None
137
- elif len(get_valid_moves(board)) == 0: # Draw
138
- return 0, None
139
-
140
- best_move = None
141
- if player == -1: # AI is maximizing player
142
- best_score = -float('inf')
143
- for move in get_valid_moves(board):
144
- row, col = divmod(move, 3)
145
- board[row, col] = -1
146
- score, _ = minimax(board.copy(), 1, depth + 1)
147
- board[row, col] = 0 # Undo move
148
- if score > best_score:
149
- best_score = score
150
- best_move = move
151
- else: # Human is minimizing player
152
- best_score = float('inf')
153
- for move in get_valid_moves(board):
154
- row, col = divmod(move, 3)
155
- board[row, col] = 1
156
- score, _ = minimax(board.copy(), -1, depth + 1)
157
- board[row, col] = 0 # Undo move
158
- if score < best_score:
159
- best_score = score
160
- best_move = move
161
-
162
- return best_score, best_move
163
 
 
 
 
164
 
165
- def generate_reasoning(board_state, human_move, ai_move):
166
- """Generate reasoning explanation based on game state."""
167
- reasoning_templates = [
168
- f"I analyzed all possible moves from the current position. After you played position {human_move}, I considered {len(get_valid_moves(board_state))} possible responses. Using minimax tree search, I determined that position {ai_move} gives me the best strategic advantage.",
169
 
170
- f"My decision process: (1) Evaluate immediate threats and opportunities, (2) Project future game states, (3) Choose move that maximizes my winning probability. Position {ai_move} emerged as optimal after analyzing the full game tree.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- f"Strategic analysis: Your move at {human_move} created a new board configuration. I used recursive tree search to evaluate all possible future sequences. Position {ai_move} either creates a winning opportunity or blocks your potential victories.",
 
 
 
 
173
 
174
- f"SPIRAL reasoning: Through self-play training, I learned that position {ai_move} is strategically superior in this configuration. This demonstrates how strategic reasoning emerges from multi-agent interaction in zero-sum games."
175
- ]
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
- return random.choice(reasoning_templates)
 
178
 
 
 
 
 
 
 
 
179
 
180
- def create_interface():
181
- """Create the main Gradio interface."""
 
 
 
 
 
 
 
 
 
 
182
 
183
- # Custom CSS to style the TicTacToe board
184
- css = """
185
- .ttt-board {
186
- display: flex;
187
- flex-direction: column;
188
- align-items: center;
189
- max-width: 300px;
190
- margin: 0 auto;
191
- }
192
- .ttt-board > div {
193
- display: flex;
194
- flex-direction: row;
195
- justify-content: center;
196
- gap: 8px;
197
- margin: 4px 0;
198
- }
199
- .ttt-board button {
200
- width: 80px !important;
201
- height: 80px !important;
202
- min-width: 80px !important;
203
- min-height: 80px !important;
204
- max-width: 80px !important;
205
- max-height: 80px !important;
206
- font-size: 24px !important;
207
- font-weight: bold !important;
208
- border: 2px solid #374151 !important;
209
- border-radius: 8px !important;
210
- background: #1f2937 !important;
211
- color: white !important;
212
- display: flex !important;
213
- align-items: center !important;
214
- justify-content: center !important;
215
- }
216
- .ttt-board button:hover {
217
- background: #374151 !important;
218
- border-color: #6b7280 !important;
219
- }
220
- .ttt-board button:disabled {
221
- opacity: 0.8 !important;
222
- cursor: not-allowed !important;
223
- }
224
- .ttt-stats {
225
- text-align: center !important;
226
- margin: 20px 0 !important;
227
- font-size: 16px !important;
228
- }
229
- .ttt-stats p {
230
- margin: 0 !important;
231
- color: #9ca3af !important;
232
- }
233
- """
234
 
235
- with gr.Blocks(title="SPIRAL: Self-Play Reasoning Demo", theme=gr.themes.Soft(), css=css) as demo:
236
- gr.Markdown("# ๐ŸŽฎ SPIRAL: Self-Play Reasoning Demo")
237
- gr.Markdown("**Demonstrating how strategic reasoning emerges from self-play in zero-sum games**")
238
- gr.Markdown("*Based on: \"Self-Play in Zero-Sum Games Incentivizes Reasoning via Multi-Agent Multi-Turn Reinforcement Learning\"*")
239
-
240
- def update_board_buttons():
241
- """Create a list of gr.Button updates from the current board state."""
242
- updates = []
243
- for i in range(9):
244
- row, col = divmod(i, 3)
245
- cell = tictactoe_env.board[row, col]
246
- val = ""
247
- interactive = True
248
- if cell == 1:
249
- val = 'โŒ'
250
- interactive = False
251
- elif cell == -1:
252
- val = 'โญ•'
253
- interactive = False
254
-
255
- if tictactoe_env.game_over:
256
- interactive = False
257
 
258
- updates.append(gr.Button(value=val, interactive=interactive))
259
- return updates
260
 
261
- ttt_stats = gr.State({'wins': 0, 'losses': 0, 'draws': 0})
262
-
263
- @spaces.GPU
264
- def play_tictactoe(position, stats):
265
- """
266
- Main game loop for TicTacToe. Handles human move, AI response, and updates state.
267
- This function is decorated with @spaces.GPU to satisfy the Hugging Face Spaces
268
- runtime, even though the TicTacToe logic does not require GPU acceleration.
269
- The underlying issue is a mismatch between the selected GPU hardware and the
270
- CPU-bound nature of the application.
271
- """
272
- if tictactoe_env.game_over:
273
- yield *update_board_buttons(), "Game is over! Click 'New Game' to start again.", "", stats
274
- return
275
-
276
- try:
277
- position = int(position)
278
-
279
- # Human move
280
- board_state, reward, done, info = tictactoe_env.step(position)
281
-
282
- if done:
283
- if info.get("invalid_move"):
284
- yield *update_board_buttons(), "Invalid move! Try again.", "", stats
285
- return
286
-
287
- winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
288
- if winner == "You": stats['wins'] += 1
289
- elif winner == "AI": stats['losses'] += 1
290
- else: stats['draws'] += 1
291
- yield *update_board_buttons(), f"Game Over! {winner} won!", "", stats
292
- return
293
-
294
- # Show AI thinking
295
- yield *update_board_buttons(), "AI is analyzing the game tree...", "๐Ÿง  Strategic reasoning in progress...", stats
296
-
297
- # AI move using minimax
298
- _, ai_action = minimax(tictactoe_env.board.copy(), -1)
299
- if ai_action is None:
300
- valid_actions = tictactoe_env.get_valid_actions()
301
- if not valid_actions:
302
- yield *update_board_buttons(), "Game is a draw!", "", stats
303
- return
304
- ai_action = random.choice(valid_actions)
305
-
306
- # Generate reasoning explanation
307
- reasoning = generate_reasoning(tictactoe_env.board.copy(), position, ai_action)
308
-
309
- # AI makes move
310
- board_state, reward, done, info = tictactoe_env.step(ai_action)
311
-
312
- if done:
313
- winner = "You" if tictactoe_env.winner == 1 else "AI" if tictactoe_env.winner == -1 else "Draw"
314
- if winner == "You": stats['wins'] += 1
315
- elif winner == "AI": stats['losses'] += 1
316
- else: stats['draws'] += 1
317
- yield *update_board_buttons(), f"Game Over! {winner} won! AI played position {ai_action}.", reasoning, stats
318
- else:
319
- yield *update_board_buttons(), f"AI chose position {ai_action}. Your turn!", reasoning, stats
320
-
321
- except Exception as e:
322
- yield *update_board_buttons(), f"Error: {str(e)}", "", stats
323
 
324
- def reset_tictactoe(stats):
325
- """Reset TicTacToe game."""
326
- tictactoe_env.reset()
327
- return *update_board_buttons(), "New game started! You are โŒ (X). Click a square to demonstrate strategic reasoning.", "The AI will explain its strategic decision-making process...", stats
 
328
 
329
  with gr.Row():
 
 
 
 
 
 
 
330
  with gr.Column(scale=2):
331
- status_box = gr.Textbox("Welcome to SPIRAL TicTacToe! You are โŒ (X). Click a square to begin.", label="Game Status", interactive=False)
332
- reasoning_box = gr.Textbox("The AI will explain its strategic moves here.", label="AI Reasoning", interactive=False, lines=4)
333
 
334
- with gr.Column(elem_classes=["ttt-board"]):
335
- board_buttons = []
336
- for i in range(3):
337
- with gr.Row():
338
- for j in range(3):
339
- pos = i * 3 + j
340
- btn = gr.Button("", elem_id=f"ttt-btn-{pos}")
341
- board_buttons.append(btn)
342
 
 
 
343
  with gr.Row():
344
- new_game_btn = gr.Button("New Game", variant="primary")
345
-
346
- # Hidden state for passing button clicks
347
- clicked_pos = gr.Textbox(visible=False)
348
 
349
- with gr.Column(scale=1):
350
- gr.Markdown("### ๐Ÿ“Š Game Stats")
351
- stats_display = gr.Markdown("Wins: 0 | Losses: 0 | Draws: 0", elem_classes=["ttt-stats"])
352
-
353
- def update_stats_display(stats):
354
- return f"Wins: {stats['wins']} | Losses: {stats['losses']} | Draws: {stats['draws']}"
355
-
356
- gr.Markdown("""
357
- ### ๐Ÿค” What is SPIRAL?
358
- SPIRAL stands for **Self-Play in Reinforcement Learning**. This demo illustrates a core concept from the paper: by playing against itself millions of times, an AI can learn complex, human-like strategic reasoning without being explicitly programmed with rules like "take the center square."
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
- The AI here uses a simple **minimax** algorithm, a classic game theory tree search method, to find the optimal move. This serves as a stand-in for the more complex neural networks used in the actual SPIRAL research.
361
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  # --- Event Handlers ---
364
-
365
- def on_board_click(pos, stats):
366
- """Handler for board button clicks. Propagates to main game logic."""
367
- yield from play_tictactoe(pos, stats)
368
-
369
- # Link button clicks to the handler
370
- for i, btn in enumerate(board_buttons):
371
- btn.click(
372
- fn=on_board_click,
373
- inputs=[gr.Textbox(str(i), visible=False), ttt_stats],
374
- outputs=[*board_buttons, status_box, reasoning_box, ttt_stats]
375
- )
376
-
377
- # Link new game button to reset function
378
- new_game_btn.click(
379
- fn=reset_tictactoe,
380
- inputs=[ttt_stats],
381
- outputs=[*board_buttons, status_box, reasoning_box, ttt_stats]
382
  )
383
 
384
- # Update stats display when ttt_stats changes
385
- ttt_stats.change(
386
- fn=update_stats_display,
387
- inputs=ttt_stats,
388
- outputs=stats_display
 
 
 
 
389
  )
390
 
 
 
 
 
 
391
  return demo
392
 
393
 
394
  if __name__ == "__main__":
395
- # Create and launch the Gradio interface
396
  spiral_demo = create_interface()
397
  spiral_demo.launch()
 
1
  """
2
+ SPIRAL: Strategic Business Competition Simulator
3
 
4
+ This demo has been updated to more intuitively demonstrate the key concepts from the
5
+ "Self-Play in Zero-Sum Games Incentivizes Reasoning" (SPIRAL) research paper.
6
 
7
+ Instead of Tic-Tac-Toe, this simulation uses a zero-sum business competition to showcase
8
+ complex, multi-turn strategic reasoning in a more practical and relatable context.
9
  """
10
 
11
  import gradio as gr
12
  import numpy as np
13
+ import pandas as pd
14
+ import plotly.express as px
15
 
16
+ # --- Game Configuration ---
17
+ INITIAL_BUDGET = 1000
18
+ INITIAL_MARKET_SHARE = 50
19
+ INITIAL_PRODUCT_QUALITY = 50
20
+ NUM_QUARTERS = 12
21
+ TITLE = "SPIRAL: Strategic Business Competition"
22
 
23
+ # --- Game Environment ---
24
+
25
+ class BusinessCompetitionEnv:
26
+ """Manages the state of the strategic business competition."""
27
  def __init__(self):
28
  self.reset()
29
+
30
  def reset(self):
31
+ """Resets the game to its initial state."""
32
+ self.quarter = 0
 
33
  self.game_over = False
 
 
 
 
 
 
 
 
34
 
35
+ self.player_stats = {
36
+ "budget": INITIAL_BUDGET,
37
+ "market_share": INITIAL_MARKET_SHARE,
38
+ "product_quality": INITIAL_PRODUCT_QUALITY,
39
+ }
40
+ self.ai_stats = {
41
+ "budget": INITIAL_BUDGET,
42
+ "market_share": INITIAL_MARKET_SHARE,
43
+ "product_quality": INITIAL_PRODUCT_QUALITY,
44
+ }
45
 
46
+ # History stores the state at the *end* of each quarter
47
+ self.history = []
48
+ self._add_to_history() # Initial state at quarter 0
49
 
50
+ return self.get_state()
51
+
52
+ def _add_to_history(self):
53
+ """Adds the current state to the history log."""
54
+ self.history.append({
55
+ "Quarter": self.quarter,
56
+ "Player Budget": self.player_stats["budget"],
57
+ "AI Budget": self.ai_stats["budget"],
58
+ "Player Market Share": self.player_stats["market_share"],
59
+ "AI Market Share": self.ai_stats["market_share"],
60
+ "Player Product Quality": self.player_stats["product_quality"],
61
+ "AI Product Quality": self.ai_stats["product_quality"],
62
+ })
63
+
64
+ def get_state(self):
65
+ """Returns the complete current state of the game."""
66
+ return {
67
+ "quarter": self.quarter,
68
+ "player_stats": self.player_stats,
69
+ "ai_stats": self.ai_stats,
70
+ "game_over": self.game_over,
71
+ "history": self.history
72
+ }
73
+
74
+ def get_winner(self):
75
+ """Determines the winner at the end of the game."""
76
+ if not self.game_over:
77
+ return None
78
+ if self.player_stats["market_share"] > self.ai_stats["market_share"]:
79
+ return "You"
80
+ elif self.ai_stats["market_share"] > self.player_stats["market_share"]:
81
+ return "AI"
82
  else:
83
+ return "It's a Draw"
84
+
85
+ def step(self, player_allocation, ai_allocation):
86
+ """Executes one quarter of the game."""
87
+ if self.game_over:
88
+ return self.get_state()
89
+
90
+ self.quarter += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # 1. Update Product Quality from R&D investment
93
+ self.player_stats["product_quality"] += int(np.sqrt(player_allocation["rd"]) * 1.5)
94
+ self.ai_stats["product_quality"] += int(np.sqrt(ai_allocation["rd"]) * 1.5)
95
 
96
+ # 2. Calculate market share shift from Marketing and Quality
97
+ mkt_diff = player_allocation["marketing"] - ai_allocation["marketing"]
98
+ quality_diff = self.player_stats["product_quality"] - self.ai_stats["product_quality"]
 
99
 
100
+ # Marketing has a direct but temporary effect, quality has a persistent effect
101
+ market_share_shift = (mkt_diff / 100.0) + (quality_diff / 50.0)
102
+ market_share_shift = np.clip(market_share_shift, -7, 7) # Cap shifts per quarter
103
+
104
+ self.player_stats["market_share"] += market_share_shift
105
+ self.ai_stats["market_share"] -= market_share_shift
106
+ self.player_stats["market_share"] = np.clip(self.player_stats["market_share"], 0, 100)
107
+ self.ai_stats["market_share"] = 100 - self.player_stats["market_share"]
108
+
109
+ # 3. Calculate next quarter's budget from Sales investment and market share
110
+ player_remaining_budget = self.player_stats['budget'] - sum(player_allocation.values())
111
+ ai_remaining_budget = self.ai_stats['budget'] - sum(ai_allocation.values())
112
+
113
+ player_sales_roi = 1.2 + (self.player_stats["market_share"] / 200.0)
114
+ ai_sales_roi = 1.2 + (self.ai_stats["market_share"] / 200.0)
115
 
116
+ self.player_stats["budget"] = int(player_allocation["sales"] * player_sales_roi + player_remaining_budget)
117
+ self.ai_stats["budget"] = int(ai_allocation["sales"] * ai_sales_roi + ai_remaining_budget)
118
+
119
+ if self.quarter >= NUM_QUARTERS:
120
+ self.game_over = True
121
 
122
+ self._add_to_history()
123
+
124
+ return self.get_state()
125
+
126
+ # --- AI Logic ---
127
+
128
+ def ai_strategy(ai_stats, player_stats):
129
+ """
130
+ A heuristic-based AI to simulate a strategic opponent.
131
+ This mimics the kind of robust strategy that would emerge from self-play,
132
+ reacting to the opponent and planning for the long term.
133
+ """
134
+ budget = ai_stats["budget"]
135
+ reasoning = []
136
 
137
+ # Default balanced strategy
138
+ allocation = {"rd": 0.33, "marketing": 0.34, "sales": 0.33}
139
 
140
+ # --- Strategic Adjustments based on SPIRAL principles ---
141
+ # 1. React to quality gap (long-term planning)
142
+ if ai_stats["product_quality"] < player_stats["product_quality"] - 15:
143
+ allocation["rd"] += 0.2
144
+ allocation["marketing"] -= 0.1
145
+ allocation["sales"] -= 0.1
146
+ reasoning.append("My analysis indicates a growing product quality gap. I'm increasing R&D investment to innovate and secure a long-term competitive advantage.")
147
 
148
+ # 2. React to market share loss (short-term defense)
149
+ elif ai_stats["market_share"] < player_stats["market_share"] - 10:
150
+ allocation["marketing"] += 0.2
151
+ allocation["rd"] -= 0.1
152
+ allocation["sales"] -= 0.1
153
+ reasoning.append("You've recently captured significant market share. I'm launching an aggressive marketing campaign to win back customers and regain my position.")
154
+
155
+ # 3. Exploit a quality advantage (pressing an advantage)
156
+ if ai_stats["product_quality"] > player_stats["product_quality"] + 20:
157
+ allocation["marketing"] += 0.15
158
+ allocation["rd"] -= 0.15
159
+ reasoning.append(f"My product quality ({ai_stats['product_quality']:.0f}) is superior. I will leverage this with a marketing push to translate product leadership into market dominance.")
160
 
161
+ # 4. Manage budget (resource management)
162
+ if ai_stats["budget"] < player_stats["budget"] * 0.8:
163
+ allocation["sales"] += 0.15
164
+ allocation["rd"] -= 0.15
165
+ reasoning.append("My projections show a potential budget shortfall. I am focusing on sales to ensure strong revenue growth for future quarters.")
166
+
167
+ if not reasoning:
168
+ reasoning.append("I am pursuing a balanced strategy, investing across R&D, Marketing, and Sales to ensure steady, long-term growth and market presence.")
169
+
170
+ # Normalize allocations
171
+ total_allocation = sum(allocation.values())
172
+ final_allocation = {key: int(budget * (val / total_allocation)) for key, val in allocation.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ # Ensure the sum is exactly the budget
175
+ diff = budget - sum(final_allocation.values())
176
+ final_allocation['sales'] += diff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ return final_allocation, " ".join(reasoning)
 
179
 
180
+ # --- Gradio UI ---
181
+
182
+ def create_interface():
183
+ """Creates the Gradio web interface for the simulator."""
184
+
185
+ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
186
+ game_env = gr.State(BusinessCompetitionEnv())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ gr.Markdown(f"# ๐ŸŽฎ {TITLE}")
189
+ gr.Markdown(
190
+ "**Demonstrating how complex, multi-turn strategic reasoning emerges from self-play.**\n"
191
+ "*This simulation replaces Tic-Tac-Toe with a business competition to better illustrate the practical takeaways from the SPIRAL paper.*"
192
+ )
193
 
194
  with gr.Row():
195
+ with gr.Column(scale=3):
196
+ gr.Markdown("### ๐Ÿ“ˆ Market Dashboard")
197
+ plot_market_share = gr.Plot()
198
+ with gr.Row():
199
+ plot_budget = gr.Plot()
200
+ plot_quality = gr.Plot()
201
+
202
  with gr.Column(scale=2):
203
+ gr.Markdown("### ๐Ÿ“Š Your Decisions")
204
+ status_box = gr.Textbox(f"Quarter 1 of {NUM_QUARTERS}. Your move.", label="Game Status", interactive=False)
205
 
206
+ with gr.Box():
207
+ player_budget_display = gr.Label(f"Your Budget: ${INITIAL_BUDGET}")
208
+ rd_slider = gr.Slider(0, INITIAL_BUDGET, label="R&D Investment", value=333, step=10)
209
+ mkt_slider = gr.Slider(0, INITIAL_BUDGET, label="Marketing Investment", value=333, step=10)
210
+ sales_slider = gr.Slider(0, INITIAL_BUDGET, label="Sales Investment", value=334, step=10)
 
 
 
211
 
212
+ total_allocated_display = gr.Label("Total Allocated: $1000")
213
+
214
  with gr.Row():
215
+ submit_btn = gr.Button("End Quarter", variant="primary")
216
+ new_game_btn = gr.Button("Start New Game")
 
 
217
 
218
+ gr.Markdown("### ๐Ÿง  AI Strategic Reasoning")
219
+ ai_reasoning_box = gr.Textbox("", label="AI Decision Rationale", lines=5, interactive=False)
220
+
221
+ gr.Markdown("---")
222
+ with gr.Accordion("Key Takeaways from the SPIRAL Research Paper", open=False):
223
+ gr.Markdown(open("spiral_paper_takeaways.md").read())
224
+
225
+ def create_plots(history):
226
+ df = pd.DataFrame(history)
227
+ if df.empty:
228
+ return None, None, None
229
+
230
+ fig_ms = px.line(df, x="Quarter", y=["Player Market Share", "AI Market Share"], title="Market Share (%)", markers=True, color_discrete_map={"Player Market Share": "#3b82f6", "AI Market Share": "#ef4444"})
231
+ fig_ms.update_layout(yaxis_range=[0,100], legend_title_text='')
232
+
233
+ fig_b = px.line(df, x="Quarter", y=["Player Budget", "AI Budget"], title="Budget ($)", markers=True, color_discrete_map={"Player Budget": "#3b82f6", "AI Budget": "#ef4444"})
234
+ fig_b.update_layout(legend_title_text='')
235
+
236
+ fig_q = px.line(df, x="Quarter", y=["Player Product Quality", "AI Product Quality"], title="Product Quality Index", markers=True, color_discrete_map={"Player Product Quality": "#3b82f6", "AI Product Quality": "#ef4444"})
237
+ fig_q.update_layout(legend_title_text='')
238
+
239
+ return fig_ms, fig_b, fig_q
240
 
241
+ def game_step_and_update(env, rd, mkt, sales):
242
+ player_budget = env.player_stats["budget"]
243
+ if (rd + mkt + sales) > player_budget:
244
+ status_text = f"Error: Allocation (${rd + mkt + sales}) exceeds budget (${player_budget})."
245
+ return env, status_text, env.ai_stats, *create_plots(env.history), gr.Label(f"Your Budget: ${player_budget}"), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget), gr.Slider(maximum=player_budget)
246
+
247
+ player_alloc = {"rd": rd, "marketing": mkt, "sales": sales}
248
+ ai_alloc, ai_reasoning = ai_strategy(env.ai_stats, env.player_stats)
249
+
250
+ env.step(player_alloc, ai_alloc)
251
+ state = env.get_state()
252
+
253
+ plots = create_plots(state["history"])
254
+
255
+ if state["game_over"]:
256
+ winner = env.get_winner()
257
+ status_text = f"Game Over! Winner: {winner}. Final market share: You ({state['player_stats']['market_share']:.1f}%) vs AI ({state['ai_stats']['market_share']:.1f}%)."
258
+ submit_btn.interactive = False
259
+ else:
260
+ status_text = f"End of Quarter {state['quarter']}. Your turn."
261
+
262
+ new_budget = state["player_stats"]["budget"]
263
+
264
+ return (state, status_text, ai_reasoning, *plots,
265
+ gr.Label(f"Your Budget: ${new_budget}"),
266
+ gr.Slider(maximum=new_budget, value=int(new_budget/3)),
267
+ gr.Slider(maximum=new_budget, value=int(new_budget/3)),
268
+ gr.Slider(maximum=new_budget, value=new_budget - 2 * int(new_budget/3)))
269
+
270
+ def on_new_game():
271
+ env = BusinessCompetitionEnv()
272
+ state = env.get_state()
273
+ plots = create_plots(state["history"])
274
+ return (
275
+ env, f"Quarter 1 of {NUM_QUARTERS}. Your move.", "", *plots,
276
+ gr.Label(f"Your Budget: ${INITIAL_BUDGET}"),
277
+ gr.Slider(maximum=INITIAL_BUDGET, value=333),
278
+ gr.Slider(maximum=INITIAL_BUDGET, value=333),
279
+ gr.Slider(maximum=INITIAL_BUDGET, value=334),
280
+ gr.Button(interactive=True)
281
+ )
282
+
283
+ def update_total_display(rd, mkt, sales):
284
+ return gr.Label(f"Total Allocated: ${rd + mkt + sales}")
285
 
286
  # --- Event Handlers ---
287
+ submit_btn.click(
288
+ fn=game_step_and_update,
289
+ inputs=[game_env, rd_slider, mkt_slider, sales_slider],
290
+ outputs=[
291
+ game_env, status_box, ai_reasoning_box,
292
+ plot_market_share, plot_budget, plot_quality,
293
+ player_budget_display, rd_slider, mkt_slider, sales_slider
294
+ ]
 
 
 
 
 
 
 
 
 
 
295
  )
296
 
297
+ new_game_btn.click(
298
+ fn=on_new_game,
299
+ inputs=[],
300
+ outputs=[
301
+ game_env, status_box, ai_reasoning_box,
302
+ plot_market_share, plot_budget, plot_quality,
303
+ player_budget_display, rd_slider, mkt_slider, sales_slider,
304
+ submit_btn
305
+ ]
306
  )
307
 
308
+ for slider in [rd_slider, mkt_slider, sales_slider]:
309
+ slider.change(fn=update_total_display, inputs=[rd_slider, mkt_slider, sales_slider], outputs=total_allocated_display)
310
+
311
+ demo.load(on_new_game, outputs=[game_env, status_box, ai_reasoning_box, plot_market_share, plot_budget, plot_quality, player_budget_display, rd_slider, mkt_slider, sales_slider, submit_btn])
312
+
313
  return demo
314
 
315
 
316
  if __name__ == "__main__":
 
317
  spiral_demo = create_interface()
318
  spiral_demo.launch()
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio==4.44.0
2
- numpy==1.24.3
 
 
 
1
  gradio==4.44.0
2
+ numpy==1.24.3
3
+ pandas
4
+ plotly